From 701c875aded880013aacac608832995c4b052257 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 18 Jan 2023 12:06:23 +0200 Subject: [PATCH 001/898] iio: adc: qcom-spmi-adc5: Fix the channel name The node name can contain an address part which is unused by the driver. Moreover, this string is propagated into the userspace label, sysfs filenames *and breaking ABI*. Cut the address part out before assigning the channel name. Fixes: 4f47a236a23d ("iio: adc: qcom-spmi-adc5: convert to device properties") Reported-by: Marijn Suijten Signed-off-by: Andy Shevchenko Reviewed-by: Marijn Suijten Link: https://lore.kernel.org/r/20230118100623.42255-1-andriy.shevchenko@linux.intel.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/qcom-spmi-adc5.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c index 821fee60a7651..d1b86570768a9 100644 --- a/drivers/iio/adc/qcom-spmi-adc5.c +++ b/drivers/iio/adc/qcom-spmi-adc5.c @@ -626,12 +626,20 @@ static int adc5_get_fw_channel_data(struct adc5_chip *adc, struct fwnode_handle *fwnode, const struct adc5_data *data) { - const char *name = fwnode_get_name(fwnode), *channel_name; + const char *channel_name; + char *name; u32 chan, value, varr[2]; u32 sid = 0; int ret; struct device *dev = adc->dev; + name = devm_kasprintf(dev, GFP_KERNEL, "%pfwP", fwnode); + if (!name) + return -ENOMEM; + + /* Cut the address part */ + name[strchrnul(name, '@') - name] = '\0'; + ret = fwnode_property_read_u32(fwnode, "reg", &chan); if (ret) { dev_err(dev, "invalid channel number %s\n", name); From 6327a930ab7bfa1ab33bcdffd5f5f4b1e7131504 Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Fri, 27 Jan 2023 14:57:14 +0200 Subject: [PATCH 002/898] drivers: iio: adc: ltc2497: fix LSB shift Correct the "sub_lsb" shift for the ltc2497 and drop the sub_lsb element which is now constant. An earlier version of the code shifted by 14 but this was a consequence of reading three bytes into a __be32 buffer and using be32_to_cpu(), so eight extra bits needed to be skipped. Now we use get_unaligned_be24() and thus the additional skip is wrong. Fixes: 2187cfeb3626 ("drivers: iio: adc: ltc2497: LTC2499 support") Signed-off-by: Ian Ray Link: https://lore.kernel.org/r/20230127125714.44608-1-ian.ray@ge.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ltc2497.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c index 17370c5eb6fe3..ec198c6f13d6b 100644 --- a/drivers/iio/adc/ltc2497.c +++ b/drivers/iio/adc/ltc2497.c @@ -28,7 +28,6 @@ struct ltc2497_driverdata { struct ltc2497core_driverdata common_ddata; struct i2c_client *client; u32 recv_size; - u32 sub_lsb; /* * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. @@ -65,10 +64,10 @@ static int ltc2497_result_and_measure(struct ltc2497core_driverdata *ddata, * equivalent to a sign extension. */ if (st->recv_size == 3) { - *val = (get_unaligned_be24(st->data.d8) >> st->sub_lsb) + *val = (get_unaligned_be24(st->data.d8) >> 6) - BIT(ddata->chip_info->resolution + 1); } else { - *val = (be32_to_cpu(st->data.d32) >> st->sub_lsb) + *val = (be32_to_cpu(st->data.d32) >> 6) - BIT(ddata->chip_info->resolution + 1); } @@ -122,7 +121,6 @@ static int ltc2497_probe(struct i2c_client *client) st->common_ddata.chip_info = chip_info; resolution = chip_info->resolution; - st->sub_lsb = 31 - (resolution + 1); st->recv_size = BITS_TO_BYTES(resolution) + 1; return ltc2497core_probe(dev, indio_dev); From d9b540ee461cca7edca0dd2c2a42625c6b9ffb8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 31 Jan 2023 10:46:11 +0100 Subject: [PATCH 003/898] iio: adis16480: select CONFIG_CRC32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In rare randconfig builds, the missing CRC32 helper causes a link error: ld.lld: error: undefined symbol: crc32_le >>> referenced by usercopy_64.c >>> vmlinux.o:(adis16480_trigger_handler) Fixes: 941f130881fa ("iio: adis16480: support burst read function") Signed-off-by: Arnd Bergmann Reviewed-by: Nuno Sá Link: https://lore.kernel.org/r/20230131094616.130238-1-arnd@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig index f1d7d4b5e2224..c2f97629e9cdb 100644 --- a/drivers/iio/imu/Kconfig +++ b/drivers/iio/imu/Kconfig @@ -47,6 +47,7 @@ config ADIS16480 depends on SPI select IIO_ADIS_LIB select IIO_ADIS_LIB_BUFFER if IIO_BUFFER + select CRC32 help Say yes here to build support for Analog Devices ADIS16375, ADIS16480, ADIS16485, ADIS16488 inertial sensors. From 42ec40b0883c1cce58b06e8fa82049a61033151c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Lindahl?= Date: Tue, 31 Jan 2023 15:01:09 +0100 Subject: [PATCH 004/898] iio: light: vcnl4000: Fix WARN_ON on uninitialized lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are different init functions for the sensors in this driver in which only one initializes the generic vcnl4000_lock. With commit e21b5b1f2669 ("iio: light: vcnl4000: Preserve conf bits when toggle power") the vcnl4040 sensor started to depend on the lock, but it was missed to initialize it in vcnl4040's init function. This has not been visible until we run lockdep on it: DEBUG_LOCKS_WARN_ON(lock->magic != lock) at kernel/locking/mutex.c:575 __mutex_lock+0x4f8/0x890 Call trace: __mutex_lock mutex_lock_nested vcnl4200_set_power_state vcnl4200_init vcnl4000_probe Fix this by initializing the lock in the probe function instead of doing it in the chip specific init functions. Fixes: e21b5b1f2669 ("iio: light: vcnl4000: Preserve conf bits when toggle power") Signed-off-by: Mårten Lindahl Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230131140109.2067577-1-marten.lindahl@axis.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index cc1a2062e76d6..69c5bc987e261 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -199,7 +199,6 @@ static int vcnl4000_init(struct vcnl4000_data *data) data->rev = ret & 0xf; data->al_scale = 250000; - mutex_init(&data->vcnl4000_lock); return data->chip_spec->set_power_state(data, true); }; @@ -1197,6 +1196,8 @@ static int vcnl4000_probe(struct i2c_client *client) data->id = id->driver_data; data->chip_spec = &vcnl4000_chip_spec_cfg[data->id]; + mutex_init(&data->vcnl4000_lock); + ret = data->chip_spec->init(data); if (ret < 0) return ret; From b5184a26a28fac1d708b0bfeeb958a9260c2924c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Thu, 16 Feb 2023 11:14:50 +0100 Subject: [PATCH 005/898] iio: buffer: correctly return bytes written in output buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If for some reason 'rb->access->write()' does not write the full requested data and the O_NONBLOCK is set, we would return 'n' to userspace which is not really truth. Hence, let's return the number of bytes we effectively wrote. Fixes: 9eeee3b0bf190 ("iio: Add output buffer support") Signed-off-by: Nuno Sá Reviewed-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20230216101452.591805-2-nuno.sa@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 80c78bd6bbef4..6340d8e1430b6 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -220,7 +220,7 @@ static ssize_t iio_buffer_write(struct file *filp, const char __user *buf, } while (ret == 0); remove_wait_queue(&rb->pollq, &wait); - return ret < 0 ? ret : n; + return ret < 0 ? ret : written; } /** From 3da1814184582ed0faf039275a3f02e6f69944ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Thu, 16 Feb 2023 11:14:51 +0100 Subject: [PATCH 006/898] iio: buffer: make sure O_NONBLOCK is respected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For output buffers, there's no guarantee that the buffer won't be full in the first iteration of the loop in which case we would block independently of userspace passing O_NONBLOCK or not. Fix it by always checking the flag before going to sleep. While at it (and as it's a bit related), refactored the loop so that the stop condition is 'written != n', i.e, run the loop until all data has been copied into the IIO buffers. This makes the code a bit simpler. Fixes: 9eeee3b0bf190 ("iio: Add output buffer support") Signed-off-by: Nuno Sá Reviewed-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20230216101452.591805-3-nuno.sa@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 6340d8e1430b6..a7a080bed1808 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -203,21 +203,24 @@ static ssize_t iio_buffer_write(struct file *filp, const char __user *buf, break; } + if (filp->f_flags & O_NONBLOCK) { + if (!written) + ret = -EAGAIN; + break; + } + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); continue; } ret = rb->access->write(rb, n - written, buf + written); - if (ret == 0 && (filp->f_flags & O_NONBLOCK)) - ret = -EAGAIN; + if (ret < 0) + break; - if (ret > 0) { - written += ret; - if (written != n && !(filp->f_flags & O_NONBLOCK)) - continue; - } - } while (ret == 0); + written += ret; + + } while (written != n); remove_wait_queue(&rb->pollq, &wait); return ret < 0 ? ret : written; From 0c6ef985a1fd8a74dcb5cad941ddcadd55cb8697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Fri, 20 Jan 2023 13:46:45 +0100 Subject: [PATCH 007/898] iio: adc: ad7791: fix IRQ flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interrupt is triggered on the falling edge rather than being a level low interrupt. Fixes: da4d3d6bb9f6 ("iio: adc: ad-sigma-delta: Allow custom IRQ flags") Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20230120124645.819910-1-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7791.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7791.c b/drivers/iio/adc/ad7791.c index fee8d129a5f08..86effe8501b44 100644 --- a/drivers/iio/adc/ad7791.c +++ b/drivers/iio/adc/ad7791.c @@ -253,7 +253,7 @@ static const struct ad_sigma_delta_info ad7791_sigma_delta_info = { .has_registers = true, .addr_shift = 4, .read_mask = BIT(3), - .irq_flags = IRQF_TRIGGER_LOW, + .irq_flags = IRQF_TRIGGER_FALLING, }; static int ad7791_read_raw(struct iio_dev *indio_dev, From 03fada47311a3e668f73efc9278c4a559e64ee85 Mon Sep 17 00:00:00 2001 From: Mehdi Djait Date: Sat, 18 Feb 2023 14:51:11 +0100 Subject: [PATCH 008/898] iio: accel: kionix-kx022a: Get the timestamp from the driver's private data in the trigger_handler The trigger_handler gets called from the IRQ thread handler using iio_trigger_poll_chained() which will only call the bottom half of the pollfunc and therefore pf->timestamp will not get set. Use instead the timestamp from the driver's private data which is always set in the IRQ handler. Fixes: 7c1d1677b322 ("iio: accel: Support Kionix/ROHM KX022A accelerometer") Link: https://lore.kernel.org/linux-iio/Y+6QoBLh1k82cJVN@carbian/ Reviewed-by: Matti Vaittinen Signed-off-by: Mehdi Djait Link: https://lore.kernel.org/r/20230218135111.90061-1-mehdi.djait.k@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kionix-kx022a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c index f866859855cdd..1c3a72380fb85 100644 --- a/drivers/iio/accel/kionix-kx022a.c +++ b/drivers/iio/accel/kionix-kx022a.c @@ -864,7 +864,7 @@ static irqreturn_t kx022a_trigger_handler(int irq, void *p) if (ret < 0) goto err_read; - iio_push_to_buffers_with_timestamp(idev, data->buffer, pf->timestamp); + iio_push_to_buffers_with_timestamp(idev, data->buffer, data->timestamp); err_read: iio_trigger_notify_done(idev->trig); From 6715df8d5d24655b9fd368e904028112b54c7de1 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sun, 19 Feb 2023 22:04:26 +0200 Subject: [PATCH 009/898] bpf: Allow reads from uninit stack This commits updates the following functions to allow reads from uninitialized stack locations when env->allow_uninit_stack option is enabled: - check_stack_read_fixed_off() - check_stack_range_initialized(), called from: - check_stack_read_var_off() - check_helper_mem_access() Such change allows to relax logic in stacksafe() to treat STACK_MISC and STACK_INVALID in a same way and make the following stack slot configurations equivalent: | Cached state | Current state | | stack slot | stack slot | |------------------+------------------| | STACK_INVALID or | STACK_INVALID or | | STACK_MISC | STACK_SPILL or | | | STACK_MISC or | | | STACK_ZERO or | | | STACK_DYNPTR | This leads to significant verification speed gains (see below). The idea was suggested by Andrii Nakryiko [1] and initial patch was created by Alexei Starovoitov [2]. Currently the env->allow_uninit_stack is allowed for programs loaded by users with CAP_PERFMON or CAP_SYS_ADMIN capabilities. A number of test cases from verifier/*.c were expecting uninitialized stack access to be an error. These test cases were updated to execute in unprivileged mode (thus preserving the tests). The test progs/test_global_func10.c expected "invalid indirect read from stack" error message because of the access to uninitialized memory region. This error is no longer possible in privileged mode. The test is updated to provoke an error "invalid indirect access to stack" because of access to invalid stack address (such error is not verified by progs/test_global_func*.c series of tests). The following tests had to be removed because these can't be made unprivileged: - verifier/sock.c: - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value" BPF_PROG_TYPE_SCHED_CLS programs are not executed in unprivileged mode. - verifier/var_off.c: - "indirect variable-offset stack access, max_off+size > max_initialized" - "indirect variable-offset stack access, uninitialized" These tests verify that access to uninitialized stack values is detected when stack offset is not a constant. However, variable stack access is prohibited in unprivileged mode, thus these tests are no longer valid. * * * Here is veristat log comparing this patch with current master on a set of selftest binaries listed in tools/testing/selftests/bpf/veristat.cfg and cilium BPF binaries (see [3]): $ ./veristat -e file,prog,states -C -f 'states_pct<-30' master.log current.log File Program States (A) States (B) States (DIFF) -------------------------- -------------------------- ---------- ---------- ---------------- bpf_host.o tail_handle_ipv6_from_host 349 244 -105 (-30.09%) bpf_host.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_lxc.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_sock.o cil_sock4_connect 70 48 -22 (-31.43%) bpf_sock.o cil_sock4_sendmsg 68 46 -22 (-32.35%) bpf_xdp.o tail_handle_nat_fwd_ipv4 1554 803 -751 (-48.33%) bpf_xdp.o tail_lb_ipv4 6457 2473 -3984 (-61.70%) bpf_xdp.o tail_lb_ipv6 7249 3908 -3341 (-46.09%) pyperf600_bpf_loop.bpf.o on_event 287 145 -142 (-49.48%) strobemeta.bpf.o on_event 15915 4772 -11143 (-70.02%) strobemeta_nounroll2.bpf.o on_event 17087 3820 -13267 (-77.64%) xdp_synproxy_kern.bpf.o syncookie_tc 21271 6635 -14636 (-68.81%) xdp_synproxy_kern.bpf.o syncookie_xdp 23122 6024 -17098 (-73.95%) -------------------------- -------------------------- ---------- ---------- ---------------- Note: I limited selection by states_pct<-30%. Inspection of differences in pyperf600_bpf_loop behavior shows that the following patch for the test removes almost all differences: - a/tools/testing/selftests/bpf/progs/pyperf.h + b/tools/testing/selftests/bpf/progs/pyperf.h @ -266,8 +266,8 @ int __on_event(struct bpf_raw_tracepoint_args *ctx) } if (event->pthread_match || !pidData->use_tls) { - void* frame_ptr; - FrameData frame; + void* frame_ptr = 0; + FrameData frame = {}; Symbol sym = {}; int cur_cpu = bpf_get_smp_processor_id(); W/o this patch the difference comes from the following pattern (for different variables): static bool get_frame_data(... FrameData *frame ...) { ... bpf_probe_read_user(&frame->f_code, ...); if (!frame->f_code) return false; ... bpf_probe_read_user(&frame->co_name, ...); if (frame->co_name) ...; } int __on_event(struct bpf_raw_tracepoint_args *ctx) { FrameData frame; ... get_frame_data(... &frame ...) // indirectly via a bpf_loop & callback ... } SEC("raw_tracepoint/kfree_skb") int on_event(struct bpf_raw_tracepoint_args* ctx) { ... ret |= __on_event(ctx); ret |= __on_event(ctx); ... } With regards to value `frame->co_name` the following is important: - Because of the conditional `if (!frame->f_code)` each call to __on_event() produces two states, one with `frame->co_name` marked as STACK_MISC, another with it as is (and marked STACK_INVALID on a first call). - The call to bpf_probe_read_user() does not mark stack slots corresponding to `&frame->co_name` as REG_LIVE_WRITTEN but it marks these slots as BPF_MISC, this happens because of the following loop in the check_helper_call(): for (i = 0; i < meta.access_size; i++) { err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1, false); if (err) return err; } Note the size of the write, it is a one byte write for each byte touched by a helper. The BPF_B write does not lead to write marks for the target stack slot. - Which means that w/o this patch when second __on_event() call is verified `if (frame->co_name)` will propagate read marks first to a stack slot with STACK_MISC marks and second to a stack slot with STACK_INVALID marks and these states would be considered different. [1] https://lore.kernel.org/bpf/CAEf4BzY3e+ZuC6HUa8dCiUovQRg2SzEk7M-dSkqNZyn=xEmnPA@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CAADnVQKs2i1iuZ5SUGuJtxWVfGYR9kDgYKhq3rNV+kBLQCu7rA@mail.gmail.com/ [3] git@github.com:anakryiko/cilium.git Suggested-by: Andrii Nakryiko Co-developed-by: Alexei Starovoitov Signed-off-by: Eduard Zingerman Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230219200427.606541-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 11 +- .../selftests/bpf/progs/test_global_func10.c | 8 +- tools/testing/selftests/bpf/verifier/calls.c | 13 ++- .../bpf/verifier/helper_access_var_len.c | 104 ++++++++++++------ .../testing/selftests/bpf/verifier/int_ptr.c | 9 +- .../selftests/bpf/verifier/search_pruning.c | 13 ++- tools/testing/selftests/bpf/verifier/sock.c | 27 ----- .../selftests/bpf/verifier/spill_fill.c | 7 +- .../testing/selftests/bpf/verifier/var_off.c | 52 --------- 9 files changed, 108 insertions(+), 136 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 272563a0b7702..d517d13878cfe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3826,6 +3826,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_MISC) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -3863,6 +3865,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_ZERO) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -5754,7 +5758,8 @@ static int check_stack_range_initialized( stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; if (*stype == STACK_MISC) goto mark; - if (*stype == STACK_ZERO) { + if ((*stype == STACK_ZERO) || + (*stype == STACK_INVALID && env->allow_uninit_stack)) { if (clobber) { /* helper can write anything into the stack */ *stype = STACK_MISC; @@ -13936,6 +13941,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; + if (env->allow_uninit_stack && + old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) + continue; + /* explored stack has more populated slots than current stack * and these slots were used */ diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c index 98327bdbbfd24..8fba3f3649e22 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func10.c +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -5,12 +5,12 @@ #include "bpf_misc.h" struct Small { - int x; + long x; }; struct Big { - int x; - int y; + long x; + long y; }; __noinline int foo(const struct Big *big) @@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big) } SEC("cgroup_skb/ingress") -__failure __msg("invalid indirect read from stack") +__failure __msg("invalid indirect access to stack") int global_func10(struct __sk_buff *skb) { const struct Small small = {.x = skb->len }; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 9d993926bf0ef..289ed202ec66a 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2221,19 +2221,22 @@ * that fp-8 stack slot was unused in the fall-through * branch and will accept the program incorrectly */ - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_48b = { 6 }, - .errstr = "invalid indirect read from stack R2 off -8+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_hash_48b = { 7 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "calls: ctx read at start of subprog", diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index a6c869a7319cd..9c4885885aba0 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -29,19 +29,30 @@ { "helper access to variable memory: stack, bitwise AND, zero included", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use bitwise AND to limit r3 range to [0, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, bitwise AND + JMP, wrong max", @@ -183,20 +194,31 @@ { "helper access to variable memory: stack, JMP, no min check", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use JMP to limit r3 range to [0, 64] */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, JMP (signed), no min check", @@ -564,29 +586,41 @@ { "helper access to variable memory: 8 bytes leak", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + /* Note: fp[-32] left uninitialized */ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + /* Limit r3 range to [1, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory region [1, 64] + * at &fp[-64] is not fully initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+32 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 3 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: 8 bytes no leak (init memory)", diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c index 070893fb29007..02d9e004260b3 100644 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -54,12 +54,13 @@ /* bpf_strtoul() */ BPF_EMIT_CALL(BPF_FUNC_strtoul), - BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack R4 off -16+4 size 8", + .result_unpriv = REJECT, + .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8", + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "ARG_PTR_TO_LONG misaligned", diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index d63fd8991b03a..745d6b5842fd4 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -128,9 +128,10 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "invalid read from stack off -16+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr_unpriv = "invalid read from stack off -16+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "precision tracking for u32 spill/fill", @@ -258,6 +259,8 @@ BPF_EXIT_INSN(), }, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "invalid read from stack off -8+1 size 8", - .result = REJECT, + .errstr_unpriv = "invalid read from stack off -8+1 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index d11d0b28be416..108dd3ee1edda 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -530,33 +530,6 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, -{ - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_sk_storage_map = { 14 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "invalid indirect read from stack", -}, { "bpf_map_lookup_elem(smap, &key)", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 9bb302dade237..d1463bf4949af 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -171,9 +171,10 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "invalid read from stack off -4+0 size 4", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid read from stack off -4+0 size 4", + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "Spill a u32 const scalar. Refill as u16. Offset to skb->data", diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index d37f512fad16e..b183e26c03f10 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -212,31 +212,6 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, -{ - "indirect variable-offset stack access, max_off+size > max_initialized", - .insns = { - /* Fill only the second from top 8 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-12 or fp-16, but we don't know - * which. fp-12 size 8 is partially uninitialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it indirectly. */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack R2 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, { "indirect variable-offset stack access, min_off < min_initialized", .insns = { @@ -289,33 +264,6 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, -{ - "indirect variable-offset stack access, uninitialized", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_3, 28), - /* Fill the top 16 bytes of the stack. */ - BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16), - /* Add it to fp. We now have either fp-12 or fp-16, we don't know - * which, but either way it points to initialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_5, 8), - /* Dereference it indirectly. */ - BPF_EMIT_CALL(BPF_FUNC_getsockopt), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect read from stack R4 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SOCK_OPS, -}, { "indirect variable-offset stack access, ok", .insns = { From 6338a94d5ab42a94e96ea36edc5f7df1fe73e68e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sun, 19 Feb 2023 22:04:27 +0200 Subject: [PATCH 010/898] selftests/bpf: Tests for uninitialized stack reads Three testcases to make sure that stack reads from uninitialized locations are accepted by verifier when executed in privileged mode: - read from a fixed offset; - read from a variable offset; - passing a pointer to stack to a helper converts STACK_INVALID to STACK_MISC. Signed-off-by: Eduard Zingerman Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230219200427.606541-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/uninit_stack.c | 9 ++ .../selftests/bpf/progs/uninit_stack.c | 87 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/uninit_stack.c create mode 100644 tools/testing/selftests/bpf/progs/uninit_stack.c diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c new file mode 100644 index 0000000000000..e64c71948491f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "uninit_stack.skel.h" + +void test_uninit_stack(void) +{ + RUN_TESTS(uninit_stack); +} diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c new file mode 100644 index 0000000000000..8a403470e557f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uninit_stack.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +/* Read an uninitialized value from stack at a fixed offset */ +SEC("socket") +__naked int read_uninit_stack_fixed_off(void *ctx) +{ + asm volatile (" \ + r0 = 0; \ + /* force stack depth to be 128 */ \ + *(u64*)(r10 - 128) = r1; \ + r1 = *(u8 *)(r10 - 8 ); \ + r0 += r1; \ + r1 = *(u8 *)(r10 - 11); \ + r1 = *(u8 *)(r10 - 13); \ + r1 = *(u8 *)(r10 - 15); \ + r1 = *(u16*)(r10 - 16); \ + r1 = *(u32*)(r10 - 32); \ + r1 = *(u64*)(r10 - 64); \ + /* read from a spill of a wrong size, it is a separate \ + * branch in check_stack_read_fixed_off() \ + */ \ + *(u32*)(r10 - 72) = r1; \ + r1 = *(u64*)(r10 - 72); \ + r0 = 0; \ + exit; \ +" + ::: __clobber_all); +} + +/* Read an uninitialized value from stack at a variable offset */ +SEC("socket") +__naked int read_uninit_stack_var_off(void *ctx) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + /* force stack depth to be 64 */ \ + *(u64*)(r10 - 64) = r0; \ + r0 = -r0; \ + /* give r0 a range [-31, -1] */ \ + if r0 s<= -32 goto exit_%=; \ + if r0 s>= 0 goto exit_%=; \ + /* access stack using r0 */ \ + r1 = r10; \ + r1 += r0; \ + r2 = *(u8*)(r1 + 0); \ +exit_%=: r0 = 0; \ + exit; \ +" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +static __noinline void dummy(void) {} + +/* Pass a pointer to uninitialized stack memory to a helper. + * Passed memory block should be marked as STACK_MISC after helper call. + */ +SEC("socket") +__log_level(7) __msg("fp-104=mmmmmmmm") +__naked int helper_uninit_to_misc(void *ctx) +{ + asm volatile (" \ + /* force stack depth to be 128 */ \ + *(u64*)(r10 - 128) = r1; \ + r1 = r10; \ + r1 += -128; \ + r2 = 32; \ + call %[bpf_trace_printk]; \ + /* Call to dummy() forces print_verifier_state(..., true), \ + * thus showing the stack state, matched by __msg(). \ + */ \ + call %[dummy]; \ + r0 = 0; \ + exit; \ +" + : + : __imm(bpf_trace_printk), + __imm(dummy) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; From 099cc90a5a62e68b2fe3a42da011ab929b98bf73 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 23 Feb 2023 10:00:59 +0800 Subject: [PATCH 011/898] iio: light: cm32181: Unregister second I2C client if present If a second dummy client that talks to the actual I2C address was created in probe(), there should be a proper cleanup on driver and device removal to avoid leakage. So unregister the dummy client via another callback. Reviewed-by: Hans de Goede Suggested-by: Hans de Goede Fixes: c1e62062ff54 ("iio: light: cm32181: Handle CM3218 ACPI devices with 2 I2C resources") Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2152281 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20230223020059.2013993-1-kai.heng.feng@canonical.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm32181.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iio/light/cm32181.c b/drivers/iio/light/cm32181.c index b1674a5bfa368..d4a34a3bf00d9 100644 --- a/drivers/iio/light/cm32181.c +++ b/drivers/iio/light/cm32181.c @@ -429,6 +429,14 @@ static const struct iio_info cm32181_info = { .attrs = &cm32181_attribute_group, }; +static void cm32181_unregister_dummy_client(void *data) +{ + struct i2c_client *client = data; + + /* Unregister the dummy client */ + i2c_unregister_device(client); +} + static int cm32181_probe(struct i2c_client *client) { struct device *dev = &client->dev; @@ -460,6 +468,10 @@ static int cm32181_probe(struct i2c_client *client) client = i2c_acpi_new_device(dev, 1, &board_info); if (IS_ERR(client)) return PTR_ERR(client); + + ret = devm_add_action_or_reset(dev, cm32181_unregister_dummy_client, client); + if (ret) + return ret; } cm32181 = iio_priv(indio_dev); From f8502fba45bd30e1a6a354d9d898bc99d1a11e6d Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Tue, 28 Feb 2023 15:11:20 +0530 Subject: [PATCH 012/898] tee: amdtee: fix race condition in amdtee_open_session There is a potential race condition in amdtee_open_session that may lead to use-after-free. For instance, in amdtee_open_session() after sess->sess_mask is set, and before setting: sess->session_info[i] = session_info; if amdtee_close_session() closes this same session, then 'sess' data structure will be released, causing kernel panic when 'sess' is accessed within amdtee_open_session(). The solution is to set the bit sess->sess_mask as the last step in amdtee_open_session(). Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Cc: stable@vger.kernel.org Signed-off-by: Rijo Thomas Acked-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/core.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 297dc62bca298..372d64756ed64 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -267,35 +267,34 @@ int amdtee_open_session(struct tee_context *ctx, goto out; } + /* Open session with loaded TA */ + handle_open_session(arg, &session_info, param); + if (arg->ret != TEEC_SUCCESS) { + pr_err("open_session failed %d\n", arg->ret); + handle_unload_ta(ta_handle); + kref_put(&sess->refcount, destroy_session); + goto out; + } + /* Find an empty session index for the given TA */ spin_lock(&sess->lock); i = find_first_zero_bit(sess->sess_mask, TEE_NUM_SESSIONS); - if (i < TEE_NUM_SESSIONS) + if (i < TEE_NUM_SESSIONS) { + sess->session_info[i] = session_info; + set_session_id(ta_handle, i, &arg->session); set_bit(i, sess->sess_mask); + } spin_unlock(&sess->lock); if (i >= TEE_NUM_SESSIONS) { pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); + handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); rc = -ENOMEM; goto out; } - /* Open session with loaded TA */ - handle_open_session(arg, &session_info, param); - if (arg->ret != TEEC_SUCCESS) { - pr_err("open_session failed %d\n", arg->ret); - spin_lock(&sess->lock); - clear_bit(i, sess->sess_mask); - spin_unlock(&sess->lock); - handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); - goto out; - } - - sess->session_info[i] = session_info; - set_session_id(ta_handle, i, &arg->session); out: free_pages((u64)ta, get_order(ta_size)); return rc; From 4bb54c2ce48ffb3a06133ac0fb4086f7b48d9109 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 14 Feb 2023 15:05:49 +0100 Subject: [PATCH 013/898] arm64: tegra: Bump CBB ranges property on Tegra194 and Tegra234 Both Xavier (Tegra194) and Orin (Tegra234) support a 40-bit address map, so bump the CBB ranges property to cover all of the 1 TiB address space. This fixes an issue where some of the PCIe regions could not be remapped because of they were outside the memory specified by the CBB's ranges property. Reported-by: Jonathan Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 2 +- arch/arm64/boot/dts/nvidia/tegra234.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 133dbe5b429d8..7096b999b33f8 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -22,7 +22,7 @@ #address-cells = <2>; #size-cells = <2>; - ranges = <0x0 0x0 0x0 0x0 0x0 0x40000000>; + ranges = <0x0 0x0 0x0 0x0 0x100 0x0>; apbmisc: misc@100000 { compatible = "nvidia,tegra194-misc"; diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi index 8fe8eda7654d8..f1748cff8a33b 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi @@ -20,7 +20,7 @@ #address-cells = <2>; #size-cells = <2>; - ranges = <0x0 0x0 0x0 0x0 0x0 0x40000000>; + ranges = <0x0 0x0 0x0 0x0 0x100 0x0>; misc@100000 { compatible = "nvidia,tegra234-misc"; From f773f0a331d6c41733b17bebbc1b6cae12e016f5 Mon Sep 17 00:00:00 2001 From: ZhaoLong Wang Date: Sat, 4 Mar 2023 09:41:41 +0800 Subject: [PATCH 014/898] ubi: Fix deadlock caused by recursively holding work_sem During the processing of the bgt, if the sync_erase() return -EBUSY or some other error code in __erase_worker(),schedule_erase() called again lead to the down_read(ubi->work_sem) hold twice and may get block by down_write(ubi->work_sem) in ubi_update_fastmap(), which cause deadlock. ubi bgt other task do_work down_read(&ubi->work_sem) ubi_update_fastmap erase_worker # Blocked by down_read __erase_worker down_write(&ubi->work_sem) schedule_erase schedule_ubi_work down_read(&ubi->work_sem) Fix this by changing input parameter @nested of the schedule_erase() to 'true' to avoid recursively acquiring the down_read(&ubi->work_sem). Also, fix the incorrect comment about @nested parameter of the schedule_erase() because when down_write(ubi->work_sem) is held, the @nested is also need be true. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217093 Fixes: 2e8f08deabbc ("ubi: Fix races around ubi_refill_pools()") Signed-off-by: ZhaoLong Wang Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/wl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 40f39e5d6dfcc..26a214f016c18 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -575,7 +575,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, * @vol_id: the volume ID that last used this PEB * @lnum: the last used logical eraseblock number for the PEB * @torture: if the physical eraseblock has to be tortured - * @nested: denotes whether the work_sem is already held in read mode + * @nested: denotes whether the work_sem is already held * * This function returns zero in case of success and a %-ENOMEM in case of * failure. @@ -1131,7 +1131,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) int err1; /* Re-schedule the LEB for erasure */ - err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false); + err1 = schedule_erase(ubi, e, vol_id, lnum, 0, true); if (err1) { spin_lock(&ubi->wl_lock); wl_entry_destroy(ubi, e); From fd4334a06d452ce89a0bb831b03130c51331d927 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 31 Jan 2023 11:35:58 +0100 Subject: [PATCH 015/898] arm64: dts: freescale: imx8-ss-lsio: Fix flexspi clock order The correct clock order is "fspi_en" and "fspi". As they are identical just reordering the names is sufficient. Fixes: 6276d66984e9 ("arm64: dts: imx8dxl: add flexspi0 support") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 1f3d225e64ece..06b94bbc2b97d 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -117,7 +117,7 @@ lsio_subsys: bus@5d000000 { interrupts = ; clocks = <&clk IMX_SC_R_FSPI_0 IMX_SC_PM_CLK_PER>, <&clk IMX_SC_R_FSPI_0 IMX_SC_PM_CLK_PER>; - clock-names = "fspi", "fspi_en"; + clock-names = "fspi_en", "fspi"; power-domains = <&pd IMX_SC_R_FSPI_0>; status = "disabled"; }; From 916508c30e22f658f12dc736f8198d8a096cb24d Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 7 Feb 2023 14:10:20 +0100 Subject: [PATCH 016/898] Revert "arm64: dts: ls1028a: sl28: get MAC addresses from VPD" With commit b203e6f1e833 ("arm64: dts: ls1028a: sl28: get MAC addresses from VPD"), the network adapter now depends on the nvmem device to be present, which isn't the case and thus breaks networking on this board. Revert it. Fixes: b203e6f1e833 ("arm64: dts: ls1028a: sl28: get MAC addresses from VPD") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- .../fsl-ls1028a-kontron-kbox-a-230-ls.dts | 12 ------------ .../freescale/fsl-ls1028a-kontron-sl28-var1.dts | 2 -- .../freescale/fsl-ls1028a-kontron-sl28-var2.dts | 8 -------- .../freescale/fsl-ls1028a-kontron-sl28-var4.dts | 2 -- .../dts/freescale/fsl-ls1028a-kontron-sl28.dts | 17 ----------------- 5 files changed, 41 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts index af9194eca5564..73eb6061c73ee 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts @@ -56,14 +56,10 @@ }; &enetc_port2 { - nvmem-cells = <&base_mac_address 2>; - nvmem-cell-names = "mac-address"; status = "okay"; }; &enetc_port3 { - nvmem-cells = <&base_mac_address 3>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -84,8 +80,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy0>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 4>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -94,8 +88,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy1>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 5>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -104,8 +96,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy2>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 6>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -114,8 +104,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy3>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 7>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts index 1f34c75534594..7cd29ab970d92 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts @@ -55,7 +55,5 @@ &enetc_port1 { phy-handle = <&phy0>; phy-mode = "rgmii-id"; - nvmem-cells = <&base_mac_address 0>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts index aac41192caa12..113b1df74bf87 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts @@ -36,14 +36,10 @@ }; &enetc_port2 { - nvmem-cells = <&base_mac_address 2>; - nvmem-cell-names = "mac-address"; status = "okay"; }; &enetc_port3 { - nvmem-cells = <&base_mac_address 3>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -56,8 +52,6 @@ managed = "in-band-status"; phy-handle = <&phy0>; phy-mode = "sgmii"; - nvmem-cells = <&base_mac_address 0>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -66,8 +60,6 @@ managed = "in-band-status"; phy-handle = <&phy1>; phy-mode = "sgmii"; - nvmem-cells = <&base_mac_address 1>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts index a4421db3784e3..9b5e92fb753e2 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts @@ -43,7 +43,5 @@ &enetc_port1 { phy-handle = <&phy1>; phy-mode = "rgmii-id"; - nvmem-cells = <&base_mac_address 1>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index 8b65af4a7147b..4ab17b984b03b 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -92,8 +92,6 @@ phy-handle = <&phy0>; phy-mode = "sgmii"; managed = "in-band-status"; - nvmem-cells = <&base_mac_address 0>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -156,21 +154,6 @@ label = "bootloader environment"; }; }; - - otp-1 { - compatible = "user-otp"; - - nvmem-layout { - compatible = "kontron,sl28-vpd"; - - serial_number: serial-number { - }; - - base_mac_address: base-mac-address { - #nvmem-cell-cells = <1>; - }; - }; - }; }; }; From ec738ca127d07ecac6afae36e2880341ec89150e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 17:02:30 +0100 Subject: [PATCH 017/898] mtd: spi-nor: fix memory leak when using debugfs_lookup() When calling debugfs_lookup() the result must have dput() called on it, otherwise the memory will leak over time. To solve this, remove the lookup and create the directory on the first device found, and then remove it when the module is unloaded. Cc: Tudor Ambarus Cc: Pratyush Yadav Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Reviewed-by: Michael Walle Link: https://lore.kernel.org/r/20230208160230.2179905-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/core.c | 14 +++++++++++++- drivers/mtd/spi-nor/core.h | 2 ++ drivers/mtd/spi-nor/debugfs.c | 11 ++++++++--- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 0a78045ca1d94..522d375aeccff 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -3343,7 +3343,19 @@ static struct spi_mem_driver spi_nor_driver = { .remove = spi_nor_remove, .shutdown = spi_nor_shutdown, }; -module_spi_mem_driver(spi_nor_driver); + +static int __init spi_nor_module_init(void) +{ + return spi_mem_driver_register(&spi_nor_driver); +} +module_init(spi_nor_module_init); + +static void __exit spi_nor_module_exit(void) +{ + spi_mem_driver_unregister(&spi_nor_driver); + spi_nor_debugfs_shutdown(); +} +module_exit(spi_nor_module_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Huang Shijie "); diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h index 25423225c29d3..e0cc42a4a0c84 100644 --- a/drivers/mtd/spi-nor/core.h +++ b/drivers/mtd/spi-nor/core.h @@ -711,8 +711,10 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd) #ifdef CONFIG_DEBUG_FS void spi_nor_debugfs_register(struct spi_nor *nor); +void spi_nor_debugfs_shutdown(void); #else static inline void spi_nor_debugfs_register(struct spi_nor *nor) {} +static inline void spi_nor_debugfs_shutdown(void) {} #endif #endif /* __LINUX_MTD_SPI_NOR_INTERNAL_H */ diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c index 845b78c7ecc7a..fc7ad203df128 100644 --- a/drivers/mtd/spi-nor/debugfs.c +++ b/drivers/mtd/spi-nor/debugfs.c @@ -226,13 +226,13 @@ static void spi_nor_debugfs_unregister(void *data) nor->debugfs_root = NULL; } +static struct dentry *rootdir; + void spi_nor_debugfs_register(struct spi_nor *nor) { - struct dentry *rootdir, *d; + struct dentry *d; int ret; - /* Create rootdir once. Will never be deleted again. */ - rootdir = debugfs_lookup(SPI_NOR_DEBUGFS_ROOT, NULL); if (!rootdir) rootdir = debugfs_create_dir(SPI_NOR_DEBUGFS_ROOT, NULL); @@ -247,3 +247,8 @@ void spi_nor_debugfs_register(struct spi_nor *nor) debugfs_create_file("capabilities", 0444, d, nor, &spi_nor_capabilities_fops); } + +void spi_nor_debugfs_shutdown(void) +{ + debugfs_remove(rootdir); +} From 8ab5059dc4f4c34325eba6270ef12a4ab1386019 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 23 Jan 2023 18:07:07 +0300 Subject: [PATCH 018/898] firmware: arm_scmi: Clean up a return statement in scmi_probe The comments say "enabled" where "disabled" is intended. Also it's cleaner to return zero explicitly instead of ret. Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Y86im5M49p3ePGxj@kili Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index d21c7eafd641c..703f16ef39530 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2739,8 +2739,8 @@ static int scmi_probe(struct platform_device *pdev) if (ret) goto clear_dev_req_notifier; - /* Bail out anyway when coex enabled */ - return ret; + /* Bail out anyway when coex disabled. */ + return 0; } /* Coex enabled, carry on in any case. */ From 6bed395d7db2c039c0ef4123a379e27c528a3357 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Feb 2023 18:17:06 +0300 Subject: [PATCH 019/898] firmware: arm_scmi: Return a literal instead of a variable In this context "return scmi_dev;" and "return NULL;" are equivalent but it is more readable to return a literal. Signed-off-by: Dan Carpenter Reviewed-by: Cristian Marussi Link: https://lore.kernel.org/r/Y/Yx8pOdf8rNhPVe@kili Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 73140b854b313..ac306ca48b07d 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -436,7 +436,7 @@ struct scmi_device *scmi_device_create(struct device_node *np, /* Nothing to do. */ if (!phead) { mutex_unlock(&scmi_requested_devices_mtx); - return scmi_dev; + return NULL; } /* Walk the list of requested devices for protocol and create them */ From 418a406d92cc276ddf81d4223271af1ae09fa5af Mon Sep 17 00:00:00 2001 From: Ye Xingchen Date: Fri, 10 Feb 2023 15:20:07 +0800 Subject: [PATCH 020/898] firmware: arm_scmi: Remove duplicate include header inclusion linux/of.h is included more than once, just remove the duplicate include header inclusion. Signed-off-by: Ye Xingchen Link: https://lore.kernel.org/r/202302101520071730986@zte.com.cn Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index ac306ca48b07d..c15928b8c5cc9 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -14,7 +14,6 @@ #include #include #include -#include #include "common.h" From b2b76e977fc6bc38e6a4dedb62b34bc90cc6ce97 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 23 Feb 2023 15:23:30 +0000 Subject: [PATCH 021/898] firmware: arm_scmi: Fix raw coexistence mode behaviour on failure path When SCMI raw coexistence mode is enabled make the core stack probe successfully even when the initial base protocol exchanges with the platform/server failed. This behaviour enables the system to boot with a broken regular SCMI stack but with a fully functional and accessible SCMI raw debugfs interface that can be used to further debug the issue. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230223152330.2707260-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 703f16ef39530..15a431639d820 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2657,6 +2657,7 @@ static int scmi_probe(struct platform_device *pdev) struct scmi_handle *handle; const struct scmi_desc *desc; struct scmi_info *info; + bool coex = IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT_COEX); struct device *dev = &pdev->dev; struct device_node *child, *np = dev->of_node; @@ -2731,9 +2732,6 @@ static int scmi_probe(struct platform_device *pdev) dev_warn(dev, "Failed to setup SCMI debugfs.\n"); if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { - bool coex = - IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT_COEX); - ret = scmi_debugfs_raw_mode_setup(info); if (!coex) { if (ret) @@ -2764,6 +2762,8 @@ static int scmi_probe(struct platform_device *pdev) ret = scmi_protocol_acquire(handle, SCMI_PROTOCOL_BASE); if (ret) { dev_err(dev, "unable to communicate with SCMI\n"); + if (coex) + return 0; goto notification_exit; } From 7af9da8ce8f9a16221ecd8ba4280582f5bd452fc Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Tue, 14 Feb 2023 13:13:50 -0600 Subject: [PATCH 022/898] thunderbolt: Add quirk to disable CLx Add QUIRK_NO_CLX to disable the CLx state for hardware which doesn't supports it. AMD Yellow Carp and Pink Sardine don't support CLx state, hence disabling it using QUIRK_NO_CLX. Cc: stable@vger.kernel.org Signed-off-by: Sanjay R Mehta Signed-off-by: Basavaraj Natikar [mw: added debug log when the quirk is run] Signed-off-by: Mika Westerberg --- drivers/thunderbolt/quirks.c | 13 +++++++++++++ drivers/thunderbolt/tb.h | 11 ++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index b5f2ec79c4d6e..ae28a03fa890b 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -20,6 +20,12 @@ static void quirk_dp_credit_allocation(struct tb_switch *sw) } } +static void quirk_clx_disable(struct tb_switch *sw) +{ + sw->quirks |= QUIRK_NO_CLX; + tb_sw_dbg(sw, "disabling CL states\n"); +} + struct tb_quirk { u16 hw_vendor_id; u16 hw_device_id; @@ -37,6 +43,13 @@ static const struct tb_quirk tb_quirks[] = { * DP buffers. */ { 0x8087, 0x0b26, 0x0000, 0x0000, quirk_dp_credit_allocation }, + /* + * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms. + */ + { 0x0438, 0x0208, 0x0000, 0x0000, quirk_clx_disable }, + { 0x0438, 0x0209, 0x0000, 0x0000, quirk_clx_disable }, + { 0x0438, 0x020a, 0x0000, 0x0000, quirk_clx_disable }, + { 0x0438, 0x020b, 0x0000, 0x0000, quirk_clx_disable }, }; /** diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index cbb20a2773462..64968c162ec7f 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -23,6 +23,11 @@ #define NVM_MAX_SIZE SZ_512K #define NVM_DATA_DWORDS 16 +/* Keep link controller awake during update */ +#define QUIRK_FORCE_POWER_LINK_CONTROLLER BIT(0) +/* Disable CLx if not supported */ +#define QUIRK_NO_CLX BIT(1) + /** * struct tb_nvm - Structure holding NVM information * @dev: Owner of the NVM @@ -1019,6 +1024,9 @@ static inline bool tb_switch_is_clx_enabled(const struct tb_switch *sw, */ static inline bool tb_switch_is_clx_supported(const struct tb_switch *sw) { + if (sw->quirks & QUIRK_NO_CLX) + return false; + return tb_switch_is_usb4(sw) || tb_switch_is_titan_ridge(sw); } @@ -1291,9 +1299,6 @@ struct usb4_port *usb4_port_device_add(struct tb_port *port); void usb4_port_device_remove(struct usb4_port *usb4); int usb4_port_device_resume(struct usb4_port *usb4); -/* Keep link controller awake during update */ -#define QUIRK_FORCE_POWER_LINK_CONTROLLER BIT(0) - void tb_check_quirks(struct tb_switch *sw); #ifdef CONFIG_ACPI From d49765b5f4320a402fbc4ed5edfd73d87640f27c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 25 Feb 2023 21:39:48 -0800 Subject: [PATCH 023/898] gpio: GPIO_REGMAP: select REGMAP instead of depending on it REGMAP is a hidden (not user visible) symbol. Users cannot set it directly thru "make *config", so drivers should select it instead of depending on it if they need it. Consistently using "select" or "depends on" can also help reduce Kconfig circular dependency issues. Therefore, change the use of "depends on REGMAP" to "select REGMAP". Fixes: ebe363197e52 ("gpio: add a reusable generic gpio_chip using regmap") Signed-off-by: Randy Dunlap Cc: Michael Walle Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: linux-gpio@vger.kernel.org Acked-by: Michael Walle Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 13be729710f28..badbe05823180 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -100,7 +100,7 @@ config GPIO_GENERIC tristate config GPIO_REGMAP - depends on REGMAP + select REGMAP tristate # put drivers in the right section, in alphabetical order From c0ad453e94e5c404efbcf668648d07eaa1a71ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 18 Feb 2023 09:51:06 +0300 Subject: [PATCH 024/898] pinctrl: mediatek: add missing options to PINCTRL_MT7981 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are options missing from PINCTRL_MT7981 whilst being on every other pin controller. Add them. Signed-off-by: Arınç ÜNAL Acked-by: Daniel Golle Link: https://lore.kernel.org/r/20230218065108.8958-1-arinc.unal@arinc9.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index f20c28334bcbf..67818ba14d4e4 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -130,6 +130,8 @@ config PINCTRL_MT7622 config PINCTRL_MT7981 bool "Mediatek MT7981 pin control" depends on OF + depends on ARM64 || COMPILE_TEST + default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE config PINCTRL_MT7986 From 6de67ca4dab7d855ef9598cd894cd7dfa4077f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 18 Feb 2023 09:51:07 +0300 Subject: [PATCH 025/898] pinctrl: mediatek: fix naming inconsistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some options include "MediaTek", some "Mediatek". Rename all to "MediaTek" to address the naming inconsistency. Signed-off-by: Arınç ÜNAL Reviewed-by: Daniel Golle Link: https://lore.kernel.org/r/20230218065108.8958-2-arinc.unal@arinc9.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/Kconfig | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index 67818ba14d4e4..a71874fed3d60 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -45,35 +45,35 @@ config PINCTRL_MTK_PARIS # For ARMv7 SoCs config PINCTRL_MT2701 - bool "Mediatek MT2701 pin control" + bool "MediaTek MT2701 pin control" depends on MACH_MT7623 || MACH_MT2701 || COMPILE_TEST depends on OF default MACH_MT2701 select PINCTRL_MTK config PINCTRL_MT7623 - bool "Mediatek MT7623 pin control with generic binding" + bool "MediaTek MT7623 pin control with generic binding" depends on MACH_MT7623 || COMPILE_TEST depends on OF default MACH_MT7623 select PINCTRL_MTK_MOORE config PINCTRL_MT7629 - bool "Mediatek MT7629 pin control" + bool "MediaTek MT7629 pin control" depends on MACH_MT7629 || COMPILE_TEST depends on OF default MACH_MT7629 select PINCTRL_MTK_MOORE config PINCTRL_MT8135 - bool "Mediatek MT8135 pin control" + bool "MediaTek MT8135 pin control" depends on MACH_MT8135 || COMPILE_TEST depends on OF default MACH_MT8135 select PINCTRL_MTK config PINCTRL_MT8127 - bool "Mediatek MT8127 pin control" + bool "MediaTek MT8127 pin control" depends on MACH_MT8127 || COMPILE_TEST depends on OF default MACH_MT8127 @@ -88,33 +88,33 @@ config PINCTRL_MT2712 select PINCTRL_MTK config PINCTRL_MT6765 - tristate "Mediatek MT6765 pin control" + tristate "MediaTek MT6765 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT6779 - tristate "Mediatek MT6779 pin control" + tristate "MediaTek MT6779 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS help Say yes here to support pin controller and gpio driver - on Mediatek MT6779 SoC. + on MediaTek MT6779 SoC. In MTK platform, we support virtual gpio and use it to map specific eint which doesn't have real gpio pin. config PINCTRL_MT6795 - bool "Mediatek MT6795 pin control" + bool "MediaTek MT6795 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT6797 - bool "Mediatek MT6797 pin control" + bool "MediaTek MT6797 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK @@ -128,42 +128,42 @@ config PINCTRL_MT7622 select PINCTRL_MTK_MOORE config PINCTRL_MT7981 - bool "Mediatek MT7981 pin control" + bool "MediaTek MT7981 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE config PINCTRL_MT7986 - bool "Mediatek MT7986 pin control" + bool "MediaTek MT7986 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE config PINCTRL_MT8167 - bool "Mediatek MT8167 pin control" + bool "MediaTek MT8167 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK config PINCTRL_MT8173 - bool "Mediatek MT8173 pin control" + bool "MediaTek MT8173 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK config PINCTRL_MT8183 - bool "Mediatek MT8183 pin control" + bool "MediaTek MT8183 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT8186 - bool "Mediatek MT8186 pin control" + bool "MediaTek MT8186 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK @@ -182,28 +182,28 @@ config PINCTRL_MT8188 map specific eint which doesn't have real gpio pin. config PINCTRL_MT8192 - bool "Mediatek MT8192 pin control" + bool "MediaTek MT8192 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT8195 - bool "Mediatek MT8195 pin control" + bool "MediaTek MT8195 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT8365 - bool "Mediatek MT8365 pin control" + bool "MediaTek MT8365 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK config PINCTRL_MT8516 - bool "Mediatek MT8516 pin control" + bool "MediaTek MT8516 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK @@ -211,7 +211,7 @@ config PINCTRL_MT8516 # For PMIC config PINCTRL_MT6397 - bool "Mediatek MT6397 pin control" + bool "MediaTek MT6397 pin control" depends on MFD_MT6397 || COMPILE_TEST depends on OF default MFD_MT6397 From 7bb97e360acdd38b68ad0a1defb89c6e89c85596 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Feb 2023 14:08:28 +0100 Subject: [PATCH 026/898] pinctrl: at91-pio4: fix domain name assignment Since commit d59f6617eef0 ("genirq: Allow fwnode to carry name information only") an IRQ domain is always given a name during allocation (e.g. used for the debugfs entry). Drop the no longer valid name assignment, which would lead to an attempt to free a string constant when removing the domain on late probe failures (e.g. probe deferral). Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only") Cc: stable@vger.kernel.org # 4.13 Signed-off-by: Johan Hovold Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea # on SAMA7G5 Link: https://lore.kernel.org/r/20230224130828.27985-1-johan+linaro@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-at91-pio4.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 373eed8bc4be9..c775d239444a6 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -1206,7 +1206,6 @@ static int atmel_pinctrl_probe(struct platform_device *pdev) dev_err(dev, "can't add the irq domain\n"); return -ENODEV; } - atmel_pioctrl->irq_domain->name = "atmel gpio"; for (i = 0; i < atmel_pioctrl->npins; i++) { int irq = irq_create_mapping(atmel_pioctrl->irq_domain, i); From 657fd9da2d4b4aa0a384105b236baa22fa0233bf Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 6 Feb 2023 21:37:20 +0100 Subject: [PATCH 027/898] pinctrl: ocelot: Fix alt mode for ocelot In case the driver was trying to set an alternate mode for gpio 0 or 32 then the mode was not set correctly. The reason is that there is computation error inside the function ocelot_pinmux_set_mux because in this case it was trying to shift to left by -1. Fix this by actually shifting the function bits and not the position. Fixes: 4b36082e2e09 ("pinctrl: ocelot: fix pinmuxing for pins after 31") Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20230206203720.1177718-1-horatiu.vultur@microchip.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index 29e4a6282a641..1dcbd0937ef5a 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -1204,7 +1204,7 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, regmap_update_bits(info->map, REG_ALT(0, info, pin->pin), BIT(p), f << p); regmap_update_bits(info->map, REG_ALT(1, info, pin->pin), - BIT(p), f << (p - 1)); + BIT(p), (f >> 1) << p); return 0; } From 913a956c4363c3b5fd13d3a00836fad4c46646a7 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Mon, 27 Feb 2023 21:51:31 +0100 Subject: [PATCH 028/898] pinctrl: stm32: use dynamic allocation of GPIO base Since commit 502df79b860563d7 ("gpiolib: Warn on drivers still using static gpiobase allocation"), one or more warnings are printed during boot on systems where static allocation of GPIO base is used: [ 0.197707] gpio gpiochip0: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.199942] stm32f429-pinctrl soc:pinctrl@40020000: GPIOA bank added [ 0.200711] gpio gpiochip1: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.202855] stm32f429-pinctrl soc:pinctrl@40020000: GPIOB bank added [ 0.203591] gpio gpiochip2: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.205704] stm32f429-pinctrl soc:pinctrl@40020000: GPIOC bank added [ 0.206338] gpio gpiochip3: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.208448] stm32f429-pinctrl soc:pinctrl@40020000: GPIOD bank added [ 0.209182] gpio gpiochip4: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.211282] stm32f429-pinctrl soc:pinctrl@40020000: GPIOE bank added [ 0.212094] gpio gpiochip5: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.214270] stm32f429-pinctrl soc:pinctrl@40020000: GPIOF bank added [ 0.215005] gpio gpiochip6: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.217110] stm32f429-pinctrl soc:pinctrl@40020000: GPIOG bank added [ 0.217845] gpio gpiochip7: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.219959] stm32f429-pinctrl soc:pinctrl@40020000: GPIOH bank added [ 0.220602] gpio gpiochip8: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.222714] stm32f429-pinctrl soc:pinctrl@40020000: GPIOI bank added [ 0.223483] gpio gpiochip9: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.225594] stm32f429-pinctrl soc:pinctrl@40020000: GPIOJ bank added [ 0.226336] gpio gpiochip10: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.228490] stm32f429-pinctrl soc:pinctrl@40020000: GPIOK bank added So let's follow the suggestion and use dynamic allocation. Tested on STM32F429I-DISC1 board. Signed-off-by: Dario Binacchi Link: https://lore.kernel.org/r/20230227205131.2104082-1-dario.binacchi@amarulasolutions.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index cb33a23ab0c11..04ace4c7bd582 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1330,7 +1330,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode if (fwnode_property_read_u32(fwnode, "st,bank-ioport", &bank_ioport_nr)) bank_ioport_nr = bank_nr; - bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; + bank->gpio_chip.base = -1; bank->gpio_chip.ngpio = npins; bank->gpio_chip.fwnode = fwnode; From ceac10c83b330680cc01ceaaab86cd49f4f30d81 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Wed, 22 Feb 2023 00:10:14 +0100 Subject: [PATCH 029/898] ARM: 9290/1: uaccess: Fix KASAN false-positives __copy_to_user_memcpy() and __clear_user_memset() had been calling memcpy() and memset() respectively, leading to false-positive KASAN reports when starting userspace: [ 10.707901] Run /init as init process [ 10.731892] process '/bin/busybox' started with executable stack [ 10.745234] ================================================================== [ 10.745796] BUG: KASAN: user-memory-access in __clear_user_memset+0x258/0x3ac [ 10.747260] Write of size 2687 at addr 000de581 by task init/1 Use __memcpy() and __memset() instead to allow userspace access, which is of course the intent of these functions. Signed-off-by: Andrew Jeffery Signed-off-by: Zev Weiss Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/lib/uaccess_with_memcpy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 14eecaaf295fa..e4c2677cc1e9e 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -116,7 +116,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) tocopy = n; ua_flags = uaccess_save_and_enable(); - memcpy((void *)to, from, tocopy); + __memcpy((void *)to, from, tocopy); uaccess_restore(ua_flags); to += tocopy; from += tocopy; @@ -178,7 +178,7 @@ __clear_user_memset(void __user *addr, unsigned long n) tocopy = n; ua_flags = uaccess_save_and_enable(); - memset((void *)addr, 0, tocopy); + __memset((void *)addr, 0, tocopy); uaccess_restore(ua_flags); addr += tocopy; n -= tocopy; From 1eb65c8687316c65140b48fad27133d583178e15 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Fri, 17 Feb 2023 22:44:11 +0200 Subject: [PATCH 030/898] Drivers: vmbus: Check for channel allocation before looking up relids relid2channel() assumes vmbus channel array to be allocated when called. However, in cases such as kdump/kexec, not all relids will be reset by the host. When the second kernel boots and if the guest receives a vmbus interrupt during vmbus driver initialization before vmbus_connect() is called, before it finishes, or if it fails, the vmbus interrupt service routine is called which in turn calls relid2channel() and can cause a null pointer dereference. Print a warning and error out in relid2channel() for a channel id that's invalid in the second kernel. Fixes: 8b6a877c060e ("Drivers: hv: vmbus: Replace the per-CPU channel lists with a global array of channels") Signed-off-by: Mohammed Gamal Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/20230217204411.212709-1-mgamal@redhat.com Signed-off-by: Wei Liu --- drivers/hv/connection.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 9dc27e5d367a2..da51b50787dff 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -409,6 +409,10 @@ void vmbus_disconnect(void) */ struct vmbus_channel *relid2channel(u32 relid) { + if (vmbus_connection.channels == NULL) { + pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid); + return NULL; + } if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) return NULL; return READ_ONCE(vmbus_connection.channels[relid]); From 4ce341de6c02d02aba7c78a6447ccfcaa9eeb328 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Mon, 27 Feb 2023 13:24:25 +0300 Subject: [PATCH 031/898] mtd: rawnand: meson: initialize struct with zeroes This structure must be zeroed, because it's field 'hw->core' is used as 'parent' in 'clk_core_fill_parent_index()', but it will be uninitialized. This happens, because when this struct is not zeroed, pointer 'hw' is "initialized" by garbage, which is valid pointer, but points to some garbage. So 'hw' will be dereferenced, but 'core' contains some random data which will be interpreted as a pointer. The following backtrace is result of dereference of such pointer: [ 1.081319] __clk_register+0x414/0x820 [ 1.085113] devm_clk_register+0x64/0xd0 [ 1.088995] meson_nfc_probe+0x258/0x6ec [ 1.092875] platform_probe+0x70/0xf0 [ 1.096498] really_probe+0xc8/0x3e0 [ 1.100034] __driver_probe_device+0x84/0x190 [ 1.104346] driver_probe_device+0x44/0x120 [ 1.108487] __driver_attach+0xb4/0x220 [ 1.112282] bus_for_each_dev+0x78/0xd0 [ 1.116077] driver_attach+0x2c/0x40 [ 1.119613] bus_add_driver+0x184/0x240 [ 1.123408] driver_register+0x80/0x140 [ 1.127203] __platform_driver_register+0x30/0x40 [ 1.131860] meson_nfc_driver_init+0x24/0x30 Fixes: 1e4d3ba66888 ("mtd: rawnand: meson: fix the clock") Signed-off-by: Arseniy Krasnov Acked-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230227102425.793841-1-AVKrasnov@sberdevices.ru --- drivers/mtd/nand/raw/meson_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 5ee01231ac4cd..30e326adabfc1 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -991,7 +991,7 @@ static const struct mtd_ooblayout_ops meson_ooblayout_ops = { static int meson_nfc_clk_init(struct meson_nfc *nfc) { - struct clk_parent_data nfc_divider_parent_data[1]; + struct clk_parent_data nfc_divider_parent_data[1] = {0}; struct clk_init_data init = {0}; int ret; From 75dce6a941e3f16c3b4878c8b2f46d5d07c619ce Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 15 Feb 2023 12:08:45 +0100 Subject: [PATCH 032/898] mtd: nand: mxic-ecc: Fix mxic_ecc_data_xfer_wait_for_completion() when irq is used wait_for_completion_timeout() and readl_poll_timeout() don't handle their return value the same way. wait_for_completion_timeout() returns 0 on time out (and >0 in all other cases) readl_poll_timeout() returns 0 on success and -ETIMEDOUT upon a timeout. In order for the error handling path to work in both cases, the logic against wait_for_completion_timeout() needs to be inverted. Fixes: 48e6633a9fa2 ("mtd: nand: mxic-ecc: Add Macronix external ECC engine support") Signed-off-by: Christophe JAILLET Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/beddbc374557e44ceec897e68c4a5d12764ddbb9.1676459308.git.christophe.jaillet@wanadoo.fr --- drivers/mtd/nand/ecc-mxic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/ecc-mxic.c b/drivers/mtd/nand/ecc-mxic.c index 8afdca731b874..6b487ffe2f2dc 100644 --- a/drivers/mtd/nand/ecc-mxic.c +++ b/drivers/mtd/nand/ecc-mxic.c @@ -429,6 +429,7 @@ static int mxic_ecc_data_xfer_wait_for_completion(struct mxic_ecc_engine *mxic) mxic_ecc_enable_int(mxic); ret = wait_for_completion_timeout(&mxic->complete, msecs_to_jiffies(1000)); + ret = ret ? 0 : -ETIMEDOUT; mxic_ecc_disable_int(mxic); } else { ret = readl_poll_timeout(mxic->regs + INTRPT_STS, val, From 9bbf5feecc7eab2c370496c1c161bbfe62084028 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 27 Feb 2023 23:23:17 +0800 Subject: [PATCH 033/898] dm thin: fix deadlock when swapping to thin device This is an already known issue that dm-thin volume cannot be used as swap, otherwise a deadlock may happen when dm-thin internal memory demand triggers swap I/O on the dm-thin volume itself. But thanks to commit a666e5c05e7c ("dm: fix deadlock when swapping to encrypted device"), the limit_swap_bios target flag can also be used for dm-thin to avoid the recursive I/O when it is used as swap. Fix is to simply set ti->limit_swap_bios to true in both pool_ctr() and thin_ctr(). In my test, I create a dm-thin volume /dev/vg/swap and use it as swap device. Then I run fio on another dm-thin volume /dev/vg/main and use large --blocksize to trigger swap I/O onto /dev/vg/swap. The following fio command line is used in my test, fio --name recursive-swap-io --lockmem 1 --iodepth 128 \ --ioengine libaio --filename /dev/vg/main --rw randrw \ --blocksize 1M --numjobs 32 --time_based --runtime=12h Without this fix, the whole system can be locked up within 15 seconds. With this fix, there is no any deadlock or hung task observed after 2 hours of running fio. Furthermore, if blocksize is changed from 1M to 128M, after around 30 seconds fio has no visible I/O, and the out-of-memory killer message shows up in kernel message. After around 20 minutes all fio processes are killed and the whole system is back to being alive. This is exactly what is expected when recursive I/O happens on dm-thin volume when it is used as swap. Depends-on: a666e5c05e7c ("dm: fix deadlock when swapping to encrypted device") Cc: stable@vger.kernel.org Signed-off-by: Coly Li Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 6cd105c1cef35..13d4677baafd1 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3369,6 +3369,7 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_bios = 1; + ti->limit_swap_bios = true; /* * Only need to enable discards if the pool should pass @@ -4249,6 +4250,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; ti->num_flush_bios = 1; + ti->limit_swap_bios = true; ti->flush_supported = true; ti->accounts_remapped_io = true; ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); From fb294b1c0ba982144ca467a75e7d01ff26304e2b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 6 Mar 2023 11:17:58 -0500 Subject: [PATCH 034/898] dm crypt: add cond_resched() to dmcrypt_write() The loop in dmcrypt_write may be running for unbounded amount of time, thus we need cond_resched() in it. This commit fixes the following warning: [ 3391.153255][ C12] watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [dmcrypt_write/2:2897] ... [ 3391.387210][ C12] Call trace: [ 3391.390338][ C12] blk_attempt_bio_merge.part.6+0x38/0x158 [ 3391.395970][ C12] blk_attempt_plug_merge+0xc0/0x1b0 [ 3391.401085][ C12] blk_mq_submit_bio+0x398/0x550 [ 3391.405856][ C12] submit_bio_noacct+0x308/0x380 [ 3391.410630][ C12] dmcrypt_write+0x1e4/0x208 [dm_crypt] [ 3391.416005][ C12] kthread+0x130/0x138 [ 3391.419911][ C12] ret_from_fork+0x10/0x18 Reported-by: yangerkun Fixes: dc2676210c42 ("dm crypt: offload writes to thread") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 87c5706131f28..faba1be572f90 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1937,6 +1937,7 @@ static int dmcrypt_write(void *data) io = crypt_io_from_node(rb_first(&write_tree)); rb_erase(&io->rb_node, &write_tree); kcryptd_io_write(io); + cond_resched(); } while (!RB_EMPTY_ROOT(&write_tree)); blk_finish_plug(&plug); } From 672a58fc7c477e59981653a11241566870fff852 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 24 Feb 2023 13:30:45 +0530 Subject: [PATCH 035/898] arm64: dts: qcom: sm8150: Fix the iommu mask used for PCIe controllers The iommu mask should be 0x3f as per Qualcomm internal documentation. Without the correct mask, the PCIe transactions from the endpoint will result in SMMU faults. Hence, fix it! Cc: stable@vger.kernel.org # 5.19 Fixes: a1c86c680533 ("arm64: dts: qcom: sm8150: Add PCIe nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Konrad Dybcio Reviewed-by: Bhupesh Sharma Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230224080045.6577-1-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index fd20096cfc6e3..13e0ce8286061 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1826,7 +1826,7 @@ "slave_q2a", "tbu"; - iommus = <&apps_smmu 0x1d80 0x7f>; + iommus = <&apps_smmu 0x1d80 0x3f>; iommu-map = <0x0 &apps_smmu 0x1d80 0x1>, <0x100 &apps_smmu 0x1d81 0x1>; @@ -1925,7 +1925,7 @@ assigned-clocks = <&gcc GCC_PCIE_1_AUX_CLK>; assigned-clock-rates = <19200000>; - iommus = <&apps_smmu 0x1e00 0x7f>; + iommus = <&apps_smmu 0x1e00 0x3f>; iommu-map = <0x0 &apps_smmu 0x1e00 0x1>, <0x100 &apps_smmu 0x1e01 0x1>; From 8013295662f55696e5953ef14c31ba03721adf8f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sat, 11 Feb 2023 10:54:15 +0530 Subject: [PATCH 036/898] arm64: dts: qcom: sc8280xp: Add label property to vadc channel nodes For uniquely identifying the vadc channels, label property has to be used. The initial commit adding vadc support assumed that the driver will use the unit address along with the node name to identify the channels. But this assumption is now broken by, commit 701c875aded8 ("iio: adc: qcom-spmi-adc5: Fix the channel name") that stripped unit address from channel names. This results in probe failure of the vadc driver: [ 8.380370] iio iio:device0: tried to double register : in_temp_pmic-die-temp_input [ 8.380383] qcom-spmi-adc5 c440000.spmi:pmic@0:adc@3100: Failed to register sysfs interfaces [ 8.380386] qcom-spmi-adc5: probe of c440000.spmi:pmic@0:adc@3100 failed with error -16 Hence, let's get rid of the assumption about drivers and rely on label property to uniquely identify the channels. The labels are derived from the schematics for each PMIC. For internal adc channels such as die and xo, the PMIC names are used as a prefix. Fixes: 7c0151347401 ("arm64: dts: qcom: sc8280xp-x13s: Add PM8280_{1/2} ADC_TM5 channels") Fixes: 9d41cd17394a ("arm64: dts: qcom: sc8280xp-x13s: Add PMR735A VADC channel") Fixes: 3375151a7185 ("arm64: dts: qcom: sc8280xp-x13s: Add PM8280_{1/2} VADC channels") Fixes: 9a6b3042c533 ("arm64: dts: qcom: sc8280xp-x13s: Add PMK8280 VADC channels") Reported-by: Steev Klimaszewski Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230211052415.14581-1-manivannan.sadhasivam@linaro.org --- .../boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 98e71b933437c..96b36ce94ce07 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -772,75 +772,88 @@ pmic-die-temp@3 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmk8350_die_temp"; }; xo-therm@44 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "pmk8350_xo_therm"; }; pmic-die-temp@103 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmc8280_1_die_temp"; }; sys-therm@144 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm1"; }; sys-therm@145 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm2"; }; sys-therm@146 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm3"; }; sys-therm@147 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm4"; }; pmic-die-temp@303 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmc8280_2_die_temp"; }; sys-therm@344 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm5"; }; sys-therm@345 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm6"; }; sys-therm@346 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm7"; }; sys-therm@347 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm8"; }; pmic-die-temp@403 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmr735a_die_temp"; }; }; From 205c91fb6aca5f8bad5346181575a7ef78e43cea Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 16 Feb 2023 13:49:21 +0100 Subject: [PATCH 037/898] arm64: dts: qcom: sm6115: Un-enable SPI5 by default The commit mentioned in the fixes tag erroneously enabled SPI5 unconditionally. Undo it. Fixes: 25aab0b852d6 ("arm64: dts: qcom: sm6115: Add geni debug uart node for qup0") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230216124921.3985834-1-konrad.dybcio@linaro.org --- arch/arm64/boot/dts/qcom/sm6115.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index 4d6ec815b78b1..fbd67d2c8d781 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -1078,6 +1078,7 @@ dma-names = "tx", "rx"; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; }; From 11d5e41f5e129e39bddedc7244a0946a802d2e8e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 9 Jan 2023 14:56:47 +0100 Subject: [PATCH 038/898] arm64: dts: qcom: sm6375: Add missing power-domain-named to CDSP This was omitted when first introducing the node. Fix it. Fixes: fe6fd26aeddf ("arm64: dts: qcom: sm6375: Add ADSP&CDSP") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230109135647.339224-5-konrad.dybcio@linaro.org --- arch/arm64/boot/dts/qcom/sm6375.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi index 31b88c7385102..068ee4f724855 100644 --- a/arch/arm64/boot/dts/qcom/sm6375.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi @@ -1209,6 +1209,7 @@ clock-names = "xo"; power-domains = <&rpmpd SM6375_VDDCX>; + power-domain-names = "cx"; memory-region = <&pil_cdsp_mem>; From 4059297ed0a5adf8e5fd0bd734d702a24202c02e Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 9 Feb 2023 09:45:10 +0200 Subject: [PATCH 039/898] arm64: dts: qcom: sm8550: Add bias pull up value to tlmm i2c data clk states The default bias pull up value for the tlmm i2c data clk states is 2.2kOhms. Add this value to make sure the driver factors in the i2c pull up bit when writing the config register. Signed-off-by: Abel Vesa Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230209074510.4153294-2-abel.vesa@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 30 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index ff4d342c07257..aa84a36c394a8 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -2691,7 +2691,7 @@ pins = "gpio28", "gpio29"; function = "qup1_se0"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c1_data_clk: qup-i2c1-data-clk-state { @@ -2699,7 +2699,7 @@ pins = "gpio32", "gpio33"; function = "qup1_se1"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c2_data_clk: qup-i2c2-data-clk-state { @@ -2707,7 +2707,7 @@ pins = "gpio36", "gpio37"; function = "qup1_se2"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c3_data_clk: qup-i2c3-data-clk-state { @@ -2715,7 +2715,7 @@ pins = "gpio40", "gpio41"; function = "qup1_se3"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c4_data_clk: qup-i2c4-data-clk-state { @@ -2723,7 +2723,7 @@ pins = "gpio44", "gpio45"; function = "qup1_se4"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c5_data_clk: qup-i2c5-data-clk-state { @@ -2731,7 +2731,7 @@ pins = "gpio52", "gpio53"; function = "qup1_se5"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c6_data_clk: qup-i2c6-data-clk-state { @@ -2739,7 +2739,7 @@ pins = "gpio48", "gpio49"; function = "qup1_se6"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c8_data_clk: qup-i2c8-data-clk-state { @@ -2747,14 +2747,14 @@ pins = "gpio57"; function = "qup2_se0_l1_mira"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; sda-pins { pins = "gpio56"; function = "qup2_se0_l0_mira"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; }; @@ -2763,7 +2763,7 @@ pins = "gpio60", "gpio61"; function = "qup2_se1"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c10_data_clk: qup-i2c10-data-clk-state { @@ -2771,7 +2771,7 @@ pins = "gpio64", "gpio65"; function = "qup2_se2"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c11_data_clk: qup-i2c11-data-clk-state { @@ -2779,7 +2779,7 @@ pins = "gpio68", "gpio69"; function = "qup2_se3"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c12_data_clk: qup-i2c12-data-clk-state { @@ -2787,7 +2787,7 @@ pins = "gpio2", "gpio3"; function = "qup2_se4"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c13_data_clk: qup-i2c13-data-clk-state { @@ -2795,7 +2795,7 @@ pins = "gpio80", "gpio81"; function = "qup2_se5"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c15_data_clk: qup-i2c15-data-clk-state { @@ -2803,7 +2803,7 @@ pins = "gpio72", "gpio106"; function = "qup2_se7"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_spi0_cs: qup-spi0-cs-state { From 27072f2ffb29283b9a44d878204c86c08d86b37f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 16 Feb 2023 12:08:03 +0100 Subject: [PATCH 040/898] arm64: dts: qcom: sm8550: Use correct CPU compatibles Use the correct compatibles for the four kinds of CPU cores used on SM8550, based on the value of their MIDR_EL1 registers: CPU7: 0x411fd4e0 - CX3 r1p1 CPU5-6: 0x412fd470 - CA710 r?p? CPU3-4: 0x411fd4d0 - CA715 r?p? CPU0-2: 0x411fd461 - CA510 r?p? Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230216110803.3945747-2-konrad.dybcio@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index aa84a36c394a8..25f51245fe9bb 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -66,7 +66,7 @@ CPU0: cpu@0 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a510"; reg = <0 0>; enable-method = "psci"; next-level-cache = <&L2_0>; @@ -89,7 +89,7 @@ CPU1: cpu@100 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a510"; reg = <0 0x100>; enable-method = "psci"; next-level-cache = <&L2_100>; @@ -108,7 +108,7 @@ CPU2: cpu@200 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a510"; reg = <0 0x200>; enable-method = "psci"; next-level-cache = <&L2_200>; @@ -127,7 +127,7 @@ CPU3: cpu@300 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a715"; reg = <0 0x300>; enable-method = "psci"; next-level-cache = <&L2_300>; @@ -146,7 +146,7 @@ CPU4: cpu@400 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a715"; reg = <0 0x400>; enable-method = "psci"; next-level-cache = <&L2_400>; @@ -165,7 +165,7 @@ CPU5: cpu@500 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a710"; reg = <0 0x500>; enable-method = "psci"; next-level-cache = <&L2_500>; @@ -184,7 +184,7 @@ CPU6: cpu@600 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a710"; reg = <0 0x600>; enable-method = "psci"; next-level-cache = <&L2_600>; @@ -203,7 +203,7 @@ CPU7: cpu@700 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-x3"; reg = <0 0x700>; enable-method = "psci"; next-level-cache = <&L2_700>; From 052750a4444577e6067bdf73dd5ff92876f59ef6 Mon Sep 17 00:00:00 2001 From: Jianhua Lu Date: Tue, 21 Feb 2023 20:36:33 +0800 Subject: [PATCH 041/898] arm64: dts: qcom: sm8250-xiaomi-elish: Correct venus firmware path Missing vendor name for venus firmware path. Add it. Fixes: a41b617530bf ("arm64: dts: qcom: sm8250: Add device tree for Xiaomi Mi Pad 5 Pro") Signed-off-by: Jianhua Lu Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230221123633.25145-1-lujianhua000@gmail.com --- arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts index acaa99c5ff8b1..a85d47f7a9e82 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts @@ -625,6 +625,6 @@ }; &venus { - firmware-name = "qcom/sm8250/elish/venus.mbn"; + firmware-name = "qcom/sm8250/xiaomi/elish/venus.mbn"; status = "okay"; }; From 77bf4b3ed42e31d29b255fcd6530fb7a1e217e89 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 6 Mar 2023 15:55:27 +0200 Subject: [PATCH 042/898] soc: qcom: llcc: Fix slice configuration values for SC8280XP The slice IDs for CVPFW, CPUSS1 and CPUWHT currently overflow the 32bit LLCC config registers, which means it is writing beyond the upper limit of the ATTR0_CFGn and ATTR1_CFGn range of registers. But the most obvious impact is the fact that the mentioned slices do not get configured at all, which will result in reduced performance. Fix that by using the slice ID values taken from the latest LLCC SC table. Fixes: ec69dfbdc426 ("soc: qcom: llcc: Add sc8180x and sc8280xp configurations") Cc: stable@vger.kernel.org # 5.19+ Signed-off-by: Abel Vesa Tested-by: Juerg Haefliger Reviewed-by: Sai Prakash Ranjan Acked-by: Konrad Dybcio Reviewed-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230306135527.509796-1-abel.vesa@linaro.org --- drivers/soc/qcom/llcc-qcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c index 23ce2f78c4ed4..26efe12012a0d 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -191,9 +191,9 @@ static const struct llcc_slice_config sc8280xp_data[] = { { LLCC_CVP, 28, 512, 3, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, { LLCC_APTCM, 30, 1024, 3, 1, 0x0, 0x1, 1, 0, 0, 1, 0, 0 }, { LLCC_WRCACHE, 31, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, - { LLCC_CVPFW, 32, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, - { LLCC_CPUSS1, 33, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, - { LLCC_CPUHWT, 36, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, + { LLCC_CVPFW, 17, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, + { LLCC_CPUSS1, 3, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, + { LLCC_CPUHWT, 5, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, }; static const struct llcc_slice_config sdm845_data[] = { From 947007419b60d5e06aa54b0f411c123db7f45a44 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 5 Mar 2023 11:32:33 +0100 Subject: [PATCH 043/898] soc: qcom: rmtfs: fix error handling reading qcom,vmid of_property_count_u32_elems returns a negative integer when an error happens , but since the value was assigned to an unsigned integer, the check never worked correctly. Also print the correct variable in the error print, ret isn't used here. Fixes: e656cd0bcf3d ("soc: qcom: rmtfs: Optionally map RMTFS to more VMs") Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230305-rmtfs-vmid-fix-v1-1-6a7206081602@z3ntu.xyz --- drivers/soc/qcom/rmtfs_mem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c index 2d3ee22b92494..f57756220198c 100644 --- a/drivers/soc/qcom/rmtfs_mem.c +++ b/drivers/soc/qcom/rmtfs_mem.c @@ -176,7 +176,8 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev) struct reserved_mem *rmem; struct qcom_rmtfs_mem *rmtfs_mem; u32 client_id; - u32 num_vmids, vmid[NUM_MAX_VMIDS]; + u32 vmid[NUM_MAX_VMIDS]; + int num_vmids; int ret, i; rmem = of_reserved_mem_lookup(node); @@ -229,7 +230,7 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev) num_vmids = of_property_count_u32_elems(node, "qcom,vmid"); if (num_vmids < 0) { - dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", ret); + dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", num_vmids); goto remove_cdev; } else if (num_vmids > NUM_MAX_VMIDS) { dev_warn(&pdev->dev, From 749d56bd5cf311dd9b50cfc092d7a39309454077 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 5 Mar 2023 11:32:34 +0100 Subject: [PATCH 044/898] soc: qcom: rmtfs: handle optional qcom,vmid correctly Older platforms don't have qcom,vmid set, handle -EINVAL return value correctly. And since num_vmids is passed to of_property_read_u32_array later we should make sure it has a sane value before continuing. Fixes: e656cd0bcf3d ("soc: qcom: rmtfs: Optionally map RMTFS to more VMs") Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230305-rmtfs-vmid-fix-v1-2-6a7206081602@z3ntu.xyz --- drivers/soc/qcom/rmtfs_mem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c index f57756220198c..538fa182169a4 100644 --- a/drivers/soc/qcom/rmtfs_mem.c +++ b/drivers/soc/qcom/rmtfs_mem.c @@ -229,7 +229,10 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev) } num_vmids = of_property_count_u32_elems(node, "qcom,vmid"); - if (num_vmids < 0) { + if (num_vmids == -EINVAL) { + /* qcom,vmid is optional */ + num_vmids = 0; + } else if (num_vmids < 0) { dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", num_vmids); goto remove_cdev; } else if (num_vmids > NUM_MAX_VMIDS) { From eaba416688f4f074ea3bf2ef975c9e2dbb06712b Mon Sep 17 00:00:00 2001 From: Yang Xiwen Date: Wed, 1 Mar 2023 16:53:50 +0800 Subject: [PATCH 045/898] arm64: dts: qcom: msm8916-ufi: Fix sim card selection pinctrl The previous commit mistakenly introduced sim_ctrl_default as pinctrl, this is incorrect, the interface for sim card selection varies between different devices and should not be placed in the dtsi. This commit selects external SIM card slot for ufi001c as default. uf896 selects the correct SIM card slot automatically, thus does not need this pinctrl node. Fixes: faf69431464b ("arm64: dts: qcom: msm8916-thwc: Add initial device trees") Signed-off-by: Yang Xiwen Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/tencent_7036BCA256055D05F8C49D86DF7F0E2D1A05@qq.com --- .../boot/dts/qcom/msm8916-thwc-uf896.dts | 4 --- .../boot/dts/qcom/msm8916-thwc-ufi001c.dts | 28 +++++++++++++++++-- arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi | 10 ------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts b/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts index c492db8561904..82e260375174d 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts @@ -33,7 +33,3 @@ &gpio_leds_default { pins = "gpio81", "gpio82", "gpio83"; }; - -&sim_ctrl_default { - pins = "gpio1", "gpio2"; -}; diff --git a/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts b/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts index 700cf81cbf8c0..8433c9710b1cf 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts @@ -25,6 +25,11 @@ gpios = <&msmgpio 20 GPIO_ACTIVE_HIGH>; }; +&mpss { + pinctrl-0 = <&sim_ctrl_default>; + pinctrl-names = "default"; +}; + &button_default { pins = "gpio37"; bias-pull-down; @@ -34,6 +39,25 @@ pins = "gpio20", "gpio21", "gpio22"; }; -&sim_ctrl_default { - pins = "gpio1", "gpio2"; +/* This selects the external SIM card slot by default */ +&msmgpio { + sim_ctrl_default: sim-ctrl-default-state { + esim-sel-pins { + pins = "gpio0", "gpio3"; + bias-disable; + output-low; + }; + + sim-en-pins { + pins = "gpio1"; + bias-disable; + output-low; + }; + + sim-sel-pins { + pins = "gpio2"; + bias-disable; + output-high; + }; + }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi b/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi index 790a9696da9de..cdf34b74fa8fa 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi @@ -92,9 +92,6 @@ }; &mpss { - pinctrl-0 = <&sim_ctrl_default>; - pinctrl-names = "default"; - status = "okay"; }; @@ -240,11 +237,4 @@ drive-strength = <2>; bias-disable; }; - - sim_ctrl_default: sim-ctrl-default-state { - function = "gpio"; - drive-strength = <2>; - bias-disable; - output-low; - }; }; From 8a63441e83724fee1ef3fd37b237d40d90780766 Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Tue, 28 Feb 2023 17:19:12 +0530 Subject: [PATCH 046/898] arm64: dts: qcom: sc7280: Mark PCIe controller as cache coherent If the controller is not marked as cache coherent, then kernel will try to ensure coherency during dma-ops and that may cause data corruption. So, mark the PCIe node as dma-coherent as the devices on PCIe bus are cache coherent. Cc: stable@vger.kernel.org Fixes: 92e0ee9f83b3 ("arm64: dts: qcom: sc7280: Add PCIe and PHY related node") Signed-off-by: Krishna chaitanya chundru Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/1677584952-17496-1-git-send-email-quic_krichai@quicinc.com --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index bdcb749253130..8f4ab6bd28864 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2131,6 +2131,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pcie1_clkreq_n>; + dma-coherent; + iommus = <&apps_smmu 0x1c80 0x1>; iommu-map = <0x0 &apps_smmu 0x1c80 0x1>, From 2ab4f4018cb6b8010ca5002c3bdc37783b5d28c2 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 7 Mar 2023 16:23:24 +0000 Subject: [PATCH 047/898] firmware: arm_scmi: Fix device node validation for mailbox transport When mailboxes are used as a transport it is possible to setup the SCMI transport layer, depending on the underlying channels configuration, to use one or two mailboxes, associated, respectively, to one or two, distinct, shared memory areas: any other combination should be treated as invalid. Add more strict checking of SCMI mailbox transport device node descriptors. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Cc: # 4.19 Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230307162324.891866-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/mailbox.c | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 0d9c9538b7f41..112c285deb97b 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -52,6 +52,39 @@ static bool mailbox_chan_available(struct device_node *of_node, int idx) "#mbox-cells", idx, NULL); } +static int mailbox_chan_validate(struct device *cdev) +{ + int num_mb, num_sh, ret = 0; + struct device_node *np = cdev->of_node; + + num_mb = of_count_phandle_with_args(np, "mboxes", "#mbox-cells"); + num_sh = of_count_phandle_with_args(np, "shmem", NULL); + /* Bail out if mboxes and shmem descriptors are inconsistent */ + if (num_mb <= 0 || num_sh > 2 || num_mb != num_sh) { + dev_warn(cdev, "Invalid channel descriptor for '%s'\n", + of_node_full_name(np)); + return -EINVAL; + } + + if (num_sh > 1) { + struct device_node *np_tx, *np_rx; + + np_tx = of_parse_phandle(np, "shmem", 0); + np_rx = of_parse_phandle(np, "shmem", 1); + /* SCMI Tx and Rx shared mem areas have to be distinct */ + if (!np_tx || !np_rx || np_tx == np_rx) { + dev_warn(cdev, "Invalid shmem descriptor for '%s'\n", + of_node_full_name(np)); + ret = -EINVAL; + } + + of_node_put(np_tx); + of_node_put(np_rx); + } + + return ret; +} + static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, bool tx) { @@ -64,6 +97,10 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, resource_size_t size; struct resource res; + ret = mailbox_chan_validate(cdev); + if (ret) + return ret; + smbox = devm_kzalloc(dev, sizeof(*smbox), GFP_KERNEL); if (!smbox) return -ENOMEM; From d617808e3b8324eacebabefec49dc75536ee39cc Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 8 Jul 2022 21:30:01 +0200 Subject: [PATCH 048/898] firmware: arm_scmi: Use the bitmap API to allocate bitmaps Use devm_bitmap_zalloc() instead of hand-writing them. It is less verbose and it improves the semantic. Signed-off-by: Christophe JAILLET Reviewed-by: Cristian Marussi Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/c073b1607ada34d5bde6ce1009179cf15bbf0da3.1657308593.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 15a431639d820..dbc474ff62b71 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2221,8 +2221,8 @@ static int __scmi_xfer_info_init(struct scmi_info *sinfo, hash_init(info->pending_xfers); /* Allocate a bitmask sized to hold MSG_TOKEN_MAX tokens */ - info->xfer_alloc_table = devm_kcalloc(dev, BITS_TO_LONGS(MSG_TOKEN_MAX), - sizeof(long), GFP_KERNEL); + info->xfer_alloc_table = devm_bitmap_zalloc(dev, MSG_TOKEN_MAX, + GFP_KERNEL); if (!info->xfer_alloc_table) return -ENOMEM; From f87fb985452ab2083967103ac00bfd68fb182764 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Mar 2023 16:42:42 +0100 Subject: [PATCH 049/898] usb: ucsi: Fix NULL pointer deref in ucsi_connector_change() When ucsi_init() fails, ucsi->connector is NULL, yet in case of ucsi_acpi we may still get events which cause the ucs_acpi code to call ucsi_connector_change(), which then derefs the NULL ucsi->connector pointer. Fix this by not setting ucsi->ntfy inside ucsi_init() until ucsi_init() has succeeded, so that ucsi_connector_change() ignores the events because UCSI_ENABLE_NTFY_CONNECTOR_CHANGE is not set in the ntfy mask. Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217106 Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230308154244.722337-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f632350f6dcb2..0623861c597b7 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1329,7 +1329,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) static int ucsi_init(struct ucsi *ucsi) { struct ucsi_connector *con; - u64 command; + u64 command, ntfy; int ret; int i; @@ -1341,8 +1341,8 @@ static int ucsi_init(struct ucsi *ucsi) } /* Enable basic notifications */ - ucsi->ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR; - command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy; + ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR; + command = UCSI_SET_NOTIFICATION_ENABLE | ntfy; ret = ucsi_send_command(ucsi, command, NULL, 0); if (ret < 0) goto err_reset; @@ -1374,12 +1374,13 @@ static int ucsi_init(struct ucsi *ucsi) } /* Enable all notifications */ - ucsi->ntfy = UCSI_ENABLE_NTFY_ALL; - command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy; + ntfy = UCSI_ENABLE_NTFY_ALL; + command = UCSI_SET_NOTIFICATION_ENABLE | ntfy; ret = ucsi_send_command(ucsi, command, NULL, 0); if (ret < 0) goto err_unregister; + ucsi->ntfy = ntfy; return 0; err_unregister: From 0482c34ec6f8557e06cd0f8e2d0e20e8ede6a22c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Mar 2023 16:42:43 +0100 Subject: [PATCH 050/898] usb: ucsi: Fix ucsi->connector race ucsi_init() which runs from a workqueue sets ucsi->connector and on an error will clear it again. ucsi->connector gets dereferenced by ucsi_resume(), this checks for ucsi->connector being NULL in case ucsi_init() has not finished yet; or in case ucsi_init() has failed. ucsi_init() setting ucsi->connector and then clearing it again on an error creates a race where the check in ucsi_resume() may pass, only to have ucsi->connector free-ed underneath it when ucsi_init() hits an error. Fix this race by making ucsi_init() store the connector array in a local variable and only assign it to ucsi->connector on success. Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230308154244.722337-3-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0623861c597b7..8d1baf28df55c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1125,12 +1125,11 @@ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con) return NULL; } -static int ucsi_register_port(struct ucsi *ucsi, int index) +static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con) { struct usb_power_delivery_desc desc = { ucsi->cap.pd_version}; struct usb_power_delivery_capabilities_desc pd_caps; struct usb_power_delivery_capabilities *pd_cap; - struct ucsi_connector *con = &ucsi->connector[index]; struct typec_capability *cap = &con->typec_cap; enum typec_accessory *accessory = cap->accessory; enum usb_role u_role = USB_ROLE_NONE; @@ -1151,7 +1150,6 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) init_completion(&con->complete); mutex_init(&con->lock); INIT_LIST_HEAD(&con->partner_tasks); - con->num = index + 1; con->ucsi = ucsi; cap->fwnode = ucsi_find_fwnode(con); @@ -1328,7 +1326,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) */ static int ucsi_init(struct ucsi *ucsi) { - struct ucsi_connector *con; + struct ucsi_connector *con, *connector; u64 command, ntfy; int ret; int i; @@ -1359,16 +1357,16 @@ static int ucsi_init(struct ucsi *ucsi) } /* Allocate the connectors. Released in ucsi_unregister() */ - ucsi->connector = kcalloc(ucsi->cap.num_connectors + 1, - sizeof(*ucsi->connector), GFP_KERNEL); - if (!ucsi->connector) { + connector = kcalloc(ucsi->cap.num_connectors + 1, sizeof(*connector), GFP_KERNEL); + if (!connector) { ret = -ENOMEM; goto err_reset; } /* Register all connectors */ for (i = 0; i < ucsi->cap.num_connectors; i++) { - ret = ucsi_register_port(ucsi, i); + connector[i].num = i + 1; + ret = ucsi_register_port(ucsi, &connector[i]); if (ret) goto err_unregister; } @@ -1380,11 +1378,12 @@ static int ucsi_init(struct ucsi *ucsi) if (ret < 0) goto err_unregister; + ucsi->connector = connector; ucsi->ntfy = ntfy; return 0; err_unregister: - for (con = ucsi->connector; con->port; con++) { + for (con = connector; con->port; con++) { ucsi_unregister_partner(con); ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); ucsi_unregister_port_psy(con); @@ -1400,10 +1399,7 @@ static int ucsi_init(struct ucsi *ucsi) typec_unregister_port(con->port); con->port = NULL; } - - kfree(ucsi->connector); - ucsi->connector = NULL; - + kfree(connector); err_reset: memset(&ucsi->cap, 0, sizeof(ucsi->cap)); ucsi_reset_ppm(ucsi); From 02d210f434249a7edbc160969b75df030dc6934d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Mar 2023 16:42:44 +0100 Subject: [PATCH 051/898] usb: ucsi_acpi: Increase the command completion timeout Commit 130a96d698d7 ("usb: typec: ucsi: acpi: Increase command completion timeout value") increased the timeout from 5 seconds to 60 seconds due to issues related to alternate mode discovery. After the alternate mode discovery switch to polled mode the timeout was reduced, but instead of being set back to 5 seconds it was reduced to 1 second. This is causing problems when using a Lenovo ThinkPad X1 yoga gen7 connected over Type-C to a LG 27UL850-W (charging DP over Type-C). When the monitor is already connected at boot the following error is logged: "PPM init failed (-110)", /sys/class/typec is empty and on unplugging the NULL pointer deref fixed earlier in this series happens. When the monitor is connected after boot the following error is logged instead: "GET_CONNECTOR_STATUS failed (-110)". Setting the timeout back to 5 seconds fixes both cases. Fixes: e08065069fc7 ("usb: typec: ucsi: acpi: Reduce the command completion timeout") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230308154244.722337-4-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index ce0c8ef80c043..62206a6b8ea75 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -78,7 +78,7 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, if (ret) goto out_clear_bit; - if (!wait_for_completion_timeout(&ua->complete, HZ)) + if (!wait_for_completion_timeout(&ua->complete, 5 * HZ)) ret = -ETIMEDOUT; out_clear_bit: From 02c1820345e795148e6b497ef85090915401698e Mon Sep 17 00:00:00 2001 From: Vincenzo Palazzo Date: Thu, 2 Mar 2023 16:07:06 +0100 Subject: [PATCH 052/898] usb: dwc3: Fix a typo in field name Fix a typo inside the dwc3 struct docs. Fixes: 63d7f9810a38 ("usb: dwc3: core: Enable GUCTL1 bit 10 for fixing termination error after resume bug") Signed-off-by: Vincenzo Palazzo Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20230302150706.229008-1-vincenzopalazzodev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 582ebd9cf9c2e..4743e918dcafa 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1098,7 +1098,7 @@ struct dwc3_scratchpad_array { * change quirk. * @dis_tx_ipgap_linecheck_quirk: set if we disable u2mac linestate * check during HS transmit. - * @resume-hs-terminations: Set if we enable quirk for fixing improper crc + * @resume_hs_terminations: Set if we enable quirk for fixing improper crc * generation after resume from suspend. * @parkmode_disable_ss_quirk: set if we need to disable all SuperSpeed * instances in park mode. From d8a2bb4eb75866275b5cf7de2e593ac3449643e2 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Mon, 6 Mar 2023 12:05:57 -0800 Subject: [PATCH 053/898] usb: dwc3: gadget: Add 1ms delay after end transfer command without IOC Previously, there was a 100uS delay inserted after issuing an end transfer command for specific controller revisions. This was due to the fact that there was a GUCTL2 bit field which enabled synchronous completion of the end transfer command once the CMDACT bit was cleared in the DEPCMD register. Since this bit does not exist for all controller revisions and the current implementation heavily relies on utizling the EndTransfer command completion interrupt, add the delay back in for uses where the interrupt on completion bit is not set, and increase the duration to 1ms for the controller to complete the command. An issue was seen where the USB request buffer was unmapped while the DWC3 controller was still accessing the TRB. However, it was confirmed that the end transfer command was successfully submitted. (no end transfer timeout) In situations, such as dwc3_gadget_soft_disconnect() and __dwc3_gadget_ep_disable(), the dwc3_remove_request() is utilized, which will issue the end transfer command, and follow up with dwc3_gadget_giveback(). At least for the USB ep disable path, it is required for any pending and started requests to be completed and returned to the function driver in the same context of the disable call. Without the GUCTL2 bit, it is not ensured that the end transfer is completed before the buffers are unmapped. Fixes: cf2f8b63f7f1 ("usb: dwc3: gadget: Remove END_TRANSFER delay") Cc: stable Signed-off-by: Wesley Cheng Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20230306200557.29387-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 3c63fa97a6800..cf5b4f49c3ed8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1699,6 +1699,7 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc) */ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { + struct dwc3 *dwc = dep->dwc; struct dwc3_gadget_ep_cmd_params params; u32 cmd; int ret; @@ -1722,10 +1723,13 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int WARN_ON_ONCE(ret); dep->resource_index = 0; - if (!interrupt) + if (!interrupt) { + if (!DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC3, 310A)) + mdelay(1); dep->flags &= ~DWC3_EP_TRANSFER_STARTED; - else if (!ret) + } else if (!ret) { dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + } dep->flags &= ~DWC3_EP_DELAY_STOP; return ret; @@ -3774,7 +3778,11 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, * enabled, the EndTransfer command will have completed upon * returning from this function. * - * This mode is NOT available on the DWC_usb31 IP. + * This mode is NOT available on the DWC_usb31 IP. In this + * case, if the IOC bit is not set, then delay by 1ms + * after issuing the EndTransfer command. This allows for the + * controller to handle the command completely before DWC3 + * remove requests attempts to unmap USB request buffers. */ __dwc3_stop_active_transfer(dep, force, interrupt); From f7c13cb48e85538709850589b496c4ddb3d3898e Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 23 Feb 2023 08:39:20 +0100 Subject: [PATCH 054/898] usb: misc: onboard-hub: add support for Microchip USB2517 USB 2.0 hub Add support for Microchip USB2517 USB 2.0 hub to the onboard usb hub driver. Adopt the generic usb-device compatible ("usbVID,PID"). This hub has the same reset timings as USB2514, so reuse that one. There is also an USB2517I which just has industrial temperature range. Signed-off-by: Alexander Stein Cc: stable Acked-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20230223073920.2912298-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 1 + drivers/usb/misc/onboard_usb_hub.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index 5402e4b7267b9..12fc6eb67c3bf 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -410,6 +410,7 @@ static const struct usb_device_id onboard_hub_id_table[] = { { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0414) }, /* RTS5414 USB 3.2 */ diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h index 0a943a1546490..aca5f50eb0da7 100644 --- a/drivers/usb/misc/onboard_usb_hub.h +++ b/drivers/usb/misc/onboard_usb_hub.h @@ -36,6 +36,7 @@ static const struct onboard_hub_pdata vialab_vl817_data = { static const struct of_device_id onboard_hub_match[] = { { .compatible = "usb424,2514", .data = µchip_usb424_data, }, + { .compatible = "usb424,2517", .data = µchip_usb424_data, }, { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, }, From 6c67ed9ad9b83e453e808f9b31a931a20a25629b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Thu, 2 Mar 2023 17:36:47 +0100 Subject: [PATCH 055/898] usb: gadget: u_audio: don't let userspace block driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the unbind callback for f_uac1 and f_uac2, a call to snd_card_free() via g_audio_cleanup() will disconnect the card and then wait for all resources to be released, which happens when the refcount falls to zero. Since userspace can keep the refcount incremented by not closing the relevant file descriptor, the call to unbind may block indefinitely. This can cause a deadlock during reboot, as evidenced by the following blocked task observed on my machine: task:reboot state:D stack:0 pid:2827 ppid:569 flags:0x0000000c Call trace: __switch_to+0xc8/0x140 __schedule+0x2f0/0x7c0 schedule+0x60/0xd0 schedule_timeout+0x180/0x1d4 wait_for_completion+0x78/0x180 snd_card_free+0x90/0xa0 g_audio_cleanup+0x2c/0x64 afunc_unbind+0x28/0x60 ... kernel_restart+0x4c/0xac __do_sys_reboot+0xcc/0x1ec __arm64_sys_reboot+0x28/0x30 invoke_syscall+0x4c/0x110 ... The issue can also be observed by opening the card with arecord and then stopping the process through the shell before unbinding: # arecord -D hw:UAC2Gadget -f S32_LE -c 2 -r 48000 /dev/null Recording WAVE '/dev/null' : Signed 32 bit Little Endian, Rate 48000 Hz, Stereo ^Z[1]+ Stopped arecord -D hw:UAC2Gadget -f S32_LE -c 2 -r 48000 /dev/null # echo gadget.0 > /sys/bus/gadget/drivers/configfs-gadget/unbind (observe that the unbind command never finishes) Fix the problem by using snd_card_free_when_closed() instead, which will still disconnect the card as desired, but defer the task of freeing the resources to the core once userspace closes its file descriptor. Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Cc: stable@vger.kernel.org Signed-off-by: Alvin Šipraga Reviewed-by: Ruslan Bilovol Reviewed-by: John Keeping Link: https://lore.kernel.org/r/20230302163648.3349669-1-alvin@pqrs.dk Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index c1f62e91b0126..4a42574b4a7fe 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -1422,7 +1422,7 @@ void g_audio_cleanup(struct g_audio *g_audio) uac = g_audio->uac; card = uac->card; if (card) - snd_card_free(card); + snd_card_free_when_closed(card); kfree(uac->p_prm.reqs); kfree(uac->c_prm.reqs); From a826492fc9dfe32afd70fff93955ae8174bbf14b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 15 Feb 2023 13:49:51 +0800 Subject: [PATCH 056/898] usb: typec: tcpm: fix create duplicate source-capabilities file The kernel will dump in the below cases: sysfs: cannot create duplicate filename '/devices/virtual/usb_power_delivery/pd1/source-capabilities' 1. After soft reset has completed, an Explicit Contract negotiation occurs. The sink device will receive source capabilitys again. This will cause a duplicate source-capabilities file be created. 2. Power swap twice on a device that is initailly sink role. This will unregister existing capabilities when above cases occurs. Fixes: 8203d26905ee ("usb: typec: tcpm: Register USB Power Delivery Capabilities") cc: Signed-off-by: Xu Yang Reviewed-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230215054951.238394-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a0d943d785800..7f39cb9b34290 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4570,6 +4570,9 @@ static void run_state_machine(struct tcpm_port *port) case SOFT_RESET: port->message_id = 0; port->rx_msgid = -1; + /* remove existing capabilities */ + usb_power_delivery_unregister_capabilities(port->partner_source_caps); + port->partner_source_caps = NULL; tcpm_pd_send_control(port, PD_CTRL_ACCEPT); tcpm_ams_finish(port); if (port->pwr_role == TYPEC_SOURCE) { @@ -4589,6 +4592,9 @@ static void run_state_machine(struct tcpm_port *port) case SOFT_RESET_SEND: port->message_id = 0; port->rx_msgid = -1; + /* remove existing capabilities */ + usb_power_delivery_unregister_capabilities(port->partner_source_caps); + port->partner_source_caps = NULL; if (tcpm_pd_send_control(port, PD_CTRL_SOFT_RESET)) tcpm_set_state_cond(port, hard_reset_state(port), 0); else @@ -4718,6 +4724,9 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SNK_STARTUP, 0); break; case PR_SWAP_SNK_SRC_SINK_OFF: + /* will be source, remove existing capabilities */ + usb_power_delivery_unregister_capabilities(port->partner_source_caps); + port->partner_source_caps = NULL; /* * Prevent vbus discharge circuit from turning on during PR_SWAP * as this is not a disconnect. From abfc4fa28f0160df61c7149567da4f6494dfb488 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 16 Feb 2023 11:15:15 +0800 Subject: [PATCH 057/898] usb: typec: tcpm: fix warning when handle discover_identity message Since both source and sink device can send discover_identity message in PD3, kernel may dump below warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 169 at drivers/usb/typec/tcpm/tcpm.c:1446 tcpm_queue_vdm+0xe0/0xf0 Modules linked in: CPU: 0 PID: 169 Comm: 1-0050 Not tainted 6.1.1-00038-g6a3c36cf1da2-dirty #567 Hardware name: NXP i.MX8MPlus EVK board (DT) pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : tcpm_queue_vdm+0xe0/0xf0 lr : tcpm_queue_vdm+0x2c/0xf0 sp : ffff80000c19bcd0 x29: ffff80000c19bcd0 x28: 0000000000000001 x27: ffff0000d11c8ab8 x26: ffff0000d11cc000 x25: 0000000000000000 x24: 00000000ff008081 x23: 0000000000000001 x22: 00000000ff00a081 x21: ffff80000c19bdbc x20: 0000000000000000 x19: ffff0000d11c8080 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000000 x15: ffff0000d716f580 x14: 0000000000000001 x13: ffff0000d716f507 x12: 0000000000000001 x11: 0000000000000000 x10: 0000000000000020 x9 : 00000000000ee098 x8 : 00000000ffffffff x7 : 000000000000001c x6 : ffff0000d716f580 x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 x2 : ffff80000c19bdbc x1 : 00000000ff00a081 x0 : 0000000000000004 Call trace: tcpm_queue_vdm+0xe0/0xf0 tcpm_pd_rx_handler+0x340/0x1ab0 kthread_worker_fn+0xcc/0x18c kthread+0x10c/0x110 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- Below sequences may trigger this warning: tcpm_send_discover_work(work) tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); tcpm_queue_vdm(port, header, data, count); port->vdm_state = VDM_STATE_READY; vdm_state_machine_work(work); <-- received discover_identity from partner vdm_run_state_machine(port); port->vdm_state = VDM_STATE_SEND_MESSAGE; mod_vdm_delayed_work(port, x); tcpm_pd_rx_handler(work); tcpm_pd_data_request(port, msg); tcpm_handle_vdm_request(port, msg->payload, cnt); tcpm_queue_vdm(port, response[0], &response[1], rlen - 1); --> WARN_ON(port->vdm_state > VDM_STATE_DONE); For this case, the state machine could still send out discover identity message later if we skip current discover_identity message. So we should handle the received message firstly and override the pending discover_identity message without warning in this case. Then, a delayed send_discover work will send discover_identity message again. Fixes: e00943e91678 ("usb: typec: tcpm: PD3.0 sinks can send Discover Identity even in device mode") cc: Signed-off-by: Xu Yang Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230216031515.4151117-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 7f39cb9b34290..1ee774c263f08 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1445,10 +1445,18 @@ static int tcpm_ams_start(struct tcpm_port *port, enum tcpm_ams ams) static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header, const u32 *data, int cnt) { + u32 vdo_hdr = port->vdo_data[0]; + WARN_ON(!mutex_is_locked(&port->lock)); - /* Make sure we are not still processing a previous VDM packet */ - WARN_ON(port->vdm_state > VDM_STATE_DONE); + /* If is sending discover_identity, handle received message first */ + if (PD_VDO_SVDM(vdo_hdr) && PD_VDO_CMD(vdo_hdr) == CMD_DISCOVER_IDENT) { + port->send_discover = true; + mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS); + } else { + /* Make sure we are not still processing a previous VDM packet */ + WARN_ON(port->vdm_state > VDM_STATE_DONE); + } port->vdo_count = cnt + 1; port->vdo_data[0] = header; @@ -1948,11 +1956,13 @@ static void vdm_run_state_machine(struct tcpm_port *port) switch (PD_VDO_CMD(vdo_hdr)) { case CMD_DISCOVER_IDENT: res = tcpm_ams_start(port, DISCOVER_IDENTITY); - if (res == 0) + if (res == 0) { port->send_discover = false; - else if (res == -EAGAIN) + } else if (res == -EAGAIN) { + port->vdo_data[0] = 0; mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS); + } break; case CMD_DISCOVER_SVID: res = tcpm_ams_start(port, DISCOVER_SVIDS); @@ -2035,6 +2045,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) unsigned long timeout; port->vdm_retries = 0; + port->vdo_data[0] = 0; port->vdm_state = VDM_STATE_BUSY; timeout = vdm_ready_timeout(vdo_hdr); mod_vdm_delayed_work(port, timeout); From 094f391013ba9cc77b4b1ae1617f0a832e598d67 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Wed, 8 Mar 2023 16:52:13 +0000 Subject: [PATCH 058/898] docs: usb: Add documentation for the UVC Gadget The UVC Gadget function has become quite complex, but documentation for it is fairly sparse. Add some more detailed documentation to improve the situation. Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20230308165213.139315-1-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget_uvc.rst | 352 +++++++++++++++++++++++++++++++ Documentation/usb/index.rst | 1 + 2 files changed, 353 insertions(+) create mode 100644 Documentation/usb/gadget_uvc.rst diff --git a/Documentation/usb/gadget_uvc.rst b/Documentation/usb/gadget_uvc.rst new file mode 100644 index 0000000000000..6d22faceb1a0b --- /dev/null +++ b/Documentation/usb/gadget_uvc.rst @@ -0,0 +1,352 @@ +======================= +Linux UVC Gadget Driver +======================= + +Overview +-------- +The UVC Gadget driver is a driver for hardware on the *device* side of a USB +connection. It is intended to run on a Linux system that has USB device-side +hardware such as boards with an OTG port. + +On the device system, once the driver is bound it appears as a V4L2 device with +the output capability. + +On the host side (once connected via USB cable), a device running the UVC Gadget +driver *and controlled by an appropriate userspace program* should appear as a UVC +specification compliant camera, and function appropriately with any program +designed to handle them. The userspace program running on the device system can +queue image buffers from a variety of sources to be transmitted via the USB +connection. Typically this would mean forwarding the buffers from a camera sensor +peripheral, but the source of the buffer is entirely dependent on the userspace +companion program. + +Configuring the device kernel +----------------------------- +The Kconfig options USB_CONFIGFS, USB_LIBCOMPOSITE, USB_CONFIGFS_F_UVC and +USB_F_UVC must be selected to enable support for the UVC gadget. + +Configuring the gadget through configfs +--------------------------------------- +The UVC Gadget expects to be configured through configfs using the UVC function. +This allows a significant degree of flexibility, as many of a UVC device's +settings can be controlled this way. + +Not all of the available attributes are described here. For a complete enumeration +see Documentation/ABI/testing/configfs-usb-gadget-uvc + +Assumptions +~~~~~~~~~~~ +This section assumes that you have mounted configfs at `/sys/kernel/config` and +created a gadget as `/sys/kernel/config/usb_gadget/g1`. + +The UVC Function +~~~~~~~~~~~~~~~~ + +The first step is to create the UVC function: + +.. code-block:: bash + + # These variables will be assumed throughout the rest of the document + CONFIGFS="/sys/kernel/config" + GADGET="$CONFIGFS/usb_gadget/g1" + FUNCTION="$GADGET/functions/uvc.0" + + mkdir -p $FUNCTION + +Formats and Frames +~~~~~~~~~~~~~~~~~~ + +You must configure the gadget by telling it which formats you support, as well +as the frame sizes and frame intervals that are supported for each format. In +the current implementation there is no way for the gadget to refuse to set a +format that the host instructs it to set, so it is important that this step is +completed *accurately* to ensure that the host never asks for a format that +can't be provided. + +Formats are created under the streaming/uncompressed and streaming/mjpeg configfs +groups, with the framesizes created under the formats in the following +structure: + +:: + + uvc.0 + + | + + streaming + + | + + mjpeg + + | | + | + mjpeg + + | | + | + 720p + | | + | + 1080p + | + + uncompressed + + | + + yuyv + + | + + 720p + | + + 1080p + +Each frame can then be configured with a width and height, plus the maximum +buffer size required to store a single frame, and finally with the supported +frame intervals for that format and framesize. Width and height are enumerated in +units of pixels, frame interval in units of 100ns. To create the structure +above with 2, 15 and 100 fps frameintervals for each framesize for example you +might do: + +.. code-block:: bash + + create_frame() { + # Example usage: + # create_frame + + WIDTH=$1 + HEIGHT=$2 + FORMAT=$3 + NAME=$4 + + wdir=$FUNCTION/streaming/$FORMAT/$NAME/${HEIGHT}p + + mkdir -p $wdir + echo $WIDTH > $wdir/wWidth + echo $HEIGHT > $wdir/wHeight + echo $(( $WIDTH * $HEIGHT * 2 )) > $wdir/dwMaxVideoFrameBufferSize + cat < $wdir/dwFrameInterval + 666666 + 100000 + 5000000 + EOF + } + + create_frame 1280 720 mjpeg mjpeg + create_frame 1920 1080 mjpeg mjpeg + create_frame 1280 720 uncompressed yuyv + create_frame 1920 1080 uncompressed yuyv + +The only uncompressed format currently supported is YUYV, which is detailed at +Documentation/userspace-api/media/v4l/pixfmt-packed.yuv.rst. + +Color Matching Descriptors +~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's possible to specify some colometry information for each format you create. +This step is optional, and default information will be included if this step is +skipped; those default values follow those defined in the Color Matching Descriptor +section of the UVC specification. + +To create a Color Matching Descriptor, create a configfs item and set its three +attributes to your desired settings and then link to it from the format you wish +it to be associated with: + +.. code-block:: bash + + # Create a new Color Matching Descriptor + + mkdir $FUNCTION/streaming/color_matching/yuyv + pushd $FUNCTION/streaming/color_matching/yuyv + + echo 1 > bColorPrimaries + echo 1 > bTransferCharacteristics + echo 4 > bMatrixCoefficients + + popd + + # Create a symlink to the Color Matching Descriptor from the format's config item + ln -s $FUNCTION/streaming/color_matching/yuyv $FUNCTION/streaming/uncompressed/yuyv + +For details about the valid values, consult the UVC specification. Note that a +default color matching descriptor exists and is used by any format which does +not have a link to a different Color Matching Descriptor. It's possible to +change the attribute settings for the default descriptor, so bear in mind that if +you do that you are altering the defaults for any format that does not link to +a different one. + + +Header linking +~~~~~~~~~~~~~~ + +The UVC specification requires that Format and Frame descriptors be preceded by +Headers detailing things such as the number and cumulative size of the different +Format descriptors that follow. This and similar operations are acheived in +configfs by linking between the configfs item representing the header and the +config items representing those other descriptors, in this manner: + +.. code-block:: bash + + mkdir $FUNCTION/streaming/header/h + + # This section links the format descriptors and their associated frames + # to the header + cd $FUNCTION/streaming/header/h + ln -s ../../uncompressed/yuyv + ln -s ../../mjpeg/mjpeg + + # This section ensures that the header will be transmitted for each + # speed's set of descriptors. If support for a particular speed is not + # needed then it can be skipped here. + cd ../../class/fs + ln -s ../../header/h + cd ../../class/hs + ln -s ../../header/h + cd ../../class/ss + ln -s ../../header/h + cd ../../../control + mkdir header/h + ln -s header/h class/fs + ln -s header/h class/ss + + +Extension Unit Support +~~~~~~~~~~~~~~~~~~~~~~ + +A UVC Extension Unit (XU) basically provides a distinct unit to which control set +and get requests can be addressed. The meaning of those control requests is +entirely implementation dependent, but may be used to control settings outside +of the UVC specification (for example enabling or disabling video effects). An +XU can be inserted into the UVC unit chain or left free-hanging. + +Configuring an extension unit involves creating an entry in the appropriate +directory and setting its attributes appropriately, like so: + +.. code-block:: bash + + mkdir $FUNCTION/control/extensions/xu.0 + pushd $FUNCTION/control/extensions/xu.0 + + # Set the bUnitID of the Processing Unit as the source for this + # Extension Unit + echo 2 > baSourceID + + # Set this XU as the source of the default output terminal. This inserts + # the XU into the UVC chain between the PU and OT such that the final + # chain is IT > PU > XU.0 > OT + cat bUnitID > ../../terminal/output/default/baSourceID + + # Flag some controls as being available for use. The bmControl field is + # a bitmap with each bit denoting the availability of a particular + # control. For example to flag the 0th, 2nd and 3rd controls available: + echo 0x0d > bmControls + + # Set the GUID; this is a vendor-specific code identifying the XU. + echo -e -n "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" > guidExtensionCode + + popd + +The bmControls attribute and the baSourceID attribute are multi-value attributes. +This means that you may write multiple newline separated values to them. For +example to flag the 1st, 2nd, 9th and 10th controls as being available you would +need to write two values to bmControls, like so: + +.. code-block:: bash + + cat << EOF > bmControls + 0x03 + 0x03 + EOF + +The multi-value nature of the baSourceID attribute belies the fact that XUs can +be multiple-input, though note that this currently has no significant effect. + +The bControlSize attribute reflects the size of the bmControls attribute, and +similarly bNrInPins reflects the size of the baSourceID attributes. Both +attributes are automatically increased / decreased as you set bmControls and +baSourceID. It is also possible to manually increase or decrease bControlSize +which has the effect of truncating entries to the new size, or padding entries +out with 0x00, for example: + +:: + + $ cat bmControls + 0x03 + 0x05 + + $ cat bControlSize + 2 + + $ echo 1 > bControlSize + $ cat bmControls + 0x03 + + $ echo 2 > bControlSize + $ cat bmControls + 0x03 + 0x00 + +bNrInPins and baSourceID function in the same way. + +Custom Strings Support +~~~~~~~~~~~~~~~~~~~~~~ + +String descriptors that provide a textual description for various parts of a +USB device can be defined in the usual place within USB configfs, and may then +be linked to from the UVC function root or from Extension Unit directories to +assign those strings as descriptors: + +.. code-block:: bash + + # Create a string descriptor in us-EN and link to it from the function + # root. The name of the link is significant here, as it declares this + # descriptor to be intended for the Interface Association Descriptor. + # Other significant link names at function root are vs0_desc and vs1_desc + # For the VideoStreaming Interface 0/1 Descriptors. + + mkdir -p $GADGET/strings/0x409/iad_desc + echo -n "Interface Associaton Descriptor" > $GADGET/strings/0x409/iad_desc/s + ln -s $GADGET/strings/0x409/iad_desc $FUNCTION/iad_desc + + # Because the link to a String Descriptor from an Extension Unit clearly + # associates the two, the name of this link is not significant and may + # be set freely. + + mkdir -p $GADGET/strings/0x409/xu.0 + echo -n "A Very Useful Extension Unit" > $GADGET/strings/0x409/xu.0/s + ln -s $GADGET/strings/0x409/xu.0 $FUNCTION/control/extensions/xu.0 + +The interrupt endpoint +~~~~~~~~~~~~~~~~~~~~~~ + +The VideoControl interface has an optional interrupt endpoint which is by default +disabled. This is intended to support delayed response control set requests for +UVC (which should respond through the interrupt endpoint rather than tying up +endpoint 0). At present support for sending data through this endpoint is missing +and so it is left disabled to avoid confusion. If you wish to enable it you can +do so through the configfs attribute: + +.. code-block:: bash + + echo 1 > $FUNCTION/control/enable_interrupt_ep + +Bandwidth configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +There are three attributes which control the bandwidth of the USB connection. +These live in the function root and can be set within limits: + +.. code-block:: bash + + # streaming_interval sets bInterval. Values range from 1..255 + echo 1 > $FUNCTION/streaming_interval + + # streaming_maxpacket sets wMaxPacketSize. Valid values are 1024/2048/3072 + echo 3072 > $FUNCTION/streaming_maxpacket + + # streaming_maxburst sets bMaxBurst. Valid values are 1..15 + echo 1 > $FUNCTION/streaming_maxburst + + +The values passed here will be clamped to valid values according to the UVC +specification (which depend on the speed of the USB connection). To understand +how the settings influence bandwidth you should consult the UVC specifications, +but a rule of thumb is that increasing the streaming_maxpacket setting will +improve bandwidth (and thus the maximum possible framerate), whilst the same is +true for streaming_maxburst provided the USB connection is running at SuperSpeed. +Increasing streaming_interval will reduce bandwidth and framerate. + +The userspace application +------------------------- +By itself, the UVC Gadget driver cannot do anything particularly interesting. It +must be paired with a userspace program that responds to UVC control requests and +fills buffers to be queued to the V4L2 device that the driver creates. How those +things are achieved is implementation dependent and beyond the scope of this +document, but a reference application can be found at https://gitlab.freedesktop.org/camera/uvc-gadget diff --git a/Documentation/usb/index.rst b/Documentation/usb/index.rst index b656c9be23ed2..27955dad95e12 100644 --- a/Documentation/usb/index.rst +++ b/Documentation/usb/index.rst @@ -16,6 +16,7 @@ USB support gadget_multi gadget_printer gadget_serial + gadget_uvc gadget-testing iuu_phoenix mass-storage From 82f5332d3b9872ab5b287e85c57b76d8bb640cd1 Mon Sep 17 00:00:00 2001 From: Ziyang Huang Date: Tue, 21 Feb 2023 18:30:04 +0800 Subject: [PATCH 059/898] usb: dwc2: drd: fix inconsistent mode if role-switch-default-mode="host" Some boards might use USB-A female connector for USB ports, however, the port could be connected to a dual-mode USB controller, making it also behaves as a peripheral device if male-to-male cable is connected. In this case, the dts looks like this: &usb0 { status = "okay"; dr_mode = "otg"; usb-role-switch; role-switch-default-mode = "host"; }; After boot, dwc2_ovr_init() sets GOTGCTL to GOTGCTL_AVALOVAL and call dwc2_force_mode() with parameter host=false, which causes inconsistent mode - The hardware is in peripheral mode while the kernel status is in host mode. What we can do now is to call dwc2_drd_role_sw_set() to switch to device mode, and everything should work just fine now, even switching back to none(default) mode afterwards. Fixes: e14acb876985 ("usb: dwc2: drd: add role-switch-default-node support") Cc: stable Signed-off-by: Ziyang Huang Tested-by: Fabrice Gasnier Acked-by: Minas Harutyunyan Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/SG2PR01MB204837BF68EDB0E343D2A375C9A59@SG2PR01MB2048.apcprd01.prod.exchangelabs.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/drd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/drd.c b/drivers/usb/dwc2/drd.c index d8d6493bc4576..a8605b02115b1 100644 --- a/drivers/usb/dwc2/drd.c +++ b/drivers/usb/dwc2/drd.c @@ -35,7 +35,8 @@ static void dwc2_ovr_init(struct dwc2_hsotg *hsotg) spin_unlock_irqrestore(&hsotg->lock, flags); - dwc2_force_mode(hsotg, (hsotg->dr_mode == USB_DR_MODE_HOST)); + dwc2_force_mode(hsotg, (hsotg->dr_mode == USB_DR_MODE_HOST) || + (hsotg->role_sw_default_mode == USB_DR_MODE_HOST)); } static int dwc2_ovr_avalid(struct dwc2_hsotg *hsotg, bool valid) From d9a02e016aaf5a57fb44e9a5e6da8ccd3b9e2e70 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 8 Mar 2023 14:39:54 -0500 Subject: [PATCH 060/898] dm crypt: avoid accessing uninitialized tasklet When neither "no_read_workqueue" nor "no_write_workqueue" are enabled, tasklet_trylock() in crypt_dec_pending() may still return false due to an uninitialized state, and dm-crypt will unnecessarily do io completion in io_queue workqueue instead of current context. Fix this by adding an 'in_tasklet' flag to dm_crypt_io struct and initialize it to false in crypt_io_init(). Set this flag to true in kcryptd_queue_crypt() before calling tasklet_schedule(). If set crypt_dec_pending() will punt io completion to a workqueue. This also nicely avoids the tasklet_trylock/unlock hack when tasklets aren't in use. Fixes: 8e14f610159d ("dm crypt: do not call bio_endio() from the dm-crypt tasklet") Cc: stable@vger.kernel.org Reported-by: Hou Tao Suggested-by: Ignat Korchagin Reviewed-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index faba1be572f90..2764b4ea18a3c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -72,7 +72,9 @@ struct dm_crypt_io { struct crypt_config *cc; struct bio *base_bio; u8 *integrity_metadata; - bool integrity_metadata_from_pool; + bool integrity_metadata_from_pool:1; + bool in_tasklet:1; + struct work_struct work; struct tasklet_struct tasklet; @@ -1731,6 +1733,7 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; + io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1777,14 +1780,13 @@ static void crypt_dec_pending(struct dm_crypt_io *io) * our tasklet. In this case we need to delay bio_endio() * execution to after the tasklet is done and dequeued. */ - if (tasklet_trylock(&io->tasklet)) { - tasklet_unlock(&io->tasklet); - bio_endio(base_bio); + if (io->in_tasklet) { + INIT_WORK(&io->work, kcryptd_io_bio_endio); + queue_work(cc->io_queue, &io->work); return; } - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); + bio_endio(base_bio); } /* @@ -2233,6 +2235,7 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * it is being executed with irqs disabled. */ if (in_hardirq() || irqs_disabled()) { + io->in_tasklet = true; tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; From ecd240875e877d78fd03efbc62292f550872df3f Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 8 Mar 2023 22:06:03 +0100 Subject: [PATCH 061/898] ARM: dts: qcom: apq8026-lg-lenok: add missing reserved memory Turns out these two memory regions also need to be avoided, otherwise weird things will happen when Linux tries to use this memory. Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230308-lenok-reserved-memory-v1-1-b8bf6ff01207@z3ntu.xyz --- arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts index de2fb1c01b6e3..b82381229adf6 100644 --- a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts +++ b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts @@ -27,6 +27,16 @@ }; reserved-memory { + sbl_region: sbl@2f00000 { + reg = <0x02f00000 0x100000>; + no-map; + }; + + external_image_region: external-image@3100000 { + reg = <0x03100000 0x200000>; + no-map; + }; + adsp_region: adsp@3300000 { reg = <0x03300000 0x1400000>; no-map; From 6df6fab9320bc9ebdf50136a01e7bf0ee5984c62 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 8 Mar 2023 13:31:29 +0100 Subject: [PATCH 062/898] arm64: dts: qcom: sm8450: correct WSA2 assigned clocks The WSA2 assigned-clocks were copied from WSA, but the WSA2 uses its own. Fixes: 14341e76dbc7 ("arm64: dts: qcom: sm8450: add Soundwire and LPASS") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230308123129.232642-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 1a744a33bcf4b..e77d188508fd9 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -2143,8 +2143,8 @@ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>, <&vamacro>; clock-names = "mclk", "npl", "macro", "dcodec", "fsgen"; - assigned-clocks = <&q6prmcc LPASS_CLK_ID_WSA_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, - <&q6prmcc LPASS_CLK_ID_WSA_CORE_TX_2X_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>; + assigned-clocks = <&q6prmcc LPASS_CLK_ID_WSA2_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, + <&q6prmcc LPASS_CLK_ID_WSA2_CORE_TX_2X_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>; assigned-clock-rates = <19200000>, <19200000>; #clock-cells = <0>; From 670b7d6569bf439c90d7aac48ec36ee3e3013754 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:38 +0000 Subject: [PATCH 063/898] arm64: dts: qcom: sc8280xp: fix rx frame shapping info Some of the SoundWire frameshapping data seems incorrect, fix these values. Fixes: 1749a8ae49a3 ("arm64: dts: qcom: sc8280xp: add SoundWire and LPASS") Signed-off-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-2-srinivas.kandagatla@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 0d02599d88672..906cbd81d268e 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -2504,12 +2504,12 @@ qcom,ports-sinterval-low = /bits/ 8 <0x03 0x1f 0x1f 0x07 0x00>; qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0B 0x01 0x00>; qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0B 0x00 0x00>; - qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff>; - qcom,ports-hstop = /bits/ 8 <0xff 0x06 0xff 0xff 0xff>; + qcom,ports-hstart = /bits/ 8 <0xff 0x03 0x00 0xff 0xff>; + qcom,ports-hstop = /bits/ 8 <0xff 0x06 0x0f 0xff 0xff>; qcom,ports-word-length = /bits/ 8 <0x01 0x07 0x04 0xff 0xff>; - qcom,ports-block-pack-mode = /bits/ 8 <0xff 0x00 0x01 0xff 0xff>; + qcom,ports-block-pack-mode = /bits/ 8 <0xff 0xff 0x01 0xff 0xff>; qcom,ports-lane-control = /bits/ 8 <0x01 0x00 0x00 0x00 0x00>; - qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0x00>; + qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>; #sound-dai-cells = <1>; #address-cells = <2>; @@ -2609,15 +2609,15 @@ qcom,din-ports = <4>; qcom,dout-ports = <0>; - qcom,ports-sinterval-low = /bits/ 8 <0x01 0x03 0x03 0x03>; - qcom,ports-offset1 = /bits/ 8 <0x01 0x00 0x02 0x01>; + qcom,ports-sinterval-low = /bits/ 8 <0x01 0x01 0x03 0x03>; + qcom,ports-offset1 = /bits/ 8 <0x01 0x00 0x02 0x00>; qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x00 0x00>; qcom,ports-block-pack-mode = /bits/ 8 <0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff>; qcom,ports-hstop = /bits/ 8 <0xff 0xff 0xff 0xff>; - qcom,ports-word-length = /bits/ 8 <0xff 0x00 0xff 0xff>; + qcom,ports-word-length = /bits/ 8 <0xff 0xff 0xff 0xff>; qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff>; - qcom,ports-lane-control = /bits/ 8 <0x00 0x01 0x00 0x00>; + qcom,ports-lane-control = /bits/ 8 <0x00 0x01 0x00 0x01>; status = "disabled"; }; From e43bd22cb377bf4c4e5b12daacaf02f5c24fbb16 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:39 +0000 Subject: [PATCH 064/898] arm64: dts: qcom: sc8280xp: fix lpass tx macro clocks Tx macro soundwire clock is for some reason is incorrectly assigned to va macro, fix this and use tx macro clock instead. Fixes: 1749a8ae49a3 ("arm64: dts: qcom: sc8280xp: add SoundWire and LPASS") Signed-off-by: Srinivas Kandagatla Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-3-srinivas.kandagatla@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 906cbd81d268e..42bfa9fa5b967 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -2600,7 +2600,7 @@ <&intc GIC_SPI 520 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "core", "wake"; - clocks = <&vamacro>; + clocks = <&txmacro>; clock-names = "iface"; label = "TX"; #sound-dai-cells = <1>; From 4def7aa377ba1dbe66335ca3ebe3aa5a5bc3fe67 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:40 +0000 Subject: [PATCH 065/898] arm64: dts: qcom: sc8280xp-x13s: fix dmic sample rate The version of dmic that is on X13s panel supports clock frequency of range 1 Mhz to 4.8 MHz for normal operation. So correct the existing node to reflect this. Fixes: 8c1ea87e80b4 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Signed-off-by: Srinivas Kandagatla Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-4-srinivas.kandagatla@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 96b36ce94ce07..7d076b9e85bf7 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -1075,7 +1075,7 @@ vdd-micb-supply = <&vreg_s10b>; - qcom,dmic-sample-rate = <600000>; + qcom,dmic-sample-rate = <4800000>; status = "okay"; }; From 2e498f35c385654396e94cf12e097522d3973d41 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:41 +0000 Subject: [PATCH 066/898] arm64: dts: qcom: sc8280xp-x13s: fix va dmic dai links and routing VA dmics 0, 1, 2 micbias on X13s are connected to WCD MICBIAS1, WCD MICBIAS1 and WCD MICBIAS3 respectively. Reflect this in dt to get dmics working. Also fix dmics to go via VA Macro instead of TX macro to fix device switching. Fixes: 8c1ea87e80b4 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Signed-off-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-5-srinivas.kandagatla@linaro.org --- .../arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 7d076b9e85bf7..fa412bea8985b 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -897,9 +897,9 @@ "VA DMIC0", "MIC BIAS1", "VA DMIC1", "MIC BIAS1", "VA DMIC2", "MIC BIAS3", - "TX DMIC0", "MIC BIAS1", - "TX DMIC1", "MIC BIAS2", - "TX DMIC2", "MIC BIAS3", + "VA DMIC0", "VA MIC BIAS1", + "VA DMIC1", "VA MIC BIAS1", + "VA DMIC2", "VA MIC BIAS3", "TX SWR_ADC1", "ADC2_OUTPUT"; wcd-playback-dai-link { @@ -950,7 +950,7 @@ va-dai-link { link-name = "VA Capture"; cpu { - sound-dai = <&q6apmbedai TX_CODEC_DMA_TX_3>; + sound-dai = <&q6apmbedai VA_CODEC_DMA_TX_0>; }; platform { From a5982b3971007161b423b39aa843bdb6713a9d44 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 2 Mar 2023 16:47:24 +0100 Subject: [PATCH 067/898] arm64: dts: qcom: sm8550: fix LPASS pinctrl slew base address The second LPASS pin controller IO address is supposed to be the MCC range which contains the slew rate registers. The Linux driver then accesses slew rate register with hard-coded offset (0xa000). However the DTS contained the address of slew rate register as the second IO address, thus any reads were effectively pass the memory space and lead to "Internal error: synchronous external aborts" when applying pin configuration. Fixes: 6de7f9c34358 ("arm64: dts: qcom: sm8550: add GPR and LPASS pin controller") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302154724.856062-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 25f51245fe9bb..24aa724c12ea8 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -1997,7 +1997,7 @@ lpass_tlmm: pinctrl@6e80000 { compatible = "qcom,sm8550-lpass-lpi-pinctrl"; reg = <0 0x06e80000 0 0x20000>, - <0 0x0725a000 0 0x10000>; + <0 0x07250000 0 0x10000>; gpio-controller; #gpio-cells = <2>; gpio-ranges = <&lpass_tlmm 0 0 23>; From e607b3c1fa0e1579951acd00f9559a77f97d0927 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 7 Mar 2023 21:02:00 +0530 Subject: [PATCH 068/898] arm64: dts: qcom: sm8350: Mark UFS controller as cache coherent The UFS controller on SM8350 supports cache coherency, hence add the "dma-coherent" property to mark it as such. Fixes: 59c7cf814783 ("arm64: dts: qcom: sm8350: Add UFS nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230307153201.180626-1-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8350.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 1c97e28da6ad8..1a5a612d4234b 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1664,6 +1664,7 @@ power-domains = <&gcc UFS_PHY_GDSC>; iommus = <&apps_smmu 0xe0 0x0>; + dma-coherent; clock-names = "core_clk", From 8ba961d4339c5db0e69ff6627606fe1f34c838e5 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 7 Mar 2023 21:02:01 +0530 Subject: [PATCH 069/898] arm64: dts: qcom: sm8450: Mark UFS controller as cache coherent The UFS controller on SM8450 supports cache coherency, hence add the "dma-coherent" property to mark it as such. Fixes: 07fa917a335e ("arm64: dts: qcom: sm8450: add ufs nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230307153201.180626-2-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index e77d188508fd9..b285b1530c109 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -4003,6 +4003,7 @@ power-domains = <&gcc UFS_PHY_GDSC>; iommus = <&apps_smmu 0xe0 0x0>; + dma-coherent; interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; From b891251b40d4dc4cfd28341f62f6784c02ad3a78 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Tue, 7 Mar 2023 18:23:40 -0500 Subject: [PATCH 070/898] arm64: dts: qcom: sa8540p-ride: correct name of remoteproc_nsp0 firmware The cdsp.mbn firmware that's referenced in sa8540p-ride.dts is actually named cdsp0.mbn in the deliverables from Qualcomm. Let's go ahead and correct the name to match what's in Qualcomm's deliverable. Signed-off-by: Brian Masney Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230307232340.2370476-1-bmasney@redhat.com --- arch/arm64/boot/dts/qcom/sa8540p-ride.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts index 3ccb5ffdb3ca3..24fa449d48a66 100644 --- a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts +++ b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts @@ -241,7 +241,7 @@ }; &remoteproc_nsp0 { - firmware-name = "qcom/sa8540p/cdsp.mbn"; + firmware-name = "qcom/sa8540p/cdsp0.mbn"; status = "okay"; }; From ee1d5100c37e7a95af506c7addf018f652545ce6 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 8 Mar 2023 11:16:30 +0530 Subject: [PATCH 071/898] arm64: dts: qcom: sm8550: Mark UFS controller as cache coherent The UFS controller on SM8550 supports cache coherency, hence add the "dma-coherent" property to mark it as such. Fixes: 35cf1aaab169 ("arm64: dts: qcom: sm8550: Add UFS host controller and phy nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230308054630.7202-1-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 24aa724c12ea8..5d0888398b3c3 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -1905,6 +1905,7 @@ required-opps = <&rpmhpd_opp_nom>; iommus = <&apps_smmu 0x60 0x0>; + dma-coherent; interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; From f3d0fbad6765da25de7ecf6481af9b6ddb0b3793 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 9 Mar 2023 12:12:09 +0100 Subject: [PATCH 072/898] firmware: qcom: scm: fix bogus irq error at probe A recent commit added support for an optional interrupt which is only available on some platforms. Stop spamming the logs with bogus error messages on platforms that do not use this new optional resource: qcom_scm firmware:scm: error -ENXIO: IRQ index 0 not found Fixes: 6bf325992236 ("firmware: qcom: scm: Add wait-queue handling logic") Cc: Guru Das Srinagesh Cc: Sibi Sankar Signed-off-by: Johan Hovold Tested-by: Steev Klimaszewski # Thinkpad X13s Acked-by: Guru Das Srinagesh Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230309111209.31606-1-johan+linaro@kernel.org --- drivers/firmware/qcom_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 468d4d5ab550c..b1e11f85b8054 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -1479,7 +1479,7 @@ static int qcom_scm_probe(struct platform_device *pdev) init_completion(&__scm->waitq_comp); - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq < 0) { if (irq != -ENXIO) return irq; From 3268a4d9b0b85a4382e93bdf7be5400a73db74c5 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Dec 2022 11:23:16 +0800 Subject: [PATCH 073/898] power: supply: rk817: Fix unsigned comparison with less than zero The tmp is defined as u32 type, which results in invalid processing of tmp<0 in function rk817_read_or_set_full_charge_on_boot(). Therefore, drop the comparison. drivers/power/supply/rk817_charger.c:828 rk817_read_or_set_full_charge_on_boot() warn: unsigned 'tmp' is never less than zero. drivers/power/supply/rk817_charger.c:788 rk817_read_or_set_full_charge_on_boot() warn: unsigned 'tmp' is never less than zero. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3444 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Tested-by: Chris Morgan Signed-off-by: Sebastian Reichel --- drivers/power/supply/rk817_charger.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 4f9c1c4179165..36f807b5ec442 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -785,8 +785,6 @@ rk817_read_or_set_full_charge_on_boot(struct rk817_charger *charger, regmap_bulk_read(rk808->regmap, RK817_GAS_GAUGE_Q_PRES_H3, bulk_reg, 4); tmp = get_unaligned_be32(bulk_reg); - if (tmp < 0) - tmp = 0; boot_charge_mah = ADC_TO_CHARGE_UAH(tmp, charger->res_div) / 1000; /* @@ -825,8 +823,6 @@ rk817_read_or_set_full_charge_on_boot(struct rk817_charger *charger, regmap_bulk_read(rk808->regmap, RK817_GAS_GAUGE_Q_PRES_H3, bulk_reg, 4); tmp = get_unaligned_be32(bulk_reg); - if (tmp < 0) - tmp = 0; boot_charge_mah = ADC_TO_CHARGE_UAH(tmp, charger->res_div) / 1000; regmap_bulk_read(rk808->regmap, RK817_GAS_GAUGE_OCV_VOL_H, bulk_reg, 2); From 14c76b2e75bca4d96e2b85a0c12aa43e84fe3f74 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 12 Dec 2022 13:38:57 -0800 Subject: [PATCH 074/898] power: supply: cros_usbpd: reclassify "default case!" as debug This doesn't need to be printed every second as an error: ... <3>[17438.628385] cros-usbpd-charger cros-usbpd-charger.3.auto: Port 1: default case! <3>[17439.634176] cros-usbpd-charger cros-usbpd-charger.3.auto: Port 1: default case! <3>[17440.640298] cros-usbpd-charger cros-usbpd-charger.3.auto: Port 1: default case! ... Reduce priority from ERROR to DEBUG. Signed-off-by: Grant Grundler Reviewed-by: Guenter Roeck Signed-off-by: Sebastian Reichel --- drivers/power/supply/cros_usbpd-charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/cros_usbpd-charger.c b/drivers/power/supply/cros_usbpd-charger.c index cadb6a0c2cc7e..b6c96376776a9 100644 --- a/drivers/power/supply/cros_usbpd-charger.c +++ b/drivers/power/supply/cros_usbpd-charger.c @@ -276,7 +276,7 @@ static int cros_usbpd_charger_get_power_info(struct port_data *port) port->psy_current_max = 0; break; default: - dev_err(dev, "Port %d: default case!\n", port->port_number); + dev_dbg(dev, "Port %d: default case!\n", port->port_number); port->psy_usb_type = POWER_SUPPLY_USB_TYPE_SDP; } From bf6c880d5d1448489ebf92e2d13d5713ff644930 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Tue, 6 Dec 2022 12:17:23 +0300 Subject: [PATCH 075/898] power: supply: axp288_fuel_gauge: Added check for negative values Variable 'pirq', which may receive negative value in platform_get_irq(). Used as an index in a function regmap_irq_get_virq(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Denis Arefev Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/axp288_fuel_gauge.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index 8e6f8a6550790..05f4131784629 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -724,6 +724,8 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev) for (i = 0; i < AXP288_FG_INTR_NUM; i++) { pirq = platform_get_irq(pdev, i); + if (pirq < 0) + continue; ret = regmap_irq_get_virq(axp20x->regmap_irqc, pirq); if (ret < 0) return dev_err_probe(dev, ret, "getting vIRQ %d\n", pirq); From 47c29d69212911f50bdcdd0564b5999a559010d4 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 10 Mar 2023 01:47:28 +0800 Subject: [PATCH 076/898] power: supply: bq24190: Fix use after free bug in bq24190_remove due to race condition In bq24190_probe, &bdi->input_current_limit_work is bound with bq24190_input_current_limit_work. When external power changed, it will call bq24190_charger_external_power_changed to start the work. If we remove the module which will call bq24190_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: CPU0 CPUc1 |bq24190_input_current_limit_work bq24190_remove | power_supply_unregister | device_unregister | power_supply_dev_release| kfree(psy) | | | power_supply_get_property_from_supplier | //use Fix it by finishing the work before cleanup in the bq24190_remove Fixes: 97774672573a ("power_supply: Initialize changed_work before calling device_add") Signed-off-by: Zheng Wang Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq24190_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index be34b98484508..de67b985f0a91 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -1906,6 +1906,7 @@ static void bq24190_remove(struct i2c_client *client) struct bq24190_dev_info *bdi = i2c_get_clientdata(client); int error; + cancel_delayed_work_sync(&bdi->input_current_limit_work); error = pm_runtime_resume_and_get(bdi->dev); if (error < 0) dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); From c7d9e628b8ff4d52a365a441bdacb3209ee83c81 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Mar 2023 12:15:24 +0100 Subject: [PATCH 077/898] efi/libstub: zboot: Mark zboot EFI application as NX compatible Now that the zboot loader will invoke the EFI memory attributes protocol to remap the decompressed code and rodata as read-only/executable, we can set the PE/COFF header flag that indicates to the firmware that the application does not rely on writable memory being executable at the same time. Cc: # v6.2+ Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/zboot-header.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/zboot-header.S b/drivers/firmware/efi/libstub/zboot-header.S index ec4525d40e0cf..445cb646eaaaf 100644 --- a/drivers/firmware/efi/libstub/zboot-header.S +++ b/drivers/firmware/efi/libstub/zboot-header.S @@ -63,7 +63,7 @@ __efistub_efi_zboot_header: .long .Lefi_header_end - .Ldoshdr .long 0 .short IMAGE_SUBSYSTEM_EFI_APPLICATION - .short 0 + .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT #ifdef CONFIG_64BIT .quad 0, 0, 0, 0 #else From 3c60f67b4bd1bc01fa9194e9dc925ac6cb56156c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Mar 2023 12:55:41 +0100 Subject: [PATCH 078/898] efi/libstub: arm64: Remap relocated image with strict permissions After relocating the executable image, use the EFI memory attributes protocol to remap the code and data regions with the appropriate permissions. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index d4a6b12a87413..b996553cdb4c3 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -139,6 +139,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, *image_addr = *reserve_addr; memcpy((void *)*image_addr, _text, kernel_size); caches_clean_inval_pou(*image_addr, *image_addr + kernel_codesize); + efi_remap_image(*image_addr, *reserve_size, kernel_codesize); return EFI_SUCCESS; } From 3c66bb1918c262dd52fb4221a8d372619c5da70a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Mar 2023 13:30:05 +0100 Subject: [PATCH 079/898] arm64: efi: Set NX compat flag in PE/COFF header The PE/COFF header has a NX compat flag which informs the firmware that the application does not rely on memory regions being mapped with both executable and writable permissions at the same time. This is typically used by the firmware to decide whether it can set the NX attribute on all allocations it returns, but going forward, it may be used to enforce a policy that only permits applications with the NX flag set to be loaded to begin wiht in some configurations, e.g., when Secure Boot is in effect. Even though the arm64 version of the EFI stub may relocate the kernel before executing it, it always did so after disabling the MMU, and so we were always in line with what the NX compat flag conveys, we just never bothered to set it. So let's set the flag now. Cc: Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi-header.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index 28d8a5dca5f12..d731b4655df8e 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -66,7 +66,7 @@ .long .Lefi_header_end - .L_head // SizeOfHeaders .long 0 // CheckSum .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem - .short 0 // DllCharacteristics + .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics .quad 0 // SizeOfStackReserve .quad 0 // SizeOfStackCommit .quad 0 // SizeOfHeapReserve From e57d06527738798039b8e91af762fbd33881b34d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Mar 2023 09:45:09 -0500 Subject: [PATCH 080/898] NFS & NFSD: Update GSS dependencies Geert reports that: > On v6.2, "make ARCH=m68k defconfig" gives you > CONFIG_RPCSEC_GSS_KRB5=m > On v6.3, it became builtin, due to dropping the dependencies on > the individual crypto modules. > > $ grep -E "CRYPTO_(MD5|DES|CBC|CTS|ECB|HMAC|SHA1|AES)" .config > CONFIG_CRYPTO_AES=y > CONFIG_CRYPTO_AES_TI=m > CONFIG_CRYPTO_DES=m > CONFIG_CRYPTO_CBC=m > CONFIG_CRYPTO_CTS=m > CONFIG_CRYPTO_ECB=m > CONFIG_CRYPTO_HMAC=m > CONFIG_CRYPTO_MD5=m > CONFIG_CRYPTO_SHA1=m This behavior is triggered by the "default y" in the definition of RPCSEC_GSS. The "default y" was added in 2010 by commit df486a25900f ("NFS: Fix the selection of security flavours in Kconfig"). However, svc_gss_principal was removed in 2012 by commit 03a4e1f6ddf2 ("nfsd4: move principal name into svc_cred"), so the 2010 fix is no longer necessary. We can safely change the NFS_V4 and NFSD_V4 dependencies back to RPCSEC_GSS_KRB5 to get the nicer v6.2 behavior back. Selecting KRB5 symbolically represents the true requirement here: that all spec-compliant NFSv4 implementations must have Kerberos available to use. Reported-by: Geert Uytterhoeven Fixes: dfe9a123451a ("SUNRPC: Enable rpcsec_gss_krb5.ko to be built without CRYPTO_DES") Signed-off-by: Chuck Lever --- fs/nfs/Kconfig | 2 +- fs/nfsd/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 14a72224b6571..450d6c3bc05e2 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -75,7 +75,7 @@ config NFS_V3_ACL config NFS_V4 tristate "NFS client support for NFS version 4" depends on NFS_FS - select SUNRPC_GSS + select RPCSEC_GSS_KRB5 select KEYS help This option enables support for version 4 of the NFS protocol diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 7c441f2bd4440..43b88eaf0673a 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -73,7 +73,7 @@ config NFSD_V4 bool "NFS server support for NFS version 4" depends on NFSD && PROC_FS select FS_POSIX_ACL - select SUNRPC_GSS + select RPCSEC_GSS_KRB5 select CRYPTO select CRYPTO_MD5 select CRYPTO_SHA256 From 50c6b976fb66f623440066f8e44309b4c2f687a0 Mon Sep 17 00:00:00 2001 From: Philippe Troin Date: Mon, 6 Mar 2023 10:18:36 -0800 Subject: [PATCH 081/898] HID: add HP 13t-aw100 & 14t-ea100 digitizer battery quirks Similar to many other devices using the Synopsys Designware Elantech hardware, HP Spectre x360 13t-aw100 and 14t-ea100 report an empty battery devices, supposedly for the active stylus. Apply the HID_BATTERY_QUIRK_IGNORE quirk to ignore the battery reports from these devices. Note that there are multiple versions of the panel installed in the 14t-ea100. Signed-off-by: Philippe Troin Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-input.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 63545cd307e5f..22e716b66fb83 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -420,6 +420,9 @@ #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A #define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C #define I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN 0x279F +#define I2C_DEVICE_ID_HP_SPECTRE_X360_13T_AW100 0x29F5 +#define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V1 0x2BED +#define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2 0x2BEE #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 7fc967964dd81..5c65a584b3fa0 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -398,6 +398,12 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_13T_AW100), + HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V1), + HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2), + HID_BATTERY_QUIRK_IGNORE }, {} }; From 0b04d4c0542e8573a837b1d81b94209e48723b25 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 6 Mar 2023 12:25:49 +0000 Subject: [PATCH 082/898] f2fs: Fix f2fs_truncate_partial_nodes ftrace event Fix the nid_t field so that its size is correctly reported in the text format embedded in trace.dat files. As it stands, it is reported as being of size 4: field:nid_t nid[3]; offset:24; size:4; signed:0; Instead of 12: field:nid_t nid[3]; offset:24; size:12; signed:0; This also fixes the reported offset of subsequent fields so that they match with the actual struct layout. Signed-off-by: Douglas Raillard Reviewed-by: Mukesh Ojha Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1322d34a5dfc1..99cbc5949e3cd 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -512,7 +512,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(nid_t, nid[3]) + __array(nid_t, nid, 3) __field(int, depth) __field(int, err) ), From dbf56d2fb53b7397002219bd221cfc72aadfdc82 Mon Sep 17 00:00:00 2001 From: Alessandro Manca Date: Fri, 10 Mar 2023 17:49:33 +0100 Subject: [PATCH 083/898] HID: topre: Add support for 87 keys Realforce R2 The tenkeyless version of the Realforce R2 has the same issue of the full size one, the report fixup is needed to make n-key rollover work instead of 6 key rollover Signed-off-by: Alessandro Manca Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 2 +- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-topre.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 82f64fb31fdab..4ce012f83253e 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1122,7 +1122,7 @@ config HID_TOPRE tristate "Topre REALFORCE keyboards" depends on HID help - Say Y for N-key rollover support on Topre REALFORCE R2 108 key keyboards. + Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key keyboards. config HID_THINGM tristate "ThingM blink(1) USB RGB LED" diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 22e716b66fb83..c2e9b6d1fd7d3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1252,6 +1252,7 @@ #define USB_VENDOR_ID_TOPRE 0x0853 #define USB_DEVICE_ID_TOPRE_REALFORCE_R2_108 0x0148 +#define USB_DEVICE_ID_TOPRE_REALFORCE_R2_87 0x0146 #define USB_VENDOR_ID_TOPSEED 0x0766 #define USB_DEVICE_ID_TOPSEED_CYBERLINK 0x0204 diff --git a/drivers/hid/hid-topre.c b/drivers/hid/hid-topre.c index 88a91cdad5f80..d1d5ca310eadc 100644 --- a/drivers/hid/hid-topre.c +++ b/drivers/hid/hid-topre.c @@ -36,6 +36,8 @@ static __u8 *topre_report_fixup(struct hid_device *hdev, __u8 *rdesc, static const struct hid_device_id topre_id_table[] = { { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R2_108) }, + { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, + USB_DEVICE_ID_TOPRE_REALFORCE_R2_87) }, { } }; MODULE_DEVICE_TABLE(hid, topre_id_table); From d6f7ff9dd387861fa30cbc6375d15b586da17d33 Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Wed, 8 Mar 2023 16:48:36 -0800 Subject: [PATCH 084/898] libbpf: Revert poisoning of strlcpy This reverts commit 6d0c4b11e743("libbpf: Poison strlcpy()"). It added the pragma poison directive to libbpf_internal.h to protect against accidental usage of strlcpy but ended up breaking the build for toolchains based on libcs which provide the strlcpy() declaration from string.h (e.g. uClibc-ng). The include order which causes the issue is: string.h, from Iibbpf_common.h:12, from libbpf.h:20, from libbpf_internal.h:26, from strset.c:9: Fixes: 6d0c4b11e743 ("libbpf: Poison strlcpy()") Signed-off-by: Jesus Sanchez-Palencia Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309004836.2808610-1-jesussanp@google.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index fbaf683353945..e4d05662a96ce 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -20,8 +20,8 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -/* prevent accidental re-addition of reallocarray()/strlcpy() */ -#pragma GCC poison reallocarray strlcpy +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray #include "libbpf.h" #include "btf.h" From 32513d40d908b267508d37994753d9bd1600914b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Mar 2023 12:41:18 -0800 Subject: [PATCH 085/898] selftests/bpf: Fix progs/find_vma_fail1.c build error. The commit 11e456cae91e ("selftests/bpf: Fix compilation errors: Assign a value to a constant") fixed the issue cleanly in bpf-next. This is an alternative fix in bpf tree to avoid merge conflict between bpf and bpf-next. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/find_vma_fail1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c index b3b326b8e2d1c..6dab9cffda132 100644 --- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c +++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include "vmlinux.h" #include +#define vm_flags vm_start char _license[] SEC("license") = "GPL"; From e8c8361cfdbf450f760e8a2bdbd4222d1947366b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Mar 2023 12:47:51 -0800 Subject: [PATCH 086/898] selftests/bpf: Fix progs/test_deny_namespace.c issues. The following build error can be seen: progs/test_deny_namespace.c:22:19: error: call to undeclared function 'BIT_LL'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); The struct kernel_cap_struct no longer exists in the kernel as well. Adjust bpf prog to fix both issues. Fixes: f122a08b197d ("capability: just use a 'u64' instead of a 'u32[2]' array") Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/test_deny_namespace.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c index 591104e79812e..e96b901a733c5 100644 --- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c @@ -5,12 +5,10 @@ #include #include -struct kernel_cap_struct { - __u64 val; -} __attribute__((preserve_access_index)); +typedef struct { unsigned long long val; } kernel_cap_t; struct cred { - struct kernel_cap_struct cap_effective; + kernel_cap_t cap_effective; } __attribute__((preserve_access_index)); char _license[] SEC("license") = "GPL"; @@ -18,8 +16,8 @@ char _license[] SEC("license") = "GPL"; SEC("lsm.s/userns_create") int BPF_PROG(test_userns_create, const struct cred *cred, int ret) { - struct kernel_cap_struct caps = cred->cap_effective; - __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); + kernel_cap_t caps = cred->cap_effective; + __u64 cap_mask = 1ULL << CAP_SYS_ADMIN; if (ret) return 0; From c3701185ee1973845db088d8b0fc443397ab0eb2 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 10 Mar 2023 19:22:48 -0500 Subject: [PATCH 087/898] iio: dac: cio-dac: Fix max DAC write value check for 12-bit The CIO-DAC series of devices only supports DAC values up to 12-bit rather than 16-bit. Trying to write a 16-bit value results in only the lower 12 bits affecting the DAC output which is not what the user expects. Instead, adjust the DAC write value check to reject values larger than 12-bit so that they fail explicitly as invalid for the user. Fixes: 3b8df5fd526e ("iio: Add IIO support for the Measurement Computing CIO-DAC family") Cc: stable@vger.kernel.org Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/20230311002248.8548-1-william.gray@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/dac/cio-dac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/cio-dac.c b/drivers/iio/dac/cio-dac.c index 791dd999cf291..18a64f72fc188 100644 --- a/drivers/iio/dac/cio-dac.c +++ b/drivers/iio/dac/cio-dac.c @@ -66,8 +66,8 @@ static int cio_dac_write_raw(struct iio_dev *indio_dev, if (mask != IIO_CHAN_INFO_RAW) return -EINVAL; - /* DAC can only accept up to a 16-bit value */ - if ((unsigned int)val > 65535) + /* DAC can only accept up to a 12-bit value */ + if ((unsigned int)val > 4095) return -EINVAL; priv->chan_out_states[chan->channel] = val; From 44ac5abac86b20856e6d9e5e5e40dcc2623fe330 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 5 Mar 2023 23:00:04 +0100 Subject: [PATCH 088/898] Documentation/security-bugs: move from admin-guide/ to process/ Jiri Kosina, Jonathan Corbet, and Willy Tarreau all expressed a desire to move this document under process/. Create a new section for security issues in the index and group it with embargoed-hardware-issues. I'm doing this at the start of the series to make all the subsequent changes show up in 'git blame'. Existing references were updated using: git grep -l security-bugs ':!Documentation/translations/' | xargs sed -i 's|admin-guide/security-bugs|process/security-bugs|g' git grep -l security-bugs Documentation/translations/ | xargs sed -i 's|Documentation/admin-guide/security-bugs|Documentation/process/security-bugs|g' git grep -l security-bugs Documentation/translations/ | xargs sed -i '/Original:/s|\.\./admin-guide/security-bugs|\.\./process/security-bugs|g' Notably, the page is not moved in the translations (due to my lack of knowledge of these languages), but the translations have been updated to point to the new location of the original document where these references exist. Link: https://lore.kernel.org/all/nycvar.YFH.7.76.2206062326230.10851@cbobk.fhfr.pm/ Suggested-by: Jiri Kosina Cc: Alex Shi Cc: Yanteng Si Cc: Hu Haowen Cc: Federico Vaga Cc: Tsugikazu Shibata Cc: Minchan Kim Cc: Jeimi Lee Cc: Carlos Bilbao Cc: Akira Yokosawa Signed-off-by: Vegard Nossum Acked-by: Carlos Bilbao Reviewed-by: Yanteng Si Reviewed-by: Akira Yokosawa Acked-by: Federico Vaga Reviewed-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20230305220010.20895-2-vegard.nossum@oracle.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/index.rst | 1 - Documentation/admin-guide/reporting-issues.rst | 4 ++-- Documentation/process/howto.rst | 2 +- Documentation/process/index.rst | 9 ++++++++- Documentation/process/researcher-guidelines.rst | 2 +- Documentation/{admin-guide => process}/security-bugs.rst | 0 Documentation/process/stable-kernel-rules.rst | 2 +- Documentation/process/submitting-patches.rst | 2 +- .../translations/it_IT/admin-guide/security-bugs.rst | 2 +- .../translations/it_IT/process/submitting-patches.rst | 2 +- Documentation/translations/ja_JP/howto.rst | 2 +- Documentation/translations/ko_KR/howto.rst | 2 +- Documentation/translations/sp_SP/howto.rst | 2 +- .../translations/sp_SP/process/submitting-patches.rst | 2 +- .../translations/zh_CN/admin-guide/security-bugs.rst | 2 +- Documentation/translations/zh_CN/process/howto.rst | 2 +- .../translations/zh_TW/admin-guide/security-bugs.rst | 2 +- Documentation/translations/zh_TW/process/howto.rst | 2 +- MAINTAINERS | 4 ++-- 19 files changed, 26 insertions(+), 20 deletions(-) rename Documentation/{admin-guide => process}/security-bugs.rst (100%) diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 0ad7e7ec0d274..09a563bbe3e77 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -36,7 +36,6 @@ problems and bugs in particular. reporting-issues reporting-regressions - security-bugs bug-hunting bug-bisect tainted-kernels diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst index ec62151fe6720..2fd5a030235ad 100644 --- a/Documentation/admin-guide/reporting-issues.rst +++ b/Documentation/admin-guide/reporting-issues.rst @@ -395,7 +395,7 @@ might want to be aware of; it for example explains how to add your issue to the list of tracked regressions, to ensure it won't fall through the cracks. What qualifies as security issue is left to your judgment. Consider reading -Documentation/admin-guide/security-bugs.rst before proceeding, as it +Documentation/process/security-bugs.rst before proceeding, as it provides additional details how to best handle security issues. An issue is a 'really severe problem' when something totally unacceptably bad @@ -1269,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward the report's text to these addresses; but on top of it put a small note where you mention that you filed it with a link to the ticket. -See Documentation/admin-guide/security-bugs.rst for more information. +See Documentation/process/security-bugs.rst for more information. Duties after the report went out diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst index cb6abcb2b6d07..deb8235e20ff1 100644 --- a/Documentation/process/howto.rst +++ b/Documentation/process/howto.rst @@ -138,7 +138,7 @@ required reading: philosophy and is very important for people moving to Linux from development on other Operating Systems. - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` If you feel you have found a security problem in the Linux kernel, please follow the steps in this document to help notify the kernel developers, and help solve the issue. diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index d4b6217472b0a..565df595152e1 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -35,6 +35,14 @@ Below are the essential guides that every developer should read. kernel-enforcement-statement kernel-driver-statement +For security issues, see: + +.. toctree:: + :maxdepth: 1 + + security-bugs + embargoed-hardware-issues + Other guides to the community that are of interest to most developers are: .. toctree:: @@ -47,7 +55,6 @@ Other guides to the community that are of interest to most developers are: submit-checklist kernel-docs deprecated - embargoed-hardware-issues maintainers researcher-guidelines diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst index afc944e0e8986..9fcfed3c350be 100644 --- a/Documentation/process/researcher-guidelines.rst +++ b/Documentation/process/researcher-guidelines.rst @@ -68,7 +68,7 @@ Before contributing, carefully read the appropriate documentation: * Documentation/process/development-process.rst * Documentation/process/submitting-patches.rst * Documentation/admin-guide/reporting-issues.rst -* Documentation/admin-guide/security-bugs.rst +* Documentation/process/security-bugs.rst Then send a patch (including a commit log with all the details listed below) and follow up on any feedback from other developers. diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/process/security-bugs.rst similarity index 100% rename from Documentation/admin-guide/security-bugs.rst rename to Documentation/process/security-bugs.rst diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 2fd8aa593a285..51df1197d5abd 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -39,7 +39,7 @@ Procedure for submitting patches to the -stable tree Security patches should not be handled (solely) by the -stable review process but should follow the procedures in - :ref:`Documentation/admin-guide/security-bugs.rst `. + :ref:`Documentation/process/security-bugs.rst `. For all other submissions, choose one of the following procedures ----------------------------------------------------------------- diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index eac7167dce83d..7b223f306efa2 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -254,7 +254,7 @@ If you have a patch that fixes an exploitable security bug, send that patch to security@kernel.org. For severe bugs, a short embargo may be considered to allow distributors to get the patch out to users; in such cases, obviously, the patch should not be sent to any public lists. See also -Documentation/admin-guide/security-bugs.rst. +Documentation/process/security-bugs.rst. Patches that fix a severe bug in a released kernel should be directed toward the stable maintainers by putting a line like this:: diff --git a/Documentation/translations/it_IT/admin-guide/security-bugs.rst b/Documentation/translations/it_IT/admin-guide/security-bugs.rst index 18a5822c7d9a8..20994f4bfa31e 100644 --- a/Documentation/translations/it_IT/admin-guide/security-bugs.rst +++ b/Documentation/translations/it_IT/admin-guide/security-bugs.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-ita.rst -:Original: :ref:`Documentation/admin-guide/security-bugs.rst ` +:Original: :ref:`Documentation/process/security-bugs.rst ` .. _it_securitybugs: diff --git a/Documentation/translations/it_IT/process/submitting-patches.rst b/Documentation/translations/it_IT/process/submitting-patches.rst index c2cfa0948b2bc..167fce813032c 100644 --- a/Documentation/translations/it_IT/process/submitting-patches.rst +++ b/Documentation/translations/it_IT/process/submitting-patches.rst @@ -272,7 +272,7 @@ embargo potrebbe essere preso in considerazione per dare il tempo alle distribuzioni di prendere la patch e renderla disponibile ai loro utenti; in questo caso, ovviamente, la patch non dovrebbe essere inviata su alcuna lista di discussione pubblica. Leggete anche -Documentation/admin-guide/security-bugs.rst. +Documentation/process/security-bugs.rst. Patch che correggono bachi importanti su un kernel già rilasciato, dovrebbero essere inviate ai manutentori dei kernel stabili aggiungendo la seguente riga:: diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/howto.rst index 9b0b3436dfcf7..8d856ebe873c6 100644 --- a/Documentation/translations/ja_JP/howto.rst +++ b/Documentation/translations/ja_JP/howto.rst @@ -167,7 +167,7 @@ linux-api@vger.kernel.org に送ることを勧めます。 このドキュメントは Linux 開発の思想を理解するのに非常に重要です。 そして、他のOSでの開発者が Linux に移る時にとても重要です。 - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を 支援してください。 diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst index 969e91a95bb0c..34f14899c1559 100644 --- a/Documentation/translations/ko_KR/howto.rst +++ b/Documentation/translations/ko_KR/howto.rst @@ -157,7 +157,7 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다. 리눅스로 전향하는 사람들에게는 매우 중요하다. - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` 여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에 나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록 도와 달라. diff --git a/Documentation/translations/sp_SP/howto.rst b/Documentation/translations/sp_SP/howto.rst index f9818d687b540..f1629738b49d0 100644 --- a/Documentation/translations/sp_SP/howto.rst +++ b/Documentation/translations/sp_SP/howto.rst @@ -135,7 +135,7 @@ de obligada lectura: de Linux y es muy importante para las personas que se mudan a Linux tras desarrollar otros sistemas operativos. - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` Si cree que ha encontrado un problema de seguridad en el kernel de Linux, siga los pasos de este documento para ayudar a notificar a los desarrolladores del kernel y ayudar a resolver el problema. diff --git a/Documentation/translations/sp_SP/process/submitting-patches.rst b/Documentation/translations/sp_SP/process/submitting-patches.rst index bf95ceb5e865a..c2757d9ab2168 100644 --- a/Documentation/translations/sp_SP/process/submitting-patches.rst +++ b/Documentation/translations/sp_SP/process/submitting-patches.rst @@ -276,7 +276,7 @@ parche a security@kernel.org. Para errores graves, se debe mantener un poco de discreción y permitir que los distribuidores entreguen el parche a los usuarios; en esos casos, obviamente, el parche no debe enviarse a ninguna lista pública. Revise también -Documentation/admin-guide/security-bugs.rst. +Documentation/process/security-bugs.rst. Los parches que corrigen un error grave en un kernel en uso deben dirigirse hacia los maintainers estables poniendo una línea como esta:: diff --git a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst index b8120391755d4..d6b8f8a4e7f63 100644 --- a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst +++ b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :doc:`../../../admin-guide/security-bugs` +:Original: :doc:`../../../process/security-bugs` :译者: diff --git a/Documentation/translations/zh_CN/process/howto.rst b/Documentation/translations/zh_CN/process/howto.rst index 10254751df6a2..cc47be356dd32 100644 --- a/Documentation/translations/zh_CN/process/howto.rst +++ b/Documentation/translations/zh_CN/process/howto.rst @@ -125,7 +125,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与 这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系 统转移到Linux的人来说也很重要。 - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` 如果你认为自己发现了Linux内核的安全性问题,请根据这篇文档中的步骤来 提醒其他内核开发者并帮助解决这个问题。 diff --git a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst index eed260ef0c373..15f8e90050718 100644 --- a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst +++ b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst @@ -2,7 +2,7 @@ .. include:: ../disclaimer-zh_TW.rst -:Original: :doc:`../../../admin-guide/security-bugs` +:Original: :doc:`../../../process/security-bugs` :譯者: diff --git a/Documentation/translations/zh_TW/process/howto.rst b/Documentation/translations/zh_TW/process/howto.rst index 8fb8edcaee665..ea2f468d3e587 100644 --- a/Documentation/translations/zh_TW/process/howto.rst +++ b/Documentation/translations/zh_TW/process/howto.rst @@ -128,7 +128,7 @@ Linux內核代碼中包含有大量的文檔。這些文檔對於學習如何與 這篇文檔對於理解Linux的開發哲學至關重要。對於將開發平台從其他操作系 統轉移到Linux的人來說也很重要。 - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` 如果你認爲自己發現了Linux內核的安全性問題,請根據這篇文檔中的步驟來 提醒其他內核開發者並幫助解決這個問題。 diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f3053..f9f0366a6190f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -73,7 +73,7 @@ Tips for patch submitters and ideally, should come with a patch proposal. Please do not send automated reports to this list either. Such bugs will be handled better and faster in the usual public places. See - Documentation/admin-guide/security-bugs.rst for details. + Documentation/process/security-bugs.rst for details. 8. Happy hacking. @@ -18801,7 +18801,7 @@ F: include/uapi/linux/sed* SECURITY CONTACT M: Security Officers S: Supported -F: Documentation/admin-guide/security-bugs.rst +F: Documentation/process/security-bugs.rst SECURITY SUBSYSTEM M: Paul Moore From 7b3825e9487d77e83bf1e27b10a74cd729b8f972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 7 Mar 2023 10:53:03 +0100 Subject: [PATCH 089/898] iio: adc: max11410: fix read_poll_timeout() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though we are passing 'ret' as stop condition for read_poll_timeout(), that return code is still being ignored. The reason is that the poll will stop if the passed condition is true which will happen if the passed op() returns error. However, read_poll_timeout() returns 0 if the *complete* condition evaluates to true. Therefore, the error code returned by op() will be ignored. To fix this we need to check for both error codes: * The one returned by read_poll_timeout() which is either 0 or ETIMEDOUT. * The one returned by the passed op(). Fixes: a44ef7c46097 ("iio: adc: add max11410 adc driver") Signed-off-by: Nuno Sá Acked-by: Ibrahim Tilki Link: https://lore.kernel.org/r/20230307095303.713251-1-nuno.sa@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max11410.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c index fdc9f03135b54..e64cd979688d0 100644 --- a/drivers/iio/adc/max11410.c +++ b/drivers/iio/adc/max11410.c @@ -413,13 +413,17 @@ static int max11410_sample(struct max11410_state *st, int *sample_raw, if (!ret) return -ETIMEDOUT; } else { + int ret2; + /* Wait for status register Conversion Ready flag */ - ret = read_poll_timeout(max11410_read_reg, ret, - ret || (val & MAX11410_STATUS_CONV_READY_BIT), + ret = read_poll_timeout(max11410_read_reg, ret2, + ret2 || (val & MAX11410_STATUS_CONV_READY_BIT), 5000, MAX11410_CONVERSION_TIMEOUT_MS * 1000, true, st, MAX11410_REG_STATUS, &val); if (ret) return ret; + if (ret2) + return ret2; } /* Read ADC Data */ @@ -850,17 +854,21 @@ static int max11410_init_vref(struct device *dev, static int max11410_calibrate(struct max11410_state *st, u32 cal_type) { - int ret, val; + int ret, ret2, val; ret = max11410_write_reg(st, MAX11410_REG_CAL_START, cal_type); if (ret) return ret; /* Wait for status register Calibration Ready flag */ - return read_poll_timeout(max11410_read_reg, ret, - ret || (val & MAX11410_STATUS_CAL_READY_BIT), - 50000, MAX11410_CALIB_TIMEOUT_MS * 1000, true, - st, MAX11410_REG_STATUS, &val); + ret = read_poll_timeout(max11410_read_reg, ret2, + ret2 || (val & MAX11410_STATUS_CAL_READY_BIT), + 50000, MAX11410_CALIB_TIMEOUT_MS * 1000, true, + st, MAX11410_REG_STATUS, &val); + if (ret) + return ret; + + return ret2; } static int max11410_self_calibrate(struct max11410_state *st) From 06615d11cc78162dfd5116efb71f29eb29502d37 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sun, 12 Mar 2023 01:46:50 +0800 Subject: [PATCH 090/898] power: supply: da9150: Fix use after free bug in da9150_charger_remove due to race condition In da9150_charger_probe, &charger->otg_work is bound with da9150_charger_otg_work. da9150_charger_otg_ncb may be called to start the work. If we remove the module which will call da9150_charger_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: Fix it by canceling the work before cleanup in the da9150_charger_remove CPU0 CPUc1 |da9150_charger_otg_work da9150_charger_remove | power_supply_unregister | device_unregister | power_supply_dev_release| kfree(psy) | | | power_supply_changed(charger->usb); | //use Fixes: c1a281e34dae ("power: Add support for DA9150 Charger") Signed-off-by: Zheng Wang Signed-off-by: Sebastian Reichel --- drivers/power/supply/da9150-charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/da9150-charger.c b/drivers/power/supply/da9150-charger.c index 14da5c595dd9f..a87aeaea38e13 100644 --- a/drivers/power/supply/da9150-charger.c +++ b/drivers/power/supply/da9150-charger.c @@ -657,6 +657,7 @@ static int da9150_charger_remove(struct platform_device *pdev) if (!IS_ERR_OR_NULL(charger->usb_phy)) usb_unregister_notifier(charger->usb_phy, &charger->otg_nb); + cancel_work_sync(&charger->otg_work); power_supply_unregister(charger->battery); power_supply_unregister(charger->usb); From acec726473822bc6b585961f4ca2a11fa7f28341 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 3 Mar 2023 11:25:08 +0200 Subject: [PATCH 091/898] thunderbolt: Fix memory leak in margining Memory for the usb4->margining needs to be relased for the upstream port of the router as well, even though the debugfs directory gets released with the router device removal. Fix this. Fixes: d0f1e0c2a699 ("thunderbolt: Add support for receiver lane margining") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/debugfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 4339e706cc3a1..f92ad71ef9831 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -942,7 +942,8 @@ static void margining_port_remove(struct tb_port *port) snprintf(dir_name, sizeof(dir_name), "port%d", port->port); parent = debugfs_lookup(dir_name, port->sw->debugfs_dir); - debugfs_remove_recursive(debugfs_lookup("margining", parent)); + if (parent) + debugfs_remove_recursive(debugfs_lookup("margining", parent)); kfree(port->usb4->margining); port->usb4->margining = NULL; @@ -967,19 +968,18 @@ static void margining_switch_init(struct tb_switch *sw) static void margining_switch_remove(struct tb_switch *sw) { + struct tb_port *upstream, *downstream; struct tb_switch *parent_sw; - struct tb_port *downstream; u64 route = tb_route(sw); if (!route) return; - /* - * Upstream is removed with the router itself but we need to - * remove the downstream port margining directory. - */ + upstream = tb_upstream_port(sw); parent_sw = tb_switch_parent(sw); downstream = tb_port_at(route, parent_sw); + + margining_port_remove(upstream); margining_port_remove(downstream); } From cd0c1e582b055dea615001b8bd8eccaf6f69f7ce Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Fri, 3 Mar 2023 00:17:24 +0200 Subject: [PATCH 092/898] thunderbolt: Add missing UNSET_INBOUND_SBTX for retimer access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to USB4 retimer specification, the process of firmware update sequence requires issuing a SET_INBOUND_SBTX port operation that later shall be followed by UNSET_INBOUND_SBTX port operation. This last step is not currently issued by the driver but it is necessary to make sure the retimers are put back to passthrough mode even during enumeration. If this step is missing the link may not come up properly after soft-reboot for example. For this reason issue UNSET_INBOUND_SBTX after SET_INBOUND_SBTX for enumeration and also when the NVM upgrade is run. Reported-by: Christian Schaubschläger Link: https://lore.kernel.org/linux-usb/b556f5ed-5ee8-9990-9910-afd60db93310@gmx.at/ Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 23 +++++++++++++++++++++-- drivers/thunderbolt/sb_regs.h | 1 + drivers/thunderbolt/tb.h | 1 + drivers/thunderbolt/usb4.c | 14 ++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 56008eb91e2e4..9cc28197dbc45 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -187,6 +187,22 @@ static ssize_t nvm_authenticate_show(struct device *dev, return ret; } +static void tb_retimer_set_inbound_sbtx(struct tb_port *port) +{ + int i; + + for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++) + usb4_port_retimer_set_inbound_sbtx(port, i); +} + +static void tb_retimer_unset_inbound_sbtx(struct tb_port *port) +{ + int i; + + for (i = TB_MAX_RETIMER_INDEX; i >= 1; i--) + usb4_port_retimer_unset_inbound_sbtx(port, i); +} + static ssize_t nvm_authenticate_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -213,6 +229,7 @@ static ssize_t nvm_authenticate_store(struct device *dev, rt->auth_status = 0; if (val) { + tb_retimer_set_inbound_sbtx(rt->port); if (val == AUTHENTICATE_ONLY) { ret = tb_retimer_nvm_authenticate(rt, true); } else { @@ -232,6 +249,7 @@ static ssize_t nvm_authenticate_store(struct device *dev, } exit_unlock: + tb_retimer_unset_inbound_sbtx(rt->port); mutex_unlock(&rt->tb->lock); exit_rpm: pm_runtime_mark_last_busy(&rt->dev); @@ -440,8 +458,7 @@ int tb_retimer_scan(struct tb_port *port, bool add) * Enable sideband channel for each retimer. We can do this * regardless whether there is device connected or not. */ - for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++) - usb4_port_retimer_set_inbound_sbtx(port, i); + tb_retimer_set_inbound_sbtx(port); /* * Before doing anything else, read the authentication status. @@ -464,6 +481,8 @@ int tb_retimer_scan(struct tb_port *port, bool add) break; } + tb_retimer_unset_inbound_sbtx(port); + if (!last_idx) return 0; diff --git a/drivers/thunderbolt/sb_regs.h b/drivers/thunderbolt/sb_regs.h index 5185cf3e4d978..f37a4320f10a5 100644 --- a/drivers/thunderbolt/sb_regs.h +++ b/drivers/thunderbolt/sb_regs.h @@ -20,6 +20,7 @@ enum usb4_sb_opcode { USB4_SB_OPCODE_ROUTER_OFFLINE = 0x4e45534c, /* "LSEN" */ USB4_SB_OPCODE_ENUMERATE_RETIMERS = 0x4d554e45, /* "ENUM" */ USB4_SB_OPCODE_SET_INBOUND_SBTX = 0x5055534c, /* "LSUP" */ + USB4_SB_OPCODE_UNSET_INBOUND_SBTX = 0x50555355, /* "USUP" */ USB4_SB_OPCODE_QUERY_LAST_RETIMER = 0x5453414c, /* "LAST" */ USB4_SB_OPCODE_GET_NVM_SECTOR_SIZE = 0x53534e47, /* "GNSS" */ USB4_SB_OPCODE_NVM_SET_OFFSET = 0x53504f42, /* "BOPS" */ diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 64968c162ec7f..b3cd13dc783b9 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -1242,6 +1242,7 @@ int usb4_port_sw_margin(struct tb_port *port, unsigned int lanes, bool timing, int usb4_port_sw_margin_errors(struct tb_port *port, u32 *errors); int usb4_port_retimer_set_inbound_sbtx(struct tb_port *port, u8 index); +int usb4_port_retimer_unset_inbound_sbtx(struct tb_port *port, u8 index); int usb4_port_retimer_read(struct tb_port *port, u8 index, u8 reg, void *buf, u8 size); int usb4_port_retimer_write(struct tb_port *port, u8 index, u8 reg, diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 1e5e9c147a310..95ff02395822a 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -1578,6 +1578,20 @@ int usb4_port_retimer_set_inbound_sbtx(struct tb_port *port, u8 index) 500); } +/** + * usb4_port_retimer_unset_inbound_sbtx() - Disable sideband channel transactions + * @port: USB4 port + * @index: Retimer index + * + * Disables sideband channel transations on SBTX. The reverse of + * usb4_port_retimer_set_inbound_sbtx(). + */ +int usb4_port_retimer_unset_inbound_sbtx(struct tb_port *port, u8 index) +{ + return usb4_port_retimer_op(port, index, + USB4_SB_OPCODE_UNSET_INBOUND_SBTX, 500); +} + /** * usb4_port_retimer_read() - Read from retimer sideband registers * @port: USB4 port From d2d6ddf188f609861489d5d188d545856a3ed399 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 3 Feb 2023 15:55:41 +0200 Subject: [PATCH 093/898] thunderbolt: Call tb_check_quirks() after initializing adapters In order to apply quirks based on certain adapter types move call to tb_check_quirks() happen after the adapters are initialized. This should not affect the existing quirks. Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 3370e18ba05f9..da373ac38285c 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2968,8 +2968,6 @@ int tb_switch_add(struct tb_switch *sw) dev_warn(&sw->dev, "reading DROM failed: %d\n", ret); tb_sw_dbg(sw, "uid: %#llx\n", sw->uid); - tb_check_quirks(sw); - ret = tb_switch_set_uuid(sw); if (ret) { dev_err(&sw->dev, "failed to set UUID\n"); @@ -2988,6 +2986,8 @@ int tb_switch_add(struct tb_switch *sw) } } + tb_check_quirks(sw); + tb_switch_default_link_ports(sw); ret = tb_switch_update_link_attributes(sw); From f0a57dd33b3eadf540912cd130db727ea824d174 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 31 Jan 2023 13:04:52 +0200 Subject: [PATCH 094/898] thunderbolt: Limit USB3 bandwidth of certain Intel USB4 host routers Current Intel USB4 host routers have hardware limitation that the USB3 bandwidth cannot go higher than 16376 Mb/s. Work this around by adding a new quirk that limits the bandwidth for the affected host routers. Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/quirks.c | 31 +++++++++++++++++++++++++++++++ drivers/thunderbolt/tb.h | 3 +++ drivers/thunderbolt/usb4.c | 17 +++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index ae28a03fa890b..1157b8869bcca 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -26,6 +26,19 @@ static void quirk_clx_disable(struct tb_switch *sw) tb_sw_dbg(sw, "disabling CL states\n"); } +static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw) +{ + struct tb_port *port; + + tb_switch_for_each_port(sw, port) { + if (!tb_port_is_usb3_down(port)) + continue; + port->max_bw = 16376; + tb_port_dbg(port, "USB3 maximum bandwidth limited to %u Mb/s\n", + port->max_bw); + } +} + struct tb_quirk { u16 hw_vendor_id; u16 hw_device_id; @@ -43,6 +56,24 @@ static const struct tb_quirk tb_quirks[] = { * DP buffers. */ { 0x8087, 0x0b26, 0x0000, 0x0000, quirk_dp_credit_allocation }, + /* + * Limit the maximum USB3 bandwidth for the following Intel USB4 + * host routers due to a hardware issue. + */ + { 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI1, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI1, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_MTL_M_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI1, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, /* * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms. */ diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index b3cd13dc783b9..275ff5219a3a3 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -272,6 +272,8 @@ struct tb_bandwidth_group { * @group: Bandwidth allocation group the adapter is assigned to. Only * used for DP IN adapters for now. * @group_list: The adapter is linked to the group's list of ports through this + * @max_bw: Maximum possible bandwidth through this adapter if set to + * non-zero. * * In USB4 terminology this structure represents an adapter (protocol or * lane adapter). @@ -299,6 +301,7 @@ struct tb_port { unsigned int dma_credits; struct tb_bandwidth_group *group; struct list_head group_list; + unsigned int max_bw; }; /** diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 95ff02395822a..6e87cf993c68c 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -1882,6 +1882,15 @@ int usb4_port_retimer_nvm_read(struct tb_port *port, u8 index, usb4_port_retimer_nvm_read_block, &info); } +static inline unsigned int +usb4_usb3_port_max_bandwidth(const struct tb_port *port, unsigned int bw) +{ + /* Take the possible bandwidth limitation into account */ + if (port->max_bw) + return min(bw, port->max_bw); + return bw; +} + /** * usb4_usb3_port_max_link_rate() - Maximum support USB3 link rate * @port: USB3 adapter port @@ -1903,7 +1912,9 @@ int usb4_usb3_port_max_link_rate(struct tb_port *port) return ret; lr = (val & ADP_USB3_CS_4_MSLR_MASK) >> ADP_USB3_CS_4_MSLR_SHIFT; - return lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000; + ret = lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000; + + return usb4_usb3_port_max_bandwidth(port, ret); } /** @@ -1930,7 +1941,9 @@ int usb4_usb3_port_actual_link_rate(struct tb_port *port) return 0; lr = val & ADP_USB3_CS_4_ALR_MASK; - return lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000; + ret = lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000; + + return usb4_usb3_port_max_bandwidth(port, ret); } static int usb4_usb3_port_cm_request(struct tb_port *port, bool request) From c82510b1d87bdebfe916048857d2ef46f1778aa5 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 27 Dec 2022 11:55:26 +0200 Subject: [PATCH 095/898] thunderbolt: Use scale field when allocating USB3 bandwidth When tunneling aggregated USB3 (20 Gb/s) the bandwidth values that are programmed to the ADP_USB3_CS_2 go higher than 4096 and that does not fit anymore to the 12-bit field. Fix this by scaling the value using the scale field accordingly. Fixes: 3b1d8d577ca8 ("thunderbolt: Implement USB3 bandwidth negotiation routines") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/usb4.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 6e87cf993c68c..a0996cb2893c8 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -2094,18 +2094,30 @@ static int usb4_usb3_port_write_allocated_bandwidth(struct tb_port *port, int downstream_bw) { u32 val, ubw, dbw, scale; - int ret; + int ret, max_bw; - /* Read the used scale, hardware default is 0 */ - ret = tb_port_read(port, &scale, TB_CFG_PORT, - port->cap_adap + ADP_USB3_CS_3, 1); + /* Figure out suitable scale */ + scale = 0; + max_bw = max(upstream_bw, downstream_bw); + while (scale < 64) { + if (mbps_to_usb3_bw(max_bw, scale) < 4096) + break; + scale++; + } + + if (WARN_ON(scale >= 64)) + return -EINVAL; + + ret = tb_port_write(port, &scale, TB_CFG_PORT, + port->cap_adap + ADP_USB3_CS_3, 1); if (ret) return ret; - scale &= ADP_USB3_CS_3_SCALE_MASK; ubw = mbps_to_usb3_bw(upstream_bw, scale); dbw = mbps_to_usb3_bw(downstream_bw, scale); + tb_port_dbg(port, "scaled bandwidth %u/%u, scale %u\n", ubw, dbw, scale); + ret = tb_port_read(port, &val, TB_CFG_PORT, port->cap_adap + ADP_USB3_CS_2, 1); if (ret) From 41130c32f3a18fcc930316da17f3a5f3bc326aa1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 23 Feb 2023 00:10:25 +0100 Subject: [PATCH 096/898] wifi: mt76: do not run mt76_unregister_device() on unregistered hw Trying to probe a mt7921e pci card without firmware results in a successful probe where ieee80211_register_hw hasn't been called. When removing the driver, ieee802111_unregister_hw is called unconditionally leading to a kernel NULL pointer dereference. Fix the issue running mt76_unregister_device routine just for registered hw. Link: https://bugs.debian.org/1029116 Link: https://bugs.kali.org/view.php?id=8140 Reported-by: Stuart Hayhurst Fixes: 1c71e03afe4b ("mt76: mt7921: move mt7921_init_hw in a dedicated work") Tested-by: Helmut Grohne Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/be3457d82f4e44bb71a22b2b5db27b644a37b1e1.1677107277.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mac80211.c | 8 ++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index b117e4467c870..34abf70f44aff 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -539,6 +539,7 @@ int mt76_register_phy(struct mt76_phy *phy, bool vht, if (ret) return ret; + set_bit(MT76_STATE_REGISTERED, &phy->state); phy->dev->phys[phy->band_idx] = phy; return 0; @@ -549,6 +550,9 @@ void mt76_unregister_phy(struct mt76_phy *phy) { struct mt76_dev *dev = phy->dev; + if (!test_bit(MT76_STATE_REGISTERED, &phy->state)) + return; + if (IS_ENABLED(CONFIG_MT76_LEDS)) mt76_led_cleanup(phy); mt76_tx_status_check(dev, true); @@ -719,6 +723,7 @@ int mt76_register_device(struct mt76_dev *dev, bool vht, return ret; WARN_ON(mt76_worker_setup(hw, &dev->tx_worker, NULL, "tx")); + set_bit(MT76_STATE_REGISTERED, &phy->state); sched_set_fifo_low(dev->tx_worker.task); return 0; @@ -729,6 +734,9 @@ void mt76_unregister_device(struct mt76_dev *dev) { struct ieee80211_hw *hw = dev->hw; + if (!test_bit(MT76_STATE_REGISTERED, &dev->phy.state)) + return; + if (IS_ENABLED(CONFIG_MT76_LEDS)) mt76_led_cleanup(&dev->phy); mt76_tx_status_check(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index ccca0162c8f82..183b0fc5a2d48 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -402,6 +402,7 @@ struct mt76_tx_cb { enum { MT76_STATE_INITIALIZED, + MT76_STATE_REGISTERED, MT76_STATE_RUNNING, MT76_STATE_MCU_RUNNING, MT76_SCANNING, From c2f73eacee3bf1df2cfe25e1f08a3cae98b1df3d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 1 Mar 2023 17:37:39 +0100 Subject: [PATCH 097/898] wifi: mt76: mt7915: add back 160MHz channel width support for MT7915 A number of users reported that this support was working fine before it got removed. Add it back, but leave out the unsupported 80+80 mode. Fixes: ac922bd60ace ("wifi: mt76: mt7915: remove BW160 and BW80+80 support") Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230301163739.52314-1-nbd@nbd.name --- .../net/wireless/mediatek/mt76/mt7915/init.c | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 1ab768feccaac..5e288116b1b01 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -383,7 +383,6 @@ mt7915_init_wiphy(struct mt7915_phy *phy) ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD); ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); ieee80211_hw_set(hw, WANT_MONITOR_VIF); - ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW); hw->max_tx_fragments = 4; @@ -396,6 +395,9 @@ mt7915_init_wiphy(struct mt7915_phy *phy) } if (phy->mt76->cap.has_5ghz) { + struct ieee80211_sta_vht_cap *vht_cap; + + vht_cap = &phy->mt76->sband_5g.sband.vht_cap; phy->mt76->sband_5g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_MAX_AMSDU; @@ -403,19 +405,28 @@ mt7915_init_wiphy(struct mt7915_phy *phy) IEEE80211_HT_MPDU_DENSITY_4; if (is_mt7915(&dev->mt76)) { - phy->mt76->sband_5g.sband.vht_cap.cap |= + vht_cap->cap |= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; + + if (!dev->dbdc_support) + vht_cap->cap |= + IEEE80211_VHT_CAP_SHORT_GI_160 | + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ | + FIELD_PREP(IEEE80211_VHT_CAP_EXT_NSS_BW_MASK, 1); } else { - phy->mt76->sband_5g.sband.vht_cap.cap |= + vht_cap->cap |= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; /* mt7916 dbdc with 2g 2x2 bw40 and 5g 2x2 bw160c */ - phy->mt76->sband_5g.sband.vht_cap.cap |= + vht_cap->cap |= IEEE80211_VHT_CAP_SHORT_GI_160 | IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; } + + if (!is_mt7915(&dev->mt76) || !dev->dbdc_support) + ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW); } mt76_set_stream_caps(phy->mt76, true); @@ -841,9 +852,13 @@ mt7915_set_stream_he_txbf_caps(struct mt7915_phy *phy, int sts = hweight8(phy->mt76->chainmask); u8 c, sts_160 = sts; - /* mt7915 doesn't support bw160 */ - if (is_mt7915(&dev->mt76)) - sts_160 = 0; + /* Can do 1/2 of STS in 160Mhz mode for mt7915 */ + if (is_mt7915(&dev->mt76)) { + if (!dev->dbdc_support) + sts_160 /= 2; + else + sts_160 = 0; + } #ifdef CONFIG_MAC80211_MESH if (vif == NL80211_IFTYPE_MESH_POINT) @@ -944,10 +959,15 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band, int i, idx = 0, nss = hweight8(phy->mt76->antenna_mask); u16 mcs_map = 0; u16 mcs_map_160 = 0; - u8 nss_160 = nss; + u8 nss_160; - /* Can't do 160MHz with mt7915 */ - if (is_mt7915(&dev->mt76)) + if (!is_mt7915(&dev->mt76)) + nss_160 = nss; + else if (!dev->dbdc_support) + /* Can do 1/2 of NSS streams in 160Mhz mode for mt7915 */ + nss_160 = nss / 2; + else + /* Can't do 160MHz with mt7915 dbdc */ nss_160 = 0; for (i = 0; i < 8; i++) { From 5683e1488aa9b0805a9403d215e48fed29d6d923 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 6 Mar 2023 18:42:51 +0100 Subject: [PATCH 098/898] wifi: mt76: connac: do not check WED status for non-mmio devices WED is supported just for mmio devices, so do not check it for usb or sdio devices. This patch fixes the crash reported below: [ 21.946627] wlp0s3u1i3: authenticate with c4:41:1e:f5:2b:1d [ 22.525298] wlp0s3u1i3: send auth to c4:41:1e:f5:2b:1d (try 1/3) [ 22.548274] wlp0s3u1i3: authenticate with c4:41:1e:f5:2b:1d [ 22.557694] wlp0s3u1i3: send auth to c4:41:1e:f5:2b:1d (try 1/3) [ 22.565885] wlp0s3u1i3: authenticated [ 22.569502] wlp0s3u1i3: associate with c4:41:1e:f5:2b:1d (try 1/3) [ 22.578966] wlp0s3u1i3: RX AssocResp from c4:41:1e:f5:2b:1d (capab=0x11 status=30 aid=3) [ 22.579113] wlp0s3u1i3: c4:41:1e:f5:2b:1d rejected association temporarily; comeback duration 1000 TU (1024 ms) [ 23.649518] wlp0s3u1i3: associate with c4:41:1e:f5:2b:1d (try 2/3) [ 23.752528] wlp0s3u1i3: RX AssocResp from c4:41:1e:f5:2b:1d (capab=0x11 status=0 aid=3) [ 23.797450] wlp0s3u1i3: associated [ 24.959527] kernel tried to execute NX-protected page - exploit attempt? (uid: 0) [ 24.959640] BUG: unable to handle page fault for address: ffff88800c223200 [ 24.959706] #PF: supervisor instruction fetch in kernel mode [ 24.959788] #PF: error_code(0x0011) - permissions violation [ 24.959846] PGD 2c01067 P4D 2c01067 PUD 2c02067 PMD c2a8063 PTE 800000000c223163 [ 24.959957] Oops: 0011 [#1] PREEMPT SMP [ 24.960009] CPU: 0 PID: 391 Comm: wpa_supplicant Not tainted 6.2.0-kvm #18 [ 24.960089] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.1-2.fc37 04/01/2014 [ 24.960191] RIP: 0010:0xffff88800c223200 [ 24.960446] RSP: 0018:ffffc90000ff7698 EFLAGS: 00010282 [ 24.960513] RAX: ffff888028397010 RBX: ffff88800c26e630 RCX: 0000000000000058 [ 24.960598] RDX: ffff88800c26f844 RSI: 0000000000000006 RDI: ffff888028397010 [ 24.960682] RBP: ffff88800ea72f00 R08: 18b873fbab2b964c R09: be06b38235f3c63c [ 24.960766] R10: 18b873fbab2b964c R11: be06b38235f3c63c R12: 0000000000000001 [ 24.960853] R13: ffff88800c26f84c R14: ffff8880063f0ff8 R15: ffff88800c26e644 [ 24.960950] FS: 00007effcea327c0(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 [ 24.961036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 24.961106] CR2: ffff88800c223200 CR3: 000000000eaa2000 CR4: 00000000000006b0 [ 24.961190] Call Trace: [ 24.961219] [ 24.961245] ? mt76_connac_mcu_add_key+0x2cf/0x310 [ 24.961313] ? mt7921_set_key+0x150/0x200 [ 24.961365] ? drv_set_key+0xa9/0x1b0 [ 24.961418] ? ieee80211_key_enable_hw_accel+0xd9/0x240 [ 24.961485] ? ieee80211_key_replace+0x3f3/0x730 [ 24.961541] ? crypto_shash_setkey+0x89/0xd0 [ 24.961597] ? ieee80211_key_link+0x2d7/0x3a0 [ 24.961664] ? crypto_aead_setauthsize+0x31/0x50 [ 24.961730] ? sta_info_hash_lookup+0xa6/0xf0 [ 24.961785] ? ieee80211_add_key+0x1fc/0x250 [ 24.961842] ? rdev_add_key+0x41/0x140 [ 24.961882] ? nl80211_parse_key+0x6c/0x2f0 [ 24.961940] ? nl80211_new_key+0x24a/0x290 [ 24.961984] ? genl_rcv_msg+0x36c/0x3a0 [ 24.962036] ? rdev_mod_link_station+0xe0/0xe0 [ 24.962102] ? nl80211_set_key+0x410/0x410 [ 24.962143] ? nl80211_pre_doit+0x200/0x200 [ 24.962187] ? genl_bind+0xc0/0xc0 [ 24.962217] ? netlink_rcv_skb+0xaa/0xd0 [ 24.962259] ? genl_rcv+0x24/0x40 [ 24.962300] ? netlink_unicast+0x224/0x2f0 [ 24.962345] ? netlink_sendmsg+0x30b/0x3d0 [ 24.962388] ? ____sys_sendmsg+0x109/0x1b0 [ 24.962388] ? ____sys_sendmsg+0x109/0x1b0 [ 24.962440] ? __import_iovec+0x2e/0x110 [ 24.962482] ? ___sys_sendmsg+0xbe/0xe0 [ 24.962525] ? mod_objcg_state+0x25c/0x330 [ 24.962576] ? __dentry_kill+0x19e/0x1d0 [ 24.962618] ? call_rcu+0x18f/0x270 [ 24.962660] ? __dentry_kill+0x19e/0x1d0 [ 24.962702] ? __x64_sys_sendmsg+0x70/0x90 [ 24.962744] ? do_syscall_64+0x3d/0x80 [ 24.962796] ? exit_to_user_mode_prepare+0x1b/0x70 [ 24.962852] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 24.962913] [ 24.962939] Modules linked in: [ 24.962981] CR2: ffff88800c223200 [ 24.963022] ---[ end trace 0000000000000000 ]--- [ 24.963087] RIP: 0010:0xffff88800c223200 [ 24.963323] RSP: 0018:ffffc90000ff7698 EFLAGS: 00010282 [ 24.963376] RAX: ffff888028397010 RBX: ffff88800c26e630 RCX: 0000000000000058 [ 24.963458] RDX: ffff88800c26f844 RSI: 0000000000000006 RDI: ffff888028397010 [ 24.963538] RBP: ffff88800ea72f00 R08: 18b873fbab2b964c R09: be06b38235f3c63c [ 24.963622] R10: 18b873fbab2b964c R11: be06b38235f3c63c R12: 0000000000000001 [ 24.963705] R13: ffff88800c26f84c R14: ffff8880063f0ff8 R15: ffff88800c26e644 [ 24.963788] FS: 00007effcea327c0(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 [ 24.963871] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 24.963941] CR2: ffff88800c223200 CR3: 000000000eaa2000 CR4: 00000000000006b0 [ 24.964018] note: wpa_supplicant[391] exited with irqs disabled Fixes: d1369e515efe ("wifi: mt76: connac: introduce mt76_connac_mcu_sta_wed_update utility routine") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/c42168429453474213fa8244bf4b069de4531f40.1678124335.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index efb9bfaa187f2..008ece1b16f8e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -1221,6 +1221,9 @@ EXPORT_SYMBOL_GPL(mt76_connac_mcu_sta_ba_tlv); int mt76_connac_mcu_sta_wed_update(struct mt76_dev *dev, struct sk_buff *skb) { + if (!mt76_is_mmio(dev)) + return 0; + if (!mtk_wed_device_active(&dev->mmio.wed)) return 0; From 71f8afa2b66e356f435b6141b4a9ccf953e18356 Mon Sep 17 00:00:00 2001 From: Kees Jan Koster Date: Sat, 18 Feb 2023 15:18:30 +0100 Subject: [PATCH 099/898] USB: serial: cp210x: add Silicon Labs IFS-USB-DATACABLE IDs The Silicon Labs IFS-USB-DATACABLE is used in conjunction with for example the Quint UPSes. It is used to enable Modbus communication with the UPS to query configuration, power and battery status. Signed-off-by: Kees Jan Koster Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 832ad592b7ef3..cdea1bff3b708 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -120,6 +120,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */ { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */ { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */ + { USB_DEVICE(0x10C4, 0x82AA) }, /* Silicon Labs IFS-USB-DATACABLE used with Quint UPS */ { USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */ { USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */ { USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */ From 9228b26194d1cc00449f12f306f53ef2e234a55b Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Sun, 12 Mar 2023 20:32:08 -0700 Subject: [PATCH 100/898] KVM: arm64: PMU: Fix GET_ONE_REG for vPMC regs to return the current value Have KVM_GET_ONE_REG for vPMU counter (vPMC) registers (PMCCNTR_EL0 and PMEVCNTR_EL0) return the sum of the register value in the sysreg file and the current perf event counter value. Values of vPMC registers are saved in sysreg files on certain occasions. These saved values don't represent the current values of the vPMC registers if the perf events for the vPMCs count events after the save. The current values of those registers are the sum of the sysreg file value and the current perf event counter value. But, when userspace reads those registers (using KVM_GET_ONE_REG), KVM returns the sysreg file value to userspace (not the sum value). Fix this to return the sum value for KVM_GET_ONE_REG. Fixes: 051ff581ce70 ("arm64: KVM: Add access handler for event counter register") Cc: stable@vger.kernel.org Reviewed-by: Marc Zyngier Signed-off-by: Reiji Watanabe Link: https://lore.kernel.org/r/20230313033208.1475499-1-reijiw@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 53749d3a0996d..1b2c161120bea 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -856,6 +856,22 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) return true; } +static int get_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + u64 idx; + + if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0) + /* PMCCNTR_EL0 */ + idx = ARMV8_PMU_CYCLE_IDX; + else + /* PMEVCNTRn_EL0 */ + idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); + + *val = kvm_pmu_get_counter_value(vcpu, idx); + return 0; +} + static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -1072,7 +1088,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \ - .reset = reset_pmevcntr, \ + .reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \ .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ @@ -1982,7 +1998,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { PMU_SYS_REG(SYS_PMCEID1_EL0), .access = access_pmceid, .reset = NULL }, { PMU_SYS_REG(SYS_PMCCNTR_EL0), - .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 }, + .access = access_pmu_evcntr, .reset = reset_unknown, + .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr}, { PMU_SYS_REG(SYS_PMXEVTYPER_EL0), .access = access_pmu_evtyper, .reset = NULL }, { PMU_SYS_REG(SYS_PMXEVCNTR_EL0), From f6da81f650fa47b61b847488f3938d43f90d093d Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Sun, 12 Mar 2023 20:32:34 -0700 Subject: [PATCH 101/898] KVM: arm64: PMU: Don't save PMCR_EL0.{C,P} for the vCPU Presently, when a guest writes 1 to PMCR_EL0.{C,P}, which is WO/RAZ, KVM saves the register value, including these bits. When userspace reads the register using KVM_GET_ONE_REG, KVM returns the saved register value as it is (the saved value might have these bits set). This could result in userspace setting these bits on the destination during migration. Consequently, KVM may end up resetting the vPMU counter registers (PMCCNTR_EL0 and/or PMEVCNTR_EL0) to zero on the first KVM_RUN after migration. Fix this by not saving those bits when a guest writes 1 to those bits. Fixes: ab9468340d2b ("arm64: KVM: Add access handler for PMCR register") Cc: stable@vger.kernel.org Reviewed-by: Marc Zyngier Signed-off-by: Reiji Watanabe Link: https://lore.kernel.org/r/20230313033234.1475987-1-reijiw@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/pmu-emul.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 24908400e1906..c243b10f3e150 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -538,7 +538,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (!kvm_pmu_is_3p5(vcpu)) val &= ~ARMV8_PMU_PMCR_LP; - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; + /* The reset bits don't indicate any state, and shouldn't be saved. */ + __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, From 4928f67bc911e46a43004251a4d7eb2259ba6077 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 8 Mar 2023 17:57:23 +0200 Subject: [PATCH 102/898] vfio/mlx5: Fix the report of dirty_bytes upon pre-copy Fix the report of dirty_bytes upon pre-copy to include both the existing data on the migration file and the device extra bytes. This gives a better close estimation to what can be passed any more as part of pre-copy. Fixes: 0dce165b1adf ("vfio/mlx5: Introduce vfio precopy ioctl implementation") Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20230308155723.108218-1-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index e897537a9e8ad..d95fd382814c8 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -442,16 +442,10 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, if (migf->pre_copy_initial_bytes > *pos) { info.initial_bytes = migf->pre_copy_initial_bytes - *pos; } else { - buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); - if (buf) { - info.dirty_bytes = buf->start_pos + buf->length - *pos; - } else { - if (!end_of_data) { - ret = -EINVAL; - goto err_migf_unlock; - } - info.dirty_bytes = inc_length; - } + info.dirty_bytes = migf->max_pos - *pos; + if (!info.dirty_bytes) + end_of_data = true; + info.dirty_bytes += inc_length; } if (!end_of_data || !inc_length) { From 8b3a149db461d3286d1e211112de3b44ccaeaf71 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 12 Mar 2023 23:00:03 +0100 Subject: [PATCH 103/898] efi: earlycon: Reprobe after parsing config tables Commit 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") reorganized the earlycon handling so that all architectures pass the screen_info data via a EFI config table instead of populating struct screen_info directly, as the latter is only possible when the EFI stub is baked into the kernel (and not into the decompressor). However, this means that struct screen_info may not have been populated yet by the time the earlycon probe takes place, and this results in a non-functional early console. So let's probe again right after parsing the config tables and populating struct screen_info. Note that this means that earlycon output starts a bit later than before, and so it may fail to capture issues that occur while doing the early EFI initialization. Fixes: 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") Reported-by: Shawn Guo Tested-by: Shawn Guo Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/earlycon.c | 16 +++++++++++++--- drivers/firmware/efi/efi-init.c | 3 +++ include/linux/efi.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c index f54e6fdf08e2b..f80a9af3d16e9 100644 --- a/drivers/firmware/efi/earlycon.c +++ b/drivers/firmware/efi/earlycon.c @@ -215,6 +215,14 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num) } } +static bool __initdata fb_probed; + +void __init efi_earlycon_reprobe(void) +{ + if (fb_probed) + setup_earlycon("efifb"); +} + static int __init efi_earlycon_setup(struct earlycon_device *device, const char *opt) { @@ -222,15 +230,17 @@ static int __init efi_earlycon_setup(struct earlycon_device *device, u16 xres, yres; u32 i; - if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) + fb_wb = opt && !strcmp(opt, "ram"); + + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) { + fb_probed = true; return -ENODEV; + } fb_base = screen_info.lfb_base; if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) fb_base |= (u64)screen_info.ext_lfb_base << 32; - fb_wb = opt && !strcmp(opt, "ram"); - si = &screen_info; xres = si->lfb_width; yres = si->lfb_height; diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 2c16080e1f719..ef0820f1a9246 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -72,6 +72,9 @@ static void __init init_screen_info(void) if (memblock_is_map_memory(screen_info.lfb_base)) memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size); + + if (IS_ENABLED(CONFIG_EFI_EARLYCON)) + efi_earlycon_reprobe(); } } diff --git a/include/linux/efi.h b/include/linux/efi.h index 04a733f0ba956..7aa62c92185f6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -693,6 +693,7 @@ efi_guid_to_str(efi_guid_t *guid, char *out) } extern void efi_init (void); +extern void efi_earlycon_reprobe(void); #ifdef CONFIG_EFI extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ #else From feafeb53140af3cde3fba46b292b15b3a0c0635c Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 14 Feb 2023 11:15:05 -0600 Subject: [PATCH 104/898] arm64: dts: imx8dxl-evk: Fix eqos phy reset gpio The deprecated property is named snps,reset-gpio, but this devicetree used snps,reset-gpios instead which results in the reset not being used and the following make dtbs_check error: ./arch/arm64/boot/dts/freescale/imx8dxl-evk.dtb: ethernet@5b050000: 'snps,reset-gpio' is a dependency of 'snps,reset-delays-us' From schema: ./Documentation/devicetree/bindings/net/snps,dwmac.yaml Use the preferred method of defining the reset gpio in the phy node itself. Note that this drops the 10 us pre-delay, but prior this wasn't used at all and a pre-delay doesn't make much sense in this context so it should be fine. Fixes: 8dd495d12374 ("arm64: dts: freescale: add support for i.MX8DXL EVK board") Signed-off-by: Andrew Halaney Acked-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8dxl-evk.dts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts index 1bcf228a22b8b..852420349c013 100644 --- a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts @@ -121,8 +121,6 @@ phy-handle = <ðphy0>; nvmem-cells = <&fec_mac1>; nvmem-cell-names = "mac-address"; - snps,reset-gpios = <&pca6416_1 2 GPIO_ACTIVE_LOW>; - snps,reset-delays-us = <10 20 200000>; status = "okay"; mdio { @@ -136,6 +134,9 @@ eee-broken-1000t; qca,disable-smarteee; qca,disable-hibernation-mode; + reset-gpios = <&pca6416_1 2 GPIO_ACTIVE_LOW>; + reset-assert-us = <20>; + reset-deassert-us = <200000>; vddio-supply = <&vddio0>; vddio0: vddio-regulator { From 32f86da7c86b27ebed31c24453a0713f612e43fb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Feb 2023 16:06:26 +0100 Subject: [PATCH 105/898] arm64: dts: imx8mm-nitrogen-r2: fix WM8960 clock name The WM8960 Linux driver expects the clock to be named "mclk". Otherwise the clock will be ignored and not prepared/enabled by the driver. Fixes: 40ba2eda0a7b ("arm64: dts: imx8mm-nitrogen-r2: add audio") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts index 6357078185edd..0e8f0d7161ad0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts @@ -247,7 +247,7 @@ compatible = "wlf,wm8960"; reg = <0x1a>; clocks = <&clk IMX8MM_CLK_SAI1_ROOT>; - clock-names = "mclk1"; + clock-names = "mclk"; wlf,shared-lrclk; #sound-dai-cells = <0>; }; From 1d0d5b917d6af71fa3c9599c3a7198a4175156a5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 17 Feb 2023 20:15:38 +0100 Subject: [PATCH 106/898] arm64: dts: imx8mp: Fix LCDIF2 node clock order The 'axi' clock are the bus APB clock, the 'disp_axi' clock are the pixel data AXI clock. The naming is confusing. Fix the clock order. Fixes: 94e6197dadc9 ("arm64: dts: imx8mp: Add LCDIF2 & LDB nodes") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index a19224fe1a6ad..2dd60e3252f35 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1131,8 +1131,8 @@ reg = <0x32e90000 0x238>; interrupts = ; clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX_ROOT>, - <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>, - <&clk IMX8MP_CLK_MEDIA_APB_ROOT>; + <&clk IMX8MP_CLK_MEDIA_APB_ROOT>, + <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>; clock-names = "pix", "axi", "disp_axi"; assigned-clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX>, <&clk IMX8MP_VIDEO_PLL1>; From 194c3e7d7e1230201fb341f40cfd87760a30e42a Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 23 Feb 2023 07:05:43 +0100 Subject: [PATCH 107/898] arm64: dts: imx93: Fix eqos properties 'macirq' is supposed to be listed first. Also only 'snps,clk-csr' is listed in the bindings while 'clk_csr' is only supported for legacy reasons. See commit 83936ea8d8ad2 ("net: stmmac: add a parse for new property 'snps,clk-csr'") Fixes: 1f4263ea6a4b ("arm64: dts: imx93: add eqos support") Signed-off-by: Alexander Stein Reviewed-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 2076f9c9983ad..92e93c8af7f7b 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -580,9 +580,9 @@ eqos: ethernet@428a0000 { compatible = "nxp,imx93-dwmac-eqos", "snps,dwmac-5.10a"; reg = <0x428a0000 0x10000>; - interrupts = , - ; - interrupt-names = "eth_wake_irq", "macirq"; + interrupts = , + ; + interrupt-names = "macirq", "eth_wake_irq"; clocks = <&clk IMX93_CLK_ENET_QOS_GATE>, <&clk IMX93_CLK_ENET_QOS_GATE>, <&clk IMX93_CLK_ENET_TIMER2>, @@ -595,7 +595,7 @@ <&clk IMX93_CLK_SYS_PLL_PFD0_DIV2>; assigned-clock-rates = <100000000>, <250000000>; intf_mode = <&wakeupmix_gpr 0x28>; - clk_csr = <0>; + snps,clk-csr = <0>; status = "disabled"; }; From 3d37f7685d525e58674c23d607020e66d501dcd1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 26 Feb 2023 21:12:12 +0800 Subject: [PATCH 108/898] ARM: dts: imx6sll: e70k02: fix usbotg1 pinctrl usb@2184000: 'pinctrl-0' is a dependency of 'pinctrl-names' Signed-off-by: Peng Fan Fixes: 3bb3fd856505 ("ARM: dts: add Netronix E70K02 board common file") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/e70k02.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/e70k02.dtsi b/arch/arm/boot/dts/e70k02.dtsi index ace3eb8a97b87..4e1bf080eaca0 100644 --- a/arch/arm/boot/dts/e70k02.dtsi +++ b/arch/arm/boot/dts/e70k02.dtsi @@ -321,6 +321,7 @@ &usbotg1 { pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; disable-over-current; srp-disable; hnp-disable; From 957c04e9784c7c757e8cc293d7fb2a60cdf461b6 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 26 Feb 2023 21:12:13 +0800 Subject: [PATCH 109/898] ARM: dts: imx6sll: e60k02: fix usbotg1 pinctrl usb@2184000: 'pinctrl-0' is a dependency of 'pinctrl-names' Signed-off-by: Peng Fan Fixes: c100ea86e6ab ("ARM: dts: add Netronix E60K02 board common file") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/e60k02.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi index 94944cc219317..dd03e3860f97f 100644 --- a/arch/arm/boot/dts/e60k02.dtsi +++ b/arch/arm/boot/dts/e60k02.dtsi @@ -311,6 +311,7 @@ &usbotg1 { pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; disable-over-current; srp-disable; hnp-disable; From 1cd489e1ada1cffa56bd06fd4609f5a60a985d43 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 26 Feb 2023 21:12:14 +0800 Subject: [PATCH 110/898] ARM: dts: imx6sl: tolino-shine2hd: fix usbotg1 pinctrl usb@2184000: 'pinctrl-0' is a dependency of 'pinctrl-names' Signed-off-by: Peng Fan Fixes: 9c7016f1ca6d ("ARM: dts: imx: add devicetree for Tolino Shine 2 HD") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts index da1399057634a..815119c12bd48 100644 --- a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts +++ b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts @@ -625,6 +625,7 @@ &usbotg1 { pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; disable-over-current; srp-disable; hnp-disable; From 62fb54148cd6eb456ff031be8fb447c98cf0bd9b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 28 Feb 2023 22:52:44 +0100 Subject: [PATCH 111/898] arm64: dts: imx8mn: specify #sound-dai-cells for SAI nodes Add #sound-dai-cells properties to SAI nodes. Reviewed-by: Adam Ford Reviewed-by: Fabio Estevam Fixes: 9e9860069725 ("arm64: dts: imx8mn: Add SAI nodes") Signed-off-by: Marek Vasut Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index ed9ac6c5047c0..9e0ddd6b7a322 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -296,6 +296,7 @@ sai2: sai@30020000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30020000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI2_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -310,6 +311,7 @@ sai3: sai@30030000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30030000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI3_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -324,6 +326,7 @@ sai5: sai@30050000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30050000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI5_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -340,6 +343,7 @@ sai6: sai@30060000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30060000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI6_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -397,6 +401,7 @@ sai7: sai@300b0000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x300b0000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI7_IPG>, <&clk IMX8MN_CLK_DUMMY>, From 1adab2922c58e7ff4fa9f0b43695079402cce876 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Mon, 6 Mar 2023 16:25:26 +0300 Subject: [PATCH 112/898] bus: imx-weim: fix branch condition evaluates to a garbage value If bus type is other than imx50_weim_devtype and have no child devices, variable 'ret' in function weim_parse_dt() will not be initialized, but will be used as branch condition and return value. Fix this by initializing 'ret' with 0. This was discovered with help of clang-analyzer, but the situation is quite possible in real life. Fixes: 52c47b63412b ("bus: imx-weim: improve error handling upon child probe-failure") Signed-off-by: Ivan Bornyakov Cc: stable@vger.kernel.org Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- drivers/bus/imx-weim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 2a6b4f6764586..36d42484142ae 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -204,8 +204,8 @@ static int weim_parse_dt(struct platform_device *pdev) const struct of_device_id *of_id = of_match_device(weim_id_table, &pdev->dev); const struct imx_weim_devtype *devtype = of_id->data; + int ret = 0, have_child = 0; struct device_node *child; - int ret, have_child = 0; struct weim_priv *priv; void __iomem *base; u32 reg; From b3cdf730486b048ca0bf23bef050550d9fd40422 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 8 Mar 2023 11:17:20 +0100 Subject: [PATCH 113/898] arm64: dts: imx93: add missing #address-cells and #size-cells to i2c nodes Add them to the SoC .dtsi, so that not every board has to specify them. Fixes: 1225396fefea ("arm64: dts: imx93: add lpi2c nodes") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 92e93c8af7f7b..41efd97dd6d6d 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -164,6 +164,8 @@ lpi2c1: i2c@44340000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x44340000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C1_GATE>, <&clk IMX93_CLK_BUS_AON>; @@ -174,6 +176,8 @@ lpi2c2: i2c@44350000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x44350000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C2_GATE>, <&clk IMX93_CLK_BUS_AON>; @@ -343,6 +347,8 @@ lpi2c3: i2c@42530000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x42530000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C3_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -353,6 +359,8 @@ lpi2c4: i2c@42540000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x42540000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C4_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -455,6 +463,8 @@ lpi2c5: i2c@426b0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426b0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C5_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -465,6 +475,8 @@ lpi2c6: i2c@426c0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426c0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C6_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -475,6 +487,8 @@ lpi2c7: i2c@426d0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426d0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C7_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -485,6 +499,8 @@ lpi2c8: i2c@426e0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426e0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C8_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; From 773e8e7d07b753474b2ccd605ff092faaa9e65b9 Mon Sep 17 00:00:00 2001 From: Enrico Sau Date: Tue, 14 Mar 2023 10:00:59 +0100 Subject: [PATCH 114/898] USB: serial: option: add Telit FE990 compositions Add the following Telit FE990 compositions: 0x1080: tty, adb, rmnet, tty, tty, tty, tty 0x1081: tty, adb, mbim, tty, tty, tty, tty 0x1082: rndis, tty, adb, tty, tty, tty, tty 0x1083: tty, adb, ecm, tty, tty, tty, tty Signed-off-by: Enrico Sau Link: https://lore.kernel.org/r/20230314090059.77876-1-enrico.sau@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e6d8d9b35ad0e..1621f66db25f7 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1300,6 +1300,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */ .driver_info = RSVD(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 3c728b1bc5b99c5275ac5c7788ef814c0e51ef54 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Tue, 14 Mar 2023 17:05:52 +0800 Subject: [PATCH 115/898] ASOC: Intel: sof_sdw: add quirk for Intel 'Rooks County' NUC M15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same quirks as the 'Bishop County' NUC M15, except the rt711 is in the 'JD2 100K' jack detection mode. Link: https://github.com/thesofproject/linux/issues/4088 Signed-off-by: Eugene Huang Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230314090553.498664-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index d2ed807abde95..767fa89d08708 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -213,6 +213,17 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_SDW_PCH_DMIC | RT711_JD1), }, + { + /* NUC15 'Rooks County' LAPRC510 and LAPRC710 skews */ + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel(R) Client Systems"), + DMI_MATCH(DMI_PRODUCT_NAME, "LAPRC"), + }, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_SDW_PCH_DMIC | + RT711_JD2_100K), + }, /* TigerLake-SDCA devices */ { .callback = sof_sdw_quirk_cb, From 9c691a42b8926c8966561265cdae3ddc7464d3a2 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Tue, 14 Mar 2023 17:05:53 +0800 Subject: [PATCH 116/898] ASoC: Intel: soc-acpi: add table for Intel 'Rooks County' NUC M15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same topology as the HP Omen 16-k0005TX, except with the rt1316 amp on link2. Link: https://github.com/thesofproject/linux/issues/4088 Signed-off-by: Eugene Huang Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230314090553.498664-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- .../intel/common/soc-acpi-intel-adl-match.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 28dd2046e4ac5..d8c80041388a7 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -354,6 +354,20 @@ static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link3[] = { {} }; +static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link2[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(rt711_sdca_0_adr), + .adr_d = rt711_sdca_0_adr, + }, + { + .mask = BIT(2), + .num_adr = ARRAY_SIZE(rt1316_2_single_adr), + .adr_d = rt1316_2_single_adr, + }, + {} +}; + static const struct snd_soc_acpi_adr_device mx8373_2_adr[] = { { .adr = 0x000223019F837300ull, @@ -624,6 +638,12 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = { .drv_name = "sof_sdw", .sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l3.tplg", }, + { + .link_mask = 0x5, /* 2 active links required */ + .links = adl_sdw_rt711_link0_rt1316_link2, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l2.tplg", + }, { .link_mask = 0x1, /* link0 required */ .links = adl_rvp, From 083a25b18d6ad9f1f540e629909aa3eaaaf01823 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 9 Mar 2023 15:13:37 +0800 Subject: [PATCH 117/898] ASoC: soc-pcm: fix hw->formats cleared by soc_pcm_hw_init() for dpcm The hw->formats may be set by snd_dmaengine_pcm_refine_runtime_hwparams() in component's startup()/open(), but soc_pcm_hw_init() will init hw->formats in dpcm_runtime_setup_fe() after component's startup()/open(), which causes the valuable hw->formats to be cleared. So need to store the hw->formats before initialization, then restore it after initialization. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1678346017-3660-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 5eb056b942ce8..7958c9defd492 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1661,10 +1661,14 @@ static void dpcm_runtime_setup_fe(struct snd_pcm_substream *substream) struct snd_pcm_hardware *hw = &runtime->hw; struct snd_soc_dai *dai; int stream = substream->stream; + u64 formats = hw->formats; int i; soc_pcm_hw_init(hw); + if (formats) + hw->formats &= formats; + for_each_rtd_cpu_dais(fe, i, dai) { struct snd_soc_pcm_stream *cpu_stream; From 13ec9308a85702af7c31f3638a2720863848a7f2 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 13 Mar 2023 16:54:54 -0700 Subject: [PATCH 118/898] KVM: arm64: Retry fault if vma_lookup() results become invalid Read mmu_invalidate_seq before dropping the mmap_lock so that KVM can detect if the results of vma_lookup() (e.g. vma_shift) become stale before it acquires kvm->mmu_lock. This fixes a theoretical bug where a VMA could be changed by userspace after vma_lookup() and before KVM reads the mmu_invalidate_seq, causing KVM to install page table entries based on a (possibly) no-longer-valid vma_shift. Re-order the MMU cache top-up to earlier in user_mem_abort() so that it is not done after KVM has read mmu_invalidate_seq (i.e. so as to avoid inducing spurious fault retries). This bug has existed since KVM/ARM's inception. It's unlikely that any sane userspace currently modifies VMAs in such a way as to trigger this race. And even with directed testing I was unable to reproduce it. But a sufficiently motivated host userspace might be able to exploit this race. Fixes: 94f8e6418d39 ("KVM: ARM: Handle guest faults in KVM") Cc: stable@vger.kernel.org Reported-by: Sean Christopherson Signed-off-by: David Matlack Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20230313235454.2964067-1-dmatlack@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 48 +++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7113587222ffe..f54408355d1dc 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1217,6 +1217,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + /* + * Permission faults just need to update the existing leaf entry, + * and so normally don't require allocations from the memcache. The + * only exception to this is when dirty logging is enabled at runtime + * and a write fault needs to collapse a block entry into a table. + */ + if (fault_status != ESR_ELx_FSC_PERM || + (logging_active && write_fault)) { + ret = kvm_mmu_topup_memory_cache(memcache, + kvm_mmu_cache_min_pages(kvm)); + if (ret) + return ret; + } + /* * Let's check if we will get back a huge page backed by hugetlbfs, or * get block mapping for device MMIO region. @@ -1269,37 +1283,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, fault_ipa &= ~(vma_pagesize - 1); gfn = fault_ipa >> PAGE_SHIFT; - mmap_read_unlock(current->mm); - - /* - * Permission faults just need to update the existing leaf entry, - * and so normally don't require allocations from the memcache. The - * only exception to this is when dirty logging is enabled at runtime - * and a write fault needs to collapse a block entry into a table. - */ - if (fault_status != ESR_ELx_FSC_PERM || - (logging_active && write_fault)) { - ret = kvm_mmu_topup_memory_cache(memcache, - kvm_mmu_cache_min_pages(kvm)); - if (ret) - return ret; - } - mmu_seq = vcpu->kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_invalidate_seq happens before we call - * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk - * the page we just got a reference to gets unmapped before we have a - * chance to grab the mmu_lock, which ensure that if the page gets - * unmapped afterwards, the call to kvm_unmap_gfn will take it away - * from us again properly. This smp_rmb() interacts with the smp_wmb() - * in kvm_mmu_notifier_invalidate_. + * Read mmu_invalidate_seq so that KVM can detect if the results of + * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to + * acquiring kvm->mmu_lock. * - * Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is - * used to avoid unnecessary overhead introduced to locate the memory - * slot because it's always fixed even @gfn is adjusted for huge pages. + * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs + * with the smp_wmb() in kvm_mmu_invalidate_end(). */ - smp_rmb(); + mmu_seq = vcpu->kvm->mmu_invalidate_seq; + mmap_read_unlock(current->mm); pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, write_fault, &writable, NULL); From ccb820dc7d2236b1af0d54ae038a27b5b6d5ae5a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 13 Mar 2023 15:12:29 -0700 Subject: [PATCH 119/898] fscrypt: destroy keyring after security_sb_delete() fscrypt_destroy_keyring() must be called after all potentially-encrypted inodes were evicted; otherwise it cannot safely destroy the keyring. Since inodes that are in-use by the Landlock LSM don't get evicted until security_sb_delete(), this means that fscrypt_destroy_keyring() must be called *after* security_sb_delete(). This fixes a WARN_ON followed by a NULL dereference, only possible if Landlock was being used on encrypted files. Fixes: d7e7b9af104c ("fscrypt: stop using keyrings subsystem for fscrypt_master_key") Cc: stable@vger.kernel.org Reported-by: syzbot+93e495f6a4f748827c88@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/00000000000044651705f6ca1e30@google.com Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20230313221231.272498-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/super.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/super.c b/fs/super.c index 84332d5cb817a..04bc62ab7dfea 100644 --- a/fs/super.c +++ b/fs/super.c @@ -475,13 +475,22 @@ void generic_shutdown_super(struct super_block *sb) cgroup_writeback_umount(); - /* evict all inodes with zero refcount */ + /* Evict all inodes with zero refcount. */ evict_inodes(sb); - /* only nonzero refcount inodes can have marks */ + + /* + * Clean up and evict any inodes that still have references due + * to fsnotify or the security policy. + */ fsnotify_sb_delete(sb); - fscrypt_destroy_keyring(sb); security_sb_delete(sb); + /* + * Now that all potentially-encrypted inodes have been evicted, + * the fscrypt keyring can be destroyed. + */ + fscrypt_destroy_keyring(sb); + if (sb->s_dio_done_wq) { destroy_workqueue(sb->s_dio_done_wq); sb->s_dio_done_wq = NULL; From 9c88ea00fef03031ce6554531e89be82f6a42835 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 9 Mar 2023 13:58:52 -0500 Subject: [PATCH 120/898] NFS: Fix /proc/PID/io read_bytes for buffered reads Prior to commit 8786fde8421c ("Convert NFS from readpages to readahead"), nfs_readpages() used the old mm interface read_cache_pages() which called task_io_account_read() for each NFS page read. After this commit, nfs_readpages() is converted to nfs_readahead(), which now uses the new mm interface readahead_page(). The new interface requires callers to call task_io_account_read() themselves. In addition, to nfs_readahead() task_io_account_read() should also be called from nfs_read_folio(). Fixes: 8786fde8421c ("Convert NFS from readpages to readahead") Link: https://lore.kernel.org/linux-nfs/CAPt2mGNEYUk5u8V4abe=5MM5msZqmvzCVrtCP4Qw1n=gCHCnww@mail.gmail.com/ Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker --- fs/nfs/read.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c380cff4108e0..e90988591df4f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -337,6 +338,7 @@ int nfs_read_folio(struct file *file, struct folio *folio) trace_nfs_aop_readpage(inode, folio); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + task_io_account_read(folio_size(folio)); /* * Try to flush any pending writes to the file.. @@ -393,6 +395,7 @@ void nfs_readahead(struct readahead_control *ractl) trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); + task_io_account_read(readahead_length(ractl)); ret = -ESTALE; if (NFS_STALE(inode)) From 7ff84910c66c9144cc0de9d9deed9fb84c03aff0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 14 Mar 2023 06:20:58 -0400 Subject: [PATCH 121/898] lockd: set file_lock start and end when decoding nlm4 testargs Commit 6930bcbfb6ce dropped the setting of the file_lock range when decoding a nlm_lock off the wire. This causes the client side grant callback to miss matching blocks and reject the lock, only to rerequest it 30s later. Add a helper function to set the file_lock range from the start and end values that the protocol uses, and have the nlm_lock decoder call that to set up the file_lock args properly. Fixes: 6930bcbfb6ce ("lockd: detect and reject lock arguments that overflow") Reported-by: Amir Goldstein Signed-off-by: Jeff Layton Tested-by: Amir Goldstein Cc: stable@vger.kernel.org #6.0 Signed-off-by: Anna Schumaker --- fs/lockd/clnt4xdr.c | 9 +-------- fs/lockd/xdr4.c | 13 ++++++++++++- include/linux/lockd/xdr4.h | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 7df6324ccb8ab..8161667c976f8 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -261,7 +261,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) u32 exclusive; int error; __be32 *p; - s32 end; memset(lock, 0, sizeof(*lock)); locks_init_lock(fl); @@ -285,13 +284,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK; p = xdr_decode_hyper(p, &l_offset); xdr_decode_hyper(p, &l_len); - end = l_offset + l_len - 1; - - fl->fl_start = (loff_t)l_offset; - if (l_len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = (loff_t)end; + nlm4svc_set_file_lock_range(fl, l_offset, l_len); error = 0; out: return error; diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 712fdfeb8ef06..5fcbf30cd2759 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -33,6 +33,17 @@ loff_t_to_s64(loff_t offset) return res; } +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len) +{ + s64 end = off + len - 1; + + fl->fl_start = off; + if (len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = end; +} + /* * NLM file handles are defined by specification to be a variable-length * XDR opaque no longer than 1024 bytes. However, this implementation @@ -80,7 +91,7 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - + nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len); return true; } diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 9a6b55da8fd64..72831e35dca32 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,6 +22,7 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len); bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); From 21fd9e8700de86d1169f6336e97d7a74916ed04a Mon Sep 17 00:00:00 2001 From: Chengen Du Date: Wed, 8 Mar 2023 16:03:27 +0800 Subject: [PATCH 122/898] NFS: Correct timing for assigning access cache timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user's login time is newer than the cache's timestamp, the original entry in the RB-tree will be replaced by a new entry. Currently, the timestamp is only set if the entry is not found in the RB-tree, which can cause the timestamp to be undefined when the entry exists. This may result in a significant increase in ACCESS operations if the timestamp is set to zero. Signed-off-by: Chengen Du Fixes: 0eb43812c027 ("NFS: Clear the file access cache upon login”) Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a41c3ee4549c0..6fbcbb8d6587a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -3089,7 +3089,6 @@ static void nfs_access_add_rbtree(struct inode *inode, else goto found; } - set->timestamp = ktime_get_ns(); rb_link_node(&set->rb_node, parent, p); rb_insert_color(&set->rb_node, root_node); list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); @@ -3114,6 +3113,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set, cache->fsgid = cred->fsgid; cache->group_info = get_group_info(cred->group_info); cache->mask = set->mask; + cache->timestamp = ktime_get_ns(); /* The above field assignments must be visible * before this item appears on the lru. We cannot easily From f959325e6ac3f499450088b8d9c626d1177be160 Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Fri, 10 Mar 2023 11:33:25 -0800 Subject: [PATCH 123/898] fsverity: Remove WQ_UNBOUND from fsverity read workqueue WQ_UNBOUND causes significant scheduler latency on ARM64/Android. This is problematic for latency sensitive workloads, like I/O post-processing. Removing WQ_UNBOUND gives a 96% reduction in fsverity workqueue related scheduler latency and improves app cold startup times by ~30ms. WQ_UNBOUND was also removed from the dm-verity workqueue for the same reason [1]. This code was tested by running Android app startup benchmarks and measuring how long the fsverity workqueue spent in the runnable state. Before Total workqueue scheduler latency: 553800us After Total workqueue scheduler latency: 18962us [1]: https://lore.kernel.org/all/20230202012348.885402-1-nhuck@google.com/ Signed-off-by: Nathan Huckleberry Fixes: 8a1d0f9cacc9 ("fs-verity: add data verification hooks for ->readpages()") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230310193325.620493-1-nhuck@google.com Signed-off-by: Eric Biggers --- fs/verity/verify.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/verity/verify.c b/fs/verity/verify.c index f50e3b5b52c93..e2508222750b3 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -387,15 +387,15 @@ EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); int __init fsverity_init_workqueue(void) { /* - * Use an unbound workqueue to allow bios to be verified in parallel - * even when they happen to complete on the same CPU. This sacrifices - * locality, but it's worthwhile since hashing is CPU-intensive. + * Use a high-priority workqueue to prioritize verification work, which + * blocks reads from completing, over regular application tasks. * - * Also use a high-priority workqueue to prioritize verification work, - * which blocks reads from completing, over regular application tasks. + * For performance reasons, don't use an unbound workqueue. Using an + * unbound workqueue for crypto operations causes excessive scheduler + * latency on ARM64. */ fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", - WQ_UNBOUND | WQ_HIGHPRI, + WQ_HIGHPRI, num_online_cpus()); if (!fsverity_read_workqueue) return -ENOMEM; From 139f6973bf140c65d4d1d4bde5485badb4454d7a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 12 Mar 2023 14:25:23 +0100 Subject: [PATCH 124/898] wifi: mwifiex: mark OF related data as maybe unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can be compile tested with !CONFIG_OF making certain data unused: drivers/net/wireless/marvell/mwifiex/sdio.c:498:34: error: ‘mwifiex_sdio_of_match_table’ defined but not used [-Werror=unused-const-variable=] drivers/net/wireless/marvell/mwifiex/pcie.c:175:34: error: ‘mwifiex_pcie_of_match_table’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230312132523.352182-1-krzysztof.kozlowski@linaro.org --- drivers/net/wireless/marvell/mwifiex/pcie.c | 2 +- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 5dcf61761a165..9a698a16a8f38 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -172,7 +172,7 @@ static const struct mwifiex_pcie_device mwifiex_pcie8997 = { .can_ext_scan = true, }; -static const struct of_device_id mwifiex_pcie_of_match_table[] = { +static const struct of_device_id mwifiex_pcie_of_match_table[] __maybe_unused = { { .compatible = "pci11ab,2b42" }, { .compatible = "pci1b4b,2b42" }, { } diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index c64e24c10ea65..a24bd40dd41ab 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -495,7 +495,7 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = { {"EXTLAST", NULL, 0, 0xFE}, }; -static const struct of_device_id mwifiex_sdio_of_match_table[] = { +static const struct of_device_id mwifiex_sdio_of_match_table[] __maybe_unused = { { .compatible = "marvell,sd8787" }, { .compatible = "marvell,sd8897" }, { .compatible = "marvell,sd8978" }, From 007ae9b268ba7553e479608cf9735d3c4672a2ab Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Tue, 14 Mar 2023 22:11:22 +0100 Subject: [PATCH 125/898] wifi: mac80211: Serialize ieee80211_handle_wake_tx_queue() ieee80211_handle_wake_tx_queue must not run concurrent multiple times. It calls ieee80211_txq_schedule_start() and the drivers migrated to iTXQ do not expect overlapping drv_tx() calls. This fixes 'c850e31f79f0 ("wifi: mac80211: add internal handler for wake_tx_queue")', which introduced ieee80211_handle_wake_tx_queue. Drivers started to use it with 'a790cc3a4fad ("wifi: mac80211: add wake_tx_queue callback to drivers")'. But only after fixing an independent bug with '4444bc2116ae ("wifi: mac80211: Proper mark iTXQs for resumption")' problematic concurrent calls really happened and exposed the initial issue. Fixes: c850e31f79f0 ("wifi: mac80211: add internal handler for wake_tx_queue") Reported-by: Thomas Mann Link: https://bugzilla.kernel.org/show_bug.cgi?id=217119 Link: https://lore.kernel.org/r/b8efebc6-4399-d0b8-b2a0-66843314616b@leemhuis.info/ Link: https://lore.kernel.org/r/b7445607128a6b9ed7c17fcdcf3679bfaf4aaea.camel@sipsolutions.net> CC: Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20230314211122.111688-1-alexander@wetzel-home.de [add missing spin_lock_init() noticed by Felix] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/main.c | 2 ++ net/mac80211/util.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ecc232eb1ee82..e082582e0aa28 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1284,6 +1284,9 @@ struct ieee80211_local { struct list_head active_txqs[IEEE80211_NUM_ACS]; u16 schedule_round[IEEE80211_NUM_ACS]; + /* serializes ieee80211_handle_wake_tx_queue */ + spinlock_t handle_wake_tx_queue_lock; + u16 airtime_flags; u32 aql_txq_limit_low[IEEE80211_NUM_ACS]; u32 aql_txq_limit_high[IEEE80211_NUM_ACS]; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 846528850612a..ddf2b7811c557 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -802,6 +802,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, local->aql_threshold = IEEE80211_AQL_THRESHOLD; atomic_set(&local->aql_total_pending_airtime, 0); + spin_lock_init(&local->handle_wake_tx_queue_lock); + INIT_LIST_HEAD(&local->chanctx_list); mutex_init(&local->chanctx_mtx); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1a28fe5cb614f..3aceb3b731bf4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -314,6 +314,8 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif); struct ieee80211_txq *queue; + spin_lock(&local->handle_wake_tx_queue_lock); + /* Use ieee80211_next_txq() for airtime fairness accounting */ ieee80211_txq_schedule_start(hw, txq->ac); while ((queue = ieee80211_next_txq(hw, txq->ac))) { @@ -321,6 +323,7 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, ieee80211_return_txq(hw, queue, false); } ieee80211_txq_schedule_end(hw, txq->ac); + spin_unlock(&local->handle_wake_tx_queue_lock); } EXPORT_SYMBOL(ieee80211_handle_wake_tx_queue); From 7c3940bf81e5664cdb50c3fedfec8f0a756a34fb Mon Sep 17 00:00:00 2001 From: "GuoRui.Yu" Date: Thu, 23 Feb 2023 00:53:15 +0800 Subject: [PATCH 126/898] swiotlb: fix the deadlock in swiotlb_do_find_slots In general, if swiotlb is sufficient, the logic of index = wrap_area_index(mem, index + 1) is fine, it will quickly take a slot and release the area->lock; But if swiotlb is insufficient and the device has min_align_mask requirements, such as NVME, we may not be able to satisfy index == wrap and exit the loop properly. In this case, other kernel threads will not be able to acquire the area->lock and release the slot, resulting in a deadlock. The current implementation of wrap_area_index does not involve a modulo operation, so adjusting the wrap to ensure the loop ends is not trivial. Introduce a new variable to record the number of loops and exit the loop after completing the traversal. Backtraces: Other CPUs are waiting this core to exit the swiotlb_do_find_slots loop. [10199.924391] RIP: 0010:swiotlb_do_find_slots+0x1fe/0x3e0 [10199.924403] Call Trace: [10199.924404] [10199.924405] swiotlb_tbl_map_single+0xec/0x1f0 [10199.924407] swiotlb_map+0x5c/0x260 [10199.924409] ? nvme_pci_setup_prps+0x1ed/0x340 [10199.924411] dma_direct_map_page+0x12e/0x1c0 [10199.924413] nvme_map_data+0x304/0x370 [10199.924415] nvme_prep_rq.part.0+0x31/0x120 [10199.924417] nvme_queue_rq+0x77/0x1f0 ... [ 9639.596311] NMI backtrace for cpu 48 [ 9639.596336] Call Trace: [ 9639.596337] [ 9639.596338] _raw_spin_lock_irqsave+0x37/0x40 [ 9639.596341] swiotlb_do_find_slots+0xef/0x3e0 [ 9639.596344] swiotlb_tbl_map_single+0xec/0x1f0 [ 9639.596347] swiotlb_map+0x5c/0x260 [ 9639.596349] dma_direct_map_sg+0x7a/0x280 [ 9639.596352] __dma_map_sg_attrs+0x30/0x70 [ 9639.596355] dma_map_sgtable+0x1d/0x30 [ 9639.596356] nvme_map_data+0xce/0x370 ... [ 9639.595665] NMI backtrace for cpu 50 [ 9639.595682] Call Trace: [ 9639.595682] [ 9639.595683] _raw_spin_lock_irqsave+0x37/0x40 [ 9639.595686] swiotlb_release_slots.isra.0+0x86/0x180 [ 9639.595688] dma_direct_unmap_sg+0xcf/0x1a0 [ 9639.595690] nvme_unmap_data.part.0+0x43/0xc0 Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask") Signed-off-by: GuoRui.Yu Signed-off-by: Xiaokang Hu Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 03e3251cd9d2b..91454b513db06 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -625,8 +625,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, unsigned int iotlb_align_mask = dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int index, wrap, count = 0, i; unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; unsigned int slot_index; @@ -649,15 +649,16 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, goto not_found; slot_base = area_index * mem->area_nslabs; - index = wrap = wrap_area_index(mem, ALIGN(area->index, stride)); + index = wrap_area_index(mem, ALIGN(area->index, stride)); - do { + for (slots_checked = 0; slots_checked < mem->area_nslabs; ) { slot_index = slot_base + index; if (orig_addr && (slot_addr(tbl_dma_addr, slot_index) & iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { index = wrap_area_index(mem, index + 1); + slots_checked++; continue; } @@ -673,7 +674,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, goto found; } index = wrap_area_index(mem, index + stride); - } while (index != wrap); + slots_checked += stride; + } not_found: spin_unlock_irqrestore(&area->lock, flags); From 1c3ab6dfa0692c3626580a508cf84e794201b357 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 2 Mar 2023 09:54:12 +0800 Subject: [PATCH 127/898] btrfs: handle missing chunk mapping more gracefully [BUG] During my scrub rework, I did a stupid thing like this: bio->bi_iter.bi_sector = stripe->logical; btrfs_submit_bio(fs_info, bio, stripe->mirror_num); Above bi_sector assignment is using logical address directly, which lacks ">> SECTOR_SHIFT". This results a read on a range which has no chunk mapping. This results the following crash: BTRFS critical (device dm-1): unable to find logical 11274289152 length 65536 assertion failed: !IS_ERR(em), in fs/btrfs/volumes.c:6387 Sure this is all my fault, but this shows a possible problem in real world, that some bit flip in file extents/tree block can point to unmapped ranges, and trigger above ASSERT(), or if CONFIG_BTRFS_ASSERT is not configured, cause invalid pointer access. [PROBLEMS] In the above call chain, we just don't handle the possible error from btrfs_get_chunk_map() inside __btrfs_map_block(). [FIX] The fix is straightforward, replace the ASSERT() with proper error handling (callers handle errors already). Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7823168c08a6a..6d0124b6e79e3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6363,7 +6363,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ASSERT(op != BTRFS_MAP_DISCARD); em = btrfs_get_chunk_map(fs_info, logical, *length); - ASSERT(!IS_ERR(em)); + if (IS_ERR(em)) + return PTR_ERR(em); map = em->map_lookup; data_stripes = nr_data_stripes(map); From 10a8857a1beaa015efba7d56e06243d484549fb6 Mon Sep 17 00:00:00 2001 From: Sweet Tea Dorminy Date: Wed, 8 Mar 2023 10:58:36 -0500 Subject: [PATCH 128/898] btrfs: fix compiler warning on SPARC/PA-RISC handling fscrypt_setup_filename Commit 1ec49744ba83 ("btrfs: turn on -Wmaybe-uninitialized") exposed that on SPARC and PA-RISC, gcc is unaware that fscrypt_setup_filename() only returns negative error values or 0. This ultimately results in a maybe-uninitialized warning in btrfs_lookup_dentry(). Change to only return negative error values or 0 from fscrypt_setup_filename() at the relevant call site, and assert that no positive error codes are returned (which would have wider implications involving other users). Reported-by: Guenter Roeck Link: https://lore.kernel.org/all/481b19b5-83a0-4793-b4fd-194ad7b978c3@roeck-us.net/ Signed-off-by: Sweet Tea Dorminy Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 44e9acc77a748..e99432e4912e7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5421,8 +5421,13 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry, return -ENOMEM; ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname); - if (ret) + if (ret < 0) goto out; + /* + * fscrypt_setup_filename() should never return a positive value, but + * gcc on sparc/parisc thinks it can, so assert that doesn't happen. + */ + ASSERT(ret == 0); /* This needs to handle no-key deletions later on */ From 9e1cdf0c354e46e428c0e0cab008abbe81b6013d Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 13 Mar 2023 16:29:49 +0900 Subject: [PATCH 129/898] btrfs: zoned: fix btrfs_can_activate_zone() to support DUP profile btrfs_can_activate_zone() returns true if at least one device has one zone available for activation. This is OK for the single profile, but not OK for DUP profile. We need two zones to create a DUP block group. Fix it by properly handling the case with the profile flags. Fixes: 265f7237dd25 ("btrfs: zoned: allow DUP on meta-data block groups") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index f95b2c94d6199..0a330d5410a02 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2086,11 +2086,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) if (!device->bdev) continue; - if (!zinfo->max_active_zones || - atomic_read(&zinfo->active_zones_left)) { + if (!zinfo->max_active_zones) { ret = true; break; } + + switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ + ret = (atomic_read(&zinfo->active_zones_left) >= 1); + break; + case BTRFS_BLOCK_GROUP_DUP: + ret = (atomic_read(&zinfo->active_zones_left) >= 2); + break; + } + if (ret) + break; } mutex_unlock(&fs_info->chunk_mutex); From bf1f1fec2724a33b67ec12032402ea75f2a83622 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Mar 2023 16:14:42 -0500 Subject: [PATCH 130/898] btrfs: rename BTRFS_FS_NO_OVERCOMMIT to BTRFS_FS_ACTIVE_ZONE_TRACKING This flag only gets set when we're doing active zone tracking, and we're going to need to use this flag for things related to this behavior. Rename the flag to represent what it actually means for the file system so it can be used in other ways and still make sense. Reviewed-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/fs.h | 7 ++----- fs/btrfs/space-info.c | 2 +- fs/btrfs/zoned.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 4c477eae68914..24cd492294086 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -120,11 +120,8 @@ enum { /* Indicate that we want to commit the transaction. */ BTRFS_FS_NEED_TRANS_COMMIT, - /* - * Indicate metadata over-commit is disabled. This is set when active - * zone tracking is needed. - */ - BTRFS_FS_NO_OVERCOMMIT, + /* This is set when active zone tracking is needed. */ + BTRFS_FS_ACTIVE_ZONE_TRACKING, /* * Indicate if we have some features changed, this is mostly for diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 69c09508afb50..2237685d1ed0c 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -407,7 +407,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) && + if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) avail = 0; else diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 0a330d5410a02..22d1e930a9165 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -524,8 +524,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } atomic_set(&zone_info->active_zones_left, max_active_zones - nactive); - /* Overcommit does not work well with active zone tacking. */ - set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags); + set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); } /* Validate superblock log */ From df384da5a49cace5c5e3100803dfd563fd982f93 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Mar 2023 16:14:43 -0500 Subject: [PATCH 131/898] btrfs: use temporary variable for space_info in btrfs_update_block_group We do cache->space_info->counter += num_bytes; everywhere in here. This is makes the lines longer than they need to be, and will be especially noticeable when we add the active tracking in, so add a temp variable for the space_info so this is cleaner. Reviewed-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 0ef8b8926bfa7..1a31bcd554d0f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3476,6 +3476,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&info->delalloc_root_lock); while (total) { + struct btrfs_space_info *space_info; bool reclaim = false; cache = btrfs_lookup_block_group(info, bytenr); @@ -3483,6 +3484,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, ret = -ENOENT; break; } + space_info = cache->space_info; factor = btrfs_bg_type_to_factor(cache->flags); /* @@ -3497,7 +3499,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, byte_in_group = bytenr - cache->start; WARN_ON(byte_in_group > cache->length); - spin_lock(&cache->space_info->lock); + spin_lock(&space_info->lock); spin_lock(&cache->lock); if (btrfs_test_opt(info, SPACE_CACHE) && @@ -3510,24 +3512,24 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, old_val += num_bytes; cache->used = old_val; cache->reserved -= num_bytes; - cache->space_info->bytes_reserved -= num_bytes; - cache->space_info->bytes_used += num_bytes; - cache->space_info->disk_used += num_bytes * factor; + space_info->bytes_reserved -= num_bytes; + space_info->bytes_used += num_bytes; + space_info->disk_used += num_bytes * factor; spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_unlock(&space_info->lock); } else { old_val -= num_bytes; cache->used = old_val; cache->pinned += num_bytes; - btrfs_space_info_update_bytes_pinned(info, - cache->space_info, num_bytes); - cache->space_info->bytes_used -= num_bytes; - cache->space_info->disk_used -= num_bytes * factor; + btrfs_space_info_update_bytes_pinned(info, space_info, + num_bytes); + space_info->bytes_used -= num_bytes; + space_info->disk_used -= num_bytes * factor; reclaim = should_reclaim_block_group(cache, num_bytes); spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_unlock(&space_info->lock); set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, From fa2068d7e922b434eba5bfb0131e6d39febfdb48 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 13 Mar 2023 16:06:13 +0900 Subject: [PATCH 132/898] btrfs: zoned: count fresh BG region as zone unusable The naming of space_info->active_total_bytes is misleading. It counts not only active block groups but also full ones which are previously active but now inactive. That confusion results in a bug not counting the full BGs into active_total_bytes on mount time. For a background, there are three kinds of block groups in terms of activation. 1. Block groups never activated 2. Block groups currently active 3. Block groups previously active and currently inactive (due to fully written or zone finish) What we really wanted to exclude from "total_bytes" is the total size of BGs #1. They seem empty and allocatable but since they are not activated, we cannot rely on them to do the space reservation. And, since BGs #1 never get activated, they should have no "used", "reserved" and "pinned" bytes. OTOH, BGs #3 can be counted in the "total", since they are already full we cannot allocate from them anyway. For them, "total_bytes == used + reserved + pinned + zone_unusable" should hold. Tracking #2 and #3 as "active_total_bytes" (current implementation) is confusing. And, tracking #1 and subtract that properly from "total_bytes" every time you need space reservation is cumbersome. Instead, we can count the whole region of a newly allocated block group as zone_unusable. Then, once that block group is activated, release [0 .. zone_capacity] from the zone_unusable counters. With this, we can eliminate the confusing ->active_total_bytes and the code will be common among regular and the zoned mode. Also, no additional counter is needed with this approach. Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 8 +++++++- fs/btrfs/zoned.c | 24 +++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0d250d052487c..d84cef89cdff5 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2693,8 +2693,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); spin_lock(&ctl->tree_lock); + /* Count initial region as zone_unusable until it gets activated. */ if (!used) to_free = size; + else if (initial && + test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) && + (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))) + to_free = 0; else if (initial) to_free = block_group->zone_capacity; else if (offset >= block_group->alloc_offset) @@ -2722,7 +2727,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, reclaimable_unusable = block_group->zone_unusable - (block_group->length - block_group->zone_capacity); /* All the region is now unusable. Mark it as unused and reclaim */ - if (block_group->zone_unusable == block_group->length) { + if (block_group->zone_unusable == block_group->length && + block_group->alloc_offset) { btrfs_mark_bg_unused(block_group); } else if (bg_reclaim_threshold && reclaimable_unusable >= diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 22d1e930a9165..6828712578ca5 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1580,9 +1580,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) return; WARN_ON(cache->bytes_super != 0); - unusable = (cache->alloc_offset - cache->used) + - (cache->length - cache->zone_capacity); - free = cache->zone_capacity - cache->alloc_offset; + + /* Check for block groups never get activated */ + if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) && + cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) && + !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) && + cache->alloc_offset == 0) { + unusable = cache->length; + free = 0; + } else { + unusable = (cache->alloc_offset - cache->used) + + (cache->length - cache->zone_capacity); + free = cache->zone_capacity - cache->alloc_offset; + } /* We only need ->free_space in ALLOC_SEQ block groups */ cache->cached = BTRFS_CACHE_FINISHED; @@ -1901,7 +1911,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* Successfully activated all the zones */ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); - space_info->active_total_bytes += block_group->length; + WARN_ON(block_group->alloc_offset != 0); + if (block_group->zone_unusable == block_group->length) { + block_group->zone_unusable = block_group->length - block_group->zone_capacity; + space_info->bytes_zone_unusable -= block_group->zone_capacity; + } spin_unlock(&block_group->lock); btrfs_try_granting_tickets(fs_info, space_info); spin_unlock(&space_info->lock); @@ -2265,7 +2279,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) u64 avail; spin_lock(&block_group->lock); - if (block_group->reserved || + if (block_group->reserved || block_group->alloc_offset == 0 || (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) { spin_unlock(&block_group->lock); continue; From e15acc25880cf048dba9df94d76ed7e7e10040e6 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 13 Mar 2023 16:06:14 +0900 Subject: [PATCH 133/898] btrfs: zoned: drop space_info->active_total_bytes The space_info->active_total_bytes is no longer necessary as we now count the region of newly allocated block group as zone_unusable. Drop its usage. Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 6 ------ fs/btrfs/space-info.c | 40 +++++++++------------------------------- fs/btrfs/space-info.h | 2 -- fs/btrfs/zoned.c | 4 ---- 4 files changed, 9 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1a31bcd554d0f..5fc670c27f864 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1175,14 +1175,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, < block_group->zone_unusable); WARN_ON(block_group->space_info->disk_total < block_group->length * factor); - WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, - &block_group->runtime_flags) && - block_group->space_info->active_total_bytes - < block_group->length); } block_group->space_info->total_bytes -= block_group->length; - if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) - block_group->space_info->active_total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); block_group->space_info->bytes_zone_unusable -= diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 2237685d1ed0c..3eecce86f63fc 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -308,8 +308,6 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, ASSERT(found); spin_lock(&found->lock); found->total_bytes += block_group->length; - if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) - found->active_total_bytes += block_group->length; found->disk_total += block_group->length * factor; found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; @@ -379,22 +377,6 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, return avail; } -static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info) -{ - /* - * On regular filesystem, all total_bytes are always writable. On zoned - * filesystem, there may be a limitation imposed by max_active_zones. - * For metadata allocation, we cannot finish an existing active block - * group to avoid a deadlock. Thus, we need to consider only the active - * groups to be writable for metadata space. - */ - if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) - return space_info->total_bytes; - - return space_info->active_total_bytes; -} - int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 bytes, enum btrfs_reserve_flush_enum flush) @@ -413,7 +395,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, else avail = calc_available_free_space(fs_info, space_info, flush); - if (used + bytes < writable_total_bytes(fs_info, space_info) + avail) + if (used + bytes < space_info->total_bytes + avail) return 1; return 0; } @@ -449,7 +431,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, ticket = list_first_entry(head, struct reserve_ticket, list); /* Check and see if our ticket can be satisfied now. */ - if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) || + if ((used + ticket->bytes <= space_info->total_bytes) || btrfs_can_overcommit(fs_info, space_info, ticket->bytes, flush)) { btrfs_space_info_update_bytes_may_use(fs_info, @@ -829,7 +811,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, { u64 used; u64 avail; - u64 total; u64 to_reclaim = space_info->reclaim_size; lockdep_assert_held(&space_info->lock); @@ -844,9 +825,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, * space. If that's the case add in our overage so we make sure to put * appropriate pressure on the flushing state machine. */ - total = writable_total_bytes(fs_info, space_info); - if (total + avail < used) - to_reclaim += used - (total + avail); + if (space_info->total_bytes + avail < used) + to_reclaim += used - (space_info->total_bytes + avail); return to_reclaim; } @@ -856,11 +836,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, { u64 global_rsv_size = fs_info->global_block_rsv.reserved; u64 ordered, delalloc; - u64 total = writable_total_bytes(fs_info, space_info); u64 thresh; u64 used; - thresh = mult_perc(total, 90); + thresh = mult_perc(space_info->total_bytes, 90); lockdep_assert_held(&space_info->lock); @@ -923,8 +902,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, BTRFS_RESERVE_FLUSH_ALL); used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_readonly + global_rsv_size; - if (used < total) - thresh += total - used; + if (used < space_info->total_bytes) + thresh += space_info->total_bytes - used; thresh >>= space_info->clamp; used = space_info->bytes_pinned; @@ -1651,7 +1630,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * can_overcommit() to ensure we can overcommit to continue. */ if (!pending_tickets && - ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) || + ((used + orig_bytes <= space_info->total_bytes) || btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); @@ -1665,8 +1644,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, */ if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) { used = btrfs_space_info_used(space_info, false); - if (used + orig_bytes <= - writable_total_bytes(fs_info, space_info)) { + if (used + orig_bytes <= space_info->total_bytes) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); ret = 0; diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index fc99ea2b0c34f..2033b71b18cec 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -96,8 +96,6 @@ struct btrfs_space_info { u64 bytes_may_use; /* number of bytes that may be used for delalloc/allocations */ u64 bytes_readonly; /* total bytes that are read only */ - /* Total bytes in the space, but only accounts active block groups. */ - u64 active_total_bytes; u64 bytes_zone_unusable; /* total bytes that are unusable until resetting the device zone */ diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 6828712578ca5..45d04092f2f8c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2316,10 +2316,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) return 0; - /* No more block groups to activate */ - if (space_info->active_total_bytes == space_info->total_bytes) - return 0; - for (;;) { int ret; bool need_finish = false; From a075bacde257f755bea0e53400c9f1cdd1b8e8e6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 14 Mar 2023 16:31:32 -0700 Subject: [PATCH 134/898] fsverity: don't drop pagecache at end of FS_IOC_ENABLE_VERITY The full pagecache drop at the end of FS_IOC_ENABLE_VERITY is causing performance problems and is hindering adoption of fsverity. It was intended to solve a race condition where unverified pages might be left in the pagecache. But actually it doesn't solve it fully. Since the incomplete solution for this race condition has too much performance impact for it to be worth it, let's remove it for now. Fixes: 3fda4c617e84 ("fs-verity: implement FS_IOC_ENABLE_VERITY ioctl") Cc: stable@vger.kernel.org Reviewed-by: Victor Hsieh Link: https://lore.kernel.org/r/20230314235332.50270-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/enable.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index e13db6507b38b..7a0e3a84d370b 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,7 +8,6 @@ #include "fsverity_private.h" #include -#include #include #include @@ -367,25 +366,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) goto out_drop_write; err = enable_verity(filp, &arg); - if (err) - goto out_allow_write_access; /* - * Some pages of the file may have been evicted from pagecache after - * being used in the Merkle tree construction, then read into pagecache - * again by another process reading from the file concurrently. Since - * these pages didn't undergo verification against the file digest which - * fs-verity now claims to be enforcing, we have to wipe the pagecache - * to ensure that all future reads are verified. + * We no longer drop the inode's pagecache after enabling verity. This + * used to be done to try to avoid a race condition where pages could be + * evicted after being used in the Merkle tree construction, then + * re-instantiated by a concurrent read. Such pages are unverified, and + * the backing storage could have filled them with different content, so + * they shouldn't be used to fulfill reads once verity is enabled. + * + * But, dropping the pagecache has a big performance impact, and it + * doesn't fully solve the race condition anyway. So for those reasons, + * and also because this race condition isn't very important relatively + * speaking (especially for small-ish files, where the chance of a page + * being used, evicted, *and* re-instantiated all while enabling verity + * is quite small), we no longer drop the inode's pagecache. */ - filemap_write_and_wait(inode->i_mapping); - invalidate_inode_pages2(inode->i_mapping); /* * allow_write_access() is needed to pair with deny_write_access(). * Regardless, the filesystem won't allow writing to verity files. */ -out_allow_write_access: allow_write_access(filp); out_drop_write: mnt_drop_write_file(filp); From c3aa32ac86fe5f27659f07474995ec743a3251b0 Mon Sep 17 00:00:00 2001 From: Hongren Zheng Date: Thu, 16 Mar 2023 01:31:53 +0800 Subject: [PATCH 135/898] MAINTAINERS: make me a reviewer of USB/IP I think I am familiar enough with USB/IP and is adequate as a reviewer. Every time there is some patch/bug, I wish I can get pinged and I will feedback on that. I had some contributions to USBIP and some support for it. Contribution: Commit 17af79321 ("docs: usbip: Fix major fields and descriptions in protocol") Commit b737eecd4 ("usbip: tools: add options and examples in man page related to device mode") Commit a58977b2f ("usbip: tools: add usage of device mode in usbip_list.c") Support: Commit 8f36b3b4e1 ("usbip: add USBIP_URB_* URB transfer flags") Bug report: https://lore.kernel.org/lkml/ZBHxfUX60EyCMw5l@Sun/ I also have implemented a userspace usbip server in https://github.com/canokeys/canokey-usbip and maintain a list of usbip implementations https://github.com/usbip/implementations Signed-off-by: Hongren (Zenithal) Zheng Acked-by: Shuah Khan Link: https://lore.kernel.org/r/ZBIBCRiFGSqQcOon@Sun Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f3053..110944cea89b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21651,6 +21651,7 @@ USB OVER IP DRIVER M: Valentina Manea M: Shuah Khan M: Shuah Khan +R: Hongren Zheng L: linux-usb@vger.kernel.org S: Maintained F: Documentation/usb/usbip_protocol.rst From 5bc38d33a5a1209fd4de65101d1ae8255ea12c6e Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 7 Mar 2023 06:14:20 -0500 Subject: [PATCH 136/898] usb: cdnsp: Fixes issue with redundant Status Stage In some cases, driver trees to send Status Stage twice. The first one from upper layer of gadget usb subsystem and second time from controller driver. This patch fixes this issue and remove tricky handling of SET_INTERFACE from controller driver which is no longer needed. cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20230307111420.376056-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ep0.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c index 9b8325f824992..d63d5d92f2554 100644 --- a/drivers/usb/cdns3/cdnsp-ep0.c +++ b/drivers/usb/cdns3/cdnsp-ep0.c @@ -403,20 +403,6 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev, case USB_REQ_SET_ISOCH_DELAY: ret = cdnsp_ep0_set_isoch_delay(pdev, ctrl); break; - case USB_REQ_SET_INTERFACE: - /* - * Add request into pending list to block sending status stage - * by libcomposite. - */ - list_add_tail(&pdev->ep0_preq.list, - &pdev->ep0_preq.pep->pending_list); - - ret = cdnsp_ep0_delegate_req(pdev, ctrl); - if (ret == -EBUSY) - ret = 0; - - list_del(&pdev->ep0_preq.list); - break; default: ret = cdnsp_ep0_delegate_req(pdev, ctrl); break; @@ -474,9 +460,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) else ret = cdnsp_ep0_delegate_req(pdev, ctrl); - if (!len) - pdev->ep0_stage = CDNSP_STATUS_STAGE; - if (ret == USB_GADGET_DELAYED_STATUS) { trace_cdnsp_ep0_status_stage("delayed"); return; @@ -484,6 +467,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) out: if (ret < 0) cdnsp_ep0_stall(pdev); - else if (pdev->ep0_stage == CDNSP_STATUS_STAGE) + else if (!len && pdev->ep0_stage != CDNSP_STATUS_STAGE) cdnsp_status_stage(pdev); } From 1272fd652a226ccb34e9f47371b6121948048438 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 8 Mar 2023 07:44:27 -0500 Subject: [PATCH 137/898] usb: cdns3: Fix issue with using incorrect PCI device function PCI based platform can have more than two PCI functions. USBSS PCI Glue driver during initialization should consider only DRD/HOST/DEVICE PCI functions and all other should be ignored. This patch adds additional condition which causes that only DRD and HOST/DEVICE function will be accepted. cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20230308124427.311245-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-pci-wrap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-pci-wrap.c b/drivers/usb/cdns3/cdns3-pci-wrap.c index deeea618ba33b..1f6320d98a76b 100644 --- a/drivers/usb/cdns3/cdns3-pci-wrap.c +++ b/drivers/usb/cdns3/cdns3-pci-wrap.c @@ -60,6 +60,11 @@ static struct pci_dev *cdns3_get_second_fun(struct pci_dev *pdev) return NULL; } + if (func->devfn != PCI_DEV_FN_HOST_DEVICE && + func->devfn != PCI_DEV_FN_OTG) { + return NULL; + } + return func; } From 96b96b2a567fb34dd41c87e6cf01f6902ce8cae4 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 9 Mar 2023 01:30:48 -0500 Subject: [PATCH 138/898] usb: cdnsp: changes PCI Device ID to fix conflict with CNDS3 driver Patch changes CDNS_DEVICE_ID in USBSSP PCI Glue driver to remove the conflict with Cadence USBSS driver. cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20230309063048.299378-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-pci.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c index efd54ed918b97..7b151f5af3ccb 100644 --- a/drivers/usb/cdns3/cdnsp-pci.c +++ b/drivers/usb/cdns3/cdnsp-pci.c @@ -29,30 +29,23 @@ #define PLAT_DRIVER_NAME "cdns-usbssp" #define CDNS_VENDOR_ID 0x17cd -#define CDNS_DEVICE_ID 0x0100 +#define CDNS_DEVICE_ID 0x0200 +#define CDNS_DRD_ID 0x0100 #define CDNS_DRD_IF (PCI_CLASS_SERIAL_USB << 8 | 0x80) static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev) { - struct pci_dev *func; - /* * Gets the second function. - * It's little tricky, but this platform has two function. - * The fist keeps resources for Host/Device while the second - * keeps resources for DRD/OTG. + * Platform has two function. The fist keeps resources for + * Host/Device while the secon keeps resources for DRD/OTG. */ - func = pci_get_device(pdev->vendor, pdev->device, NULL); - if (!func) - return NULL; + if (pdev->device == CDNS_DEVICE_ID) + return pci_get_device(pdev->vendor, CDNS_DRD_ID, NULL); + else if (pdev->device == CDNS_DRD_ID) + return pci_get_device(pdev->vendor, CDNS_DEVICE_ID, NULL); - if (func->devfn == pdev->devfn) { - func = pci_get_device(pdev->vendor, pdev->device, func); - if (!func) - return NULL; - } - - return func; + return NULL; } static int cdnsp_pci_probe(struct pci_dev *pdev, @@ -230,6 +223,8 @@ static const struct pci_device_id cdnsp_pci_ids[] = { PCI_CLASS_SERIAL_USB_DEVICE, PCI_ANY_ID }, { PCI_VENDOR_ID_CDNS, CDNS_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, CDNS_DRD_IF, PCI_ANY_ID }, + { PCI_VENDOR_ID_CDNS, CDNS_DRD_ID, PCI_ANY_ID, PCI_ANY_ID, + CDNS_DRD_IF, PCI_ANY_ID }, { 0, } }; From a37eb61b6ec064ac794b8a1e89fd33eb582fe51d Mon Sep 17 00:00:00 2001 From: Yaroslav Furman Date: Sun, 12 Mar 2023 11:07:45 +0200 Subject: [PATCH 139/898] uas: Add US_FL_NO_REPORT_OPCODES for JMicron JMS583Gen 2 Just like other JMicron JMS5xx enclosures, it chokes on report-opcodes, let's avoid them. Signed-off-by: Yaroslav Furman Cc: stable Link: https://lore.kernel.org/r/20230312090745.47962-1-yaro330@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index c7b763d6d1023..1f8c9b16a0fb8 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -111,6 +111,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA), +/* Reported by: Yaroslav Furman */ +UNUSUAL_DEV(0x152d, 0x0583, 0x0000, 0x9999, + "JMicron", + "JMS583Gen 2", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_OPCODES), + /* Reported-by: Thinh Nguyen */ UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999, "PNY", From bbf860ed710bacc0279c4cda2817f70e1200d04b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Mar 2023 17:45:22 +0200 Subject: [PATCH 140/898] usb: gadget: Use correct endianness of the wLength field for WebUSB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WebUSB code uses wLength directly without proper endianness conversion. Update it to use already prepared temporary variable w_length instead. Fixes: 93c473948c58 ("usb: gadget: add WebUSB landing page support") Signed-off-by: Andy Shevchenko Tested-By: Jó Ágila Bitsch Link: https://lore.kernel.org/r/20230313154522.52684-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index fa7dd6cf014d7..5377d873c08eb 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2079,10 +2079,9 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) sizeof(url_descriptor->URL) - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset); - if (ctrl->wLength < WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH - + landing_page_length) - landing_page_length = ctrl->wLength - - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset; + if (w_length < WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_length) + landing_page_length = w_length + - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset; memcpy(url_descriptor->URL, cdev->landing_page + landing_page_offset, From c7df4813b149362248d6ef7be41a311e27bf75fe Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Wed, 8 Mar 2023 18:40:13 +0100 Subject: [PATCH 141/898] xsk: Add missing overflow check in xdp_umem_reg The number of chunks can overflow u32. Make sure to return -EINVAL on overflow. Also remove a redundant u32 cast assigning umem->npgs. Fixes: bbff2f321a86 ("xsk: new descriptor addressing scheme") Signed-off-by: Kal Conley Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20230308174013.1114745-1-kal.conley@dectris.com --- net/xdp/xdp_umem.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 4681e8e8ad943..02207e852d796 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -150,10 +150,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { - u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; - u64 npgs, addr = mr->addr, size = mr->len; - unsigned int chunks, chunks_rem; + u32 chunk_size = mr->chunk_size, headroom = mr->headroom; + u64 addr = mr->addr, size = mr->len; + u32 chunks_rem, npgs_rem; + u64 chunks, npgs; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -188,8 +189,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (npgs > U32_MAX) return -EINVAL; - chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); - if (chunks == 0) + chunks = div_u64_rem(size, chunk_size, &chunks_rem); + if (!chunks || chunks > U32_MAX) return -EINVAL; if (!unaligned_chunks && chunks_rem) @@ -202,7 +203,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->headroom = headroom; umem->chunk_size = chunk_size; umem->chunks = chunks; - umem->npgs = (u32)npgs; + umem->npgs = npgs; umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; From 32d57f667f871bc5a8babbe27ea4c5e668ee0ea8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 1 Mar 2023 12:59:07 +0100 Subject: [PATCH 142/898] iavf: fix inverted Rx hash condition leading to disabled hash Condition, which checks whether the netdev has hashing enabled is inverted. Basically, the tagged commit effectively disabled passing flow hash from descriptor to skb, unless user *disables* it via Ethtool. Commit a876c3ba59a6 ("i40e/i40evf: properly report Rx packet hash") fixed this problem, but only for i40e. Invert the condition now in iavf and unblock passing hash to skbs again. Fixes: 857942fd1aa1 ("i40e: Fix Rx hash reported to the stack by our driver") Reviewed-by: Larysa Zaremba Reviewed-by: Michal Kubiak Signed-off-by: Alexander Lobakin Tested-by: Rafal Romanowski Reviewed-by: Leon Romanovsky Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 18b6a702a1d6d..e989feda133c1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1096,7 +1096,7 @@ static inline void iavf_rx_hash(struct iavf_ring *ring, cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH << IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (ring->netdev->features & NETIF_F_RXHASH) + if (!(ring->netdev->features & NETIF_F_RXHASH)) return; if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { From de58647b4301fe181f9c38e8b46f7021584ae427 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 1 Mar 2023 12:59:08 +0100 Subject: [PATCH 143/898] iavf: fix non-tunneled IPv6 UDP packet type and hashing Currently, IAVF's decode_rx_desc_ptype() correctly reports payload type of L4 for IPv4 UDP packets and IPv{4,6} TCP, but only L3 for IPv6 UDP. Originally, i40e, ice and iavf were affected. Commit 73df8c9e3e3d ("i40e: Correct UDP packet header for non_tunnel-ipv6") fixed that in i40e, then commit 638a0c8c8861 ("ice: fix incorrect payload indicator on PTYPE") fixed that for ice. IPv6 UDP is L4 obviously. Fix it and make iavf report correct L4 hash type for such packets, so that the stack won't calculate it on CPU when needs it. Fixes: 206812b5fccb ("i40e/i40evf: i40e implementation for skb_set_hash") Reviewed-by: Larysa Zaremba Reviewed-by: Michal Kubiak Signed-off-by: Alexander Lobakin Tested-by: Rafal Romanowski Reviewed-by: Leon Romanovsky Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index 16c490965b61a..dd11dbbd5551a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -661,7 +661,7 @@ struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = { /* Non Tunneled IPv6 */ IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3), + IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), IAVF_PTT_UNUSED_ENTRY(91), IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), From 964290ff32d132bf971d45b29f7de39756dab7c8 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Wed, 15 Mar 2023 13:59:25 -0600 Subject: [PATCH 144/898] iavf: do not track VLAN 0 filters When an interface with the maximum number of VLAN filters is brought up, a spurious error is logged: [257.483082] 8021q: adding VLAN 0 to HW filter on device enp0s3 [257.483094] iavf 0000:00:03.0 enp0s3: Max allowed VLAN filters 8. Remove existing VLANs or disable filtering via Ethtool if supported. The VF driver complains that it cannot add the VLAN 0 filter. On the other hand, the PF driver always adds VLAN 0 filter on VF initialization. The VF does not need to ask the PF for that filter at all. Fix the error by not tracking VLAN 0 filters altogether. With that, the check added by commit 0e710a3ffd0c ("iavf: Fix VF driver counting VLAN 0 filters") in iavf_virtchnl.c is useless and might be confusing if left as it suggests that we track VLAN 0. Fixes: 0e710a3ffd0c ("iavf: Fix VF driver counting VLAN 0 filters") Signed-off-by: Ahmed Zaki Reviewed-by: Michal Kubiak Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 8 ++++++++ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 2 -- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3273aeb8fa676..327cd9b1af2c8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -893,6 +893,10 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); + /* Do not track VLAN 0 filter, always added by the PF on VF init */ + if (!vid) + return 0; + if (!VLAN_FILTERING_ALLOWED(adapter)) return -EIO; @@ -919,6 +923,10 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); + /* We do not track VLAN 0 filter */ + if (!vid) + return 0; + iavf_del_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))); if (proto == cpu_to_be16(ETH_P_8021Q)) clear_bit(vid, adapter->vsi.active_cvlans); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 6d23338604bb3..4e17d006c52d4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -2446,8 +2446,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, list_for_each_entry(f, &adapter->vlan_filter_list, list) { if (f->is_new_vlan) { f->is_new_vlan = false; - if (!f->vlan.vid) - continue; if (f->vlan.tpid == ETH_P_8021Q) set_bit(f->vlan.vid, adapter->vsi.active_cvlans); From 65f69851e44d71248b952a687e44759a7abb5016 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 7 Mar 2023 23:29:17 +0800 Subject: [PATCH 145/898] igb: revert rtnl_lock() that causes deadlock The commit 6faee3d4ee8b ("igb: Add lock to avoid data race") adds rtnl_lock to eliminate a false data race shown below (FREE from device detaching) | (USE from netdev core) igb_remove | igb_ndo_get_vf_config igb_disable_sriov | vf >= adapter->vfs_allocated_count? kfree(adapter->vf_data) | adapter->vfs_allocated_count = 0 | | memcpy(... adapter->vf_data[vf] The above race will never happen and the extra rtnl_lock causes deadlock below [ 141.420169] [ 141.420672] __schedule+0x2dd/0x840 [ 141.421427] schedule+0x50/0xc0 [ 141.422041] schedule_preempt_disabled+0x11/0x20 [ 141.422678] __mutex_lock.isra.13+0x431/0x6b0 [ 141.423324] unregister_netdev+0xe/0x20 [ 141.423578] igbvf_remove+0x45/0xe0 [igbvf] [ 141.423791] pci_device_remove+0x36/0xb0 [ 141.423990] device_release_driver_internal+0xc1/0x160 [ 141.424270] pci_stop_bus_device+0x6d/0x90 [ 141.424507] pci_stop_and_remove_bus_device+0xe/0x20 [ 141.424789] pci_iov_remove_virtfn+0xba/0x120 [ 141.425452] sriov_disable+0x2f/0xf0 [ 141.425679] igb_disable_sriov+0x4e/0x100 [igb] [ 141.426353] igb_remove+0xa0/0x130 [igb] [ 141.426599] pci_device_remove+0x36/0xb0 [ 141.426796] device_release_driver_internal+0xc1/0x160 [ 141.427060] driver_detach+0x44/0x90 [ 141.427253] bus_remove_driver+0x55/0xe0 [ 141.427477] pci_unregister_driver+0x2a/0xa0 [ 141.428296] __x64_sys_delete_module+0x141/0x2b0 [ 141.429126] ? mntput_no_expire+0x4a/0x240 [ 141.429363] ? syscall_trace_enter.isra.19+0x126/0x1a0 [ 141.429653] do_syscall_64+0x5b/0x80 [ 141.429847] ? exit_to_user_mode_prepare+0x14d/0x1c0 [ 141.430109] ? syscall_exit_to_user_mode+0x12/0x30 [ 141.430849] ? do_syscall_64+0x67/0x80 [ 141.431083] ? syscall_exit_to_user_mode_prepare+0x183/0x1b0 [ 141.431770] ? syscall_exit_to_user_mode+0x12/0x30 [ 141.432482] ? do_syscall_64+0x67/0x80 [ 141.432714] ? exc_page_fault+0x64/0x140 [ 141.432911] entry_SYSCALL_64_after_hwframe+0x72/0xdc Since the igb_disable_sriov() will call pci_disable_sriov() before releasing any resources, the netdev core will synchronize the cleanup to avoid any races. This patch removes the useless rtnl_(un)lock to guarantee correctness. CC: stable@vger.kernel.org Fixes: 6faee3d4ee8b ("igb: Add lock to avoid data race") Reported-by: Corinna Vinschen Link: https://lore.kernel.org/intel-wired-lan/ZAcJvkEPqWeJHO2r@calimero.vinschen.de/ Signed-off-by: Lin Ma Tested-by: Corinna Vinschen Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 03bc1e8af575f..5532361b0e945 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3863,9 +3863,7 @@ static void igb_remove(struct pci_dev *pdev) igb_release_hw_control(adapter); #ifdef CONFIG_PCI_IOV - rtnl_lock(); igb_disable_sriov(pdev); - rtnl_unlock(); #endif unregister_netdev(netdev); From 50f303496d92e25b79bdfb73e3707ad0684ad67f Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 22 Nov 2022 18:28:03 +0900 Subject: [PATCH 146/898] igb: Enable SR-IOV after reinit Enabling SR-IOV causes the virtual functions to make requests to the PF via the mailbox. Notably, E1000_VF_RESET request will happen during the initialization of the VF. However, unless the reinit is done, the VMMB interrupt, which delivers mailbox interrupt from VF to PF will be kept masked and such requests will be silently ignored. Enable SR-IOV at the very end of the procedure to configure the device for SR-IOV so that the PF is configured properly for SR-IOV when a VF is activated. Fixes: fa44f2f185f7 ("igb: Enable SR-IOV configuration via PCI sysfs interface") Signed-off-by: Akihiko Odaki Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 135 ++++++++++------------ 1 file changed, 58 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 5532361b0e945..274c781b55473 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -109,6 +109,7 @@ static void igb_free_all_rx_resources(struct igb_adapter *); static void igb_setup_mrqc(struct igb_adapter *); static int igb_probe(struct pci_dev *, const struct pci_device_id *); static void igb_remove(struct pci_dev *pdev); +static void igb_init_queue_configuration(struct igb_adapter *adapter); static int igb_sw_init(struct igb_adapter *); int igb_open(struct net_device *); int igb_close(struct net_device *); @@ -175,9 +176,7 @@ static void igb_nfc_filter_restore(struct igb_adapter *adapter); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); -static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs); -static int igb_disable_sriov(struct pci_dev *dev); -static int igb_pci_disable_sriov(struct pci_dev *dev); +static int igb_disable_sriov(struct pci_dev *dev, bool reinit); #endif static int igb_suspend(struct device *); @@ -3665,7 +3664,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) kfree(adapter->shadow_vfta); igb_clear_interrupt_scheme(adapter); #ifdef CONFIG_PCI_IOV - igb_disable_sriov(pdev); + igb_disable_sriov(pdev, false); #endif pci_iounmap(pdev, adapter->io_addr); err_ioremap: @@ -3679,7 +3678,38 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } #ifdef CONFIG_PCI_IOV -static int igb_disable_sriov(struct pci_dev *pdev) +static int igb_sriov_reinit(struct pci_dev *dev) +{ + struct net_device *netdev = pci_get_drvdata(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct pci_dev *pdev = adapter->pdev; + + rtnl_lock(); + + if (netif_running(netdev)) + igb_close(netdev); + else + igb_reset(adapter); + + igb_clear_interrupt_scheme(adapter); + + igb_init_queue_configuration(adapter); + + if (igb_init_interrupt_scheme(adapter, true)) { + rtnl_unlock(); + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + if (netif_running(netdev)) + igb_open(netdev); + + rtnl_unlock(); + + return 0; +} + +static int igb_disable_sriov(struct pci_dev *pdev, bool reinit) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -3713,10 +3743,10 @@ static int igb_disable_sriov(struct pci_dev *pdev) adapter->flags |= IGB_FLAG_DMAC; } - return 0; + return reinit ? igb_sriov_reinit(pdev) : 0; } -static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) +static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs, bool reinit) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -3781,12 +3811,6 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) "Unable to allocate memory for VF MAC filter list\n"); } - /* only call pci_enable_sriov() if no VFs are allocated already */ - if (!old_vfs) { - err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); - if (err) - goto err_out; - } dev_info(&pdev->dev, "%d VFs allocated\n", adapter->vfs_allocated_count); for (i = 0; i < adapter->vfs_allocated_count; i++) @@ -3794,6 +3818,17 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) /* DMA Coalescing is not supported in IOV mode. */ adapter->flags &= ~IGB_FLAG_DMAC; + + if (reinit) { + err = igb_sriov_reinit(pdev); + if (err) + goto err_out; + } + + /* only call pci_enable_sriov() if no VFs are allocated already */ + if (!old_vfs) + err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); + goto out; err_out: @@ -3863,7 +3898,7 @@ static void igb_remove(struct pci_dev *pdev) igb_release_hw_control(adapter); #ifdef CONFIG_PCI_IOV - igb_disable_sriov(pdev); + igb_disable_sriov(pdev, false); #endif unregister_netdev(netdev); @@ -3909,7 +3944,7 @@ static void igb_probe_vfs(struct igb_adapter *adapter) igb_reset_interrupt_capability(adapter); pci_sriov_set_totalvfs(pdev, 7); - igb_enable_sriov(pdev, max_vfs); + igb_enable_sriov(pdev, max_vfs, false); #endif /* CONFIG_PCI_IOV */ } @@ -9518,71 +9553,17 @@ static void igb_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PCI_IOV -static int igb_sriov_reinit(struct pci_dev *dev) -{ - struct net_device *netdev = pci_get_drvdata(dev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; - - rtnl_lock(); - - if (netif_running(netdev)) - igb_close(netdev); - else - igb_reset(adapter); - - igb_clear_interrupt_scheme(adapter); - - igb_init_queue_configuration(adapter); - - if (igb_init_interrupt_scheme(adapter, true)) { - rtnl_unlock(); - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); - return -ENOMEM; - } - - if (netif_running(netdev)) - igb_open(netdev); - - rtnl_unlock(); - - return 0; -} - -static int igb_pci_disable_sriov(struct pci_dev *dev) -{ - int err = igb_disable_sriov(dev); - - if (!err) - err = igb_sriov_reinit(dev); - - return err; -} - -static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs) -{ - int err = igb_enable_sriov(dev, num_vfs); - - if (err) - goto out; - - err = igb_sriov_reinit(dev); - if (!err) - return num_vfs; - -out: - return err; -} - -#endif static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs) { #ifdef CONFIG_PCI_IOV - if (num_vfs == 0) - return igb_pci_disable_sriov(dev); - else - return igb_pci_enable_sriov(dev, num_vfs); + int err; + + if (num_vfs == 0) { + return igb_disable_sriov(dev, true); + } else { + err = igb_enable_sriov(dev, num_vfs, true); + return err ? err : num_vfs; + } #endif return 0; } From 85eb39bb39cbb5c086df1e19ba67cc1366693a77 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Tue, 22 Nov 2022 10:28:52 +0800 Subject: [PATCH 147/898] intel/igbvf: free irq on the error path in igbvf_request_msix() In igbvf_request_msix(), irqs have not been freed on the err path, we need to free it. Fix it. Fixes: d4e0fe01a38a ("igbvf: add new driver to support 82576 virtual functions") Signed-off-by: Gaosheng Cui Reviewed-by: Maciej Fijalkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/netdev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 3a32809510fc6..72cb1b56e9f24 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1074,7 +1074,7 @@ static int igbvf_request_msix(struct igbvf_adapter *adapter) igbvf_intr_msix_rx, 0, adapter->rx_ring->name, netdev); if (err) - goto out; + goto free_irq_tx; adapter->rx_ring->itr_register = E1000_EITR(vector); adapter->rx_ring->itr_val = adapter->current_itr; @@ -1083,10 +1083,14 @@ static int igbvf_request_msix(struct igbvf_adapter *adapter) err = request_irq(adapter->msix_entries[vector].vector, igbvf_msix_other, 0, netdev->name, netdev); if (err) - goto out; + goto free_irq_rx; igbvf_configure_msix(adapter); return 0; +free_irq_rx: + free_irq(adapter->msix_entries[--vector].vector, netdev); +free_irq_tx: + free_irq(adapter->msix_entries[--vector].vector, netdev); out: return err; } From 02c83791ef969c6a8a150b4927193d0d0e50fb23 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 1 Dec 2022 19:20:03 +0900 Subject: [PATCH 148/898] igbvf: Regard vf reset nack as success vf reset nack actually represents the reset operation itself is performed but no address is assigned. Therefore, e1000_reset_hw_vf should fill the "perm_addr" with the zero address and return success on such an occasion. This prevents its callers in netdev.c from saying PF still resetting, and instead allows them to correctly report that no address is assigned. Fixes: 6ddbc4cf1f4d ("igb: Indicate failure on vf reset for empty mac address") Signed-off-by: Akihiko Odaki Reviewed-by: Leon Romanovsky Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/vf.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c index b8ba3f94c3632..a47a2e3e548cf 100644 --- a/drivers/net/ethernet/intel/igbvf/vf.c +++ b/drivers/net/ethernet/intel/igbvf/vf.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2009 - 2018 Intel Corporation. */ +#include + #include "vf.h" static s32 e1000_check_for_link_vf(struct e1000_hw *hw); @@ -131,11 +133,16 @@ static s32 e1000_reset_hw_vf(struct e1000_hw *hw) /* set our "perm_addr" based on info provided by PF */ ret_val = mbx->ops.read_posted(hw, msgbuf, 3); if (!ret_val) { - if (msgbuf[0] == (E1000_VF_RESET | - E1000_VT_MSGTYPE_ACK)) + switch (msgbuf[0]) { + case E1000_VF_RESET | E1000_VT_MSGTYPE_ACK: memcpy(hw->mac.perm_addr, addr, ETH_ALEN); - else + break; + case E1000_VF_RESET | E1000_VT_MSGTYPE_NACK: + eth_zero_addr(hw->mac.perm_addr); + break; + default: ret_val = -E1000_ERR_MAC_INIT; + } } } From 2b4cc3d3f4d8ec42961e98568a0afeee96a943ab Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Tue, 7 Mar 2023 15:45:31 +0900 Subject: [PATCH 149/898] igc: fix the validation logic for taprio's gate list The check introduced in the commit a5fd39464a40 ("igc: Lift TAPRIO schedule restriction") can detect a false positive error in some corner case. For instance, tc qdisc replace ... taprio num_tc 4 ... sched-entry S 0x01 100000 # slot#1 sched-entry S 0x03 100000 # slot#2 sched-entry S 0x04 100000 # slot#3 sched-entry S 0x08 200000 # slot#4 flags 0x02 # hardware offload Here the queue#0 (the first queue) is on at the slot#1 and #2, and off at the slot#3 and #4. Under the current logic, when the slot#4 is examined, validate_schedule() returns *false* since the enablement count for the queue#0 is two and it is already off at the previous slot (i.e. #3). But this definition is truely correct. Let's fix the logic to enforce a strict validation for consecutively-opened slots. Fixes: a5fd39464a40 ("igc: Lift TAPRIO schedule restriction") Signed-off-by: AKASHI Takahiro Reviewed-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2928a6c736928..25fc6c65209bf 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6010,18 +6010,18 @@ static bool validate_schedule(struct igc_adapter *adapter, if (e->command != TC_TAPRIO_CMD_SET_GATES) return false; - for (i = 0; i < adapter->num_tx_queues; i++) { - if (e->gate_mask & BIT(i)) + for (i = 0; i < adapter->num_tx_queues; i++) + if (e->gate_mask & BIT(i)) { queue_uses[i]++; - /* There are limitations: A single queue cannot be - * opened and closed multiple times per cycle unless the - * gate stays open. Check for it. - */ - if (queue_uses[i] > 1 && - !(prev->gate_mask & BIT(i))) - return false; - } + /* There are limitations: A single queue cannot + * be opened and closed multiple times per cycle + * unless the gate stays open. Check for it. + */ + if (queue_uses[i] > 1 && + !(prev->gate_mask & BIT(i))) + return false; + } } return true; From 748b2f5e82d17480404b3e2895388fc2925f7caf Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 6 Mar 2023 11:18:24 -0800 Subject: [PATCH 150/898] ca8210: Fix unsigned mac_len comparison with zero in ca8210_skb_tx() mac_len is of type unsigned, which can never be less than zero. mac_len = ieee802154_hdr_peek_addrs(skb, &header); if (mac_len < 0) return mac_len; Change this to type int as ieee802154_hdr_peek_addrs() can return negative integers, this is found by static analysis with smatch. Fixes: 6c993779ea1d ("ca8210: fix mac_len negative array access") Signed-off-by: Harshit Mogalapalli Acked-by: Alexander Aring Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230306191824.4115839-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 0b0c6c0764fe9..d0b5129439ed6 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1902,10 +1902,9 @@ static int ca8210_skb_tx( struct ca8210_priv *priv ) { - int status; struct ieee802154_hdr header = { }; struct secspec secspec; - unsigned int mac_len; + int mac_len, status; dev_dbg(&priv->spi->dev, "%s called\n", __func__); From 6de4b1ab470fe52351415217ac6dffddee571c45 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 10 Mar 2023 13:42:08 -0800 Subject: [PATCH 151/898] xfs: try to idiot-proof the allocators In porting his development branch to 6.3-rc1, yours truly has repeatedly screwed up the args->pag being fed to the xfs_alloc_vextent* functions. Add some debugging assertions to test the preconditions required of the callers. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6a037173d20d9..8999e38e1bed2 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3279,6 +3279,9 @@ xfs_alloc_vextent_this_ag( xfs_agnumber_t minimum_agno; int error; + ASSERT(args->pag != NULL); + ASSERT(args->pag->pag_agno == agno); + args->agno = agno; args->agbno = 0; error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0), @@ -3394,6 +3397,8 @@ xfs_alloc_vextent_start_ag( bool bump_rotor = false; int error; + ASSERT(args->pag == NULL); + args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); @@ -3442,6 +3447,8 @@ xfs_alloc_vextent_first_ag( xfs_agnumber_t start_agno; int error; + ASSERT(args->pag == NULL); + args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); @@ -3470,6 +3477,9 @@ xfs_alloc_vextent_exact_bno( xfs_agnumber_t minimum_agno; int error; + ASSERT(args->pag != NULL); + ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target)); + args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); @@ -3502,6 +3512,9 @@ xfs_alloc_vextent_near_bno( bool needs_perag = args->pag == NULL; int error; + if (!needs_perag) + ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target)); + args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); From d3aa3e060c4a80827eb801fc448debc9daa7c46b Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 16 Mar 2023 14:55:06 +0800 Subject: [PATCH 152/898] dm stats: check for and propagate alloc_percpu failure Check alloc_precpu()'s return value and return an error from dm_stats_init() if it fails. Update alloc_dev() to fail if dm_stats_init() does. Otherwise, a NULL pointer dereference will occur in dm_stats_cleanup() even if dm-stats isn't being actively used. Fixes: fd2ed4d25270 ("dm: add statistics support") Cc: stable@vger.kernel.org Signed-off-by: Jiasheng Jiang Signed-off-by: Mike Snitzer --- drivers/md/dm-stats.c | 7 ++++++- drivers/md/dm-stats.h | 2 +- drivers/md/dm.c | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index c21a19ab73f70..db2d997a6c181 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -188,7 +188,7 @@ static int dm_stat_in_flight(struct dm_stat_shared *shared) atomic_read(&shared->in_flight[WRITE]); } -void dm_stats_init(struct dm_stats *stats) +int dm_stats_init(struct dm_stats *stats) { int cpu; struct dm_stats_last_position *last; @@ -197,11 +197,16 @@ void dm_stats_init(struct dm_stats *stats) INIT_LIST_HEAD(&stats->list); stats->precise_timestamps = false; stats->last = alloc_percpu(struct dm_stats_last_position); + if (!stats->last) + return -ENOMEM; + for_each_possible_cpu(cpu) { last = per_cpu_ptr(stats->last, cpu); last->last_sector = (sector_t)ULLONG_MAX; last->last_rw = UINT_MAX; } + + return 0; } void dm_stats_cleanup(struct dm_stats *stats) diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h index 0bc152c8e4f31..c6728c8b41594 100644 --- a/drivers/md/dm-stats.h +++ b/drivers/md/dm-stats.h @@ -21,7 +21,7 @@ struct dm_stats_aux { unsigned long long duration_ns; }; -void dm_stats_init(struct dm_stats *st); +int dm_stats_init(struct dm_stats *st); void dm_stats_cleanup(struct dm_stats *st); struct mapped_device; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index eace45a18d456..b6ace995b9ca3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2097,7 +2097,9 @@ static struct mapped_device *alloc_dev(int minor) if (!md->pending_io) goto bad; - dm_stats_init(&md->stats); + r = dm_stats_init(&md->stats); + if (r < 0) + goto bad; /* Populate the mapping, nobody knows we exist yet */ spin_lock(&_minor_lock); From 987dd36c0141f6ab9f0fbf14d6b2ec3342dedb2f Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 30 Jan 2023 16:32:46 +0100 Subject: [PATCH 153/898] i2c: imx-lpi2c: clean rx/tx buffers upon new message When start sending a new message clear the Rx & Tx buffer pointers in order to avoid using stale pointers. Signed-off-by: Alexander Stein Tested-by: Emanuele Ghidoli Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx-lpi2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 188f2a36d2fd6..c6d0225246e69 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -463,6 +463,8 @@ static int lpi2c_imx_xfer(struct i2c_adapter *adapter, if (num == 1 && msgs[0].len == 0) goto stop; + lpi2c_imx->rx_buf = NULL; + lpi2c_imx->tx_buf = NULL; lpi2c_imx->delivered = 0; lpi2c_imx->msglen = msgs[i].len; init_completion(&lpi2c_imx->complete); From 1c7885004567e8951d65a983be095f254dd20bef Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 30 Jan 2023 16:32:47 +0100 Subject: [PATCH 154/898] i2c: imx-lpi2c: check only for enabled interrupt flags When reading from I2C, the Tx watermark is set to 0. Unfortunately the TDF (transmit data flag) is enabled when Tx FIFO entries is equal or less than watermark. So it is set in every case, hence the reset default of 1. This results in the MSR_RDF _and_ MSR_TDF flags to be set thus trying to send Tx data on a read message. Mask the IRQ status to filter for wanted flags only. Fixes: a55fa9d0e42e ("i2c: imx-lpi2c: add low power i2c bus driver") Signed-off-by: Alexander Stein Tested-by: Emanuele Ghidoli Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index c6d0225246e69..a49b14d52a986 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -505,10 +505,14 @@ static int lpi2c_imx_xfer(struct i2c_adapter *adapter, static irqreturn_t lpi2c_imx_isr(int irq, void *dev_id) { struct lpi2c_imx_struct *lpi2c_imx = dev_id; + unsigned int enabled; unsigned int temp; + enabled = readl(lpi2c_imx->base + LPI2C_MIER); + lpi2c_imx_intctrl(lpi2c_imx, 0); temp = readl(lpi2c_imx->base + LPI2C_MSR); + temp &= enabled; if (temp & MSR_RDF) lpi2c_imx_read_rxfifo(lpi2c_imx); From 5190417bdf72c71b65bd9892103c6186816a6e8b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 13 Feb 2023 16:25:50 +0100 Subject: [PATCH 155/898] i2c: mxs: ensure that DMA buffers are safe for DMA We found that after commit 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems"), the PCF85063 RTC driver stopped working on i.MX28 due to regmap_bulk_read() reading bogus data into a stack buffer. This is caused by the i2c-mxs driver using DMA transfers even for messages without the I2C_M_DMA_SAFE flag, and the aforementioned commit enabling vmapped stacks. As the MXS I2C controller requires DMA for reads of >4 bytes, DMA can't be disabled, so the issue is fixed by using i2c_get_dma_safe_msg_buf() to create a bounce buffer when needed. Fixes: 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems") Signed-off-by: Matthias Schiffer Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mxs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index d113bed795452..e0f3b3545cfe4 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -171,7 +171,7 @@ static void mxs_i2c_dma_irq_callback(void *param) } static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, - struct i2c_msg *msg, uint32_t flags) + struct i2c_msg *msg, u8 *buf, uint32_t flags) { struct dma_async_tx_descriptor *desc; struct mxs_i2c_dev *i2c = i2c_get_adapdata(adap); @@ -226,7 +226,7 @@ static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, } /* Queue the DMA data transfer. */ - sg_init_one(&i2c->sg_io[1], msg->buf, msg->len); + sg_init_one(&i2c->sg_io[1], buf, msg->len); dma_map_sg(i2c->dev, &i2c->sg_io[1], 1, DMA_FROM_DEVICE); desc = dmaengine_prep_slave_sg(i2c->dmach, &i2c->sg_io[1], 1, DMA_DEV_TO_MEM, @@ -259,7 +259,7 @@ static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, /* Queue the DMA data transfer. */ sg_init_table(i2c->sg_io, 2); sg_set_buf(&i2c->sg_io[0], &i2c->addr_data, 1); - sg_set_buf(&i2c->sg_io[1], msg->buf, msg->len); + sg_set_buf(&i2c->sg_io[1], buf, msg->len); dma_map_sg(i2c->dev, i2c->sg_io, 2, DMA_TO_DEVICE); desc = dmaengine_prep_slave_sg(i2c->dmach, i2c->sg_io, 2, DMA_MEM_TO_DEV, @@ -563,6 +563,7 @@ static int mxs_i2c_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, struct mxs_i2c_dev *i2c = i2c_get_adapdata(adap); int ret; int flags; + u8 *dma_buf; int use_pio = 0; unsigned long time_left; @@ -588,13 +589,20 @@ static int mxs_i2c_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, if (ret && (ret != -ENXIO)) mxs_i2c_reset(i2c); } else { + dma_buf = i2c_get_dma_safe_msg_buf(msg, 1); + if (!dma_buf) + return -ENOMEM; + reinit_completion(&i2c->cmd_complete); - ret = mxs_i2c_dma_setup_xfer(adap, msg, flags); - if (ret) + ret = mxs_i2c_dma_setup_xfer(adap, msg, dma_buf, flags); + if (ret) { + i2c_put_dma_safe_msg_buf(dma_buf, msg, false); return ret; + } time_left = wait_for_completion_timeout(&i2c->cmd_complete, msecs_to_jiffies(1000)); + i2c_put_dma_safe_msg_buf(dma_buf, msg, true); if (!time_left) goto timeout; From cc9812a3096d1986caca9a23bee99effc45c08df Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 13 Mar 2023 15:45:51 +0800 Subject: [PATCH 156/898] i2c: hisi: Avoid redundant interrupts After issuing all the messages we can disable the TX_EMPTY interrupts to avoid handling redundant interrupts. For doing a sinlge bus detection (i2cdetect -y -r 0) we can reduce ~97% interrupts (before ~12000 after ~400). Signed-off-by: Sheng Feng Signed-off-by: Yicong Yang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-hisi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c index 8c6c7075c765c..1b7609a34f4a7 100644 --- a/drivers/i2c/busses/i2c-hisi.c +++ b/drivers/i2c/busses/i2c-hisi.c @@ -316,6 +316,13 @@ static void hisi_i2c_xfer_msg(struct hisi_i2c_controller *ctlr) max_write == 0) break; } + + /* + * Disable the TX_EMPTY interrupt after finishing all the messages to + * avoid overwhelming the CPU. + */ + if (ctlr->msg_tx_idx == ctlr->msg_num) + hisi_i2c_disable_int(ctlr, HISI_I2C_INT_TX_EMPTY); } static irqreturn_t hisi_i2c_irq(int irq, void *context) From d98263512684a47e81bcb72a5408958ecd1e60b0 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 13 Mar 2023 15:45:52 +0800 Subject: [PATCH 157/898] i2c: hisi: Only use the completion interrupt to finish the transfer The controller will always generate a completion interrupt when the transfer is finished normally or not. Currently we use either error or completion interrupt to finish, this may result the completion interrupt unhandled and corrupt the next transfer, especially at low speed mode. Since on error case, the error interrupt will come first then is the completion interrupt. So only use the completion interrupt to finish the whole transfer process. Fixes: d62fbdb99a85 ("i2c: add support for HiSilicon I2C controller") Reported-by: Sheng Feng Signed-off-by: Sheng Feng Signed-off-by: Yicong Yang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-hisi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c index 1b7609a34f4a7..e067671b3ce2e 100644 --- a/drivers/i2c/busses/i2c-hisi.c +++ b/drivers/i2c/busses/i2c-hisi.c @@ -348,7 +348,11 @@ static irqreturn_t hisi_i2c_irq(int irq, void *context) hisi_i2c_read_rx_fifo(ctlr); out: - if (int_stat & HISI_I2C_INT_TRANS_CPLT || ctlr->xfer_err) { + /* + * Only use TRANS_CPLT to indicate the completion. On error cases we'll + * get two interrupts, INT_ERR first then TRANS_CPLT. + */ + if (int_stat & HISI_I2C_INT_TRANS_CPLT) { hisi_i2c_disable_int(ctlr, HISI_I2C_INT_ALL); hisi_i2c_clear_int(ctlr, HISI_I2C_INT_ALL); complete(ctlr->completion); From 92fbb6d1296f81f41f65effd7f5f8c0f74943d15 Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Tue, 14 Mar 2023 16:54:21 +0000 Subject: [PATCH 158/898] i2c: xgene-slimpro: Fix out-of-bounds bug in xgene_slimpro_i2c_xfer() The data->block[0] variable comes from user and is a number between 0-255. Without proper check, the variable may be very large to cause an out-of-bounds when performing memcpy in slimpro_i2c_blkwr. Fix this bug by checking the value of writelen. Fixes: f6505fbabc42 ("i2c: add SLIMpro I2C device driver on APM X-Gene platform") Signed-off-by: Wei Chen Cc: stable@vger.kernel.org Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xgene-slimpro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index 63259b3ea5abd..3538d36368a90 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -308,6 +308,9 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip, u32 msg[3]; int rc; + if (writelen > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + memcpy(ctx->dma_buffer, data, writelen); paddr = dma_map_single(ctx->dev, ctx->dma_buffer, writelen, DMA_TO_DEVICE); From 47d43086531f10539470a63e8ad92803e686a3dd Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Thu, 16 Mar 2023 10:36:24 +0800 Subject: [PATCH 159/898] clk: sprd: set max_register according to mapping range In sprd clock driver, regmap_config.max_register was set to a fixed value which is likely larger than the address range configured in device tree, when reading registers through debugfs it would cause access violation. Fixes: d41f59fd92f2 ("clk: sprd: Add common infrastructure") Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20230316023624.758204-1-chunyan.zhang@unisoc.com Signed-off-by: Stephen Boyd --- drivers/clk/sprd/common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/clk/sprd/common.c b/drivers/clk/sprd/common.c index ce81e4087a8fc..2bfbab8db94bf 100644 --- a/drivers/clk/sprd/common.c +++ b/drivers/clk/sprd/common.c @@ -17,7 +17,6 @@ static const struct regmap_config sprdclk_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0xffff, .fast_io = true, }; @@ -43,6 +42,8 @@ int sprd_clk_regmap_init(struct platform_device *pdev, struct device *dev = &pdev->dev; struct device_node *node = dev->of_node, *np; struct regmap *regmap; + struct resource *res; + struct regmap_config reg_config = sprdclk_regmap_config; if (of_find_property(node, "sprd,syscon", NULL)) { regmap = syscon_regmap_lookup_by_phandle(node, "sprd,syscon"); @@ -59,12 +60,14 @@ int sprd_clk_regmap_init(struct platform_device *pdev, return PTR_ERR(regmap); } } else { - base = devm_platform_ioremap_resource(pdev, 0); + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); + reg_config.max_register = resource_size(res) - reg_config.reg_stride; + regmap = devm_regmap_init_mmio(&pdev->dev, base, - &sprdclk_regmap_config); + ®_config); if (IS_ERR(regmap)) { pr_err("failed to init regmap\n"); return PTR_ERR(regmap); From e86fc1a3a3e9b4850fe74d738e3cfcf4297d8bba Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Mar 2023 17:45:45 +0000 Subject: [PATCH 160/898] KVM: arm64: Disable interrupts while walking userspace PTs We walk the userspace PTs to discover what mapping size was used there. However, this can race against the userspace tables being freed, and we end-up in the weeds. Thankfully, the mm code is being generous and will IPI us when doing so. So let's implement our part of the bargain and disable interrupts around the walk. This ensures that nothing terrible happens during that time. We still need to handle the removal of the page tables before the walk. For that, allow get_user_mapping_size() to return an error, and make sure this error can be propagated all the way to the the exit handler. Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230316174546.3777507-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 45 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index f54408355d1dc..cd819725193bb 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -666,14 +666,33 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) CONFIG_PGTABLE_LEVELS), .mm_ops = &kvm_user_mm_ops, }; + unsigned long flags; kvm_pte_t pte = 0; /* Keep GCC quiet... */ u32 level = ~0; int ret; + /* + * Disable IRQs so that we hazard against a concurrent + * teardown of the userspace page tables (which relies on + * IPI-ing threads). + */ + local_irq_save(flags); ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level); - VM_BUG_ON(ret); - VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS); - VM_BUG_ON(!(pte & PTE_VALID)); + local_irq_restore(flags); + + if (ret) + return ret; + + /* + * Not seeing an error, but not updating level? Something went + * deeply wrong... + */ + if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS)) + return -EFAULT; + + /* Oops, the userspace PTs are gone... Replay the fault */ + if (!kvm_pte_valid(pte)) + return -EAGAIN; return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)); } @@ -1079,7 +1098,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, * * Returns the size of the mapping. */ -static unsigned long +static long transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long hva, kvm_pfn_t *pfnp, phys_addr_t *ipap) @@ -1091,8 +1110,15 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, * sure that the HVA and IPA are sufficiently aligned and that the * block map is contained within the memslot. */ - if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) && - get_user_mapping_size(kvm, hva) >= PMD_SIZE) { + if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { + int sz = get_user_mapping_size(kvm, hva); + + if (sz < 0) + return sz; + + if (sz < PMD_SIZE) + return PAGE_SIZE; + /* * The address we faulted on is backed by a transparent huge * page. However, because we map the compound huge page and @@ -1203,7 +1229,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); - unsigned long vma_pagesize, fault_granule; + long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; @@ -1344,6 +1370,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &pfn, &fault_ipa); + + if (vma_pagesize < 0) { + ret = vma_pagesize; + goto out_unlock; + } } if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) { From 8c2e8ac8ad4be68409e806ce1cc78fc7a04539f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Mar 2023 17:45:46 +0000 Subject: [PATCH 161/898] KVM: arm64: Check for kvm_vma_mte_allowed in the critical section On page fault, we find about the VMA that backs the page fault early on, and quickly release the mmap_read_lock. However, using the VMA pointer after the critical section is pretty dangerous, as a teardown may happen in the meantime and the VMA be long gone. Move the sampling of the MTE permission early, and NULL-ify the VMA pointer after that, just to be on the safe side. Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230316174546.3777507-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index cd819725193bb..3b9d4d24c361a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1218,7 +1218,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, { int ret = 0; bool write_fault, writable, force_pte = false; - bool exec_fault; + bool exec_fault, mte_allowed; bool device = false; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; @@ -1309,6 +1309,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, fault_ipa &= ~(vma_pagesize - 1); gfn = fault_ipa >> PAGE_SHIFT; + mte_allowed = kvm_vma_mte_allowed(vma); + + /* Don't use the VMA after the unlock -- it may have vanished */ + vma = NULL; /* * Read mmu_invalidate_seq so that KVM can detect if the results of @@ -1379,7 +1383,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ - if (kvm_vma_mte_allowed(vma)) { + if (mte_allowed) { sanitise_mte_tags(kvm, pfn, vma_pagesize); } else { ret = -EFAULT; From a204b490595de71016b2360a1886ec8c12d0afac Mon Sep 17 00:00:00 2001 From: Joel Selvaraj Date: Sun, 12 Mar 2023 23:14:02 -0500 Subject: [PATCH 162/898] scsi: core: Add BLIST_SKIP_VPD_PAGES for SKhynix H28U74301AMR Xiaomi Poco F1 (qcom/sdm845-xiaomi-beryllium*.dts) comes with a SKhynix H28U74301AMR UFS. The sd_read_cpr() operation leads to a 120 second timeout, making the device bootup very slow: [ 121.457736] sd 0:0:0:1: [sdb] tag#23 timing out command, waited 120s Setting the BLIST_SKIP_VPD_PAGES allows the device to skip the failing sd_read_cpr operation and boot normally. Signed-off-by: Joel Selvaraj Link: https://lore.kernel.org/r/20230313041402.39330-1-joelselvaraj.oss@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index bc9d280417f6a..3fcaf10a9dfe7 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -234,6 +234,7 @@ static struct { {"SGI", "RAID5", "*", BLIST_SPARSELUN}, {"SGI", "TP9100", "*", BLIST_REPORTLUN2}, {"SGI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"SKhynix", "H28U74301AMR", NULL, BLIST_SKIP_VPD_PAGES}, {"IBM", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SUN", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"DELL", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, From 0367076b0817d5c75dfb83001ce7ce5c64d803a9 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Sun, 12 Mar 2023 21:37:10 -0700 Subject: [PATCH 163/898] scsi: qla2xxx: Perform lockless command completion in abort path While adding and removing the controller, the following call trace was observed: WARNING: CPU: 3 PID: 623596 at kernel/dma/mapping.c:532 dma_free_attrs+0x33/0x50 CPU: 3 PID: 623596 Comm: sh Kdump: loaded Not tainted 5.14.0-96.el9.x86_64 #1 RIP: 0010:dma_free_attrs+0x33/0x50 Call Trace: qla2x00_async_sns_sp_done+0x107/0x1b0 [qla2xxx] qla2x00_abort_srb+0x8e/0x250 [qla2xxx] ? ql_dbg+0x70/0x100 [qla2xxx] __qla2x00_abort_all_cmds+0x108/0x190 [qla2xxx] qla2x00_abort_all_cmds+0x24/0x70 [qla2xxx] qla2x00_abort_isp_cleanup+0x305/0x3e0 [qla2xxx] qla2x00_remove_one+0x364/0x400 [qla2xxx] pci_device_remove+0x36/0xa0 __device_release_driver+0x17a/0x230 device_release_driver+0x24/0x30 pci_stop_bus_device+0x68/0x90 pci_stop_and_remove_bus_device_locked+0x16/0x30 remove_store+0x75/0x90 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 ? do_user_addr_fault+0x1d8/0x680 ? do_syscall_64+0x69/0x80 ? exc_page_fault+0x62/0x140 ? asm_exc_page_fault+0x8/0x30 entry_SYSCALL_64_after_hwframe+0x44/0xae The command was completed in the abort path during driver unload with a lock held, causing the warning in abort path. Hence complete the command without any lock held. Reported-by: Lin Li Tested-by: Lin Li Cc: stable@vger.kernel.org Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230313043711.13500-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Reviewed-by: John Meneghini Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 80c4ee9df2a4f..bee1b8a820207 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1865,6 +1865,17 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { + /* + * perform lockless completion during driver unload + */ + if (qla2x00_chip_is_down(vha)) { + req->outstanding_cmds[cnt] = NULL; + spin_unlock_irqrestore(qp->qp_lock_ptr, flags); + sp->done(sp, res); + spin_lock_irqsave(qp->qp_lock_ptr, flags); + continue; + } + switch (sp->cmd_type) { case TYPE_SRB: qla2x00_abort_srb(qp, sp, res, &flags); From d3affdeb400f3adc925bd996f3839481f5291839 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Sun, 12 Mar 2023 21:37:11 -0700 Subject: [PATCH 164/898] scsi: qla2xxx: Synchronize the IOCB count to be in order A system hang was observed with the following call trace: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 15 PID: 86747 Comm: nvme Kdump: loaded Not tainted 6.2.0+ #1 Hardware name: Dell Inc. PowerEdge R6515/04F3CJ, BIOS 2.7.3 03/31/2022 RIP: 0010:__wake_up_common+0x55/0x190 Code: 41 f6 01 04 0f 85 b2 00 00 00 48 8b 43 08 4c 8d 40 e8 48 8d 43 08 48 89 04 24 48 89 c6\ 49 8d 40 18 48 39 c6 0f 84 e9 00 00 00 <49> 8b 40 18 89 6c 24 14 31 ed 4c 8d 60 e8 41 8b 18 f6 c3 04 75 5d RSP: 0018:ffffb05a82afbba0 EFLAGS: 00010082 RAX: 0000000000000000 RBX: ffff8f9b83a00018 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff8f9b83a00020 RDI: ffff8f9b83a00018 RBP: 0000000000000001 R08: ffffffffffffffe8 R09: ffffb05a82afbbf8 R10: 70735f7472617473 R11: 5f30307832616c71 R12: 0000000000000001 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f815cf4c740(0000) GS:ffff8f9eeed80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000010633a000 CR4: 0000000000350ee0 Call Trace: __wake_up_common_lock+0x83/0xd0 qla_nvme_ls_req+0x21b/0x2b0 [qla2xxx] __nvme_fc_send_ls_req+0x1b5/0x350 [nvme_fc] nvme_fc_xmt_disconnect_assoc+0xca/0x110 [nvme_fc] nvme_fc_delete_association+0x1bf/0x220 [nvme_fc] ? nvme_remove_namespaces+0x9f/0x140 [nvme_core] nvme_do_delete_ctrl+0x5b/0xa0 [nvme_core] nvme_sysfs_delete+0x5f/0x70 [nvme_core] kernfs_fop_write_iter+0x12b/0x1c0 vfs_write+0x2a3/0x3b0 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 ? exit_to_user_mode_loop+0xd0/0x130 ? exit_to_user_mode_prepare+0xec/0x100 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f815cd3eb97 The IOCB counts are out of order and that would block any commands from going out and subsequently hang the system. Synchronize the IOCB count to be in correct order. Fixes: 5f63a163ed2f ("scsi: qla2xxx: Fix exchange oversubscription for management commands") Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230313043711.13500-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Reviewed-by: John Meneghini Tested-by: Lin Li Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 030625ebb4e65..71feda2cdb630 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1900,6 +1900,8 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, } req->outstanding_cmds[index] = NULL; + + qla_put_fw_resources(sp->qpair, &sp->iores); return sp; } @@ -3112,7 +3114,6 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt, } bsg_reply->reply_payload_rcv_len = 0; - qla_put_fw_resources(sp->qpair, &sp->iores); done: /* Return the vendor specific reply to API */ bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = rval; From a13faca032acbf2699293587085293bdfaafc8ae Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 15 Mar 2023 14:21:54 +0800 Subject: [PATCH 165/898] scsi: scsi_dh_alua: Fix memleak for 'qdata' in alua_activate() If alua_rtpg_queue() failed from alua_activate(), then 'qdata' is not freed, which will cause following memleak: unreferenced object 0xffff88810b2c6980 (size 32): comm "kworker/u16:2", pid 635322, jiffies 4355801099 (age 1216426.076s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 40 39 24 c1 ff ff ff ff 00 f8 ea 0a 81 88 ff ff @9$............. backtrace: [<0000000098f3a26d>] alua_activate+0xb0/0x320 [<000000003b529641>] scsi_dh_activate+0xb2/0x140 [<000000007b296db3>] activate_path_work+0xc6/0xe0 [dm_multipath] [<000000007adc9ace>] process_one_work+0x3c5/0x730 [<00000000c457a985>] worker_thread+0x93/0x650 [<00000000cb80e628>] kthread+0x1ba/0x210 [<00000000a1e61077>] ret_from_fork+0x22/0x30 Fix the problem by freeing 'qdata' in error path. Fixes: 625fe857e4fa ("scsi: scsi_dh_alua: Check scsi_device_get() return value") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20230315062154.668812-1-yukuai1@huaweicloud.com Reviewed-by: Benjamin Block Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 362fa631f39b2..a226dc1b65d71 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1145,10 +1145,12 @@ static int alua_activate(struct scsi_device *sdev, rcu_read_unlock(); mutex_unlock(&h->init_mutex); - if (alua_rtpg_queue(pg, sdev, qdata, true)) + if (alua_rtpg_queue(pg, sdev, qdata, true)) { fn = NULL; - else + } else { + kfree(qdata); err = SCSI_DH_DEV_OFFLINED; + } kref_put(&pg->kref, release_port_group); out: if (fn) From 6eff38048944cadc3cddcf117acfa5199ec32490 Mon Sep 17 00:00:00 2001 From: Rajnesh Kanwal Date: Fri, 10 Feb 2023 14:27:11 +0000 Subject: [PATCH 166/898] riscv/kvm: Fix VM hang in case of timer delta being zero. In case when VCPU is blocked due to WFI, we schedule the timer from `kvm_riscv_vcpu_timer_blocking()` to keep timer interrupt ticking. But in case when delta_ns comes to be zero, we never schedule the timer and VCPU keeps sleeping indefinitely until any activity is done with VM console. This is easily reproduce-able using kvmtool. ./lkvm-static run -c1 --console virtio -p "earlycon root=/dev/vda" \ -k ./Image -d rootfs.ext4 Also, just add a print in kvm_riscv_vcpu_vstimer_expired() to check the interrupt delivery and run `top` or similar auto-upating cmd from guest. Within sometime one can notice that print from timer expiry routine stops and the `top` cmd output will stop updating. This change fixes this by making sure we schedule the timer even with delta_ns being zero to bring the VCPU out of sleep immediately. Fixes: 8f5cb44b1bae ("RISC-V: KVM: Support sstc extension") Signed-off-by: Rajnesh Kanwal Reviewed-by: Atish Patra Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_timer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index ad34519c8a13d..3ac2ff6a65dac 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -147,10 +147,8 @@ static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu) return; delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t); - if (delta_ns) { - hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); - t->next_set = true; - } + hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); + t->next_set = true; } static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu) From 984cfd55e0c99e80b2e5b1dc6b2bf98608af7ff9 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 8 Mar 2023 16:32:31 +0800 Subject: [PATCH 167/898] net: ieee802154: remove an unnecessary null pointer check llsec_parse_seclevel has the null pointer check at its begining. Compared with nl802154_add_llsec_seclevel, nl802154_del_llsec_seclevel has a redundant null pointer check of info->attrs[NL802154_ATTR_SEC_LEVEL] before llsec_parse_seclevel. Fix this issue by removing the null pointer check in nl802154_del_llsec_seclevel. Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20230308083231.460015-1-dzm91@hust.edu.cn Signed-off-by: Stefan Schmidt --- net/ieee802154/nl802154.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index d8f4379d4fa68..832e3c50816c4 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2488,8 +2488,7 @@ static int nl802154_del_llsec_seclevel(struct sk_buff *skb, if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) return -EOPNOTSUPP; - if (!info->attrs[NL802154_ATTR_SEC_LEVEL] || - llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], + if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], &sl) < 0) return -EINVAL; From f5bad62f9107b701a6def7cac1f5f65862219b83 Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Fri, 17 Mar 2023 03:19:51 -0700 Subject: [PATCH 168/898] Input: i8042 - add quirk for Fujitsu Lifebook A574/H Fujitsu Lifebook A574/H requires the nomux option to properly probe the touchpad, especially when waking from sleep. Signed-off-by: Jonathan Denose Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20230303152623.45859-1-jdenose@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index efc61736099b9..fe7ffe30997c2 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -610,6 +610,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook A574/H */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, { /* Gigabyte M912 */ .matches = { From 8a0432bab6ea3203d220785da7ab3c7677f70ecb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 17 Mar 2023 03:13:12 -0700 Subject: [PATCH 169/898] Input: goodix - add Lenovo Yoga Book X90F to nine_bytes_report DMI table The Android Lenovo Yoga Book X90F / X90L uses the same goodix touchscreen with 9 bytes touch reports for its touch keyboard as the already supported Windows Lenovo Yoga Book X91F/L, add a DMI match for this to the nine_bytes_report DMI table. When the quirk for the X91F/L was initially added it was written to also apply to the X90F/L but this does not work because the Android version of the Yoga Book uses completely different DMI strings. Also adjust the X91F/L quirk to reflect that it only applies to the X91F/L models. Signed-off-by: Hans de Goede Reviewed-by: Bastien Nocera Link: https://lore.kernel.org/r/20230315134442.71787-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index b348172f19c3d..d77f116680a0a 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -124,10 +124,18 @@ static const unsigned long goodix_irq_flags[] = { static const struct dmi_system_id nine_bytes_report[] = { #if defined(CONFIG_DMI) && defined(CONFIG_X86) { - .ident = "Lenovo YogaBook", - /* YB1-X91L/F and YB1-X90L/F */ + /* Lenovo Yoga Book X90F / X90L */ .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9") + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), + } + }, + { + /* Lenovo Yoga Book X91F / X91L */ + .matches = { + /* Non exact match to match F + L versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"), } }, #endif From f8acb24aaf89fc46cd953229462ea8abe31b395f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 15 Mar 2023 08:34:13 -0700 Subject: [PATCH 170/898] x86/hyperv: Block root partition functionality in a Confidential VM Hyper-V should never specify a VM that is a Confidential VM and also running in the root partition. Nonetheless, explicitly block such a combination to guard against a compromised Hyper-V maliciously trying to exploit root partition functionality in a Confidential VM to expose Confidential VM secrets. No known bug is being fixed, but the attack surface for Confidential VMs on Hyper-V is reduced. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1678894453-95392-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/kernel/cpu/mshyperv.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f36dc2f796c5c..f1197366a97dc 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -358,12 +358,16 @@ static void __init ms_hyperv_init_platform(void) * To mirror what Windows does we should extract CPU management * features and use the ReservedIdentityBit to detect if Linux is the * root partition. But that requires negotiating CPU management - * interface (a process to be finalized). + * interface (a process to be finalized). For now, use the privilege + * flag as the indicator for running as root. * - * For now, use the privilege flag as the indicator for running as - * root. + * Hyper-V should never specify running as root and as a Confidential + * VM. But to protect against a compromised/malicious Hyper-V trying + * to exploit root behavior to expose Confidential VM memory, ignore + * the root partition setting if also a Confidential VM. */ - if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) { + if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && + !(ms_hyperv.priv_high & HV_ISOLATION)) { hv_root_partition = true; pr_info("Hyper-V: running as root partition\n"); } From 205efd4619b860404ebb5882e5a119eb3b3b3716 Mon Sep 17 00:00:00 2001 From: Ge-org Brohammer Date: Fri, 17 Mar 2023 00:38:51 +0200 Subject: [PATCH 171/898] ASoC: amd: yc: Add DMI entries to support Victus by HP Laptop 16-e1xxx (8A22) This model requires an additional detection quirk to enable the internal microphone. Tried to use git send-email this time. Signed-off-by: Ge-org Brohammer Link: https://lore.kernel.org/r/PAVP195MB2261322C220E95D7F4B2732ADABC9@PAVP195MB2261.EURP195.PROD.OUTLOOK.COM Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 4a69ce702360c..0acdf0156f075 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -269,6 +269,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A43"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A22"), + } + }, {} }; From 91e78b2585348284ef7cdb6a8736fd21389914fe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Feb 2023 13:21:38 +0200 Subject: [PATCH 172/898] dmaengine: Actually use devm_add_action_or_reset() It appears that the commit a1beaa50b583 ("dmaengine: Simplify dmaenginem_async_device_register() function") mentions devm_add_action_or_reset() the actual change utilised devm_add_action() call by mistake. Fix the issue by switching to devm_add_action_or_reset(). Fixes: a1beaa50b583 ("dmaengine: Simplify dmaenginem_async_device_register() function") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230213112138.32118-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index c24bca210104c..826b98284fa1f 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1342,7 +1342,7 @@ int dmaenginem_async_device_register(struct dma_device *device) if (ret) return ret; - return devm_add_action(device->dev, dmaenginem_async_device_unregister, device); + return devm_add_action_or_reset(device->dev, dmaenginem_async_device_unregister, device); } EXPORT_SYMBOL(dmaenginem_async_device_register); From b771baf3988a094c5129b05f1e5eaa383f1ec185 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 14 Feb 2023 09:03:44 +0800 Subject: [PATCH 173/898] dmaengine: xilinx: xdma: Fix some kernel-doc comments Make the description of @xdma_chan to @xchan to silence the warnings: drivers/dma/xilinx/xdma.c:283: warning: Function parameter or member 'xchan' not described in 'xdma_xfer_start' drivers/dma/xilinx/xdma.c:283: warning: Excess function parameter 'xdma_chan' description in 'xdma_xfer_start' Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4051 Signed-off-by: Yang Li Acked-by: Peter Korsgaard Link: https://lore.kernel.org/r/20230214010344.5354-1-yang.lee@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index 462109c616537..93ee298d52b89 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -277,7 +277,7 @@ xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num) /** * xdma_xfer_start - Start DMA transfer - * @xdma_chan: DMA channel pointer + * @xchan: DMA channel pointer */ static int xdma_xfer_start(struct xdma_chan *xchan) { From 2f0e4f0342201fe2228fcc2301cc2b42ae04b8e3 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 16 Mar 2023 10:45:12 +0000 Subject: [PATCH 174/898] cifs: check only tcon status on tcon related functions We had a couple of checks for session in cifs_tree_connect and cifs_mark_open_files_invalid, which were unnecessary. And that was done with ses_lock. Changed that to tc_lock too. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 10 +++++++--- fs/cifs/dfs.c | 10 +++++++--- fs/cifs/dfs_cache.c | 2 +- fs/cifs/file.c | 8 ++++---- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0eceddde7140a..49b37594e991f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4036,9 +4036,13 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); - if (tcon->ses->ses_status != SES_GOOD || - (tcon->status != TID_NEW && - tcon->status != TID_NEED_TCON)) { + if (tcon->status != TID_NEW && + tcon->status != TID_NEED_TCON) { + spin_unlock(&tcon->tc_lock); + return -EHOSTDOWN; + } + + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; } diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index c8bda52fa096c..3a11716b6e13e 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -502,9 +502,13 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); - if (tcon->ses->ses_status != SES_GOOD || - (tcon->status != TID_NEW && - tcon->status != TID_NEED_TCON)) { + if (tcon->status != TID_NEW && + tcon->status != TID_NEED_TCON) { + spin_unlock(&tcon->tc_lock); + return -EHOSTDOWN; + } + + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; } diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 1c59811bfa73a..30cbdf8514a59 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1191,7 +1191,7 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r } spin_lock(&ipc->tc_lock); - if (ses->ses_status != SES_GOOD || ipc->status != TID_GOOD) { + if (ipc->status != TID_GOOD) { spin_unlock(&ipc->tc_lock); cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", __func__); goto out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4d4a2d82636d2..6831a9949c430 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -174,13 +174,13 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) struct list_head *tmp1; /* only send once per connect */ - spin_lock(&tcon->ses->ses_lock); - if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) { - spin_unlock(&tcon->ses->ses_lock); + spin_lock(&tcon->tc_lock); + if (tcon->status != TID_NEED_RECON) { + spin_unlock(&tcon->tc_lock); return; } tcon->status = TID_IN_FILES_INVALIDATE; - spin_unlock(&tcon->ses->ses_lock); + spin_unlock(&tcon->tc_lock); /* list all files open on tree connection and mark them invalid */ spin_lock(&tcon->open_file_lock); From 27c934dd8832dd40fd34776f916dc201e18b319b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 Mar 2023 13:13:08 -0400 Subject: [PATCH 175/898] nfsd: don't replace page in rq_pages if it's a continuation of last page The splice read calls nfsd_splice_actor to put the pages containing file data into the svc_rqst->rq_pages array. It's possible however to get a splice result that only has a partial page at the end, if (e.g.) the filesystem hands back a short read that doesn't cover the whole page. nfsd_splice_actor will plop the partial page into its rq_pages array and return. Then later, when nfsd_splice_actor is called again, the remainder of the page may end up being filled out. At this point, nfsd_splice_actor will put the page into the array _again_ corrupting the reply. If this is done enough times, rq_next_page will overrun the array and corrupt the trailing fields -- the rq_respages and rq_next_page pointers themselves. If we've already added the page to the array in the last pass, don't add it to the array a second time when dealing with a splice continuation. This was originally handled properly in nfsd_splice_actor, but commit 91e23b1c3982 ("NFSD: Clean up nfsd_splice_actor()") removed the check for it. Fixes: 91e23b1c3982 ("NFSD: Clean up nfsd_splice_actor()") Cc: Al Viro Reported-by: Dario Lesca Tested-by: David Critch Link: https://bugzilla.redhat.com/show_bug.cgi?id=2150630 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ba34a31a7c702..cd0dbea335d97 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -941,8 +941,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct page *last_page; last_page = page + (offset + sd->len - 1) / PAGE_SIZE; - for (page += offset / PAGE_SIZE; page <= last_page; page++) + for (page += offset / PAGE_SIZE; page <= last_page; page++) { + /* + * Skip page replacement when extending the contents + * of the current page. + */ + if (page == *(rqstp->rq_next_page - 1)) + continue; svc_rqst_replace_page(rqstp, page); + } if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; From fcdb1eda5302599045bb366e679cccb4216f3873 Mon Sep 17 00:00:00 2001 From: Josh Don Date: Wed, 15 Mar 2023 14:40:29 -0700 Subject: [PATCH 176/898] cgroup: fix display of forceidle time at root We need to reset forceidle_sum to 0 when reading from root, since the bstat we accumulate into is stack allocated. To make this more robust, just replace the existing cputime reset with a memset of the overall bstat. Signed-off-by: Josh Don Fixes: 1fcf54deb767 ("sched/core: add forced idle accounting for cgroups") Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 831f1f472bb81..0a2b4967e3334 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) struct task_cputime *cputime = &bstat->cputime; int i; - cputime->stime = 0; - cputime->utime = 0; - cputime->sum_exec_runtime = 0; + memset(bstat, 0, sizeof(*bstat)); for_each_possible_cpu(i) { struct kernel_cpustat kcpustat; u64 *cpustat = kcpustat.cpustat; From 1d3b7a788ca7435156809a6bd5b20c95b2370d45 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 16 Mar 2023 23:00:21 -0700 Subject: [PATCH 177/898] xtensa: fix KASAN report for show_stack show_stack dumps raw stack contents which may trigger an unnecessary KASAN report. Fix it by copying stack contents to a temporary buffer with __memcpy and then printing that buffer instead of passing stack pointer directly to the print_hex_dump. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/traps.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index cd98366a9b238..f0a7d1c2641e0 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -539,7 +539,7 @@ static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { - size_t len; + size_t len, off = 0; if (!sp) sp = stack_pointer(task); @@ -548,9 +548,17 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE); printk("%sStack:\n", loglvl); - print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE, - STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE, - sp, len, false); + while (off < len) { + u8 line[STACK_DUMP_LINE_SIZE]; + size_t line_len = len - off > STACK_DUMP_LINE_SIZE ? + STACK_DUMP_LINE_SIZE : len - off; + + __memcpy(line, (u8 *)sp + off, line_len); + print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE, + STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE, + line, line_len, false); + off += STACK_DUMP_LINE_SIZE; + } show_trace(task, sp, loglvl); } From 30796d0dcb6e41c6558a07950f2ce60c209da867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 16 Mar 2023 18:28:07 +0100 Subject: [PATCH 178/898] net: dsa: b53: mmap: fix device tree support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPU port should also be enabled in order to get a working switch. Fixes: a5538a777b73 ("net: dsa: b53: mmap: Add device tree support") Signed-off-by: Álvaro Fernández Rojas Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20230316172807.460146-1-noltari@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index e968322dfbf0b..70887e0aece33 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -263,7 +263,7 @@ static int b53_mmap_probe_of(struct platform_device *pdev, if (of_property_read_u32(of_port, "reg", ®)) continue; - if (reg < B53_CPU_PORT) + if (reg < B53_N_PORTS) pdata->enabled_ports |= BIT(reg); } From ff821092cf02a70c2bccd2d19269f01e29aa52cf Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Thu, 16 Mar 2023 11:19:54 +0100 Subject: [PATCH 179/898] net: usb: smsc95xx: Limit packet length to skb->len Packet length retrieved from descriptor may be larger than the actual socket buffer length. In such case the cloned skb passed up the network stack will leak kernel memory contents. Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver") Signed-off-by: Szymon Heidrich Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230316101954.75836-1-szymon.heidrich@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/smsc95xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 32d2c60d334dc..563ecd27b93ea 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1833,6 +1833,12 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) size = (u16)((header & RX_STS_FL_) >> 16); align_count = (4 - ((size + NET_IP_ALIGN) % 4)) % 4; + if (unlikely(size > skb->len)) { + netif_dbg(dev, rx_err, dev->net, + "size err header=0x%08x\n", header); + return 0; + } + if (unlikely(header & RX_STS_ES_)) { netif_dbg(dev, rx_err, dev->net, "Error header=0x%08x\n", header); From 3dacc5bb81472905a7f4f9879cb95477c22dc359 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 16 Mar 2023 15:22:32 +0530 Subject: [PATCH 180/898] net: ethernet: ti: am65-cpts: reset pps genf adj settings on enable The CPTS PPS GENf adjustment settings are invalid after it has been disabled for a while, so reset them. Fixes: eb9233ce6751 ("net: ethernet: ti: am65-cpts: adjust pps following ptp changes") Signed-off-by: Grygorii Strashko Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Reviewed-by: Michal Swiatkowski Link: https://lore.kernel.org/r/20230316095232.2002680-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpts.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 16ee9c29cb35a..8caf85acbb6af 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -636,6 +636,10 @@ static void am65_cpts_perout_enable_hw(struct am65_cpts *cpts, val = lower_32_bits(cycles); am65_cpts_write32(cpts, val, genf[req->index].length); + am65_cpts_write32(cpts, 0, genf[req->index].control); + am65_cpts_write32(cpts, 0, genf[req->index].ppm_hi); + am65_cpts_write32(cpts, 0, genf[req->index].ppm_low); + cpts->genf_enable |= BIT(req->index); } else { am65_cpts_write32(cpts, 0, genf[req->index].length); From 34343eb06afc04af9178a9883d9354dc12beede0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Feb 2023 19:23:09 +0100 Subject: [PATCH 181/898] efi/libstub: smbios: Use length member instead of record struct size The type 1 SMBIOS record happens to always be the same size, but there are other record types which have been augmented over time, and so we should really use the length field in the header to decide where the string table starts. Fixes: 550b33cfd4452968 ("arm64: efi: Force the use of ...") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/smbios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/smbios.c b/drivers/firmware/efi/libstub/smbios.c index 460418b7f5f5e..aadb422b9637d 100644 --- a/drivers/firmware/efi/libstub/smbios.c +++ b/drivers/firmware/efi/libstub/smbios.c @@ -36,7 +36,7 @@ const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize) if (status != EFI_SUCCESS) return NULL; - strtable = (u8 *)record + recsize; + strtable = (u8 *)record + record->length; for (int i = 1; i < ((u8 *)record)[offset]; i++) { int len = strlen(strtable); From eb684408f3ea4856639675d6465f0024e498e4b1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Feb 2023 17:00:49 +0100 Subject: [PATCH 182/898] arm64: efi: Use SMBIOS processor version to key off Ampere quirk Instead of using the SMBIOS type 1 record 'family' field, which is often modified by OEMs, use the type 4 'processor ID' and 'processor version' fields, which are set to a small set of probe-able values on all known Ampere EFI systems in the field. Fixes: 550b33cfd4452968 ("arm64: efi: Force the use of ...") Tested-by: Andrea Righi Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64.c | 39 +++++++++++++++++++----- drivers/firmware/efi/libstub/efistub.h | 41 ++++++++++++++++++++++++-- drivers/firmware/efi/libstub/smbios.c | 13 ++++++-- 3 files changed, 80 insertions(+), 13 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64.c b/drivers/firmware/efi/libstub/arm64.c index 3997702663727..8aad8c49d43f1 100644 --- a/drivers/firmware/efi/libstub/arm64.c +++ b/drivers/firmware/efi/libstub/arm64.c @@ -16,20 +16,43 @@ static bool system_needs_vamap(void) { - const u8 *type1_family = efi_get_smbios_string(1, family); + const struct efi_smbios_type4_record *record; + const u32 __aligned(1) *socid; + const u8 *version; /* * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if - * SetVirtualAddressMap() has not been called prior. + * SetVirtualAddressMap() has not been called prior. Most Altra systems + * can be identified by the SMCCC soc ID, which is conveniently exposed + * via the type 4 SMBIOS records. Otherwise, test the processor version + * field. eMAG systems all appear to have the processor version field + * set to "eMAG". */ - if (!type1_family || ( - strcmp(type1_family, "eMAG") && - strcmp(type1_family, "Altra") && - strcmp(type1_family, "Altra Max"))) + record = (struct efi_smbios_type4_record *)efi_get_smbios_record(4); + if (!record) return false; - efi_warn("Working around broken SetVirtualAddressMap()\n"); - return true; + socid = (u32 *)record->processor_id; + switch (*socid & 0xffff000f) { + static char const altra[] = "Ampere(TM) Altra(TM) Processor"; + static char const emag[] = "eMAG"; + + default: + version = efi_get_smbios_string(&record->header, 4, + processor_version); + if (!version || (strncmp(version, altra, sizeof(altra) - 1) && + strncmp(version, emag, sizeof(emag) - 1))) + break; + + fallthrough; + + case 0x0a160001: // Altra + case 0x0a160002: // Altra Max + efi_warn("Working around broken SetVirtualAddressMap()\n"); + return true; + } + + return false; } efi_status_t check_platform_features(void) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6bd3bb86d9679..330565b9263a6 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1074,6 +1074,8 @@ struct efi_smbios_record { u16 handle; }; +const struct efi_smbios_record *efi_get_smbios_record(u8 type); + struct efi_smbios_type1_record { struct efi_smbios_record header; @@ -1087,14 +1089,47 @@ struct efi_smbios_type1_record { u8 family; }; -#define efi_get_smbios_string(__type, __name) ({ \ +struct efi_smbios_type4_record { + struct efi_smbios_record header; + + u8 socket; + u8 processor_type; + u8 processor_family; + u8 processor_manufacturer; + u8 processor_id[8]; + u8 processor_version; + u8 voltage; + u16 external_clock; + u16 max_speed; + u16 current_speed; + u8 status; + u8 processor_upgrade; + u16 l1_cache_handle; + u16 l2_cache_handle; + u16 l3_cache_handle; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 core_count; + u8 enabled_core_count; + u8 thread_count; + u16 processor_characteristics; + u16 processor_family2; + u16 core_count2; + u16 enabled_core_count2; + u16 thread_count2; + u16 thread_enabled; +}; + +#define efi_get_smbios_string(__record, __type, __name) ({ \ int size = sizeof(struct efi_smbios_type ## __type ## _record); \ int off = offsetof(struct efi_smbios_type ## __type ## _record, \ __name); \ - __efi_get_smbios_string(__type, off, size); \ + __efi_get_smbios_string((__record), __type, off, size); \ }) -const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize); +const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, + u8 type, int offset, int recsize); void efi_remap_image(unsigned long image_base, unsigned alloc_size, unsigned long code_size); diff --git a/drivers/firmware/efi/libstub/smbios.c b/drivers/firmware/efi/libstub/smbios.c index aadb422b9637d..f9c159c28f461 100644 --- a/drivers/firmware/efi/libstub/smbios.c +++ b/drivers/firmware/efi/libstub/smbios.c @@ -22,19 +22,28 @@ struct efi_smbios_protocol { u8 minor_version; }; -const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize) +const struct efi_smbios_record *efi_get_smbios_record(u8 type) { struct efi_smbios_record *record; efi_smbios_protocol_t *smbios; efi_status_t status; u16 handle = 0xfffe; - const u8 *strtable; status = efi_bs_call(locate_protocol, &EFI_SMBIOS_PROTOCOL_GUID, NULL, (void **)&smbios) ?: efi_call_proto(smbios, get_next, &handle, &type, &record, NULL); if (status != EFI_SUCCESS) return NULL; + return record; +} + +const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, + u8 type, int offset, int recsize) +{ + const u8 *strtable; + + if (!record) + return NULL; strtable = (u8 *)record + record->length; for (int i = 1; i < ((u8 *)record)[offset]; i++) { From f59a7ec1e69fc23946175b8c0d7e0fd21f94f8c9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Feb 2023 19:33:14 +0100 Subject: [PATCH 183/898] efi/libstub: smbios: Drop unused 'recsize' parameter We no longer use the recsize argument for locating the string table in an SMBIOS record, so we can drop it from the internal API. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efistub.h | 5 ++--- drivers/firmware/efi/libstub/smbios.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 330565b9263a6..bd9c38a93bbce 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1122,14 +1122,13 @@ struct efi_smbios_type4_record { }; #define efi_get_smbios_string(__record, __type, __name) ({ \ - int size = sizeof(struct efi_smbios_type ## __type ## _record); \ int off = offsetof(struct efi_smbios_type ## __type ## _record, \ __name); \ - __efi_get_smbios_string((__record), __type, off, size); \ + __efi_get_smbios_string((__record), __type, off); \ }) const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, - u8 type, int offset, int recsize); + u8 type, int offset); void efi_remap_image(unsigned long image_base, unsigned alloc_size, unsigned long code_size); diff --git a/drivers/firmware/efi/libstub/smbios.c b/drivers/firmware/efi/libstub/smbios.c index f9c159c28f461..c217de2cc8d56 100644 --- a/drivers/firmware/efi/libstub/smbios.c +++ b/drivers/firmware/efi/libstub/smbios.c @@ -38,7 +38,7 @@ const struct efi_smbios_record *efi_get_smbios_record(u8 type) } const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, - u8 type, int offset, int recsize) + u8 type, int offset) { const u8 *strtable; From 3615c78673c332b69aaacefbcde5937c5c706686 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Mar 2023 13:31:02 +0100 Subject: [PATCH 184/898] efi: sysfb_efi: Fix DMI quirks not working for simpledrm Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") moved the sysfb_apply_efi_quirks() call in sysfb_init() from before the [sysfb_]parse_mode() call to after it. But sysfb_apply_efi_quirks() modifies the global screen_info struct which [sysfb_]parse_mode() parses, so doing it later is too late. This has broken all DMI based quirks for correcting wrong firmware efifb settings when simpledrm is used. To fix this move the sysfb_apply_efi_quirks() call back to its old place and split the new setup of the efifb_fwnode (which requires the platform_device) into its own function and call that at the place of the moved sysfb_apply_efi_quirks(pd) calls. Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Cc: stable@vger.kernel.org Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/sysfb_efi.c | 5 ++++- drivers/firmware/sysfb.c | 4 +++- drivers/firmware/sysfb_simplefb.c | 2 +- include/linux/sysfb.h | 9 +++++++-- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index f06fdacc9bc83..e76d6803bdd08 100644 --- a/drivers/firmware/efi/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -341,7 +341,7 @@ static const struct fwnode_operations efifb_fwnode_ops = { #ifdef CONFIG_EFI static struct fwnode_handle efifb_fwnode; -__init void sysfb_apply_efi_quirks(struct platform_device *pd) +__init void sysfb_apply_efi_quirks(void) { if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS)) @@ -355,7 +355,10 @@ __init void sysfb_apply_efi_quirks(struct platform_device *pd) screen_info.lfb_height = temp; screen_info.lfb_linelength = 4 * screen_info.lfb_width; } +} +__init void sysfb_set_efifb_fwnode(struct platform_device *pd) +{ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI && IS_ENABLED(CONFIG_PCI)) { fwnode_init(&efifb_fwnode, &efifb_fwnode_ops); pd->dev.fwnode = &efifb_fwnode; diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 3fd3563d962b8..3c197db42c9d9 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -81,6 +81,8 @@ static __init int sysfb_init(void) if (disabled) goto unlock_mutex; + sysfb_apply_efi_quirks(); + /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { @@ -107,7 +109,7 @@ static __init int sysfb_init(void) goto unlock_mutex; } - sysfb_apply_efi_quirks(pd); + sysfb_set_efifb_fwnode(pd); ret = platform_device_add_data(pd, si, sizeof(*si)); if (ret) diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index ce9c007ed66ff..82c64cb9f5316 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -141,7 +141,7 @@ __init struct platform_device *sysfb_create_simplefb(const struct screen_info *s if (!pd) return ERR_PTR(-ENOMEM); - sysfb_apply_efi_quirks(pd); + sysfb_set_efifb_fwnode(pd); ret = platform_device_add_resources(pd, &res, 1); if (ret) diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 8ba8b5be55675..c1ef5fc60a3cb 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -70,11 +70,16 @@ static inline void sysfb_disable(void) #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -void sysfb_apply_efi_quirks(struct platform_device *pd); +void sysfb_apply_efi_quirks(void); +void sysfb_set_efifb_fwnode(struct platform_device *pd); #else /* CONFIG_EFI */ -static inline void sysfb_apply_efi_quirks(struct platform_device *pd) +static inline void sysfb_apply_efi_quirks(void) +{ +} + +static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) { } From 5ed213dd64681f84a01ceaa82fb336cf7d59ddcf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Mar 2023 13:31:03 +0100 Subject: [PATCH 185/898] efi: sysfb_efi: Add quirk for Lenovo Yoga Book X91F/L Another Lenovo convertable which reports a landscape resolution of 1920x1200 with a pitch of (1920 * 4) bytes, while the actual framebuffer has a resolution of 1200x1920 with a pitch of (1200 * 4) bytes. Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/sysfb_efi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index e76d6803bdd08..456d0e5eaf78b 100644 --- a/drivers/firmware/efi/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -272,6 +272,14 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = { "IdeaPad Duet 3 10IGL5"), }, }, + { + /* Lenovo Yoga Book X91F / X91L */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + /* Non exact match to match F + L versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"), + }, + }, {}, }; From 4aa3b75c74603c3374877d5fd18ad9cc3a9a62ed Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 12 Mar 2023 19:15:49 -0400 Subject: [PATCH 186/898] counter: 104-quad-8: Fix race condition between FLAG and CNTR reads The Counter (CNTR) register is 24 bits wide, but we can have an effective 25-bit count value by setting bit 24 to the XOR of the Borrow flag and Carry flag. The flags can be read from the FLAG register, but a race condition exists: the Borrow flag and Carry flag are instantaneous and could change by the time the count value is read from the CNTR register. Since the race condition could result in an incorrect 25-bit count value, remove support for 25-bit count values from this driver; hard-coded maximum count values are replaced by a LS7267_CNTR_MAX define for consistency and clarity. Fixes: 28e5d3bb0325 ("iio: 104-quad-8: Add IIO support for the ACCES 104-QUAD-8") Cc: # 6.1.x Cc: # 6.2.x Link: https://lore.kernel.org/r/20230312231554.134858-1-william.gray@linaro.org/ Signed-off-by: William Breathitt Gray --- drivers/counter/104-quad-8.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c index deed4afadb298..d59e4f34a680c 100644 --- a/drivers/counter/104-quad-8.c +++ b/drivers/counter/104-quad-8.c @@ -97,10 +97,6 @@ struct quad8 { struct quad8_reg __iomem *reg; }; -/* Borrow Toggle flip-flop */ -#define QUAD8_FLAG_BT BIT(0) -/* Carry Toggle flip-flop */ -#define QUAD8_FLAG_CT BIT(1) /* Error flag */ #define QUAD8_FLAG_E BIT(4) /* Up/Down flag */ @@ -133,6 +129,9 @@ struct quad8 { #define QUAD8_CMR_QUADRATURE_X2 0x10 #define QUAD8_CMR_QUADRATURE_X4 0x18 +/* Each Counter is 24 bits wide */ +#define LS7267_CNTR_MAX GENMASK(23, 0) + static int quad8_signal_read(struct counter_device *counter, struct counter_signal *signal, enum counter_signal_level *level) @@ -156,18 +155,10 @@ static int quad8_count_read(struct counter_device *counter, { struct quad8 *const priv = counter_priv(counter); struct channel_reg __iomem *const chan = priv->reg->channel + count->id; - unsigned int flags; - unsigned int borrow; - unsigned int carry; unsigned long irqflags; int i; - flags = ioread8(&chan->control); - borrow = flags & QUAD8_FLAG_BT; - carry = !!(flags & QUAD8_FLAG_CT); - - /* Borrow XOR Carry effectively doubles count range */ - *val = (unsigned long)(borrow ^ carry) << 24; + *val = 0; spin_lock_irqsave(&priv->lock, irqflags); @@ -191,8 +182,7 @@ static int quad8_count_write(struct counter_device *counter, unsigned long irqflags; int i; - /* Only 24-bit values are supported */ - if (val > 0xFFFFFF) + if (val > LS7267_CNTR_MAX) return -ERANGE; spin_lock_irqsave(&priv->lock, irqflags); @@ -806,8 +796,7 @@ static int quad8_count_preset_write(struct counter_device *counter, struct quad8 *const priv = counter_priv(counter); unsigned long irqflags; - /* Only 24-bit values are supported */ - if (preset > 0xFFFFFF) + if (preset > LS7267_CNTR_MAX) return -ERANGE; spin_lock_irqsave(&priv->lock, irqflags); @@ -834,8 +823,7 @@ static int quad8_count_ceiling_read(struct counter_device *counter, *ceiling = priv->preset[count->id]; break; default: - /* By default 0x1FFFFFF (25 bits unsigned) is maximum count */ - *ceiling = 0x1FFFFFF; + *ceiling = LS7267_CNTR_MAX; break; } @@ -850,8 +838,7 @@ static int quad8_count_ceiling_write(struct counter_device *counter, struct quad8 *const priv = counter_priv(counter); unsigned long irqflags; - /* Only 24-bit values are supported */ - if (ceiling > 0xFFFFFF) + if (ceiling > LS7267_CNTR_MAX) return -ERANGE; spin_lock_irqsave(&priv->lock, irqflags); From 00f4bc5184c19cb33f468f1ea409d70d19f8f502 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Thu, 16 Mar 2023 16:34:26 -0400 Subject: [PATCH 187/898] counter: 104-quad-8: Fix Synapse action reported for Index signals Signal 16 and higher represent the device's Index lines. The priv->preset_enable array holds the device configuration for these Index lines. The preset_enable configuration is active low on the device, so invert the conditional check in quad8_action_read() to properly handle the logical state of preset_enable. Fixes: f1d8a071d45b ("counter: 104-quad-8: Add Generic Counter interface support") Cc: Link: https://lore.kernel.org/r/20230316203426.224745-1-william.gray@linaro.org/ Signed-off-by: William Breathitt Gray --- drivers/counter/104-quad-8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c index d59e4f34a680c..d9cb937665cfc 100644 --- a/drivers/counter/104-quad-8.c +++ b/drivers/counter/104-quad-8.c @@ -368,7 +368,7 @@ static int quad8_action_read(struct counter_device *counter, /* Handle Index signals */ if (synapse->signal->id >= 16) { - if (priv->preset_enable[count->id]) + if (!priv->preset_enable[count->id]) *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE; else *action = COUNTER_SYNAPSE_ACTION_NONE; From ab327f8acdf8d06601fbf058859a539a9422afff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Fri, 17 Mar 2023 11:20:04 +0100 Subject: [PATCH 188/898] mips: bmips: BCM6358: disable RAC flush for TP1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RAC flush causes kernel panics on BCM6358 with EHCI/OHCI when booting from TP1: [ 3.881739] usb 1-1: new high-speed USB device number 2 using ehci-platform [ 3.895011] Reserved instruction in kernel code[#1]: [ 3.900113] CPU: 0 PID: 1 Comm: init Not tainted 5.10.16 #0 [ 3.905829] $ 0 : 00000000 10008700 00000000 77d94060 [ 3.911238] $ 4 : 7fd1f088 00000000 81431cac 81431ca0 [ 3.916641] $ 8 : 00000000 ffffefff 8075cd34 00000000 [ 3.922043] $12 : 806f8d40 f3e812b7 00000000 000d9aaa [ 3.927446] $16 : 7fd1f068 7fd1f080 7ff559b8 81428470 [ 3.932848] $20 : 00000000 00000000 55590000 77d70000 [ 3.938251] $24 : 00000018 00000010 [ 3.943655] $28 : 81430000 81431e60 81431f28 800157fc [ 3.949058] Hi : 00000000 [ 3.952013] Lo : 00000000 [ 3.955019] epc : 80015808 setup_sigcontext+0x54/0x24c [ 3.960464] ra : 800157fc setup_sigcontext+0x48/0x24c [ 3.965913] Status: 10008703 KERNEL EXL IE [ 3.970216] Cause : 00800028 (ExcCode 0a) [ 3.974340] PrId : 0002a010 (Broadcom BMIPS4350) [ 3.979170] Modules linked in: ohci_platform ohci_hcd fsl_mph_dr_of ehci_platform ehci_fsl ehci_hcd gpio_button_hotplug usbcore nls_base usb_common [ 3.992907] Process init (pid: 1, threadinfo=(ptrval), task=(ptrval), tls=77e22ec8) [ 4.000776] Stack : 81431ef4 7fd1f080 81431f28 81428470 7fd1f068 81431edc 7ff559b8 81428470 [ 4.009467] 81431f28 7fd1f080 55590000 77d70000 77d5498c 80015c70 806f0000 8063ae74 [ 4.018149] 08100002 81431f28 0000000a 08100002 81431f28 0000000a 77d6b418 00000003 [ 4.026831] ffffffff 80016414 80080734 81431ecc 81431ecc 00000001 00000000 04000000 [ 4.035512] 77d54874 00000000 00000000 00000000 00000000 00000012 00000002 00000000 [ 4.044196] ... [ 4.046706] Call Trace: [ 4.049238] [<80015808>] setup_sigcontext+0x54/0x24c [ 4.054356] [<80015c70>] setup_frame+0xdc/0x124 [ 4.059015] [<80016414>] do_notify_resume+0x1dc/0x288 [ 4.064207] [<80011b50>] work_notifysig+0x10/0x18 [ 4.069036] [ 4.070538] Code: 8fc300b4 00001025 26240008 ac830004 3c048063 0c0228aa 24846a00 26240010 [ 4.080686] [ 4.082517] ---[ end trace 22a8edb41f5f983b ]--- [ 4.087374] Kernel panic - not syncing: Fatal exception [ 4.092753] Rebooting in 1 seconds.. Because the bootloader (CFE) is not initializing the Read-ahead cache properly on the second thread (TP1). Since the RAC was not initialized properly, we should avoid flushing it at the risk of corrupting the instruction stream as seen in the trace above. Fixes: d59098a0e9cb ("MIPS: bmips: use generic dma noncoherent ops") Signed-off-by: Álvaro Fernández Rojas Signed-off-by: Thomas Bogendoerfer --- arch/mips/bmips/dma.c | 5 +++++ arch/mips/bmips/setup.c | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c index 33788668cbdbf..3779e7855bd75 100644 --- a/arch/mips/bmips/dma.c +++ b/arch/mips/bmips/dma.c @@ -5,6 +5,8 @@ #include #include +bool bmips_rac_flush_disable; + void arch_sync_dma_for_cpu_all(void) { void __iomem *cbr = BMIPS_GET_CBR(); @@ -15,6 +17,9 @@ void arch_sync_dma_for_cpu_all(void) boot_cpu_type() != CPU_BMIPS4380) return; + if (unlikely(bmips_rac_flush_disable)) + return; + /* Flush stale data out of the readahead cache */ cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG); diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index e95b3f78e7cd4..549a6392a3d2d 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -35,6 +35,8 @@ #define REG_BCM6328_OTP ((void __iomem *)CKSEG1ADDR(0x1000062c)) #define BCM6328_TP1_DISABLED BIT(9) +extern bool bmips_rac_flush_disable; + static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000; struct bmips_quirk { @@ -104,6 +106,12 @@ static void bcm6358_quirks(void) * disable SMP for now */ bmips_smp_enabled = 0; + + /* + * RAC flush causes kernel panics on BCM6358 when booting from TP1 + * because the bootloader is not initializing it properly. + */ + bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)); } static void bcm6368_quirks(void) From 4985e7b2c002eb4c5c794a1d3acd91b82c89a0fd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 18 Mar 2023 22:12:31 +0800 Subject: [PATCH 189/898] block: ublk_drv: mark device as LIVE before adding disk IO can be started before add_disk() returns, such as reading parititon table, then the monitor work should work for making forward progress. So mark device as LIVE before adding disk, meantime change to DEAD if add_disk() fails. Fixed: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Reviewed-by: Ziyang Zhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230318141231.55562-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d1d1c8d606c8d..fb5a557afde8b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1602,17 +1602,18 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); get_device(&ub->cdev_dev); + ub->dev_info.state = UBLK_S_DEV_LIVE; ret = add_disk(disk); if (ret) { /* * Has to drop the reference since ->free_disk won't be * called in case of add_disk failure. */ + ub->dev_info.state = UBLK_S_DEV_DEAD; ublk_put_device(ub); goto out_put_disk; } set_bit(UB_STATE_USED, &ub->state); - ub->dev_info.state = UBLK_S_DEV_LIVE; out_put_disk: if (ret) put_disk(disk); From 49f76c499d38bf67803438eee88c8300d0f6ce09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20Dahlstr=C3=B6m?= Date: Mon, 13 Mar 2023 21:50:29 +0100 Subject: [PATCH 190/898] iio: adc: palmas_gpadc: fix NULL dereference on rmmod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling dev_to_iio_dev() on a platform device pointer is undefined and will make adc NULL. Signed-off-by: Patrik Dahlström Link: https://lore.kernel.org/r/20230313205029.1881745-1-risca@dalakolonin.se Signed-off-by: Jonathan Cameron --- drivers/iio/adc/palmas_gpadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index fd000345ec5cf..849a697a467e5 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -639,7 +639,7 @@ static int palmas_gpadc_probe(struct platform_device *pdev) static int palmas_gpadc_remove(struct platform_device *pdev) { - struct iio_dev *indio_dev = dev_to_iio_dev(&pdev->dev); + struct iio_dev *indio_dev = dev_get_drvdata(&pdev->dev); struct palmas_gpadc *adc = iio_priv(indio_dev); if (adc->wakeup1_enable || adc->wakeup2_enable) From 363c7dc72f79edd55bf1c4380e0fbf7f1bbc2c86 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 12 Mar 2023 14:09:33 -0700 Subject: [PATCH 191/898] iio: adc: ti-ads7950: Set `can_sleep` flag for GPIO chip The ads7950 uses a mutex as well as SPI transfers in its GPIO callbacks. This means these callbacks can sleep and the `can_sleep` flag should be set. Having the flag set will make sure that warnings are generated when calling any of the callbacks from a potentially non-sleeping context. Fixes: c97dce792dc8 ("iio: adc: ti-ads7950: add GPIO support") Signed-off-by: Lars-Peter Clausen Acked-by: David Lechner Link: https://lore.kernel.org/r/20230312210933.2275376-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads7950.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index 2cc9a9bd9db60..263fc3a1b87e1 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -634,6 +634,7 @@ static int ti_ads7950_probe(struct spi_device *spi) st->chip.label = dev_name(&st->spi->dev); st->chip.parent = &st->spi->dev; st->chip.owner = THIS_MODULE; + st->chip.can_sleep = true; st->chip.base = -1; st->chip.ngpio = TI_ADS7950_NUM_GPIOS; st->chip.get_direction = ti_ads7950_get_direction; From 43e5f1d5921128373743585e3275ed9044ef8b8f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 13 Mar 2023 15:12:30 -0700 Subject: [PATCH 192/898] fscrypt: improve fscrypt_destroy_keyring() documentation Document that fscrypt_destroy_keyring() must be called after all potentially-encrypted inodes have been evicted. Link: https://lore.kernel.org/r/20230313221231.272498-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keyring.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 78086f8dbda52..bb15709ac9a40 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -207,10 +207,11 @@ static int allocate_filesystem_keyring(struct super_block *sb) * Release all encryption keys that have been added to the filesystem, along * with the keyring that contains them. * - * This is called at unmount time. The filesystem's underlying block device(s) - * are still available at this time; this is important because after user file - * accesses have been allowed, this function may need to evict keys from the - * keyslots of an inline crypto engine, which requires the block device(s). + * This is called at unmount time, after all potentially-encrypted inodes have + * been evicted. The filesystem's underlying block device(s) are still + * available at this time; this is important because after user file accesses + * have been allowed, this function may need to evict keys from the keyslots of + * an inline crypto engine, which requires the block device(s). */ void fscrypt_destroy_keyring(struct super_block *sb) { @@ -227,12 +228,12 @@ void fscrypt_destroy_keyring(struct super_block *sb) hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { /* - * Since all inodes were already evicted, every key - * remaining in the keyring should have an empty inode - * list, and should only still be in the keyring due to - * the single active ref associated with ->mk_secret. - * There should be no structural refs beyond the one - * associated with the active ref. + * Since all potentially-encrypted inodes were already + * evicted, every key remaining in the keyring should + * have an empty inode list, and should only still be in + * the keyring due to the single active ref associated + * with ->mk_secret. There should be no structural refs + * beyond the one associated with the active ref. */ WARN_ON(refcount_read(&mk->mk_active_refs) != 1); WARN_ON(refcount_read(&mk->mk_struct_refs) != 1); From 4bcf6f827a79c59806c695dc280e763c5b6a6813 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 13 Mar 2023 15:12:31 -0700 Subject: [PATCH 193/898] fscrypt: check for NULL keyring in fscrypt_put_master_key_activeref() It is a bug for fscrypt_put_master_key_activeref() to see a NULL keyring. But it used to be possible due to the bug, now fixed, where fscrypt_destroy_keyring() was called before security_sb_delete(). To be consistent with how fscrypt_destroy_keyring() uses WARN_ON for the same issue, WARN and leak the fscrypt_master_key if the keyring is NULL instead of dereferencing the NULL pointer. This is a robustness improvement, not a fix. Link: https://lore.kernel.org/r/20230313221231.272498-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keyring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index bb15709ac9a40..13d336a6cc5da 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -92,6 +92,8 @@ void fscrypt_put_master_key_activeref(struct super_block *sb, * destroying any subkeys embedded in it. */ + if (WARN_ON(!sb->s_master_keys)) + return; spin_lock(&sb->s_master_keys->lock); hlist_del_rcu(&mk->mk_node); spin_unlock(&sb->s_master_keys->lock); From b7a5822810c4398515300d614d988cf638adecad Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Fri, 17 Mar 2023 08:18:25 -0600 Subject: [PATCH 194/898] ALSA: hda/realtek: Add quirks for some Clevo laptops Add the audio quirk for some of Clevo's latest RPL laptops: - NP50RNJS (ALC256) - NP70SNE (ALC256) - PD50SNE (ALC1220) - PE60RNE (ALC1220) Co-authored-by: Jeremy Soller Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20230317141825.11807-1-tcrawford@system76.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f09a1d7c1b186..0ec2c59bb8d56 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2631,6 +2631,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x66a2, "Clevo PE60RNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), @@ -2651,6 +2652,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0xd502, "Clevo PD50SNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), @@ -9575,6 +9577,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -9609,6 +9612,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL5[03]RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From 25143b6a01d0cc5319edd3de22ffa2578b045550 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Thu, 16 Mar 2023 13:29:21 +0300 Subject: [PATCH 195/898] qed/qed_sriov: guard against NULL derefs from qed_iov_get_vf_info We have to make sure that the info returned by the helper is valid before using it. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: f990c82c385b ("qed*: Add support for ndo_set_vf_trust") Fixes: 733def6a04bf ("qed*: IOV link control") Signed-off-by: Daniil Tatianin Reviewed-by: Michal Swiatkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 2bf18748581d3..fa167b1aa0190 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -4404,6 +4404,9 @@ qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) } vf = qed_iov_get_vf_info(QED_LEADING_HWFN(cdev), (u16)vfid, true); + if (!vf) + return -EINVAL; + vport_id = vf->vport_id; return qed_configure_vport_wfq(cdev, vport_id, rate); @@ -5152,7 +5155,7 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) /* Validate that the VF has a configured vport */ vf = qed_iov_get_vf_info(hwfn, i, true); - if (!vf->vport_instance) + if (!vf || !vf->vport_instance) continue; memset(¶ms, 0, sizeof(params)); From 30ed9ee9a10a90ae719dcfcacead1d0506fa45ed Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 15 Mar 2023 09:52:28 -0500 Subject: [PATCH 196/898] RDMA/irdma: Do not generate SW completions for NOPs Currently, artificial SW completions are generated for NOP wqes which can generate unexpected completions with wr_id = 0. Skip the generation of artificial completions for NOPs. Fixes: 81091d7696ae ("RDMA/irdma: Add SW mechanism to generate completions on error") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/utils.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 445e69e864097..7887230c867b1 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2595,7 +2595,10 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp) /* remove the SQ WR by moving SQ tail*/ IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); - + if (cmpl->cpi.op_type == IRDMAQP_OP_NOP) { + kfree(cmpl); + continue; + } ibdev_dbg(iwqp->iwscq->ibcq.device, "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n", __func__, cmpl->cpi.wr_id, qp->qp_id); From b69a6979dbaa2453675fe9c71bdc2497fedb11f9 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 15 Mar 2023 09:52:29 -0500 Subject: [PATCH 197/898] RDMA/irdma: Fix memory leak of PBLE objects On rmmod of irdma, the PBLE object memory is not being freed. PBLE object memory are not statically pre-allocated at function initialization time unlike other HMC objects. PBLEs objects and the Segment Descriptors (SD) for it can be dynamically allocated during scale up and SD's remain allocated till function deinitialization. Fix this leak by adding IRDMA_HMC_IW_PBLE to the iw_hmc_obj_types[] table and skip pbles in irdma_create_hmc_obj but not in irdma_del_hmc_objects(). Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 2e1e2bad04011..43dfa4761f069 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -41,6 +41,7 @@ static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = { IRDMA_HMC_IW_XFFL, IRDMA_HMC_IW_Q1, IRDMA_HMC_IW_Q1FL, + IRDMA_HMC_IW_PBLE, IRDMA_HMC_IW_TIMER, IRDMA_HMC_IW_FSIMC, IRDMA_HMC_IW_FSIAV, @@ -827,6 +828,8 @@ static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, info.entry_type = rf->sd_type; for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) { + if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE) + continue; if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) { info.rsrc_type = iw_hmc_obj_types[i]; info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt; From 8385a875c9eecc429b2f72970efcbb0e5cb5b547 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 15 Mar 2023 09:52:30 -0500 Subject: [PATCH 198/898] RDMA/irdma: Increase iWARP CM default rexmit count When running perftest with large number of connections in iWARP mode, the passive side could be slow to respond. Increase the rexmit counter default to allow scaling connections. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-4-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h index 19c284975fc7c..7feadb3e1eda3 100644 --- a/drivers/infiniband/hw/irdma/cm.h +++ b/drivers/infiniband/hw/irdma/cm.h @@ -41,7 +41,7 @@ #define TCP_OPTIONS_PADDING 3 #define IRDMA_DEFAULT_RETRYS 64 -#define IRDMA_DEFAULT_RETRANS 8 +#define IRDMA_DEFAULT_RETRANS 32 #define IRDMA_DEFAULT_TTL 0x40 #define IRDMA_DEFAULT_RTT_VAR 6 #define IRDMA_DEFAULT_SS_THRESH 0x3fffffff From e4522c097ec10f23ea0933e9e69d4fa9d8ae9441 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 15 Mar 2023 09:52:31 -0500 Subject: [PATCH 199/898] RDMA/irdma: Add ipv4 check to irdma_find_listener() Add ipv4 check to irdma_find_listener(). Otherwise the function incorrectly finds and returns a listener with a different addr family for the zero IP addr, if a listener with a zero IP addr and the same port as the one searched for has already been created. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Tatyana Nikolova Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-5-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 195aa9ea18b6c..8817864154af1 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1458,13 +1458,15 @@ static int irdma_send_fin(struct irdma_cm_node *cm_node) * irdma_find_listener - find a cm node listening on this addr-port pair * @cm_core: cm's core * @dst_addr: listener ip addr + * @ipv4: flag indicating IPv4 when true * @dst_port: listener tcp port num * @vlan_id: virtual LAN ID * @listener_state: state to match with listen node's */ static struct irdma_cm_listener * -irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, - u16 vlan_id, enum irdma_cm_listener_state listener_state) +irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4, + u16 dst_port, u16 vlan_id, + enum irdma_cm_listener_state listener_state) { struct irdma_cm_listener *listen_node; static const u32 ip_zero[4] = { 0, 0, 0, 0 }; @@ -1477,7 +1479,7 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, list_for_each_entry (listen_node, &cm_core->listen_list, list) { memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr)); listen_port = listen_node->loc_port; - if (listen_port != dst_port || + if (listen_node->ipv4 != ipv4 || listen_port != dst_port || !(listener_state & listen_node->listener_state)) continue; /* compare node pair, return node handle if a match */ @@ -2902,9 +2904,10 @@ irdma_make_listen_node(struct irdma_cm_core *cm_core, unsigned long flags; /* cannot have multiple matching listeners */ - listener = irdma_find_listener(cm_core, cm_info->loc_addr, - cm_info->loc_port, cm_info->vlan_id, - IRDMA_CM_LISTENER_EITHER_STATE); + listener = + irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4, + cm_info->loc_port, cm_info->vlan_id, + IRDMA_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) { refcount_dec(&listener->refcnt); @@ -3153,6 +3156,7 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) listener = irdma_find_listener(cm_core, cm_info.loc_addr, + cm_info.ipv4, cm_info.loc_port, cm_info.vlan_id, IRDMA_CM_LISTENER_ACTIVE_STATE); From e8d20c3ded59a092532513c9bd030d1ea66f5f44 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 17 Mar 2023 00:15:26 +0800 Subject: [PATCH 200/898] xirc2ps_cs: Fix use after free bug in xirc2ps_detach In xirc2ps_probe, the local->tx_timeout_task was bounded with xirc2ps_tx_timeout_task. When timeout occurs, it will call xirc_tx_timeout->schedule_work to start the work. When we call xirc2ps_detach to remove the driver, there may be a sequence as follows: Stop responding to timeout tasks and complete scheduled tasks before cleanup in xirc2ps_detach, which will fix the problem. CPU0 CPU1 |xirc2ps_tx_timeout_task xirc2ps_detach | free_netdev | kfree(dev); | | | do_reset | //use dev Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zheng Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/xircom/xirc2ps_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 894e92ef415b9..9f505cf02d965 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -503,6 +503,11 @@ static void xirc2ps_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; + struct local_info *local = netdev_priv(dev); + + netif_carrier_off(dev); + netif_tx_disable(dev); + cancel_work_sync(&local->tx_timeout_task); dev_dbg(&link->dev, "detach\n"); From 4203d84032e28f893594a453bd8bc9c3b15c7334 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Mar 2023 13:33:24 -0700 Subject: [PATCH 201/898] net: phy: Ensure state transitions are processed from phy_stop() In the phy_disconnect() -> phy_stop() path, we will be forcibly setting the PHY state machine to PHY_HALTED. This invalidates the old_state != phydev->state condition in phy_state_machine() such that we will neither display the state change for debugging, nor will we invoke the link_change_notify() callback. Factor the code by introducing phy_process_state_change(), and ensure that we process the state change from phy_stop() as well. Fixes: 5c5f626bcace ("net: phy: improve handling link_change_notify callback") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index b33e55a7364e6..99a07eb54c441 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -57,6 +57,18 @@ static const char *phy_state_to_str(enum phy_state st) return NULL; } +static void phy_process_state_change(struct phy_device *phydev, + enum phy_state old_state) +{ + if (old_state != phydev->state) { + phydev_dbg(phydev, "PHY state change %s -> %s\n", + phy_state_to_str(old_state), + phy_state_to_str(phydev->state)); + if (phydev->drv && phydev->drv->link_change_notify) + phydev->drv->link_change_notify(phydev); + } +} + static void phy_link_up(struct phy_device *phydev) { phydev->phy_link_change(phydev, true); @@ -1301,6 +1313,7 @@ EXPORT_SYMBOL(phy_free_interrupt); void phy_stop(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; + enum phy_state old_state; if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) { WARN(1, "called from state %s\n", @@ -1309,6 +1322,7 @@ void phy_stop(struct phy_device *phydev) } mutex_lock(&phydev->lock); + old_state = phydev->state; if (phydev->state == PHY_CABLETEST) { phy_abort_cable_test(phydev); @@ -1319,6 +1333,7 @@ void phy_stop(struct phy_device *phydev) sfp_upstream_stop(phydev->sfp_bus); phydev->state = PHY_HALTED; + phy_process_state_change(phydev, old_state); mutex_unlock(&phydev->lock); @@ -1436,13 +1451,7 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); - if (old_state != phydev->state) { - phydev_dbg(phydev, "PHY state change %s -> %s\n", - phy_state_to_str(old_state), - phy_state_to_str(phydev->state)); - if (phydev->drv && phydev->drv->link_change_notify) - phydev->drv->link_change_notify(phydev); - } + phy_process_state_change(phydev, old_state); /* Only re-schedule a PHY state machine change if we are polling the * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving From 99669259f3361d759219811e670b7e0742668556 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 16 Mar 2023 16:33:16 -0700 Subject: [PATCH 202/898] net: mdio: fix owner field for mdio buses registered using device-tree Bus ownership is wrong when using of_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. Signed-off-by: Maxime Bizon Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") [florian: fix kdoc, added Fixes tag] Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/mdio/of_mdio.c | 12 +++++++----- drivers/net/phy/mdio_devres.c | 11 ++++++----- include/linux/of_mdio.h | 22 +++++++++++++++++++--- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 510822d6d0d90..1e46e39f5f46a 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -139,21 +139,23 @@ bool of_mdiobus_child_is_phy(struct device_node *child) EXPORT_SYMBOL(of_mdiobus_child_is_phy); /** - * of_mdiobus_register - Register mii_bus and create PHYs from the device tree + * __of_mdiobus_register - Register mii_bus and create PHYs from the device tree * @mdio: pointer to mii_bus structure * @np: pointer to device_node of MDIO bus. + * @owner: module owning the @mdio object. * * This function registers the mii_bus structure and registers a phy_device * for each child node of @np. */ -int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) +int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np, + struct module *owner) { struct device_node *child; bool scanphys = false; int addr, rc; if (!np) - return mdiobus_register(mdio); + return __mdiobus_register(mdio, owner); /* Do not continue if the node is disabled */ if (!of_device_is_available(np)) @@ -172,7 +174,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) of_property_read_u32(np, "reset-post-delay-us", &mdio->reset_post_delay_us); /* Register the MDIO bus */ - rc = mdiobus_register(mdio); + rc = __mdiobus_register(mdio, owner); if (rc) return rc; @@ -236,7 +238,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) mdiobus_unregister(mdio); return rc; } -EXPORT_SYMBOL(of_mdiobus_register); +EXPORT_SYMBOL(__of_mdiobus_register); /** * of_mdio_find_device - Given a device tree node, find the mdio_device diff --git a/drivers/net/phy/mdio_devres.c b/drivers/net/phy/mdio_devres.c index b560e99695dfd..69b829e6ab35b 100644 --- a/drivers/net/phy/mdio_devres.c +++ b/drivers/net/phy/mdio_devres.c @@ -98,13 +98,14 @@ EXPORT_SYMBOL(__devm_mdiobus_register); #if IS_ENABLED(CONFIG_OF_MDIO) /** - * devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register() + * __devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register() * @dev: Device to register mii_bus for * @mdio: MII bus structure to register * @np: Device node to parse + * @owner: Owning module */ -int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, - struct device_node *np) +int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np, struct module *owner) { struct mdiobus_devres *dr; int ret; @@ -117,7 +118,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, if (!dr) return -ENOMEM; - ret = of_mdiobus_register(mdio, np); + ret = __of_mdiobus_register(mdio, np, owner); if (ret) { devres_free(dr); return ret; @@ -127,7 +128,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, devres_add(dev, dr); return 0; } -EXPORT_SYMBOL(devm_of_mdiobus_register); +EXPORT_SYMBOL(__devm_of_mdiobus_register); #endif /* CONFIG_OF_MDIO */ MODULE_LICENSE("GPL"); diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index da633d34ab866..8a52ef2e6fa6b 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -14,9 +14,25 @@ #if IS_ENABLED(CONFIG_OF_MDIO) bool of_mdiobus_child_is_phy(struct device_node *child); -int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); -int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, - struct device_node *np); +int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np, + struct module *owner); + +static inline int of_mdiobus_register(struct mii_bus *mdio, + struct device_node *np) +{ + return __of_mdiobus_register(mdio, np, THIS_MODULE); +} + +int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np, struct module *owner); + +static inline int devm_of_mdiobus_register(struct device *dev, + struct mii_bus *mdio, + struct device_node *np) +{ + return __devm_of_mdiobus_register(dev, mdio, np, THIS_MODULE); +} + struct mdio_device *of_mdio_find_device(struct device_node *np); struct phy_device *of_phy_find_device(struct device_node *phy_np); struct phy_device * From 30b605b8501e321f79e19c3238aa6ca31da6087c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Mar 2023 16:33:17 -0700 Subject: [PATCH 203/898] net: mdio: fix owner field for mdio buses registered using ACPI Bus ownership is wrong when using acpi_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. CC: Maxime Bizon Fixes: 803ca24d2f92 ("net: mdio: Add ACPI support code for mdio") Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/mdio/acpi_mdio.c | 10 ++++++---- include/linux/acpi_mdio.h | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/mdio/acpi_mdio.c b/drivers/net/mdio/acpi_mdio.c index d77c987fda9cd..4630dde019749 100644 --- a/drivers/net/mdio/acpi_mdio.c +++ b/drivers/net/mdio/acpi_mdio.c @@ -18,16 +18,18 @@ MODULE_AUTHOR("Calvin Johnson "); MODULE_LICENSE("GPL"); /** - * acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL. + * __acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL. * @mdio: pointer to mii_bus structure * @fwnode: pointer to fwnode of MDIO bus. This fwnode is expected to represent + * @owner: module owning this @mdio object. * an ACPI device object corresponding to the MDIO bus and its children are * expected to correspond to the PHY devices on that bus. * * This function registers the mii_bus structure and registers a phy_device * for each child node of @fwnode. */ -int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) +int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode, + struct module *owner) { struct fwnode_handle *child; u32 addr; @@ -35,7 +37,7 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) /* Mask out all PHYs from auto probing. */ mdio->phy_mask = GENMASK(31, 0); - ret = mdiobus_register(mdio); + ret = __mdiobus_register(mdio, owner); if (ret) return ret; @@ -55,4 +57,4 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) } return 0; } -EXPORT_SYMBOL(acpi_mdiobus_register); +EXPORT_SYMBOL(__acpi_mdiobus_register); diff --git a/include/linux/acpi_mdio.h b/include/linux/acpi_mdio.h index 0a24ab7cb66fa..8e2eefa9fbc0f 100644 --- a/include/linux/acpi_mdio.h +++ b/include/linux/acpi_mdio.h @@ -9,7 +9,14 @@ #include #if IS_ENABLED(CONFIG_ACPI_MDIO) -int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode); +int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode, + struct module *owner); + +static inline int +acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *handle) +{ + return __acpi_mdiobus_register(mdio, handle, THIS_MODULE); +} #else /* CONFIG_ACPI_MDIO */ static inline int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) From 070246e4674b125860d311c18ce2623e73e2bd51 Mon Sep 17 00:00:00 2001 From: Jochen Henneberg Date: Fri, 17 Mar 2023 09:08:17 +0100 Subject: [PATCH 204/898] net: stmmac: Fix for mismatched host/device DMA address width Currently DMA address width is either read from a RO device register or force set from the platform data. This breaks DMA when the host DMA address width is <=32it but the device is >32bit. Right now the driver may decide to use a 2nd DMA descriptor for another buffer (happens in case of TSO xmit) assuming that 32bit addressing is used due to platform configuration but the device will still use both descriptor addresses as one address. This can be observed with the Intel EHL platform driver that sets 32bit for addr64 but the MAC reports 40bit. The TX queue gets stuck in case of TCP with iptables NAT configuration on TSO packets. The logic should be like this: Whatever we do on the host side (memory allocation GFP flags) should happen with the host DMA width, whenever we decide how to set addresses on the device registers we must use the device DMA address width. This patch renames the platform address width field from addr64 (term used in device datasheet) to host_addr and uses this value exclusively for host side operations while all chip operations consider the device DMA width as read from the device register. Fixes: 7cfc4486e7ea ("stmmac: intel: Configure EHL PSE0 GbE and PSE1 GbE to 32 bits DMA addressing") Signed-off-by: Jochen Henneberg Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 + .../net/ethernet/stmicro/stmmac/dwmac-imx.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 +-- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 30 ++++++++++--------- include/linux/stmmac.h | 2 +- 6 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 6b5d96bced475..ec9c130276d89 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -418,6 +418,7 @@ struct dma_features { unsigned int frpbs; unsigned int frpes; unsigned int addr64; + unsigned int host_dma_width; unsigned int rssen; unsigned int vlhash; unsigned int sphen; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index ac550d1ac0159..2a2be65d65a03 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -288,7 +288,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) goto err_parse_dt; } - plat_dat->addr64 = dwmac->ops->addr_width; + plat_dat->host_dma_width = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; plat_dat->clks_config = imx_dwmac_clks_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 7deb1f817dacc..13aa919633b47 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -684,7 +684,7 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, intel_priv->is_pse = true; plat->bus_id = 2; - plat->addr64 = 32; + plat->host_dma_width = 32; plat->clk_ptp_rate = 200000000; @@ -725,7 +725,7 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, intel_priv->is_pse = true; plat->bus_id = 3; - plat->addr64 = 32; + plat->host_dma_width = 32; plat->clk_ptp_rate = 200000000; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 2f7d8e4561d92..9ae31e3dc8218 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -591,7 +591,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->use_phy_wol = priv_plat->mac_wol ? 0 : 1; plat->riwt_off = 1; plat->maxmtu = ETH_DATA_LEN; - plat->addr64 = priv_plat->variant->dma_bit_mask; + plat->host_dma_width = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8f543c3ab5c56..17310ade88dda 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1431,7 +1431,7 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - if (priv->dma_cap.addr64 <= 32) + if (priv->dma_cap.host_dma_width <= 32) gfp |= GFP_DMA32; if (!buf->page) { @@ -4587,7 +4587,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) unsigned int entry = rx_q->dirty_rx; gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - if (priv->dma_cap.addr64 <= 32) + if (priv->dma_cap.host_dma_width <= 32) gfp |= GFP_DMA32; while (dirty-- > 0) { @@ -6205,7 +6205,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) seq_printf(seq, "\tFlexible RX Parser: %s\n", priv->dma_cap.frpsel ? "Y" : "N"); seq_printf(seq, "\tEnhanced Addressing: %d\n", - priv->dma_cap.addr64); + priv->dma_cap.host_dma_width); seq_printf(seq, "\tReceive Side Scaling: %s\n", priv->dma_cap.rssen ? "Y" : "N"); seq_printf(seq, "\tVLAN Hash Filtering: %s\n", @@ -7178,20 +7178,22 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "SPH feature enabled\n"); } - /* The current IP register MAC_HW_Feature1[ADDR64] only define - * 32/40/64 bit width, but some SOC support others like i.MX8MP - * support 34 bits but it map to 40 bits width in MAC_HW_Feature1[ADDR64]. - * So overwrite dma_cap.addr64 according to HW real design. + /* Ideally our host DMA address width is the same as for the + * device. However, it may differ and then we have to use our + * host DMA width for allocation and the device DMA width for + * register handling. */ - if (priv->plat->addr64) - priv->dma_cap.addr64 = priv->plat->addr64; + if (priv->plat->host_dma_width) + priv->dma_cap.host_dma_width = priv->plat->host_dma_width; + else + priv->dma_cap.host_dma_width = priv->dma_cap.addr64; - if (priv->dma_cap.addr64) { + if (priv->dma_cap.host_dma_width) { ret = dma_set_mask_and_coherent(device, - DMA_BIT_MASK(priv->dma_cap.addr64)); + DMA_BIT_MASK(priv->dma_cap.host_dma_width)); if (!ret) { - dev_info(priv->device, "Using %d bits DMA width\n", - priv->dma_cap.addr64); + dev_info(priv->device, "Using %d/%d bits DMA host/device width\n", + priv->dma_cap.host_dma_width, priv->dma_cap.addr64); /* * If more than 32 bits can be addressed, make sure to @@ -7206,7 +7208,7 @@ int stmmac_dvr_probe(struct device *device, goto error_hw_init; } - priv->dma_cap.addr64 = 32; + priv->dma_cap.host_dma_width = 32; } } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a152678b82b71..a2414c1874837 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -215,7 +215,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; - u32 addr64; + u32 host_dma_width; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; From 04361b8bb81819efb68bf39c276025e2250ac537 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 17 Mar 2023 07:28:00 +0000 Subject: [PATCH 205/898] net: sfp: fix state loss when updating state_hw_mask Andrew reports that the SFF modules on one of the ZII platforms do not indicate link up due to the SFP code believing that LOS indicating that there is no signal being received from the remote end, but in fact the LOS signal is showing that there is signal. What makes SFF modules different from SFPs is they typically have an inverted LOS, which uncovered this issue. When we read the hardware state, we mask it with state_hw_mask so we ignore anything we're not interested in. However, we don't re-read when state_hw_mask changes, leading to sfp->state being stale. Arrange for a software poll of the module state after we have parsed the EEPROM in sfp_sm_mod_probe() and updated state_*_mask. This will generate any necessary events for signal changes for the state machine as well as updating sfp->state. Reported-by: Andrew Lunn Tested-by: Andrew Lunn Fixes: 8475c4b70b04 ("net: sfp: re-implement soft state polling setup") Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c02cad6478a81..fb98db61e06cb 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2190,6 +2190,11 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) break; } + /* Force a poll to re-read the hardware signal state after + * sfp_sm_mod_probe() changed state_hw_mask. + */ + mod_delayed_work(system_wq, &sfp->poll, 1); + err = sfp_hwmon_insert(sfp); if (err) dev_warn(sfp->dev, "hwmon probe failed: %pe\n", From 6d206b1ea9f48433a96edec7028586db1d947911 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 17 Mar 2023 16:32:59 +0100 Subject: [PATCH 206/898] mlxsw: core_thermal: Fix fan speed in maximum cooling state The cooling levels array is supposed to prevent the system fans from being configured below a 20% duty cycle as otherwise some of them get stuck at 0 RPM. Due to an off-by-one error, the last element in the array was not initialized, causing it to be set to zero, which in turn lead to fans being configured with a 0% duty cycle in maximum cooling state. Since commit 332fdf951df8 ("mlxsw: thermal: Fix out-of-bounds memory accesses") the contents of the array are static. Therefore, instead of fixing the initialization of the array, simply remove it and adjust thermal_cooling_device_ops::set_cur_state() so that the configured duty cycle is never set below 20%. Before: # cat /sys/class/thermal/thermal_zone0/cdev0/type mlxsw_fan # echo 10 > /sys/class/thermal/thermal_zone0/cdev0/cur_state # cat /sys/class/hwmon/hwmon0/name mlxsw # cat /sys/class/hwmon/hwmon0/pwm1 0 After: # cat /sys/class/thermal/thermal_zone0/cdev0/type mlxsw_fan # echo 10 > /sys/class/thermal/thermal_zone0/cdev0/cur_state # cat /sys/class/hwmon/hwmon0/name mlxsw # cat /sys/class/hwmon/hwmon0/pwm1 255 This bug was uncovered when the thermal subsystem repeatedly tried to configure the cooling devices to their maximum state due to another issue [1]. This resulted in the fans being stuck at 0 RPM, which eventually lead to the system undergoing thermal shutdown. [1] https://lore.kernel.org/netdev/ZA3CFNhU4AbtsP4G@shredder/ Fixes: a421ce088ac8 ("mlxsw: core: Extend cooling device with cooling levels") Signed-off-by: Ido Schimmel Reviewed-by: Vadim Pasternak Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index c5240d38c9dbd..09ed6e5fa6c34 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -105,7 +105,6 @@ struct mlxsw_thermal { struct thermal_zone_device *tzdev; int polling_delay; struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; - u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; struct mlxsw_cooling_states cooling_states[MLXSW_THERMAL_NUM_TRIPS]; struct mlxsw_thermal_area line_cards[]; @@ -468,7 +467,7 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, return idx; /* Normalize the state to the valid speed range. */ - state = thermal->cooling_levels[state]; + state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state); mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { @@ -859,10 +858,6 @@ int mlxsw_thermal_init(struct mlxsw_core *core, } } - /* Initialize cooling levels per PWM state. */ - for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i); - thermal->polling_delay = bus_info->low_frequency ? MLXSW_THERMAL_SLOW_POLL_INT : MLXSW_THERMAL_POLL_INT; From 9eb775968b68d049fb3b00353f12cd10308527c7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 15 Mar 2023 17:30:33 -0700 Subject: [PATCH 207/898] xfs: walk all AGs if TRYLOCK passed to xfs_alloc_vextent_iterate_ags Callers of xfs_alloc_vextent_iterate_ags that pass in the TRYLOCK flag want us to perform a non-blocking scan of the AGs for free space. There are no ordering constraints for non-blocking AGF lock acquisition, so the scan can freely start over at AG 0 even when minimum_agno > 0. This manifests fairly reliably on xfs/294 on 6.3-rc2 with the parent pointer patchset applied and the realtime volume enabled. I observed the following sequence as part of an xfs_dir_createname call: 0. Fragment the free space, then allocate nearly all the free space in all AGs except AG 0. 1. Create a directory in AG 2 and let it grow for a while. 2. Try to allocate 2 blocks to expand the dirent part of a directory. The space will be allocated out of AG 0, but the allocation will not be contiguous. This (I think) activates the LOWMODE allocator. 3. The bmapi call decides to convert from extents to bmbt format and tries to allocate 1 block. This allocation request calls xfs_alloc_vextent_start_ag with the inode number, which starts the scan at AG 2. We ignore AG 0 (with all its free space) and instead scrape AG 2 and 3 for more space. We find one block, but this now kicks t_highest_agno to 3. 4. The createname call decides it needs to split the dabtree. It tries to allocate even more space with xfs_alloc_vextent_start_ag, but now we're constrained to AG 3, and we don't find the space. The createname returns ENOSPC and the filesystem shuts down. This change fixes the problem by making the trylock scan wrap around to AG 0 if it doesn't like the AGs that it finds. Since the current transaction itself holds AGF 0, the trylock of AGF 0 will succeed, and we take space from the AG that has plenty. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 8999e38e1bed2..bd7112d430b68 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3326,11 +3326,14 @@ xfs_alloc_vextent_iterate_ags( uint32_t flags) { struct xfs_mount *mp = args->mp; + xfs_agnumber_t restart_agno = minimum_agno; xfs_agnumber_t agno; int error = 0; + if (flags & XFS_ALLOC_FLAG_TRYLOCK) + restart_agno = 0; restart: - for_each_perag_wrap_range(mp, start_agno, minimum_agno, + for_each_perag_wrap_range(mp, start_agno, restart_agno, mp->m_sb.sb_agcount, agno, args->pag) { args->agno = agno; error = xfs_alloc_vextent_prepare_ag(args); @@ -3369,6 +3372,7 @@ xfs_alloc_vextent_iterate_ags( */ if (flags) { flags = 0; + restart_agno = minimum_agno; goto restart; } From e6fbb7167ed005783ac5aef3e75699f45ffe2af8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 15 Mar 2023 17:30:33 -0700 Subject: [PATCH 208/898] xfs: add tracepoints for each of the externally visible allocators There are now five separate space allocator interfaces exposed to the rest of XFS for five different strategies to find space. Add tracepoints for each of them so that I can tell from a trace dump exactly which ones got called and what happened underneath them. Add a sixth so it's more obvious if an allocation actually happened. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 17 +++++++++++++++++ fs/xfs/xfs_trace.h | 7 +++++++ 2 files changed, 24 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index bd7112d430b68..55ae08a6144c3 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3255,6 +3255,8 @@ xfs_alloc_vextent_finish( XFS_STATS_INC(mp, xs_allocx); XFS_STATS_ADD(mp, xs_allocb, args->len); + trace_xfs_alloc_vextent_finish(args); + out_drop_perag: if (drop_perag && args->pag) { xfs_perag_rele(args->pag); @@ -3284,6 +3286,9 @@ xfs_alloc_vextent_this_ag( args->agno = agno; args->agbno = 0; + + trace_xfs_alloc_vextent_this_ag(args); + error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0), &minimum_agno); if (error) { @@ -3405,6 +3410,9 @@ xfs_alloc_vextent_start_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; + + trace_xfs_alloc_vextent_first_ag(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) @@ -3455,6 +3463,9 @@ xfs_alloc_vextent_first_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; + + trace_xfs_alloc_vextent_start_ag(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) @@ -3486,6 +3497,9 @@ xfs_alloc_vextent_exact_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); + + trace_xfs_alloc_vextent_near_bno(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) @@ -3521,6 +3535,9 @@ xfs_alloc_vextent_near_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); + + trace_xfs_alloc_vextent_exact_bno(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7dc0fd6a65047..9c0006c55fec3 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1883,6 +1883,13 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_this_ag); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_start_ag); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_first_ag); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_exact_bno); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_near_bno); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_finish); + TRACE_EVENT(xfs_alloc_cur_check, TP_PROTO(struct xfs_mount *mp, xfs_btnum_t btnum, xfs_agblock_t bno, xfs_extlen_t len, xfs_extlen_t diff, bool new), From 3cfb9290da3d87a5877b03bda96c3d5d3ed9fcb0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 16 Mar 2023 09:31:20 -0700 Subject: [PATCH 209/898] xfs: test dir/attr hash when loading module Back in the 6.2-rc1 days, Eric Whitney reported a fstests regression in ext4 against generic/454. The cause of this test failure was the unfortunate combination of setting an xattr name containing UTF8 encoded emoji, an xattr hash function that accepted a char pointer with no explicit signedness, signed type extension of those chars to an int, and the 6.2 build tools maintainers deciding to mandate -funsigned-char across the board. As a result, the ondisk extended attribute structure written out by 6.1 and 6.2 were not the same. This discrepancy, in fact, had been noticeable if a filesystem with such an xattr were moved between any two architectures that don't employ the same signedness of a raw "char" declaration. The only reason anyone noticed is that x86 gcc defaults to signed, and no such -funsigned-char update was made to e2fsprogs, so e2fsck immediately started reporting data corruption. After a day and a half of discussing how to handle this use case (xattrs with bit 7 set anywhere in the name) without breaking existing users, Linus merged his own patch and didn't tell the maintainer. None of the ext4 developers realized this until AUTOSEL announced that the commit had been backported to stable. In the end, this problem could have been detected much earlier if there had been any useful tests of hash function(s) in use inside ext4 to make sure that they always produce the same outputs given the same inputs. The XFS dirent/xattr name hash takes a uint8_t*, so I don't think it's vulnerable to this problem. However, let's avoid all this drama by adding our own self test to check that the da hash produces the same outputs for a static pile of inputs on various platforms. This enables us to fix any breakage that may result in a controlled fashion. The buffer and test data are identical to the patches submitted to xfsprogs. Link: https://lore.kernel.org/linux-ext4/Y8bpkm3jA3bDm3eL@debian-BULLSEYE-live-builder-AMD64/ Link: https://lore.kernel.org/linux-xfs/ZBUKCRR7xvIqPrpX@destitution/T/#md38272cc684e2c0d61494435ccbb91f022e8dee4 Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/Makefile | 1 + fs/xfs/xfs_dahash_test.c | 662 +++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_dahash_test.h | 12 + fs/xfs/xfs_super.c | 5 + 4 files changed, 680 insertions(+) create mode 100644 fs/xfs/xfs_dahash_test.c create mode 100644 fs/xfs/xfs_dahash_test.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 03135a1c31b67..92d88dc3c9f71 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -63,6 +63,7 @@ xfs-y += xfs_aops.o \ xfs_bmap_util.o \ xfs_bio_io.o \ xfs_buf.o \ + xfs_dahash_test.o \ xfs_dir2_readdir.o \ xfs_discard.o \ xfs_error.o \ diff --git a/fs/xfs/xfs_dahash_test.c b/fs/xfs/xfs_dahash_test.c new file mode 100644 index 0000000000000..230651ab5ce43 --- /dev/null +++ b/fs/xfs/xfs_dahash_test.c @@ -0,0 +1,662 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dahash_test.h" + +/* 4096 random bytes */ +static uint8_t __initdata __attribute__((__aligned__(8))) test_buf[] = +{ + 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30, + 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4, + 0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60, + 0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c, + 0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4, + 0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a, + 0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a, + 0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4, + 0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9, + 0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4, + 0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca, + 0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61, + 0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e, + 0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a, + 0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f, + 0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd, + 0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c, + 0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88, + 0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53, + 0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f, + 0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4, + 0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74, + 0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60, + 0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09, + 0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07, + 0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1, + 0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f, + 0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2, + 0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0, + 0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95, + 0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22, + 0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93, + 0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86, + 0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d, + 0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40, + 0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b, + 0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35, + 0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40, + 0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63, + 0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b, + 0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8, + 0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72, + 0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86, + 0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff, + 0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed, + 0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c, + 0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed, + 0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30, + 0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99, + 0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4, + 0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80, + 0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37, + 0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04, + 0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e, + 0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd, + 0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c, + 0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09, + 0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb, + 0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b, + 0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53, + 0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b, + 0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f, + 0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff, + 0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40, + 0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6, + 0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb, + 0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73, + 0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f, + 0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4, + 0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66, + 0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1, + 0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80, + 0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f, + 0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5, + 0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7, + 0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce, + 0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff, + 0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48, + 0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26, + 0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72, + 0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88, + 0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9, + 0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc, + 0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8, + 0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09, + 0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8, + 0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c, + 0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48, + 0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d, + 0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f, + 0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae, + 0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97, + 0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8, + 0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75, + 0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc, + 0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27, + 0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf, + 0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7, + 0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0, + 0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8, + 0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c, + 0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44, + 0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54, + 0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38, + 0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f, + 0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b, + 0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7, + 0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef, + 0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e, + 0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c, + 0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c, + 0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0, + 0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37, + 0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf, + 0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e, + 0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4, + 0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60, + 0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe, + 0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61, + 0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3, + 0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe, + 0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40, + 0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec, + 0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f, + 0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7, + 0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79, + 0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c, + 0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f, + 0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21, + 0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9, + 0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30, + 0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b, + 0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee, + 0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6, + 0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3, + 0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09, + 0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd, + 0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f, + 0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9, + 0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc, + 0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59, + 0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60, + 0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5, + 0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1, + 0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8, + 0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9, + 0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab, + 0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80, + 0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01, + 0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e, + 0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d, + 0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35, + 0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38, + 0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a, + 0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac, + 0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca, + 0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57, + 0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed, + 0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20, + 0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef, + 0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c, + 0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a, + 0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64, + 0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4, + 0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54, + 0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16, + 0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26, + 0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc, + 0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87, + 0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60, + 0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d, + 0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54, + 0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13, + 0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59, + 0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb, + 0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f, + 0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15, + 0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78, + 0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93, + 0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e, + 0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31, + 0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1, + 0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37, + 0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15, + 0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78, + 0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f, + 0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31, + 0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f, + 0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc, + 0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9, + 0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3, + 0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe, + 0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4, + 0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24, + 0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1, + 0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85, + 0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8, + 0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09, + 0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c, + 0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46, + 0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5, + 0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39, + 0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2, + 0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc, + 0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35, + 0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde, + 0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80, + 0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15, + 0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63, + 0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58, + 0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d, + 0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf, + 0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12, + 0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c, + 0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b, + 0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1, + 0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6, + 0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73, + 0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9, + 0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e, + 0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22, + 0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb, + 0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2, + 0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c, + 0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c, + 0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93, + 0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f, + 0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38, + 0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57, + 0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03, + 0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90, + 0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8, + 0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4, + 0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36, + 0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7, + 0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47, + 0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46, + 0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73, + 0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72, + 0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23, + 0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a, + 0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58, + 0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f, + 0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96, + 0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9, + 0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b, + 0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c, + 0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef, + 0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3, + 0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4, + 0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f, + 0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17, + 0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18, + 0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8, + 0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98, + 0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42, + 0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97, + 0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97, + 0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1, + 0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77, + 0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb, + 0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c, + 0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb, + 0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56, + 0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04, + 0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48, + 0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe, + 0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d, + 0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97, + 0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8, + 0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f, + 0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e, + 0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca, + 0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44, + 0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f, + 0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6, + 0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63, + 0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19, + 0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58, + 0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b, + 0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28, + 0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf, + 0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6, + 0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3, + 0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe, + 0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f, + 0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf, + 0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9, + 0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e, + 0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7, + 0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70, + 0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0, + 0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d, + 0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4, + 0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5, + 0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85, + 0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc, + 0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f, + 0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56, + 0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb, + 0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b, + 0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5, + 0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03, + 0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23, + 0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03, + 0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87, + 0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4, + 0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43, + 0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11, + 0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40, + 0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59, + 0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9, + 0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30, + 0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd, + 0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45, + 0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83, + 0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b, + 0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5, + 0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3, + 0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84, + 0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8, + 0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34, + 0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b, + 0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31, + 0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b, + 0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40, + 0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b, + 0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e, + 0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38, + 0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb, + 0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2, + 0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c, + 0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1, + 0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc, + 0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec, + 0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34, + 0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95, + 0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92, + 0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f, + 0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c, + 0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b, + 0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c, + 0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5, + 0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb, + 0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4, + 0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9, + 0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4, + 0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41, + 0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a, + 0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8, + 0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06, + 0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62, + 0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47, + 0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4, + 0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00, + 0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67, + 0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81, + 0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0, + 0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10, + 0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79, + 0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19, + 0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8, + 0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1, + 0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83, + 0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86, + 0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55, + 0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66, + 0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0, + 0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49, + 0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea, + 0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24, + 0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e, + 0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88, + 0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87, + 0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34, + 0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f, + 0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a, + 0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a, + 0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93, + 0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37, + 0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38, + 0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4, + 0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48, + 0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65, + 0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09, + 0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e, + 0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5, + 0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b, + 0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4, + 0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e, + 0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d, + 0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0, + 0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5, + 0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48, + 0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e, + 0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f, + 0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a, + 0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d, + 0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14, + 0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69, + 0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53, + 0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56, + 0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48, + 0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4, + 0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26, + 0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e, + 0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40, + 0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7, + 0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62, + 0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe, + 0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf, + 0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2, + 0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d, + 0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32, + 0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa, + 0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45, + 0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04, + 0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33, + 0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad, + 0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4, + 0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c, + 0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b, + 0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36, + 0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa, + 0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9, + 0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28, + 0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b, + 0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03, + 0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d, + 0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff, + 0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39, + 0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b, + 0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2, + 0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34, + 0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe, + 0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0, + 0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27, + 0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86, + 0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90, + 0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03, + 0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb, + 0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57, + 0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9, + 0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5, + 0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16, + 0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5, + 0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a, + 0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d, + 0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0, + 0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f, + 0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48, + 0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1, + 0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09, + 0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51, + 0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b, + 0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf, + 0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe, + 0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad, + 0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e, + 0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57, + 0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f, + 0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef, + 0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8, + 0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69, + 0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d, + 0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59, + 0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9, + 0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d, + 0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea, + 0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56, + 0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4, + 0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8, + 0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78, + 0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f, + 0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4, + 0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91, + 0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f, + 0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c, + 0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57, + 0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4, + 0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23, + 0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17, + 0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66, + 0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39, + 0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36, + 0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00, + 0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7, + 0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60, + 0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c, + 0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e, + 0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7, + 0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a, + 0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d, + 0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37, + 0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82, + 0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8, + 0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e, + 0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85, + 0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98, + 0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22, + 0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7, + 0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49, + 0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33, + 0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc, + 0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8, + 0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f, + 0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3, + 0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98, + 0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c, + 0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6, + 0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc, + 0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d, +}; + +/* 100 test cases */ +static struct dahash_test { + uint16_t start; /* random 12 bit offset in buf */ + uint16_t length; /* random 8 bit length of test */ + xfs_dahash_t dahash; /* expected dahash result */ +} test[] __initdata = +{ + {0x0567, 0x0097, 0x96951389}, + {0x0869, 0x0055, 0x6455ab4f}, + {0x0c51, 0x00be, 0x8663afde}, + {0x044a, 0x00fc, 0x98fbe432}, + {0x0f29, 0x0079, 0x42371997}, + {0x08ba, 0x0052, 0x942be4f7}, + {0x01f2, 0x0013, 0x5262687e}, + {0x09e3, 0x00e2, 0x8ffb0908}, + {0x007c, 0x0051, 0xb3158491}, + {0x0854, 0x001f, 0x83bb20d9}, + {0x031b, 0x0008, 0x98970bdf}, + {0x0de7, 0x0027, 0xbfbf6f6c}, + {0x0f76, 0x0005, 0x906a7105}, + {0x092e, 0x00d0, 0x86631850}, + {0x0233, 0x0082, 0xdbdd914e}, + {0x04c9, 0x0075, 0x5a400a9e}, + {0x0b66, 0x0099, 0xae128b45}, + {0x000d, 0x00ed, 0xe61c216a}, + {0x0a31, 0x003d, 0xf69663b9}, + {0x00a3, 0x0052, 0x643c39ae}, + {0x0125, 0x00d5, 0x7c310b0d}, + {0x0105, 0x004a, 0x06a77e74}, + {0x0858, 0x008e, 0x265bc739}, + {0x045e, 0x0095, 0x13d6b192}, + {0x0dab, 0x003c, 0xc4498704}, + {0x00cd, 0x00b5, 0x802a4e2d}, + {0x069b, 0x008c, 0x5df60f71}, + {0x0454, 0x006c, 0x5f03d8bb}, + {0x040e, 0x0032, 0x0ce513b5}, + {0x0874, 0x00e2, 0x6a811fb3}, + {0x0521, 0x00b4, 0x93296833}, + {0x0ddc, 0x00cf, 0xf9305338}, + {0x0a70, 0x0023, 0x239549ea}, + {0x083e, 0x0027, 0x2d88ba97}, + {0x0241, 0x00a7, 0xfe0b32e1}, + {0x0dfc, 0x0096, 0x1a11e815}, + {0x023e, 0x001e, 0xebc9a1f3}, + {0x067e, 0x0066, 0xb1067f81}, + {0x09ea, 0x000e, 0x46fd7247}, + {0x036b, 0x008c, 0x1a39acdf}, + {0x078f, 0x0030, 0x964042ab}, + {0x085c, 0x008f, 0x1829edab}, + {0x02ec, 0x009f, 0x6aefa72d}, + {0x043b, 0x00ce, 0x65642ff5}, + {0x0a32, 0x00b8, 0xbd82759e}, + {0x0d3c, 0x0087, 0xf4d66d54}, + {0x09ec, 0x008a, 0x06bfa1ff}, + {0x0902, 0x0015, 0x755025d2}, + {0x08fe, 0x000e, 0xf690ce2d}, + {0x00fb, 0x00dc, 0xe55f1528}, + {0x0eaa, 0x003a, 0x0fe0a8d7}, + {0x05fb, 0x0006, 0x86281cfb}, + {0x0dd1, 0x00a7, 0x60ab51b4}, + {0x0005, 0x001b, 0xf51d969b}, + {0x077c, 0x00dd, 0xc2fed268}, + {0x0575, 0x00f5, 0x432c0b1a}, + {0x05be, 0x0088, 0x78baa04b}, + {0x0c89, 0x0068, 0xeda9e428}, + {0x0f5c, 0x0068, 0xec143c76}, + {0x06a8, 0x0009, 0xd72651ce}, + {0x060f, 0x008e, 0x765426cd}, + {0x07b1, 0x0047, 0x2cfcfa0c}, + {0x04f1, 0x0041, 0x55b172f9}, + {0x0e05, 0x00ac, 0x61efde93}, + {0x0bf7, 0x0097, 0x05b83eee}, + {0x04e9, 0x00f3, 0x9928223a}, + {0x023a, 0x0005, 0xdfada9bc}, + {0x0acb, 0x000e, 0x2217cecd}, + {0x0148, 0x0060, 0xbc3f7405}, + {0x0764, 0x0059, 0xcbc201b1}, + {0x021f, 0x0059, 0x5d6b2256}, + {0x0f1e, 0x006c, 0xdefeeb45}, + {0x071c, 0x00b9, 0xb9b59309}, + {0x0564, 0x0063, 0xae064271}, + {0x0b14, 0x0044, 0xdb867d9b}, + {0x0e5a, 0x0055, 0xff06b685}, + {0x015e, 0x00ba, 0x1115ccbc}, + {0x0379, 0x00e6, 0x5f4e58dd}, + {0x013b, 0x0067, 0x4897427e}, + {0x0e64, 0x0071, 0x7af2b7a4}, + {0x0a11, 0x0050, 0x92105726}, + {0x0109, 0x0055, 0xd0d000f9}, + {0x00aa, 0x0022, 0x815d229d}, + {0x09ac, 0x004f, 0x02f9d985}, + {0x0e1b, 0x00ce, 0x5cf92ab4}, + {0x08af, 0x00d8, 0x17ca72d1}, + {0x0e33, 0x000a, 0xda2dba6b}, + {0x0ee3, 0x006a, 0xb00048e5}, + {0x0648, 0x001a, 0x2364b8cb}, + {0x0315, 0x0085, 0x0596fd0d}, + {0x0fbb, 0x003e, 0x298230ca}, + {0x0422, 0x006a, 0x78ada4ab}, + {0x04ba, 0x0073, 0xced1fbc2}, + {0x007d, 0x0061, 0x4b7ff236}, + {0x070b, 0x00d0, 0x261cf0ae}, + {0x0c1a, 0x0035, 0x8be92ee2}, + {0x0af8, 0x0063, 0x824dcf03}, + {0x08f8, 0x006d, 0xd289710c}, + {0x021b, 0x00ee, 0x6ac1c41d}, + {0x05b5, 0x00da, 0x8e52f0e2}, +}; + +int __init +xfs_dahash_test(void) +{ + unsigned int i; + unsigned int errors = 0; + + for (i = 0; i < ARRAY_SIZE(test); i++) { + xfs_dahash_t hash; + + hash = xfs_da_hashname(test_buf + test[i].start, + test[i].length); + if (hash != test[i].dahash) + errors++; + } + + if (errors) { + printk(KERN_ERR "xfs dir/attr hash test failed %u times!", + errors); + return -ERANGE; + } + + return 0; +} diff --git a/fs/xfs/xfs_dahash_test.h b/fs/xfs/xfs_dahash_test.h new file mode 100644 index 0000000000000..1a05bf4bd9e12 --- /dev/null +++ b/fs/xfs/xfs_dahash_test.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_DAHASH_TEST_H__ +#define __XFS_DAHASH_TEST_H__ + +int xfs_dahash_test(void); + +#endif /* __XFS_DAHASH_TEST_H__ */ + diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2479b5cbd75ec..4f814f9e12ab5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -41,6 +41,7 @@ #include "xfs_attr_item.h" #include "xfs_xattr.h" #include "xfs_iunlink_item.h" +#include "xfs_dahash_test.h" #include #include @@ -2286,6 +2287,10 @@ init_xfs_fs(void) xfs_check_ondisk_structs(); + error = xfs_dahash_test(); + if (error) + return error; + printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); From 1470afefc3c42df5d1662f87d079b46651bdc95b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:02 -0700 Subject: [PATCH 210/898] cpumask: introduce for_each_cpu_or Equivalent of for_each_cpu_and, except it ORs the two masks together so it iterates all the CPUs present in either mask. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/cpumask.h | 17 +++++++++++++++++ include/linux/find.h | 37 +++++++++++++++++++++++++++++++++++++ lib/find_bit.c | 9 +++++++++ 3 files changed, 63 insertions(+) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fbe76607965e..220974ef1bf58 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -350,6 +350,23 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta #define for_each_cpu_andnot(cpu, mask1, mask2) \ for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) +/** + * for_each_cpu_or - iterate over every cpu present in either mask + * @cpu: the (optionally unsigned) integer iterator + * @mask1: the first cpumask pointer + * @mask2: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_or(&tmp, &mask1, &mask2); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_or(cpu, mask1, mask2) \ + for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) + /** * cpumask_any_but - return a "random" in a cpumask, but not this one. * @mask: the cpumask to search diff --git a/include/linux/find.h b/include/linux/find.h index 4647864a5ffdb..5e4f39ef2e72c 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -14,6 +14,8 @@ unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long unsigned long nbits, unsigned long start); unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start); +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); @@ -127,6 +129,36 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, } #endif +#ifndef find_next_or_bit +/** + * find_next_or_bit - find the next set bit in either memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_or_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = (*addr1 | *addr2) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_or_bit(addr1, addr2, size, offset); +} +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -536,6 +568,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ (bit)++) +#define for_each_or_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) + /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) diff --git a/lib/find_bit.c b/lib/find_bit.c index c10920e667889..32f99e9a670e6 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -182,6 +182,15 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l EXPORT_SYMBOL(_find_next_andnot_bit); #endif +#ifndef find_next_or_bit +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_or_bit); +#endif + #ifndef find_next_zero_bit unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) From 8b57b11cca88f397035a95b9e12b03511847b0e8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:02 -0700 Subject: [PATCH 211/898] pcpcntrs: fix dying cpu summation race In commit f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") a race condition between a cpu dying and percpu_counter_sum() iterating online CPUs was identified. The solution was to iterate all possible CPUs for summation via percpu_counter_sum_all(). We recently had a percpu_counter_sum() call in XFS trip over this same race condition and it fired a debug assert because the filesystem was unmounting and the counter *should* be zero just before we destroy it. That was reported here: https://lore.kernel.org/linux-kernel/20230314090649.326642-1-yebin@huaweicloud.com/ likely as a result of running generic/648 which exercises filesystems in the presence of CPU online/offline events. The solution to use percpu_counter_sum_all() is an awful one. We use percpu counters and percpu_counter_sum() for accurate and reliable threshold detection for space management, so a summation race condition during these operations can result in overcommit of available space and that may result in filesystem shutdowns. As percpu_counter_sum_all() iterates all possible CPUs rather than just those online or even those present, the mask can include CPUs that aren't even installed in the machine, or in the case of machines that can hot-plug CPU capable nodes, even have physical sockets present in the machine. Fundamentally, this race condition is caused by the CPU being offlined being removed from the cpu_online_mask before the notifier that cleans up per-cpu state is run. Hence percpu_counter_sum() will not sum the count for a cpu currently being taken offline, regardless of whether the notifier has run or not. This is the root cause of the bug. The percpu counter notifier iterates all the registered counters, locks the counter and moves the percpu count to the global sum. This is serialised against other operations that move the percpu counter to the global sum as well as percpu_counter_sum() operations that sum the percpu counts while holding the counter lock. Hence the notifier is safe to run concurrently with sum operations, and the only thing we actually need to care about is that percpu_counter_sum() iterates dying CPUs. That's trivial to do, and when there are no CPUs dying, it has no addition overhead except for a cpumask_or() operation. This change makes percpu_counter_sum() always do the right thing in the presence of CPU hot unplug events and makes percpu_counter_sum_all() unnecessary. This, in turn, means that filesystems like XFS, ext4, and btrfs don't have to work out when they should use percpu_counter_sum() vs percpu_counter_sum_all() in their space accounting algorithms Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- lib/percpu_counter.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index dba56c5c18379..0e096311e0c08 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -131,7 +131,7 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; - for_each_cpu(cpu, cpu_mask) { + for_each_cpu_or(cpu, cpu_online_mask, cpu_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } @@ -141,11 +141,20 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, /* * Add up all the per-cpu counts, return the result. This is a more accurate - * but much slower version of percpu_counter_read_positive() + * but much slower version of percpu_counter_read_positive(). + * + * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums + * from CPUs that are in the process of being taken offline. Dying cpus have + * been removed from the online mask, but may not have had the hotplug dead + * notifier called to fold the percpu count back into the global counter sum. + * By including dying CPUs in the iteration mask, we avoid this race condition + * so __percpu_counter_sum() just does the right thing when CPUs are being taken + * offline. */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum_mask(fbc, cpu_online_mask); + + return __percpu_counter_sum_mask(fbc, cpu_dying_mask); } EXPORT_SYMBOL(__percpu_counter_sum); From 7ba85fba47bd89618fdb7dc322bdf823b1b56efb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:03 -0700 Subject: [PATCH 212/898] fork: remove use of percpu_counter_sum_all This effectively reverts the change made in commit f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") as the race condition percpu_counter_sum_all() was invented to avoid is now handled directly in percpu_counter_sum() and nobody needs to care about summing racing with cpu unplug anymore. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- kernel/fork.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index f68954d05e89d..bcc3085e79ef4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -755,11 +755,6 @@ static void check_mm(struct mm_struct *mm) for (i = 0; i < NR_MM_COUNTERS; i++) { long x = percpu_counter_sum(&mm->rss_stat[i]); - if (likely(!x)) - continue; - - /* Making sure this is not due to race with CPU offlining. */ - x = percpu_counter_sum_all(&mm->rss_stat[i]); if (unlikely(x)) pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", mm, resident_page_types[i], x); From e9b60c7f97130795c7aa81a649ae4b93a172a277 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:03 -0700 Subject: [PATCH 213/898] pcpcntr: remove percpu_counter_sum_all() percpu_counter_sum_all() is now redundant as the race condition it was invented to handle is now dealt with by percpu_counter_sum() directly and all users of percpu_counter_sum_all() have been removed. Remove it. This effectively reverts the changes made in f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") except for the cpumask iteration that fixes percpu_counter_sum() made earlier in this series. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/percpu_counter.h | 6 ----- lib/percpu_counter.c | 40 ++++++++++------------------------ 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 521a733e21a92..75b73c83bc9d0 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -45,7 +45,6 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -s64 percpu_counter_sum_all(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); @@ -196,11 +195,6 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc) return percpu_counter_read(fbc); } -static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return percpu_counter_read(fbc); -} - static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 0e096311e0c08..5004463c4f9f1 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -122,23 +122,6 @@ void percpu_counter_sync(struct percpu_counter *fbc) } EXPORT_SYMBOL(percpu_counter_sync); -static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, - const struct cpumask *cpu_mask) -{ - s64 ret; - int cpu; - unsigned long flags; - - raw_spin_lock_irqsave(&fbc->lock, flags); - ret = fbc->count; - for_each_cpu_or(cpu, cpu_online_mask, cpu_mask) { - s32 *pcount = per_cpu_ptr(fbc->counters, cpu); - ret += *pcount; - } - raw_spin_unlock_irqrestore(&fbc->lock, flags); - return ret; -} - /* * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive(). @@ -153,22 +136,21 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { + s64 ret; + int cpu; + unsigned long flags; - return __percpu_counter_sum_mask(fbc, cpu_dying_mask); + raw_spin_lock_irqsave(&fbc->lock, flags); + ret = fbc->count; + for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { + s32 *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } + raw_spin_unlock_irqrestore(&fbc->lock, flags); + return ret; } EXPORT_SYMBOL(__percpu_counter_sum); -/* - * This is slower version of percpu_counter_sum as it traverses all possible - * cpus. Use this only in the cases where accurate data is needed in the - * presense of CPUs getting offlined. - */ -s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return __percpu_counter_sum_mask(fbc, cpu_possible_mask); -} -EXPORT_SYMBOL(percpu_counter_sum_all); - int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { From ffa6206ebf8d39e83d87ac226df68dbbe155819a Mon Sep 17 00:00:00 2001 From: Matthias Benkmann Date: Sun, 19 Mar 2023 21:30:15 -0700 Subject: [PATCH 214/898] Input: xpad - fix incorrectly applied patch for MAP_PROFILE_BUTTON When commit commit fff1011a26d6 ("Input: xpad - add X-Box Adaptive Profile button") was applied, one hunk ended up in the wrong function; move it to where it belongs. Fixes: fff1011a26d6 ("Input: xpad - add X-Box Adaptive Profile button") Signed-off-by: Matthias Benkmann Link: https://lore.kernel.org/r/20230318162106.0aef4ba5@ninja Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index f642ec8e92ddd..29131f1a2f067 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -781,9 +781,6 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d input_report_key(dev, BTN_C, data[8]); input_report_key(dev, BTN_Z, data[9]); - /* Profile button has a value of 0-3, so it is reported as an axis */ - if (xpad->mapping & MAP_PROFILE_BUTTON) - input_report_abs(dev, ABS_PROFILE, data[34]); input_sync(dev); } @@ -1061,6 +1058,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char (__u16) le16_to_cpup((__le16 *)(data + 8))); } + /* Profile button has a value of 0-3, so it is reported as an axis */ + if (xpad->mapping & MAP_PROFILE_BUTTON) + input_report_abs(dev, ABS_PROFILE, data[34]); + /* paddle handling */ /* based on SDL's SDL_hidapi_xboxone.c */ if (xpad->mapping & MAP_PADDLES) { From 8980f190947ba29f23110408e712444884b74251 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 19 Mar 2023 21:36:36 -0700 Subject: [PATCH 215/898] Input: focaltech - use explicitly signed char type The recent change of -funsigned-char causes additions of negative numbers to become additions of large positive numbers, leading to wrong calculations of mouse movement. Change these casts to be explicitly signed, to take into account negative offsets. Fixes: 3bc753c06dd0 ("kbuild: treat char as always unsigned") Signed-off-by: Jason A. Donenfeld Reviewed-by: Hans de Goede Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217211 Link: https://lore.kernel.org/r/20230318133010.1285202-1-Jason@zx2c4.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/focaltech.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c index 6fd5fff0cbfff..c74b99077d16a 100644 --- a/drivers/input/mouse/focaltech.c +++ b/drivers/input/mouse/focaltech.c @@ -202,8 +202,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse, state->pressed = packet[0] >> 7; finger1 = ((packet[0] >> 4) & 0x7) - 1; if (finger1 < FOC_MAX_FINGERS) { - state->fingers[finger1].x += (char)packet[1]; - state->fingers[finger1].y += (char)packet[2]; + state->fingers[finger1].x += (s8)packet[1]; + state->fingers[finger1].y += (s8)packet[2]; } else { psmouse_err(psmouse, "First finger in rel packet invalid: %d\n", finger1); @@ -218,8 +218,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse, */ finger2 = ((packet[3] >> 4) & 0x7) - 1; if (finger2 < FOC_MAX_FINGERS) { - state->fingers[finger2].x += (char)packet[4]; - state->fingers[finger2].y += (char)packet[5]; + state->fingers[finger2].x += (s8)packet[4]; + state->fingers[finger2].y += (s8)packet[5]; } } From 754ff5060daf5a1cf4474eff9b4edeb6c17ef7ab Mon Sep 17 00:00:00 2001 From: msizanoen Date: Sun, 19 Mar 2023 23:02:56 -0700 Subject: [PATCH 216/898] Input: alps - fix compatibility with -funsigned-char The AlpsPS/2 code previously relied on the assumption that `char` is a signed type, which was true on x86 platforms (the only place where this driver is used) before kernel 6.2. However, on 6.2 and later, this assumption is broken due to the introduction of -funsigned-char as a new global compiler flag. Fix this by explicitly specifying the signedness of `char` when sign extending the values received from the device. Fixes: f3f33c677699 ("Input: alps - Rushmore and v7 resolution support") Signed-off-by: msizanoen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230320045228.182259-1-msizanoen@qtmlabs.xyz Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 989228b5a0a44..e2c11d9f3868f 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -852,8 +852,8 @@ static void alps_process_packet_v6(struct psmouse *psmouse) x = y = z = 0; /* Divide 4 since trackpoint's speed is too fast */ - input_report_rel(dev2, REL_X, (char)x / 4); - input_report_rel(dev2, REL_Y, -((char)y / 4)); + input_report_rel(dev2, REL_X, (s8)x / 4); + input_report_rel(dev2, REL_Y, -((s8)y / 4)); psmouse_report_standard_buttons(dev2, packet[3]); @@ -1104,8 +1104,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse) ((packet[3] & 0x20) << 1); z = (packet[5] & 0x3f) | ((packet[3] & 0x80) >> 1); - input_report_rel(dev2, REL_X, (char)x); - input_report_rel(dev2, REL_Y, -((char)y)); + input_report_rel(dev2, REL_X, (s8)x); + input_report_rel(dev2, REL_Y, -((s8)y)); input_report_abs(dev2, ABS_PRESSURE, z); psmouse_report_standard_buttons(dev2, packet[1]); @@ -2294,20 +2294,20 @@ static int alps_get_v3_v7_resolution(struct psmouse *psmouse, int reg_pitch) if (reg < 0) return reg; - x_pitch = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */ + x_pitch = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */ x_pitch = 50 + 2 * x_pitch; /* In 0.1 mm units */ - y_pitch = (char)reg >> 4; /* sign extend upper 4 bits */ + y_pitch = (s8)reg >> 4; /* sign extend upper 4 bits */ y_pitch = 36 + 2 * y_pitch; /* In 0.1 mm units */ reg = alps_command_mode_read_reg(psmouse, reg_pitch + 1); if (reg < 0) return reg; - x_electrode = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */ + x_electrode = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */ x_electrode = 17 + x_electrode; - y_electrode = (char)reg >> 4; /* sign extend upper 4 bits */ + y_electrode = (s8)reg >> 4; /* sign extend upper 4 bits */ y_electrode = 13 + y_electrode; x_phys = x_pitch * (x_electrode - 1); /* In 0.1 mm units */ From 1716efdb07938bd6510e1127d02012799112c433 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Mar 2023 11:20:49 -0600 Subject: [PATCH 217/898] thunderbolt: Use const qualifier for `ring_interrupt_index` `ring_interrupt_index` doesn't change the data for `ring` so mark it as const. This is needed by the following patch that disables interrupt auto clear for rings. Cc: Sanju Mehta Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 4dce2edd86ea0..fdc0c3ba2ef01 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -46,7 +46,7 @@ #define QUIRK_AUTO_CLEAR_INT BIT(0) #define QUIRK_E2E BIT(1) -static int ring_interrupt_index(struct tb_ring *ring) +static int ring_interrupt_index(const struct tb_ring *ring) { int bit = ring->hop; if (!ring->is_tx) From 468c49f44759720a312e52d44a71c3949ed63d7c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Mar 2023 11:20:50 -0600 Subject: [PATCH 218/898] thunderbolt: Disable interrupt auto clear for rings When interrupt auto clear is programmed, any read to the interrupt status register will clear all interrupts. If two interrupts have come in before one can be serviced then this will cause lost interrupts. On AMD USB4 routers this has manifested in odd problems particularly with long strings of control tranfers such as reading the DROM via bit banging. Instead of clearing interrupts automatically, clear the bit corresponding to the given ring's interrupt in the ISR. Fixes: 7a1808f82a37 ("thunderbolt: Handle ring interrupt by reading interrupt status register") Cc: Sanju Mehta Cc: stable@vger.kernel.org Tested-by: Anson Tsao Signed-off-by: Mario Limonciello Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 40 +++++++++++++++++++++------------- drivers/thunderbolt/nhi_regs.h | 6 +++-- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index fdc0c3ba2ef01..318d20bd5b695 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -71,24 +71,31 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) u32 step, shift, ivr, misc; void __iomem *ivr_base; int index; + int bit; if (ring->is_tx) index = ring->hop; else index = ring->hop + ring->nhi->hop_count; - if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) { - /* - * Ask the hardware to clear interrupt status - * bits automatically since we already know - * which interrupt was triggered. - */ - misc = ioread32(ring->nhi->iobase + REG_DMA_MISC); - if (!(misc & REG_DMA_MISC_INT_AUTO_CLEAR)) { - misc |= REG_DMA_MISC_INT_AUTO_CLEAR; - iowrite32(misc, ring->nhi->iobase + REG_DMA_MISC); - } - } + /* + * Intel routers support a bit that isn't part of + * the USB4 spec to ask the hardware to clear + * interrupt status bits automatically since + * we already know which interrupt was triggered. + * + * Other routers explicitly disable auto-clear + * to prevent conditions that may occur where two + * MSIX interrupts are simultaneously active and + * reading the register clears both of them. + */ + misc = ioread32(ring->nhi->iobase + REG_DMA_MISC); + if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) + bit = REG_DMA_MISC_INT_AUTO_CLEAR; + else + bit = REG_DMA_MISC_DISABLE_AUTO_CLEAR; + if (!(misc & bit)) + iowrite32(misc | bit, ring->nhi->iobase + REG_DMA_MISC); ivr_base = ring->nhi->iobase + REG_INT_VEC_ALLOC_BASE; step = index / REG_INT_VEC_ALLOC_REGS * REG_INT_VEC_ALLOC_BITS; @@ -393,14 +400,17 @@ EXPORT_SYMBOL_GPL(tb_ring_poll_complete); static void ring_clear_msix(const struct tb_ring *ring) { + int bit; + if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) return; + bit = ring_interrupt_index(ring) & 31; if (ring->is_tx) - ioread32(ring->nhi->iobase + REG_RING_NOTIFY_BASE); + iowrite32(BIT(bit), ring->nhi->iobase + REG_RING_INT_CLEAR); else - ioread32(ring->nhi->iobase + REG_RING_NOTIFY_BASE + - 4 * (ring->nhi->hop_count / 32)); + iowrite32(BIT(bit), ring->nhi->iobase + REG_RING_INT_CLEAR + + 4 * (ring->nhi->hop_count / 32)); } static irqreturn_t ring_msix(int irq, void *data) diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 0d4970dcef842..faef165a919cc 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -77,12 +77,13 @@ struct ring_desc { /* * three bitfields: tx, rx, rx overflow - * Every bitfield contains one bit for every hop (REG_HOP_COUNT). Registers are - * cleared on read. New interrupts are fired only after ALL registers have been + * Every bitfield contains one bit for every hop (REG_HOP_COUNT). + * New interrupts are fired only after ALL registers have been * read (even those containing only disabled rings). */ #define REG_RING_NOTIFY_BASE 0x37800 #define RING_NOTIFY_REG_COUNT(nhi) ((31 + 3 * nhi->hop_count) / 32) +#define REG_RING_INT_CLEAR 0x37808 /* * two bitfields: rx, tx @@ -105,6 +106,7 @@ struct ring_desc { #define REG_DMA_MISC 0x39864 #define REG_DMA_MISC_INT_AUTO_CLEAR BIT(2) +#define REG_DMA_MISC_DISABLE_AUTO_CLEAR BIT(17) #define REG_INMAIL_DATA 0x39900 From 88c9483faf15ada14eca82714114656893063458 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Thu, 16 Mar 2023 15:40:49 +0200 Subject: [PATCH 219/898] IB/mlx5: Add support for 400G_8X lane speed Currently, when driver queries PTYS to report which link speed is being used on its RoCE ports, it does not check the case of having 400Gbps transmitted over 8 lanes. Thus it fails to report the said speed and instead it defaults to report 10G over 4 lanes. Add a check for the said speed when querying PTYS and report it back correctly when needed. Fixes: 08e8676f1607 ("IB/mlx5: Add support for 50Gbps per lane link modes") Signed-off-by: Maher Sanalla Reviewed-by: Aya Levin Reviewed-by: Saeed Mahameed Link: https://lore.kernel.org/r/ec9040548d119d22557d6a4b4070d6f421701fd4.1678973994.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5b988db66b8fd..5d45de223c43a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -442,6 +442,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_NDR; break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_8): + *active_width = IB_WIDTH_8X; + *active_speed = IB_SPEED_HDR; + break; case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_NDR; From 364ac7863fc161841e86388884bb7d5f4048031a Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 1 Mar 2023 12:10:49 -0800 Subject: [PATCH 220/898] drm/i915/mtl: Fix Wa_16015201720 implementation The commit 2357f2b271ad ("drm/i915/mtl: Initial display workarounds") extended the workaround Wa_16015201720 to MTL. However the registers that the original WA implemented moved for MTL. Implement the workaround with the correct register. v3: Skip clock gating for pipe C, D DMC's and fix the title Fixes: 2357f2b271ad ("drm/i915/mtl: Initial display workarounds") Cc: Matt Atwood Cc: Lucas De Marchi Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230301201053.928709-2-radhakrishna.sripada@intel.com (cherry picked from commit 0188be507b973e36f637ba010a369057c8cb7282) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dmc.c | 26 +++++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 8 +++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 257aa2b7cf204..3485d5e6dd3c7 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -384,15 +384,12 @@ static void disable_all_event_handlers(struct drm_i915_private *i915) } } -static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) +static void adlp_pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) { enum pipe pipe; - if (DISPLAY_VER(i915) < 13) - return; - /* - * Wa_16015201720:adl-p,dg2, mtl + * Wa_16015201720:adl-p,dg2 * The WA requires clock gating to be disabled all the time * for pipe A and B. * For pipe C and D clock gating needs to be disabled only @@ -408,6 +405,25 @@ static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) PIPEDMC_GATING_DIS, 0); } +static void mtl_pipedmc_clock_gating_wa(struct drm_i915_private *i915) +{ + /* + * Wa_16015201720 + * The WA requires clock gating to be disabled all the time + * for pipe A and B. + */ + intel_de_rmw(i915, GEN9_CLKGATE_DIS_0, 0, + MTL_PIPEDMC_GATING_DIS_A | MTL_PIPEDMC_GATING_DIS_B); +} + +static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) +{ + if (DISPLAY_VER(i915) >= 14 && enable) + mtl_pipedmc_clock_gating_wa(i915); + else if (DISPLAY_VER(i915) == 13) + adlp_pipedmc_clock_gating_wa(i915, enable); +} + void intel_dmc_enable_pipe(struct drm_i915_private *i915, enum pipe pipe) { if (!has_dmc_id_fw(i915, PIPE_TO_DMC_ID(pipe))) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3b2642397b828..19b047875e62f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1786,9 +1786,11 @@ * GEN9 clock gating regs */ #define GEN9_CLKGATE_DIS_0 _MMIO(0x46530) -#define DARBF_GATING_DIS (1 << 27) -#define PWM2_GATING_DIS (1 << 14) -#define PWM1_GATING_DIS (1 << 13) +#define DARBF_GATING_DIS REG_BIT(27) +#define MTL_PIPEDMC_GATING_DIS_A REG_BIT(15) +#define MTL_PIPEDMC_GATING_DIS_B REG_BIT(14) +#define PWM2_GATING_DIS REG_BIT(14) +#define PWM1_GATING_DIS REG_BIT(13) #define GEN9_CLKGATE_DIS_3 _MMIO(0x46538) #define TGL_VRH_GATING_DIS REG_BIT(31) From ed00eba03474adbf525ff03d69705d8c78b76456 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 1 Mar 2023 12:10:52 -0800 Subject: [PATCH 221/898] drm/i915/fbdev: lock the fbdev obj before vma pin lock the fbdev obj before calling into i915_vma_pin_iomap(). This helps to solve below : <7>[ 93.563308] i915 0000:00:02.0: [drm:intelfb_create [i915]] no BIOS fb, allocating a new one <4>[ 93.581844] ------------[ cut here ]------------ <4>[ 93.581855] WARNING: CPU: 12 PID: 625 at drivers/gpu/drm/i915/gem/i915_gem_pages.c:424 i915_gem_object_pin_map+0x152/0x1c0 [i915] Fixes: f0b6b01b3efe ("drm/i915: Add ww context to intel_dpt_pin, v2.") Cc: Chris Wilson Cc: Matthew Auld Cc: Maarten Lankhorst Signed-off-by: Tejas Upadhyay Signed-off-by: Radhakrishna Sripada Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230301201053.928709-5-radhakrishna.sripada@intel.com (cherry picked from commit 561b31acfd65502a2cda2067513240fc57ccdbdc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.c | 24 ++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index f76b06293eb94..38825b30db16c 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -210,6 +210,7 @@ static int intelfb_create(struct drm_fb_helper *helper, bool prealloc = false; void __iomem *vaddr; struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; int ret; mutex_lock(&ifbdev->hpd_lock); @@ -283,13 +284,24 @@ static int intelfb_create(struct drm_fb_helper *helper, info->fix.smem_len = vma->size; } - vaddr = i915_vma_pin_iomap(vma); - if (IS_ERR(vaddr)) { - drm_err(&dev_priv->drm, - "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); - ret = PTR_ERR(vaddr); - goto out_unpin; + for_i915_gem_ww(&ww, ret, false) { + ret = i915_gem_object_lock(vma->obj, &ww); + + if (ret) + continue; + + vaddr = i915_vma_pin_iomap(vma); + if (IS_ERR(vaddr)) { + drm_err(&dev_priv->drm, + "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); + ret = PTR_ERR(vaddr); + continue; + } } + + if (ret) + goto out_unpin; + info->screen_base = vaddr; info->screen_size = vma->size; From 3a84f2c6c9558c554a90ec26ad25df92fc5e05b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Feb 2023 17:20:48 +0200 Subject: [PATCH 222/898] drm/i915: Preserve crtc_state->inherited during state clearing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_crtc_prepare_cleared_state() is unintentionally losing the "inherited" flag. This will happen if intel_initial_commit() is forced to go through the full modeset calculations for whatever reason. Afterwards the first real commit from userspace will not get forced to the full modeset path, and thus eg. audio state may not get recomputed properly. So if the monitor was already enabled during boot audio will not work until userspace itself does an explicit full modeset. Cc: stable@vger.kernel.org Tested-by: Lee Shawn C Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230223152048.20878-1-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar (cherry picked from commit 2553bacaf953b48c59357f5a622282bc0c45adae) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d3994e2a7d636..208b1b5b15dd4 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5145,6 +5145,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, * only fields that are know to not cause problems are preserved. */ saved_state->uapi = crtc_state->uapi; + saved_state->inherited = crtc_state->inherited; saved_state->scaler_state = crtc_state->scaler_state; saved_state->shared_dpll = crtc_state->shared_dpll; saved_state->dpll_hw_state = crtc_state->dpll_hw_state; From 088a422c3fa3ee9268d400078626b0c202cfe9dd Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Fri, 10 Mar 2023 11:43:39 +0530 Subject: [PATCH 223/898] drm/i915/mtl: Disable MC6 for MTL A step The Wa_14017073508 require to send Media Busy/Idle mailbox while accessing Media tile. As of now it is getting handled while __gt_unpark, __gt_park. But there are various corner cases where forcewakes are taken without __gt_unpark i.e. without sending Busy Mailbox especially during register reads. Forcewakes are taken without busy mailbox leads to GPU HANG. So bringing mailbox calls under forcewake calls are no feasible option as forcewake calls are atomic and mailbox calls are blocking. The issue already fixed in B step so disabling MC6 on A step and reverting previous commit which handles Wa_14017073508 Fixes: 8f70f1ec587d ("drm/i915/mtl: Add Wa_14017073508 for SAMedia") Cc: Rodrigo Vivi Signed-off-by: Badal Nilawar Reviewed-by: Rodrigo Vivi Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20230310061339.2495416-2-badal.nilawar@intel.com (cherry picked from commit 038a24835ab68f341eaa7a0e3bcc6ce0f9b22e17) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 27 ----------------------- drivers/gpu/drm/i915/gt/intel_rc6.c | 8 +++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 13 +---------- drivers/gpu/drm/i915/i915_reg.h | 9 -------- 4 files changed, 9 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index cef3d6f5c34e0..56b993f6e7dc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -21,31 +21,10 @@ #include "intel_rc6.h" #include "intel_rps.h" #include "intel_wakeref.h" -#include "intel_pcode.h" #include "pxp/intel_pxp_pm.h" #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2) -static void mtl_media_busy(struct intel_gt *gt) -{ - /* Wa_14017073508: mtl */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, - PCODE_MBOX_GT_STATE_MEDIA_BUSY, - PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); -} - -static void mtl_media_idle(struct intel_gt *gt) -{ - /* Wa_14017073508: mtl */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, - PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY, - PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); -} - static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); @@ -93,9 +72,6 @@ static int __gt_unpark(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); - /* Wa_14017073508: mtl */ - mtl_media_busy(gt); - /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -145,9 +121,6 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); - /* Wa_14017073508: mtl */ - mtl_media_idle(gt); - return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 5c91622dfca42..f4150f61f39c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -486,6 +486,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) static bool rc6_supported(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); if (!HAS_RC6(i915)) return false; @@ -502,6 +503,13 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; } + if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) && + gt->type == GT_MEDIA) { + drm_notice(&i915->drm, + "Media RC6 disabled on A step\n"); + return false; + } + return true; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c index b5855091cf6a9..8f8dd05835c5a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -11,20 +11,9 @@ static bool __guc_rc_supported(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); - - /* - * Wa_14017073508: mtl - * Do not enable gucrc to avoid additional interrupts which - * may disrupt pcode wa. - */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) - return false; - /* GuC RC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(gt->i915) >= 12; + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; } static bool __guc_rc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 19b047875e62f..747b53b567a09 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6598,15 +6598,6 @@ /* XEHP_PCODE_FREQUENCY_CONFIG param2 */ #define PCODE_MBOX_DOMAIN_NONE 0x0 #define PCODE_MBOX_DOMAIN_MEDIAFF 0x3 - -/* Wa_14017210380: mtl */ -#define PCODE_MBOX_GT_STATE 0x50 -/* sub-commands (param1) */ -#define PCODE_MBOX_GT_STATE_MEDIA_BUSY 0x1 -#define PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY 0x2 -/* param2 */ -#define PCODE_MBOX_GT_STATE_DOMAIN_MEDIA 0x1 - #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 From 8df23e4c4f72f4e201c28e6fb0a67e2dbf30628a Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 10 Mar 2023 22:37:12 -0800 Subject: [PATCH 224/898] drm/i915/guc: Fix missing ecodes Error captures are tagged with an 'ecode'. This is a pseduo-unique magic number that is meant to distinguish similar seeming bugs with different underlying signatures. It is a combination of two ring state registers. Unfortunately, the register state being used is only valid in execlist mode. In GuC mode, the register state exists in a separate list of arbitrary register address/value pairs rather than the named entry structure. So, search through that list to find the two exciting registers and copy them over to the structure's named members. v2: if else if instead of if if (Alan) Signed-off-by: John Harrison Reviewed-by: Alan Previn Fixes: a6f0f9cf330a ("drm/i915/guc: Plumb GuC-capture into gpu_coredump") Cc: Alan Previn Cc: Umesh Nerlige Ramappa Cc: Lucas De Marchi Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Matt Roper Cc: Aravind Iddamsetty Cc: Michael Cheng Cc: Matthew Brost Cc: Bruce Chang Cc: Daniele Ceraolo Spurio Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230311063714.570389-2-John.C.Harrison@Intel.com (cherry picked from commit 9724ecdbb9ddd6da3260e4a442574b90fc75188a) Signed-off-by: Jani Nikula --- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index fc3b994626a4f..710999d7189ee 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -1571,6 +1571,27 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, #endif //CONFIG_DRM_I915_CAPTURE_ERROR +static void guc_capture_find_ecode(struct intel_engine_coredump *ee) +{ + struct gcap_reg_list_info *reginfo; + struct guc_mmio_reg *regs; + i915_reg_t reg_ipehr = RING_IPEHR(0); + i915_reg_t reg_instdone = RING_INSTDONE(0); + int i; + + if (!ee->guc_capture_node) + return; + + reginfo = ee->guc_capture_node->reginfo + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE; + regs = reginfo->regs; + for (i = 0; i < reginfo->num_regs; i++) { + if (regs[i].offset == reg_ipehr.reg) + ee->ipehr = regs[i].value; + else if (regs[i].offset == reg_instdone.reg) + ee->instdone.instdone = regs[i].value; + } +} + void intel_guc_capture_free_node(struct intel_engine_coredump *ee) { if (!ee || !ee->guc_capture_node) @@ -1612,6 +1633,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt, list_del(&n->link); ee->guc_capture_node = n; ee->guc_capture = guc->capture; + guc_capture_find_ecode(ee); return; } } From e92eb246feb9019b0b137706c934b8891cdfe3c2 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 14 Mar 2023 15:29:14 +0100 Subject: [PATCH 225/898] drm/i915/active: Fix missing debug object activation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit debug_active_activate() expected ref->count to be zero which is not true anymore as __i915_active_activate() calls debug_active_activate() after incrementing the count. v2: No need to check for "ref->count == 1" as __i915_active_activate() already make sure of that(Janusz). References: https://gitlab.freedesktop.org/drm/intel/-/issues/6733 Fixes: 04240e30ed06 ("drm/i915: Skip taking acquire mutex for no ref->active callback") Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Thomas Hellström Cc: Andi Shyti Cc: intel-gfx@lists.freedesktop.org Cc: Janusz Krzysztofik Cc: # v5.10+ Signed-off-by: Nirmoy Das Reviewed-by: Janusz Krzysztofik Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230313114613.9874-1-nirmoy.das@intel.com (cherry picked from commit bfad380c542438a9b642f8190b7fd37bc77e2723) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_active.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index a9fea115f2d26..8ef93889061a6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -92,8 +92,7 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { lockdep_assert_held(&ref->tree_lock); - if (!atomic_read(&ref->count)) /* before the first inc */ - debug_object_activate(ref, &active_debug_desc); + debug_object_activate(ref, &active_debug_desc); } static void debug_active_deactivate(struct i915_active *ref) From 150784f9285e656373cf3953ef4a7663f1e1a0f2 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2023 16:19:20 +0100 Subject: [PATCH 226/898] drm/i915/gt: perform uc late init after probe error injection Probe pseudo errors should be injected only in places where real errors can be encountered, otherwise unwinding code can be broken. Placing intel_uc_init_late before i915_inject_probe_error violated this rule, resulting in following bug: __intel_gt_disable:655 GEM_BUG_ON(intel_gt_pm_is_awake(gt)) Fixes: 481d458caede ("drm/i915/guc: Add golden context to GuC ADS") Acked-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230314151920.1065847-1-andrzej.hajda@intel.com (cherry picked from commit c4252a11131c7f27a158294241466e2a4e7ff94e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f0dbfc434e077..40d357cf8b042 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -737,12 +737,12 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt; - intel_uc_init_late(>->uc); - err = i915_inject_probe_error(gt->i915, -EIO); if (err) goto err_gt; + intel_uc_init_late(>->uc); + intel_migrate_init(>->migrate, gt); goto out_fw; From f8d62aa8d24d9883df738e450bfe6be396e11979 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 14 Mar 2023 19:29:06 -0700 Subject: [PATCH 227/898] drm/i915: Fix format for perf_limit_reasons Use hex format so that it is easier to decode. Fixes: fe5979665f64 ("drm/i915/debugfs: Add perf_limit_reasons in debugfs") Signed-off-by: Vinay Belgaumkar Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230315022906.2467408-1-vinay.belgaumkar@intel.com (cherry picked from commit 5e008ba67cb80084e99b40ccd46f9029ae421632) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 83df4cd5e06cb..80dbbef86b1db 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -580,7 +580,7 @@ static bool perf_limit_reasons_eval(void *data) } DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get, - perf_limit_reasons_clear, "%llu\n"); + perf_limit_reasons_clear, "0x%llx\n"); void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { From 59ad01c786a4c94afacc7feb0ab97bf8d6672a46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 11 Mar 2023 01:58:25 +0200 Subject: [PATCH 228/898] drm/i915: Update vblank timestamping stuff on seamless M/N change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we change the M/N values seamlessly during a fastset we should also update the vblank timestamping stuff to make sure the vblank timestamp corrections/guesstimations come out exact. Note that only crtc_clock and framedur_ns can actually end up changing here during fastsets. Everything else we touch can only change during full modesets. Technically we should try to do this exactly at the start of vblank, but that would require some kind of double buffering scheme. Let's skip that for now and just update things right after the commit has been submitted to the hardware. This means the information will be properly up to date when the vblank irq handler goes to work. Only if someone ends up querying some vblanky stuff in between the commit and start of vblank may we see a slight discrepancy. Also this same problem really exists for the DRRS downclocking stuff. But as that is supposed to be more or less transparent to the user, and it only drops to low gear after a long delay (1 sec currently) we probably don't have to worry about it. Any time something is actively submitting updates DRRS will remain in high gear and so the timestamping constants will match the hardware state. Reviewed-by: Jani Nikula Reviewed-by: Mitul Golani Fixes: e6f29923c048 ("drm/i915: Allow M/N change during fastset on bdw+") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230310235828.17439-1-ville.syrjala@linux.intel.com (cherry picked from commit 8cb1f95cca68421b08333175719fdd3615372ca8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_crtc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 82be0fbe99342..d5b5d40ed817f 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -683,6 +683,14 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) */ intel_vrr_send_push(new_crtc_state); + /* + * Seamless M/N update may need to update frame timings. + * + * FIXME Should be synchronized with the start of vblank somehow... + */ + if (new_crtc_state->seamless_m_n && intel_crtc_needs_fastset(new_crtc_state)) + intel_crtc_update_active_timings(new_crtc_state); + local_irq_enable(); if (intel_vgpu_active(dev_priv)) From a8eff03545d4cef12ae66a1905627c1818a0f81a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 18 Mar 2023 01:19:00 +0200 Subject: [PATCH 229/898] net: dsa: report rx_bytes unadjusted for ETH_HLEN We collect the software statistics counters for RX bytes (reported to /proc/net/dev and to ethtool -S $dev | grep 'rx_bytes: ") at a time when skb->len has already been adjusted by the eth_type_trans() -> skb_pull_inline(skb, ETH_HLEN) call to exclude the L2 header. This means that when connecting 2 DSA interfaces back to back and sending 1 packet with length 100, the sending interface will report tx_bytes as incrementing by 100, and the receiving interface will report rx_bytes as incrementing by 86. Since accounting for that in scripts is quirky and is something that would be DSA-specific behavior (requiring users to know that they are running on a DSA interface in the first place), the proposal is that we treat it as a bug and fix it. This design bug has always existed in DSA, according to my analysis: commit 91da11f870f0 ("net: Distributed Switch Architecture protocol support") also updates skb->dev->stats.rx_bytes += skb->len after the eth_type_trans() call. Technically, prior to Florian's commit a86d8becc3f0 ("net: dsa: Factor bottom tag receive functions"), each and every vendor-specific tagging protocol driver open-coded the same bug, until the buggy code was consolidated into something resembling what can be seen now. So each and every driver should have its own Fixes: tag, because of their different histories until the convergence point. I'm not going to do that, for the sake of simplicity, but just blame the oldest appearance of buggy code. There are 2 ways to fix the problem. One is the obvious way, and the other is how I ended up doing it. Obvious would have been to move dev_sw_netstats_rx_add() one line above eth_type_trans(), and below skb_push(skb, ETH_HLEN). But DSA processing is not as simple as that. We count the bytes after removing everything DSA-related from the packet, to emulate what the packet's length was, on the wire, when the user port received it. When eth_type_trans() executes, dsa_untag_bridge_pvid() has not run yet, so in case the switch driver requests this behavior - commit 412a1526d067 ("net: dsa: untag the bridge pvid from rx skbs") has the details - the obvious variant of the fix wouldn't have worked, because the positioning there would have also counted the not-yet-stripped VLAN header length, something which is absent from the packet as seen on the wire (there it may be untagged, whereas software will see it as PVID-tagged). Fixes: f613ed665bb3 ("net: dsa: Add support for 64-bit statistics") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag.c b/net/dsa/tag.c index b2fba1a003ce3..5105a5ff58fa2 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -114,7 +114,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, skb = nskb; } - dev_sw_netstats_rx_add(skb->dev, skb->len); + dev_sw_netstats_rx_add(skb->dev, skb->len + ETH_HLEN); if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; From 6b6bc5b8bd2d4ca9e1efa9ae0f98a0b0687ace75 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sat, 18 Mar 2023 16:05:26 +0800 Subject: [PATCH 230/898] net: qcom/emac: Fix use after free bug in emac_remove due to race condition In emac_probe, &adpt->work_thread is bound with emac_work_thread. Then it will be started by timeout handler emac_tx_timeout or a IRQ handler emac_isr. If we remove the driver which will call emac_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: Fix it by finishing the work before cleanup in the emac_remove and disable timeout response. CPU0 CPU1 |emac_work_thread emac_remove | free_netdev | kfree(netdev); | |emac_reinit_locked |emac_mac_down |//use netdev Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Signed-off-by: Zheng Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 3115b2c128980..eaa50050aa0b7 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -724,9 +724,15 @@ static int emac_remove(struct platform_device *pdev) struct net_device *netdev = dev_get_drvdata(&pdev->dev); struct emac_adapter *adpt = netdev_priv(netdev); + netif_carrier_off(netdev); + netif_tx_disable(netdev); + unregister_netdev(netdev); netif_napi_del(&adpt->rx_q.napi); + free_irq(adpt->irq.irq, &adpt->irq); + cancel_work_sync(&adpt->work_thread); + emac_clks_teardown(adpt); put_device(&adpt->phydev->mdio.dev); From 7f247f5a2c18b3f21206cdd51193df4f38e1b9f5 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Sat, 18 Mar 2023 10:25:52 +0100 Subject: [PATCH 231/898] net: usb: lan78xx: Limit packet length to skb->len Packet length retrieved from descriptor may be larger than the actual socket buffer length. In such case the cloned skb passed up the network stack will leak kernel memory contents. Additionally prevent integer underflow when size is less than ETH_FCS_LEN. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Szymon Heidrich Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 068488890d57b..c458c030fadf6 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3579,13 +3579,29 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb, size = (rx_cmd_a & RX_CMD_A_LEN_MASK_); align_count = (4 - ((size + RXW_PADDING) % 4)) % 4; + if (unlikely(size > skb->len)) { + netif_dbg(dev, rx_err, dev->net, + "size err rx_cmd_a=0x%08x\n", + rx_cmd_a); + return 0; + } + if (unlikely(rx_cmd_a & RX_CMD_A_RED_)) { netif_dbg(dev, rx_err, dev->net, "Error rx_cmd_a=0x%08x", rx_cmd_a); } else { - u32 frame_len = size - ETH_FCS_LEN; + u32 frame_len; struct sk_buff *skb2; + if (unlikely(size < ETH_FCS_LEN)) { + netif_dbg(dev, rx_err, dev->net, + "size err rx_cmd_a=0x%08x\n", + rx_cmd_a); + return 0; + } + + frame_len = size - ETH_FCS_LEN; + skb2 = napi_alloc_skb(&dev->napi, frame_len); if (!skb2) return 0; From 7d722c9802d4bd61a1f1614e07413b6a5fac382d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 18 Mar 2023 09:13:42 -0400 Subject: [PATCH 232/898] usb: plusb: remove unused pl_clear_QuickLink_features function clang with W=1 reports drivers/net/usb/plusb.c:65:1: error: unused function 'pl_clear_QuickLink_features' [-Werror,-Wunused-function] pl_clear_QuickLink_features(struct usbnet *dev, int val) ^ This static function is not used, so remove it. Signed-off-by: Tom Rix Signed-off-by: David S. Miller --- drivers/net/usb/plusb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 7a2b0094de51f..2894114858a29 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -61,12 +61,6 @@ pl_vendor_req(struct usbnet *dev, u8 req, u8 val, u8 index) val, index, NULL, 0); } -static inline int -pl_clear_QuickLink_features(struct usbnet *dev, int val) -{ - return pl_vendor_req(dev, 1, (u8) val, 0); -} - static inline int pl_set_QuickLink_features(struct usbnet *dev, int val) { From 19b3bb51c3bc288b3f2c6f8c4450b0f548320625 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Sat, 18 Mar 2023 17:39:16 +0000 Subject: [PATCH 233/898] net/ps3_gelic_net: Fix RX sk_buff length The Gelic Ethernet device needs to have the RX sk_buffs aligned to GELIC_NET_RXBUF_ALIGN, and also the length of the RX sk_buffs must be a multiple of GELIC_NET_RXBUF_ALIGN. The current Gelic Ethernet driver was not allocating sk_buffs large enough to allow for this alignment. Also, correct the maximum and minimum MTU sizes, and add a new preprocessor macro for the maximum frame size, GELIC_NET_MAX_FRAME. Fixes various randomly occurring runtime network errors. Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3") Signed-off-by: Geoff Levand Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 19 ++++++++++--------- drivers/net/ethernet/toshiba/ps3_gelic_net.h | 5 +++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index cf8de8a7a8a1e..dffd664e65f4e 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -365,26 +365,27 @@ static int gelic_card_init_chain(struct gelic_card *card, * * allocates a new rx skb, iommu-maps it and attaches it to the descriptor. * Activate the descriptor state-wise + * + * Gelic RX sk_buffs must be aligned to GELIC_NET_RXBUF_ALIGN and the length + * must be a multiple of GELIC_NET_RXBUF_ALIGN. */ static int gelic_descr_prepare_rx(struct gelic_card *card, struct gelic_descr *descr) { + static const unsigned int rx_skb_size = + ALIGN(GELIC_NET_MAX_FRAME, GELIC_NET_RXBUF_ALIGN) + + GELIC_NET_RXBUF_ALIGN - 1; int offset; - unsigned int bufsize; if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE) dev_info(ctodev(card), "%s: ERROR status\n", __func__); - /* we need to round up the buffer size to a multiple of 128 */ - bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN); - /* and we need to have it 128 byte aligned, therefore we allocate a - * bit more */ - descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1); + descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); if (!descr->skb) { descr->buf_addr = 0; /* tell DMAC don't touch memory */ return -ENOMEM; } - descr->buf_size = cpu_to_be32(bufsize); + descr->buf_size = cpu_to_be32(rx_skb_size); descr->dmac_cmd_status = 0; descr->result_size = 0; descr->valid_size = 0; @@ -397,7 +398,7 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, /* io-mmu-map the skb */ descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card), descr->skb->data, - GELIC_NET_MAX_MTU, + GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE)); if (!descr->buf_addr) { dev_kfree_skb_any(descr->skb); @@ -915,7 +916,7 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr, data_error = be32_to_cpu(descr->data_error); /* unmap skb buffer */ dma_unmap_single(ctodev(card), be32_to_cpu(descr->buf_addr), - GELIC_NET_MAX_MTU, + GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE); skb_put(skb, be32_to_cpu(descr->valid_size)? diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 68f324ed4eaf0..0d98defb011ed 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -19,8 +19,9 @@ #define GELIC_NET_RX_DESCRIPTORS 128 /* num of descriptors */ #define GELIC_NET_TX_DESCRIPTORS 128 /* num of descriptors */ -#define GELIC_NET_MAX_MTU VLAN_ETH_FRAME_LEN -#define GELIC_NET_MIN_MTU VLAN_ETH_ZLEN +#define GELIC_NET_MAX_FRAME 2312 +#define GELIC_NET_MAX_MTU 2294 +#define GELIC_NET_MIN_MTU 64 #define GELIC_NET_RXBUF_ALIGN 128 #define GELIC_CARD_RX_CSUM_DEFAULT 1 /* hw chksum */ #define GELIC_NET_WATCHDOG_TIMEOUT 5*HZ From bebe933d35a63d4f042fbf4dce4f22e689ba0fcd Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Sat, 18 Mar 2023 17:39:16 +0000 Subject: [PATCH 234/898] net/ps3_gelic_net: Use dma_mapping_error The current Gelic Etherenet driver was checking the return value of its dma_map_single call, and not using the dma_mapping_error() routine. Fixes runtime problems like these: DMA-API: ps3_gelic_driver sb_05: device driver failed to check map error WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:1027 .check_unmap+0x888/0x8dc Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3") Reviewed-by: Alexander Duyck Signed-off-by: Geoff Levand Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 24 +++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index dffd664e65f4e..9d535ae596266 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -317,15 +317,17 @@ static int gelic_card_init_chain(struct gelic_card *card, /* set up the hardware pointers in each descriptor */ for (i = 0; i < no; i++, descr++) { + dma_addr_t cpu_addr; + gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE); - descr->bus_addr = - dma_map_single(ctodev(card), descr, - GELIC_DESCR_SIZE, - DMA_BIDIRECTIONAL); - if (!descr->bus_addr) + cpu_addr = dma_map_single(ctodev(card), descr, + GELIC_DESCR_SIZE, DMA_BIDIRECTIONAL); + + if (dma_mapping_error(ctodev(card), cpu_addr)) goto iommu_error; + descr->bus_addr = cpu_to_be32(cpu_addr); descr->next = descr + 1; descr->prev = descr - 1; } @@ -375,6 +377,7 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, static const unsigned int rx_skb_size = ALIGN(GELIC_NET_MAX_FRAME, GELIC_NET_RXBUF_ALIGN) + GELIC_NET_RXBUF_ALIGN - 1; + dma_addr_t cpu_addr; int offset; if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE) @@ -396,11 +399,10 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, if (offset) skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset); /* io-mmu-map the skb */ - descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card), - descr->skb->data, - GELIC_NET_MAX_FRAME, - DMA_FROM_DEVICE)); - if (!descr->buf_addr) { + cpu_addr = dma_map_single(ctodev(card), descr->skb->data, + GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE); + descr->buf_addr = cpu_to_be32(cpu_addr); + if (dma_mapping_error(ctodev(card), cpu_addr)) { dev_kfree_skb_any(descr->skb); descr->skb = NULL; dev_info(ctodev(card), @@ -780,7 +782,7 @@ static int gelic_descr_prepare_tx(struct gelic_card *card, buf = dma_map_single(ctodev(card), skb->data, skb->len, DMA_TO_DEVICE); - if (!buf) { + if (dma_mapping_error(ctodev(card), buf)) { dev_err(ctodev(card), "dma map 2 failed (%p, %i). Dropping packet\n", skb->data, skb->len); From 22aa20e4c5dcbe6fdc480eb4fb27039b1f43217f Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 19 Mar 2023 07:03:00 -0700 Subject: [PATCH 235/898] Revert "drm/i915/hwmon: Enable PL1 power limit" This reverts commit ee892ea83d99610fa33bea612de058e0955eec3a. It was accidentally picked up for backporting. Revert. Cc: Jani Nikula Cc: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230319140300.2892032-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 4683a5b96eff1..1225bc432f0d5 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -687,11 +687,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(&hwmon->ddat_gt[i], &energy); } - - /* Enable PL1 power limit */ - if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) From 3fe26c0493e4c2da4b7d8ba8c975a6f48fb75ec2 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:49 +0800 Subject: [PATCH 236/898] RDMA/erdma: Fix some typos FAA is short for atomic fetch and add, not FAD. Fix this. Fixes: 0ca9c2e2844a ("RDMA/erdma: Implement atomic operations support") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-2-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_cq.c | 2 +- drivers/infiniband/hw/erdma/erdma_hw.h | 2 +- drivers/infiniband/hw/erdma/erdma_qp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index cabd8678b3558..7bc354273d4ec 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -65,7 +65,7 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV, [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ, [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP, - [ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD, + [ERDMA_OP_ATOMIC_FAA] = IB_WC_FETCH_ADD, }; static const struct { diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 4c38d99c73f1c..5d3a541db9417 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -491,7 +491,7 @@ enum erdma_opcode { ERDMA_OP_LOCAL_INV = 15, ERDMA_OP_READ_WITH_INV = 16, ERDMA_OP_ATOMIC_CAS = 17, - ERDMA_OP_ATOMIC_FAD = 18, + ERDMA_OP_ATOMIC_FAA = 18, ERDMA_NUM_OPCODES = 19, ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 }; diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index d088d6bef431a..ff473b208acfb 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -439,7 +439,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, cpu_to_le64(atomic_wr(send_wr)->compare_add); } else { wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, - ERDMA_OP_ATOMIC_FAD); + ERDMA_OP_ATOMIC_FAA); atomic_sqe->fetchadd_swap_data = cpu_to_le64(atomic_wr(send_wr)->compare_add); } From 6256aa9ae955d10ec73a434533ca62034eff1b76 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:50 +0800 Subject: [PATCH 237/898] RDMA/erdma: Update default EQ depth to 4096 and max_send_wr to 8192 Max EQ depth of hardware is 32K, the current default EQ depth is too small for some applications, so change the default depth to 4096. Max send WRs the hardware can support is 8K, but the driver limits the value to 4K. Remove this limitation. Fixes: be3cff0f242d ("RDMA/erdma: Add the hardware related definitions") Fixes: db23ae64caac ("RDMA/erdma: Add verbs header file") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-3-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_hw.h | 2 +- drivers/infiniband/hw/erdma/erdma_verbs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 5d3a541db9417..37ad1bb1917c4 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -441,7 +441,7 @@ struct erdma_reg_mr_sqe { }; /* EQ related. */ -#define ERDMA_DEFAULT_EQ_DEPTH 256 +#define ERDMA_DEFAULT_EQ_DEPTH 4096 /* ceqe */ #define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index e0a993bc032a4..131cf5f409822 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -11,7 +11,7 @@ /* RDMA Capability. */ #define ERDMA_MAX_PD (128 * 1024) -#define ERDMA_MAX_SEND_WR 4096 +#define ERDMA_MAX_SEND_WR 8192 #define ERDMA_MAX_ORD 128 #define ERDMA_MAX_IRD 128 #define ERDMA_MAX_SGE_RD 1 From 0dd83a4d7756713f81990d6c5547500f212a1190 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:51 +0800 Subject: [PATCH 238/898] RDMA/erdma: Inline mtt entries into WQE if supported The max inline mtt count supported is ERDMA_MAX_INLINE_MTT_ENTRIES. When mr->mem.mtt_nents == ERDMA_MAX_INLINE_MTT_ENTRIES, inline mtt is also supported, fix it. Fixes: 155055771704 ("RDMA/erdma: Add verbs implementation") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-4-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index ff473b208acfb..44923c51a01b4 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -405,7 +405,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); - if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) { + if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) { attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0); /* Copy SGLs to SQE content to accelerate */ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, From 6bd1bca858f1734a75572a788213d1e1143f2f0a Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:52 +0800 Subject: [PATCH 239/898] RDMA/erdma: Defer probing if netdevice can not be found ERDMA device may be probed before its associated netdevice, returning -EPROBE_DEFER allows OS try to probe erdma device later. Fixes: d55e6fb4803c ("RDMA/erdma: Add the erdma module") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-5-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 5dc31e5df5cba..4a29a53a6652e 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -56,7 +56,7 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, static int erdma_enum_and_get_netdev(struct erdma_dev *dev) { struct net_device *netdev; - int ret = -ENODEV; + int ret = -EPROBE_DEFER; /* Already binded to a net_device, so we skip. */ if (dev->netdev) From da0ba0ccce54059d6c6b788a75099bfce95126da Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Thu, 9 Mar 2023 12:01:07 +0800 Subject: [PATCH 240/898] platform/x86/intel: vsec: Fix a memory leak in intel_vsec_add_aux The first error handling code in intel_vsec_add_aux misses the deallocation of intel_vsec_dev->resource. Fix this by adding kfree(intel_vsec_dev->resource) in the error handling code. Reviewed-by: David E. Box Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20230309040107.534716-4-dzm91@hust.edu.cn Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/vsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index 13decf36c6ded..2311c16cb975d 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -154,6 +154,7 @@ int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent, ret = ida_alloc(intel_vsec_dev->ida, GFP_KERNEL); mutex_unlock(&vsec_ida_lock); if (ret < 0) { + kfree(intel_vsec_dev->resource); kfree(intel_vsec_dev); return ret; } From 4d5a2a7d2c97dbd658533eea5f79dab1ad5dc0ee Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Thu, 9 Mar 2023 12:01:05 +0800 Subject: [PATCH 241/898] platform/x86/intel: tpmi: Fix double free in tpmi_create_device() The previous commit 6a192c0cbf38 ("platform/x86/intel/tpmi: Fix double free reported by Smatch") incorrectly handle the deallocation of res variable. As shown in the comment, intel_vsec_add_aux handles all the deallocation of res and feature_vsec_dev. Therefore, kfree(res) can still cause double free if intel_vsec_add_aux returns error. Fix this by adjusting the error handling part in tpmi_create_device, following the function intel_vsec_add_dev. Fixes: 6a192c0cbf38 ("platform/x86/intel/tpmi: Fix double free reported by Smatch") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20230309040107.534716-2-dzm91@hust.edu.cn Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/tpmi.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/platform/x86/intel/tpmi.c b/drivers/platform/x86/intel/tpmi.c index c999732b0f1e5..a8733c43e4abe 100644 --- a/drivers/platform/x86/intel/tpmi.c +++ b/drivers/platform/x86/intel/tpmi.c @@ -203,7 +203,7 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info, struct intel_vsec_device *feature_vsec_dev; struct resource *res, *tmp; const char *name; - int ret, i; + int i; name = intel_tpmi_name(pfs->pfs_header.tpmi_id); if (!name) @@ -215,8 +215,8 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info, feature_vsec_dev = kzalloc(sizeof(*feature_vsec_dev), GFP_KERNEL); if (!feature_vsec_dev) { - ret = -ENOMEM; - goto free_res; + kfree(res); + return -ENOMEM; } snprintf(feature_id_name, sizeof(feature_id_name), "tpmi-%s", name); @@ -242,17 +242,8 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info, * feature_vsec_dev memory is also freed as part of device * delete. */ - ret = intel_vsec_add_aux(vsec_dev->pcidev, &vsec_dev->auxdev.dev, - feature_vsec_dev, feature_id_name); - if (ret) - goto free_res; - - return 0; - -free_res: - kfree(res); - - return ret; + return intel_vsec_add_aux(vsec_dev->pcidev, &vsec_dev->auxdev.dev, + feature_vsec_dev, feature_id_name); } static int tpmi_create_devices(struct intel_tpmi_info *tpmi_info) From 8d13d50b157655247cdb3a69aca7836b58ff8735 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Thu, 9 Mar 2023 12:01:06 +0800 Subject: [PATCH 242/898] platform/x86/intel: tpmi: Revise the comment of intel_vsec_add_aux intel_vsec_add_aux() is resource managed including res and feature_vsec_dev memory. Fix this by revising the comment of intel_vsec_add_aux since res variable will also be freed in the intel_vsec_add_aux. Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20230309040107.534716-3-dzm91@hust.edu.cn Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/tpmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/tpmi.c b/drivers/platform/x86/intel/tpmi.c index a8733c43e4abe..a5227951decce 100644 --- a/drivers/platform/x86/intel/tpmi.c +++ b/drivers/platform/x86/intel/tpmi.c @@ -239,8 +239,8 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info, /* * intel_vsec_add_aux() is resource managed, no explicit * delete is required on error or on module unload. - * feature_vsec_dev memory is also freed as part of device - * delete. + * feature_vsec_dev and res memory are also freed as part of + * device deletion. */ return intel_vsec_add_aux(vsec_dev->pcidev, &vsec_dev->auxdev.dev, feature_vsec_dev, feature_id_name); From b7c994f8c35e916e27c60803bb21457bc1373500 Mon Sep 17 00:00:00 2001 From: Frank Crawford Date: Sat, 18 Mar 2023 20:14:41 +1100 Subject: [PATCH 243/898] platform/x86 (gigabyte-wmi): Add support for A320M-S2H V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for A320M-S2H V2. Tested using module force_load option. Signed-off-by: Frank Crawford Acked-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20230318091441.1240921-1-frank@crawford.emu.id.au Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 322cfaeda17ba..4dd39ab6ecfa2 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) }} static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("A320M-S2H V2-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"), From 58cdfe6f58b35f17f56386f5fcf937168a423ad1 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 15 Mar 2023 18:04:50 -0400 Subject: [PATCH 244/898] thunderbolt: Rename shadowed variables bit to interrupt_bit and auto_clear_bit cppcheck reports drivers/thunderbolt/nhi.c:74:7: style: Local variable 'bit' shadows outer variable [shadowVariable] int bit; ^ drivers/thunderbolt/nhi.c:66:6: note: Shadowed declaration int bit = ring_interrupt_index(ring) & 31; ^ drivers/thunderbolt/nhi.c:74:7: note: Shadow variable int bit; ^ For readablity rename the outer to interrupt_bit and the innner to auto_clear_bit. Fixes: 468c49f44759 ("thunderbolt: Disable interrupt auto clear for ring") Cc: stable@vger.kernel.org Signed-off-by: Tom Rix Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 318d20bd5b695..cfebec107f3fc 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -63,15 +63,15 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) { int reg = REG_RING_INTERRUPT_BASE + ring_interrupt_index(ring) / 32 * 4; - int bit = ring_interrupt_index(ring) & 31; - int mask = 1 << bit; + int interrupt_bit = ring_interrupt_index(ring) & 31; + int mask = 1 << interrupt_bit; u32 old, new; if (ring->irq > 0) { u32 step, shift, ivr, misc; void __iomem *ivr_base; + int auto_clear_bit; int index; - int bit; if (ring->is_tx) index = ring->hop; @@ -91,11 +91,12 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) */ misc = ioread32(ring->nhi->iobase + REG_DMA_MISC); if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) - bit = REG_DMA_MISC_INT_AUTO_CLEAR; + auto_clear_bit = REG_DMA_MISC_INT_AUTO_CLEAR; else - bit = REG_DMA_MISC_DISABLE_AUTO_CLEAR; - if (!(misc & bit)) - iowrite32(misc | bit, ring->nhi->iobase + REG_DMA_MISC); + auto_clear_bit = REG_DMA_MISC_DISABLE_AUTO_CLEAR; + if (!(misc & auto_clear_bit)) + iowrite32(misc | auto_clear_bit, + ring->nhi->iobase + REG_DMA_MISC); ivr_base = ring->nhi->iobase + REG_INT_VEC_ALLOC_BASE; step = index / REG_INT_VEC_ALLOC_REGS * REG_INT_VEC_ALLOC_BITS; @@ -115,7 +116,7 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) dev_dbg(&ring->nhi->pdev->dev, "%s interrupt at register %#x bit %d (%#x -> %#x)\n", - active ? "enabling" : "disabling", reg, bit, old, new); + active ? "enabling" : "disabling", reg, interrupt_bit, old, new); if (new == old) dev_WARN(&ring->nhi->pdev->dev, From 5e7a3bf65db57461d0f47955248fcadf37321a74 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Mar 2023 16:59:46 +0100 Subject: [PATCH 245/898] ACPI: video: Add backlight=native DMI quirk for Acer Aspire 3830TG The Acer Aspire 3830TG predates Windows 8, so it defaults to using acpi_video# for backlight control, but this is non functional on this model. Add a DMI quirk to use the native backlight interface which does work properly. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 14d6d81e536fe..fd7cbce8076e2 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -495,6 +495,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), }, }, + { + .callback = video_detect_force_native, + /* Acer Aspire 3830TG */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 3830TG"), + }, + }, { .callback = video_detect_force_native, /* Acer Aspire 4810T */ From 7d31677bb7b1944ac89e9155110dc1b9acbb3895 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Jan 2023 23:14:00 +0100 Subject: [PATCH 246/898] gpu: host1x: fix uninitialized variable use The error handling for platform_get_irq() failing no longer works after a recent change, clang now points this out with a warning: drivers/gpu/host1x/dev.c:520:6: error: variable 'syncpt_irq' is uninitialized when used here [-Werror,-Wuninitialized] if (syncpt_irq < 0) ^~~~~~~~~~ Fix this by removing the variable and checking the correct error status. Fixes: 625d4ffb438c ("gpu: host1x: Rewrite syncpoint interrupt handling") Signed-off-by: Arnd Bergmann Reviewed-by: Jon Hunter Reviewed-by: Nick Desaulniers Reviewed-by: Mikko Perttunen Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- drivers/gpu/host1x/dev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 4872d183d8604..aae2efeef5032 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -487,7 +487,6 @@ static int host1x_get_resets(struct host1x *host) static int host1x_probe(struct platform_device *pdev) { struct host1x *host; - int syncpt_irq; int err; host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); @@ -517,8 +516,8 @@ static int host1x_probe(struct platform_device *pdev) } host->syncpt_irq = platform_get_irq(pdev, 0); - if (syncpt_irq < 0) - return syncpt_irq; + if (host->syncpt_irq < 0) + return host->syncpt_irq; mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); From f420f47e56c67587d9bc8f94267327b6fb214c1d Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 10 Mar 2023 17:45:23 +0100 Subject: [PATCH 247/898] clk: imx6ul: fix "failed to get parent" error On some configuration we may get following error: [ 0.000000] imx:clk-gpr-mux: failed to get parent (-EINVAL) This happens if selector is configured to not supported value. To avoid this warnings add dummy parents for not supported values. Fixes: 4e197ee880c2 ("clk: imx6ul: add ethernet refclock mux support") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20230310164523.534571-1-o.rempel@pengutronix.de Tested-by: Stefan Wahren Reported-by: Stefan Wahren Reviewed-by: Peng Fan Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-imx6ul.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index 2836adb817b70..e3696a88b5a36 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -95,14 +95,16 @@ static const struct clk_div_table video_div_table[] = { { } }; -static const char * enet1_ref_sels[] = { "enet1_ref_125m", "enet1_ref_pad", }; +static const char * enet1_ref_sels[] = { "enet1_ref_125m", "enet1_ref_pad", "dummy", "dummy"}; static const u32 enet1_ref_sels_table[] = { IMX6UL_GPR1_ENET1_TX_CLK_DIR, - IMX6UL_GPR1_ENET1_CLK_SEL }; + IMX6UL_GPR1_ENET1_CLK_SEL, 0, + IMX6UL_GPR1_ENET1_TX_CLK_DIR | IMX6UL_GPR1_ENET1_CLK_SEL }; static const u32 enet1_ref_sels_table_mask = IMX6UL_GPR1_ENET1_TX_CLK_DIR | IMX6UL_GPR1_ENET1_CLK_SEL; -static const char * enet2_ref_sels[] = { "enet2_ref_125m", "enet2_ref_pad", }; +static const char * enet2_ref_sels[] = { "enet2_ref_125m", "enet2_ref_pad", "dummy", "dummy"}; static const u32 enet2_ref_sels_table[] = { IMX6UL_GPR1_ENET2_TX_CLK_DIR, - IMX6UL_GPR1_ENET2_CLK_SEL }; + IMX6UL_GPR1_ENET2_CLK_SEL, 0, + IMX6UL_GPR1_ENET2_TX_CLK_DIR | IMX6UL_GPR1_ENET2_CLK_SEL }; static const u32 enet2_ref_sels_table_mask = IMX6UL_GPR1_ENET2_TX_CLK_DIR | IMX6UL_GPR1_ENET2_CLK_SEL; From d7e673c2a900206bea3461a4b4ecc74ea930f80e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 20 Mar 2023 15:35:06 +0900 Subject: [PATCH 248/898] zonefs: Prevent uninitialized symbol 'size' warning In zonefs_file_dio_append(), initialize the variable size to 0 to prevent compilation and static code analizers warning such as: New smatch warnings: fs/zonefs/file.c:441 zonefs_file_dio_append() error: uninitialized symbol 'size'. The warning is a false positive as size is never actually used uninitialized. No functional change. Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202303191227.GL8Dprbi-lkp@intel.com/ Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani --- fs/zonefs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 738b0e28d74b5..a545a6d9a32ef 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -383,7 +383,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) struct block_device *bdev = inode->i_sb->s_bdev; unsigned int max = bdev_max_zone_append_sectors(bdev); struct bio *bio; - ssize_t size; + ssize_t size = 0; int nr_pages; ssize_t ret; From 88b170088ad2c3e27086fe35769aa49f8a512564 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 20 Mar 2023 22:49:15 +0900 Subject: [PATCH 249/898] zonefs: Fix error message in zonefs_file_dio_append() Since the expected write location in a sequential file is always at the end of the file (append write), when an invalid write append location is detected in zonefs_file_dio_append(), print the invalid written location instead of the expected write location. Fixes: a608da3bd730 ("zonefs: Detect append writes at invalid locations") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani --- fs/zonefs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index a545a6d9a32ef..617e4f9db42ea 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -426,7 +426,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) if (bio->bi_iter.bi_sector != wpsector) { zonefs_warn(inode->i_sb, "Corrupted write pointer %llu for zone at %llu\n", - wpsector, z->z_sector); + bio->bi_iter.bi_sector, z->z_sector); ret = -EIO; } } From 05107edc910135d27fe557267dc45be9630bf3dd Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 8 Mar 2023 11:59:33 -0800 Subject: [PATCH 250/898] selftests: sigaltstack: fix -Wuninitialized Building sigaltstack with clang via: $ ARCH=x86 make LLVM=1 -C tools/testing/selftests/sigaltstack/ produces the following warning: warning: variable 'sp' is uninitialized when used here [-Wuninitialized] if (sp < (unsigned long)sstack || ^~ Clang expects these to be declared at global scope; we've fixed this in the kernel proper by using the macro `current_stack_pointer`. This is defined in different headers for different target architectures, so just create a new header that defines the arch-specific register names for the stack pointer register, and define it for more targets (at least the ones that support current_stack_pointer/ARCH_HAS_CURRENT_STACK_POINTER). Reported-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/lkml/CA+G9fYsi3OOu7yCsMutpzKDnBMAzJBCPimBp86LhGBa0eCnEpA@mail.gmail.com/ Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Tested-by: Linux Kernel Functional Testing Tested-by: Anders Roxell Signed-off-by: Shuah Khan --- .../sigaltstack/current_stack_pointer.h | 23 +++++++++++++++++++ tools/testing/selftests/sigaltstack/sas.c | 7 +----- 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/sigaltstack/current_stack_pointer.h diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h new file mode 100644 index 0000000000000..ea9bdf3a90b16 --- /dev/null +++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#if __alpha__ +register unsigned long sp asm("$30"); +#elif __arm__ || __aarch64__ || __csky__ || __m68k__ || __mips__ || __riscv +register unsigned long sp asm("sp"); +#elif __i386__ +register unsigned long sp asm("esp"); +#elif __loongarch64 +register unsigned long sp asm("$sp"); +#elif __ppc__ +register unsigned long sp asm("r1"); +#elif __s390x__ +register unsigned long sp asm("%15"); +#elif __sh__ +register unsigned long sp asm("r15"); +#elif __x86_64__ +register unsigned long sp asm("rsp"); +#elif __XTENSA__ +register unsigned long sp asm("a1"); +#else +#error "implement current_stack_pointer equivalent" +#endif diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index c53b070755b65..98d37cb744fb2 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -20,6 +20,7 @@ #include #include "../kselftest.h" +#include "current_stack_pointer.h" #ifndef SS_AUTODISARM #define SS_AUTODISARM (1U << 31) @@ -46,12 +47,6 @@ void my_usr1(int sig, siginfo_t *si, void *u) stack_t stk; struct stk_data *p; -#if __s390x__ - register unsigned long sp asm("%15"); -#else - register unsigned long sp asm("sp"); -#endif - if (sp < (unsigned long)sstack || sp >= (unsigned long)sstack + stack_size) { ksft_exit_fail_msg("SP is not on sigaltstack\n"); From 9d2789ac9d60c049d26ef6d3005d9c94c5a559e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Mar 2023 20:01:25 -0600 Subject: [PATCH 251/898] block/io_uring: pass in issue_flags for uring_cmd task_work handling io_uring_cmd_done() currently assumes that the uring_lock is held when invoked, and while it generally is, this is not guaranteed. Pass in the issue_flags associated with it, so that we have IO_URING_F_UNLOCKED available to be able to lock the CQ ring appropriately when completing events. Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 31 ++++++++++++++++++------------- drivers/nvme/host/ioctl.c | 14 ++++++++------ include/linux/io_uring.h | 11 ++++++----- io_uring/uring_cmd.c | 10 ++++++---- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index fb5a557afde8b..c73cc57ec5477 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -715,7 +715,8 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, } } -static void ubq_complete_io_cmd(struct ublk_io *io, int res) +static void ubq_complete_io_cmd(struct ublk_io *io, int res, + unsigned issue_flags) { /* mark this cmd owned by ublksrv */ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; @@ -727,7 +728,7 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) io->flags &= ~UBLK_IO_FLAG_ACTIVE; /* tell ublksrv one io request is coming */ - io_uring_cmd_done(io->cmd, res, 0); + io_uring_cmd_done(io->cmd, res, 0, issue_flags); } #define UBLK_REQUEUE_DELAY_MS 3 @@ -744,7 +745,8 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); } -static inline void __ublk_rq_task_work(struct request *req) +static inline void __ublk_rq_task_work(struct request *req, + unsigned issue_flags) { struct ublk_queue *ubq = req->mq_hctx->driver_data; int tag = req->tag; @@ -782,7 +784,7 @@ static inline void __ublk_rq_task_work(struct request *req) pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n", __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags); - ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA); + ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags); return; } /* @@ -820,17 +822,18 @@ static inline void __ublk_rq_task_work(struct request *req) mapped_bytes >> 9; } - ubq_complete_io_cmd(io, UBLK_IO_RES_OK); + ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, + unsigned issue_flags) { struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); struct ublk_rq_data *data, *tmp; io_cmds = llist_reverse_order(io_cmds); llist_for_each_entry_safe(data, tmp, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); } static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) @@ -842,12 +845,12 @@ static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -856,8 +859,9 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); struct ublk_queue *ubq = req->mq_hctx->driver_data; + unsigned issue_flags = IO_URING_F_UNLOCKED; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) @@ -1111,7 +1115,8 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) struct ublk_io *io = &ubq->ios[i]; if (io->flags & UBLK_IO_FLAG_ACTIVE) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0); + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, + IO_URING_F_UNLOCKED); } /* all io commands are canceled */ @@ -1351,7 +1356,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EIOCBQUEUED; out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", __func__, cmd_op, tag, ret, io->flags); return -EIOCBQUEUED; @@ -2234,7 +2239,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (ub) ublk_put_device(ub); out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id); return -EIOCBQUEUED; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 723e7d5b778f2..d24ea2e051564 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -464,7 +464,8 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; } -static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) +static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd, + unsigned issue_flags) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); struct request *req = pdu->req; @@ -485,17 +486,18 @@ static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) blk_rq_unmap_user(req->bio); blk_mq_free_request(req); - io_uring_cmd_done(ioucmd, status, result); + io_uring_cmd_done(ioucmd, status, result, issue_flags); } -static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, + unsigned issue_flags) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); if (pdu->bio) blk_rq_unmap_user(pdu->bio); - io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); + io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags); } static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, @@ -517,7 +519,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, * Otherwise, move the completion to task work. */ if (cookie != NULL && blk_rq_is_poll(req)) - nvme_uring_task_cb(ioucmd); + nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED); else io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); @@ -539,7 +541,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, * Otherwise, move the completion to task work. */ if (cookie != NULL && blk_rq_is_poll(req)) - nvme_uring_task_meta_cb(ioucmd); + nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED); else io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 934e5dd4ccc08..35b9328ca3352 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -27,7 +27,7 @@ struct io_uring_cmd { const void *cmd; union { /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd); + void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); /* used for polled completion */ void *cookie; }; @@ -39,9 +39,10 @@ struct io_uring_cmd { #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, + unsigned issue_flags); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)); + void (*task_work_cb)(struct io_uring_cmd *, unsigned)); struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); @@ -72,11 +73,11 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, return -EOPNOTSUPP; } static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2) + ssize_t ret2, unsigned issue_flags) { } static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { } static inline struct sock *io_uring_get_socket(struct file *file) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 446a189b78b03..e535e8db01e3e 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -15,12 +15,13 @@ static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); + unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; - ioucmd->task_work_cb(ioucmd); + ioucmd->task_work_cb(ioucmd, issue_flags); } void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); @@ -42,7 +43,8 @@ static inline void io_req_set_cqe32_extra(struct io_kiocb *req, * Called by consumers of io_uring_cmd, if they originally returned * -EIOCBQUEUED upon receiving the command. */ -void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) +void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2, + unsigned issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); @@ -56,7 +58,7 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) /* order with io_iopoll_req_issued() checking ->iopoll_complete */ smp_store_release(&req->iopoll_completed, 1); else - io_req_complete_post(req, 0); + io_req_complete_post(req, issue_flags); } EXPORT_SYMBOL_GPL(io_uring_cmd_done); From 74e2e17ee1f8d8a0928b90434ad7e2df70f8483e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Mar 2023 11:13:49 -0600 Subject: [PATCH 252/898] io_uring/net: avoid sending -ECONNABORTED on repeated connection requests Since io_uring does nonblocking connect requests, if we do two repeated ones without having a listener, the second will get -ECONNABORTED rather than the expected -ECONNREFUSED. Treat -ECONNABORTED like a normal retry condition if we're nonblocking, if we haven't already seen it. Cc: stable@vger.kernel.org Fixes: 3fb1bd688172 ("io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT") Link: https://github.com/axboe/liburing/issues/828 Reported-by: Hui, Chunyang Signed-off-by: Jens Axboe --- io_uring/net.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index b7f190ca528e6..4040cf093318c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -47,6 +47,7 @@ struct io_connect { struct sockaddr __user *addr; int addr_len; bool in_progress; + bool seen_econnaborted; }; struct io_sr_msg { @@ -1424,7 +1425,7 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); conn->addr_len = READ_ONCE(sqe->addr2); - conn->in_progress = false; + conn->in_progress = conn->seen_econnaborted = false; return 0; } @@ -1461,18 +1462,24 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) ret = __sys_connect_file(req->file, &io->address, connect->addr_len, file_flags); - if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { + if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) + && force_nonblock) { if (ret == -EINPROGRESS) { connect->in_progress = true; - } else { - if (req_has_async_data(req)) - return -EAGAIN; - if (io_alloc_async_data(req)) { - ret = -ENOMEM; + return -EAGAIN; + } + if (ret == -ECONNABORTED) { + if (connect->seen_econnaborted) goto out; - } - memcpy(req->async_data, &__io, sizeof(__io)); + connect->seen_econnaborted = true; + } + if (req_has_async_data(req)) + return -EAGAIN; + if (io_alloc_async_data(req)) { + ret = -ENOMEM; + goto out; } + memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } if (ret == -ERESTARTSYS) From f038f3917baf04835ba2b7bcf2a04ac93fbf8a9c Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 17 Mar 2023 14:43:37 +0800 Subject: [PATCH 253/898] octeontx2-vf: Add missing free for alloc_percpu Add the free_percpu for the allocated "vf->hw.lmt_info" in order to avoid memory leak, same as the "pf->hw.lmt_info" in `drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c`. Fixes: 5c0512072f65 ("octeontx2-pf: cn10k: Use runtime allocated LMTLINE region") Signed-off-by: Jiasheng Jiang Reviewed-by: Michal Swiatkowski Acked-by: Geethasowjanya Akula Link: https://lore.kernel.org/r/20230317064337.18198-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7f8ffbf79cf74..ab126f8706c74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -709,6 +709,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_ptp_destroy: otx2_ptp_destroy(vf); err_detach_rsrc: + free_percpu(vf->hw.lmt_info); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) qmem_free(vf->dev, vf->dync_lmt); otx2_detach_resources(&vf->mbox); @@ -762,6 +763,7 @@ static void otx2vf_remove(struct pci_dev *pdev) otx2_shutdown_tc(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); + free_percpu(vf->hw.lmt_info); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) qmem_free(vf->dev, vf->dync_lmt); otx2vf_vfaf_mbox_destroy(vf); From b871cb971c683f7f212e7ca3c9a6709a75785116 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Mar 2023 15:09:54 +0100 Subject: [PATCH 254/898] ALSA: hda/conexant: Partial revert of a quirk for Lenovo The recent commit f83bb2592482 ("ALSA: hda/conexant: Add quirk for LENOVO 20149 Notebook model") introduced a quirk for the device with 17aa:3977, but this caused a regression on another model (Lenovo Ideadpad U31) with the very same PCI SSID. And, through skimming over the net, it seems that this PCI SSID is used for multiple different models, so it's no good idea to apply the quirk with the SSID. Although we may take a different ID check (e.g. the codec SSID instead of the PCI SSID), unfortunately, the original patch author couldn't identify the hardware details any longer as the machine was returned, and we can't develop the further proper fix. In this patch, instead, we partially revert the change so that the quirk won't be applied as default for addressing the regression. Meanwhile, the quirk function itself is kept, and it's now made to be applicable via the explicit model=lenovo-20149 option. Fixes: f83bb2592482 ("ALSA: hda/conexant: Add quirk for LENOVO 20149 Notebook model") Reported-by: Jetro Jormalainen Link: https://lore.kernel.org/r/20230308215009.4d3e58a6@mopti Cc: Link: https://lore.kernel.org/r/20230320140954.31154-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 75e1d00074b9f..a889cccdd607c 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -980,7 +980,10 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), - SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_PINCFG_LENOVO_NOTEBOOK), + /* NOTE: we'd need to extend the quirk for 17aa:3977 as the same + * PCI SSID is used on multiple Lenovo models + */ + SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI), @@ -1003,6 +1006,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" }, { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, + { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, {} }; From 8c721c53dda512fdd48eb24d6d99e56deee57898 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Mar 2023 15:28:38 +0100 Subject: [PATCH 255/898] ALSA: usb-audio: Fix recursive locking at XRUN during syncing The recent support of low latency playback in USB-audio driver made the snd_usb_queue_pending_output_urbs() function to be called via PCM ack ops. In the new code path, the function is performed already in the PCM stream lock. The problem is that, when an XRUN is detected, the function calls snd_pcm_xrun() to notify, but snd_pcm_xrun() is supposed to be called only outside the stream lock. As a result, it leads to a deadlock of PCM stream locking. For avoiding such a recursive locking, this patch adds an additional check to the code paths in PCM core that call the ack callback; now it checks the error code from the callback, and if it's -EPIPE, the XRUN is handled in the PCM core side gracefully. Along with it, the USB-audio driver code is changed to follow that, i.e. -EPIPE is returned instead of the explicit snd_pcm_xrun() call when the function is performed already in the stream lock. Fixes: d5f871f89e21 ("ALSA: usb-audio: Improved lowlatency playback support") Reported-and-tested-by: John Keeping Link: https://lore.kernel.org/r/20230317195128.3911155-1-john@metanate.com Reviewed-by: Jaroslav Kysela Reviewed-by; Takashi Sakamoto Link: https://lore.kernel.org/r/20230320142838.494-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 2 ++ sound/usb/endpoint.c | 22 ++++++++++++++-------- sound/usb/endpoint.h | 4 ++-- sound/usb/pcm.c | 2 +- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 8b6aeb8a78f7d..02fd65993e7e5 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2155,6 +2155,8 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream, ret = substream->ops->ack(substream); if (ret < 0) { runtime->control->appl_ptr = old_appl_ptr; + if (ret == -EPIPE) + __snd_pcm_xrun(substream); return ret; } } diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 419302e2057e8..647fa054d8b1d 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -455,8 +455,8 @@ static void push_back_to_ready_list(struct snd_usb_endpoint *ep, * This function is used both for implicit feedback endpoints and in low- * latency playback mode. */ -void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, - bool in_stream_lock) +int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, + bool in_stream_lock) { bool implicit_fb = snd_usb_endpoint_implicit_feedback_sink(ep); @@ -480,7 +480,7 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, spin_unlock_irqrestore(&ep->lock, flags); if (ctx == NULL) - return; + break; /* copy over the length information */ if (implicit_fb) { @@ -495,11 +495,14 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, break; if (err < 0) { /* push back to ready list again for -EAGAIN */ - if (err == -EAGAIN) + if (err == -EAGAIN) { push_back_to_ready_list(ep, ctx); - else + break; + } + + if (!in_stream_lock) notify_xrun(ep); - return; + return -EPIPE; } err = usb_submit_urb(ctx->urb, GFP_ATOMIC); @@ -507,13 +510,16 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, usb_audio_err(ep->chip, "Unable to submit urb #%d: %d at %s\n", ctx->index, err, __func__); - notify_xrun(ep); - return; + if (!in_stream_lock) + notify_xrun(ep); + return -EPIPE; } set_bit(ctx->index, &ep->active_mask); atomic_inc(&ep->submitted_urbs); } + + return 0; } /* diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index 924f4351588ce..c09f68ce08b18 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -52,7 +52,7 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep); int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, struct snd_urb_ctx *ctx, int idx, unsigned int avail); -void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, - bool in_stream_lock); +int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep, + bool in_stream_lock); #endif /* __USBAUDIO_ENDPOINT_H */ diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index d959da7a1afba..eec5232f9fb29 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1639,7 +1639,7 @@ static int snd_usb_pcm_playback_ack(struct snd_pcm_substream *substream) * outputs here */ if (!ep->active_mask) - snd_usb_queue_pending_output_urbs(ep, true); + return snd_usb_queue_pending_output_urbs(ep, true); return 0; } From 03aecb1acbcd7a660f97d645ca6c09d9de27ff9d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2023 10:52:18 +0100 Subject: [PATCH 256/898] drm: panel-orientation-quirks: Add quirk for Lenovo Yoga Book X90F Like the Windows Lenovo Yoga Book X91F/L the Android Lenovo Yoga Book X90F/L has a portrait 1200x1920 screen used in landscape mode, add a quirk for this. When the quirk for the X91F/L was initially added it was written to also apply to the X90F/L but this does not work because the Android version of the Yoga Book uses completely different DMI strings. Also adjust the X91F/L quirk to reflect that it only applies to the X91F/L models. Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20230301095218.28457-1-hdegoede@redhat.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 5522d610c5cfd..b1a38e6ce2f8f 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -328,10 +328,17 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, - }, { /* Lenovo Yoga Book X90F / X91F / X91L */ + }, { /* Lenovo Yoga Book X90F / X90L */ .matches = { - /* Non exact match to match all versions */ - DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Yoga Book X91F / X91L */ + .matches = { + /* Non exact match to match F + L versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* Lenovo Yoga Tablet 2 830F / 830L */ From f87d28673b71b35b248231a2086f9404afbb7f28 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Feb 2023 16:01:36 -0800 Subject: [PATCH 257/898] entry: Fix noinstr warning in __enter_from_user_mode() __enter_from_user_mode() is triggering noinstr warnings with CONFIG_DEBUG_PREEMPT due to its call of preempt_count_add() via ct_state(). The preemption disable isn't needed as interrupts are already disabled. And the context_tracking_enabled() check in ct_state() also isn't needed as that's already being done by the CT_WARN_ON(). Just use __ct_state() instead. Fixes the following warnings: vmlinux.o: warning: objtool: enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode+0xf9: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode_prepare+0xc7: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section Fixes: 171476775d32 ("context_tracking: Convert state to atomic_t") Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/d8955fa6d68dc955dda19baf13ae014ae27926f5.1677369694.git.jpoimboe@kernel.org --- include/linux/context_tracking.h | 1 + include/linux/context_tracking_state.h | 2 ++ kernel/entry/common.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d4afa8508a806..3a7909ed54980 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -96,6 +96,7 @@ static inline void user_exit_irqoff(void) { } static inline int exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline int ct_state(void) { return -1; } +static inline int __ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } #define CT_WARN_ON(cond) do { } while (0) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 4a4d56f771802..fdd537ea513ff 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -46,7 +46,9 @@ struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); +#endif +#ifdef CONFIG_CONTEXT_TRACKING_USER static __always_inline int __ct_state(void) { return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 846add8394c41..1314894d2efad 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -21,7 +21,7 @@ static __always_inline void __enter_from_user_mode(struct pt_regs *regs) arch_enter_from_user_mode(regs); lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(ct_state() != CONTEXT_USER); + CT_WARN_ON(__ct_state() != CONTEXT_USER); user_exit_irqoff(); instrumentation_begin(); From bf84937e882009075f57fd213836256fc65d96bc Mon Sep 17 00:00:00 2001 From: Steve Clevenger Date: Mon, 27 Feb 2023 16:54:32 -0700 Subject: [PATCH 258/898] coresight-etm4: Fix for() loop drvdata->nr_addr_cmp range bug In etm4_enable_hw, fix for() loop range to represent address comparator pairs. Fixes: 2e1cdfe184b5 ("coresight-etm4x: Adding CoreSight ETM4x driver") Cc: stable@vger.kernel.org Signed-off-by: Steve Clevenger Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/4a4ee61ce8ef402615a4528b21a051de3444fb7b.1677540079.git.scclevenger@os.amperecomputing.com --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 1ea8f173cca0d..104333c2c8a36 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -472,7 +472,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) if (etm4x_sspcicrn_present(drvdata, i)) etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i)); } - for (i = 0; i < drvdata->nr_addr_cmp; i++) { + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i)); etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i)); } From 735e7b30a53a1679c050cddb73f5e5316105d2e3 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 21 Mar 2023 10:45:30 +0000 Subject: [PATCH 259/898] coresight: etm4x: Do not access TRCIDR1 for identification CoreSight ETM4x architecture clearly provides ways to identify a device via registers in the "Management" class, TRCDEVARCH and TRCDEVTYPE. These registers can be accessed without the Trace domain being powered on. We additionally added TRCIDR1 as fallback in order to cover for any ETMs that may not have implemented TRCDEVARCH. So far, nobody has reported hitting a WARNING we placed to catch such systems. Also, more importantly it is problematic to access TRCIDR1, which is a "Trace" register via MMIO access, without clearing the OSLK. But we cannot mess with the OSLK until we know for sure that this is an ETMv4 device. Thus, this kind of creates a chicken and egg problem unnecessarily for systems "which are compliant" to the ETMv4 architecture. Let us remove the TRCIDR1 fall back check and rely only on TRCDEVARCH. Fixes: 8b94db1edaee ("coresight: etm4x: Use TRCDEVARCH for component discovery") Cc: stable@vger.kernel.org Reported-by: Steve Clevenger Link: https://lore.kernel.org/all/143540e5623d4c7393d24833f2b80600d8d745d2.1677881753.git.scclevenger@os.amperecomputing.com/ Cc: Mike Leach Cc: James Clark Reviewed-by: Mike Leach Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230321104530.1547136-1-suzuki.poulose@arm.com --- .../coresight/coresight-etm4x-core.c | 22 ++++++++----------- drivers/hwtracing/coresight/coresight-etm4x.h | 20 +++++------------ 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 104333c2c8a36..4c15fae534f3a 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1070,25 +1070,21 @@ static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata, struct csdev_access *csa) { u32 devarch = readl_relaxed(drvdata->base + TRCDEVARCH); - u32 idr1 = readl_relaxed(drvdata->base + TRCIDR1); /* * All ETMs must implement TRCDEVARCH to indicate that - * the component is an ETMv4. To support any broken - * implementations we fall back to TRCIDR1 check, which - * is not really reliable. + * the component is an ETMv4. Even though TRCIDR1 also + * contains the information, it is part of the "Trace" + * register and must be accessed with the OSLK cleared, + * with MMIO. But we cannot touch the OSLK until we are + * sure this is an ETM. So rely only on the TRCDEVARCH. */ - if ((devarch & ETM_DEVARCH_ID_MASK) == ETM_DEVARCH_ETMv4x_ARCH) { - drvdata->arch = etm_devarch_to_arch(devarch); - } else { - pr_warn("CPU%d: ETM4x incompatible TRCDEVARCH: %x, falling back to TRCIDR1\n", - smp_processor_id(), devarch); - - if (ETM_TRCIDR1_ARCH_MAJOR(idr1) != ETM_TRCIDR1_ARCH_ETMv4) - return false; - drvdata->arch = etm_trcidr_to_arch(idr1); + if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH) { + pr_warn_once("TRCDEVARCH doesn't match ETMv4 architecture\n"); + return false; } + drvdata->arch = etm_devarch_to_arch(devarch); *csa = CSDEV_ACCESS_IOMEM(drvdata->base); return true; } diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 434f4e95ee17b..27c8a99018680 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -753,14 +753,12 @@ * TRCDEVARCH - CoreSight architected register * - Bits[15:12] - Major version * - Bits[19:16] - Minor version - * TRCIDR1 - ETM architected register - * - Bits[11:8] - Major version - * - Bits[7:4] - Minor version - * We must rely on TRCDEVARCH for the version information, - * however we don't want to break the support for potential - * old implementations which might not implement it. Thus - * we fall back to TRCIDR1 if TRCDEVARCH is not implemented - * for memory mapped components. + * + * We must rely only on TRCDEVARCH for the version information. Even though, + * TRCIDR1 also provides the architecture version, it is a "Trace" register + * and as such must be accessed only with Trace power domain ON. This may + * not be available at probe time. + * * Now to make certain decisions easier based on the version * we use an internal representation of the version in the * driver, as follows : @@ -786,12 +784,6 @@ static inline u8 etm_devarch_to_arch(u32 devarch) ETM_DEVARCH_REVISION(devarch)); } -static inline u8 etm_trcidr_to_arch(u32 trcidr1) -{ - return ETM_ARCH_VERSION(ETM_TRCIDR1_ARCH_MAJOR(trcidr1), - ETM_TRCIDR1_ARCH_MINOR(trcidr1)); -} - enum etm_impdef_type { ETM4_IMPDEF_HISI_CORE_COMMIT, ETM4_IMPDEF_FEATURE_MAX, From a53ce18cacb477dd0513c607f187d16f0fa96f71 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 17 Mar 2023 17:08:10 +0100 Subject: [PATCH 260/898] sched/fair: Sanitize vruntime of entity being migrated Commit 829c1651e9c4 ("sched/fair: sanitize vruntime of entity being placed") fixes an overflowing bug, but ignore a case that se->exec_start is reset after a migration. For fixing this case, we delay the reset of se->exec_start after placing the entity which se->exec_start to detect long sleeping task. In order to take into account a possible divergence between the clock_task of 2 rqs, we increase the threshold to around 104 days. Fixes: 829c1651e9c4 ("sched/fair: sanitize vruntime of entity being placed") Originally-by: Zhang Qiao Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Zhang Qiao Link: https://lore.kernel.org/r/20230317160810.107988-1-vincent.guittot@linaro.org --- kernel/sched/core.c | 3 +++ kernel/sched/fair.c | 55 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 488655f2319f5..0d18c3969f904 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2084,6 +2084,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags) { + if (task_on_rq_migrating(p)) + flags |= ENQUEUE_MIGRATED; + enqueue_task(rq, p, flags); p->on_rq = TASK_ON_RQ_QUEUED; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a1b1f855b963..6986ea31c9844 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) #endif } +static inline bool entity_is_long_sleeper(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq; + u64 sleep_time; + + if (se->exec_start == 0) + return false; + + cfs_rq = cfs_rq_of(se); + + sleep_time = rq_clock_task(rq_of(cfs_rq)); + + /* Happen while migrating because of clock task divergence */ + if (sleep_time <= se->exec_start) + return false; + + sleep_time -= se->exec_start; + if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD))) + return true; + + return false; +} + static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) { u64 vruntime = cfs_rq->min_vruntime; - u64 sleep_time; /* * The 'current' period is already promised to the current tasks, @@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) /* * Pull vruntime of the entity being placed to the base level of - * cfs_rq, to prevent boosting it if placed backwards. If the entity - * slept for a long time, don't even try to compare its vruntime with - * the base as it may be too far off and the comparison may get - * inversed due to s64 overflow. - */ - sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start; - if ((s64)sleep_time > 60LL * NSEC_PER_SEC) + * cfs_rq, to prevent boosting it if placed backwards. + * However, min_vruntime can advance much faster than real time, with + * the extreme being when an entity with the minimal weight always runs + * on the cfs_rq. If the waking entity slept for a long time, its + * vruntime difference from min_vruntime may overflow s64 and their + * comparison may get inversed, so ignore the entity's original + * vruntime in that case. + * The maximal vruntime speedup is given by the ratio of normal to + * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES. + * When placing a migrated waking entity, its exec_start has been set + * from a different rq. In order to take into account a possible + * divergence between new and prev rq's clocks task because of irq and + * stolen time, we take an additional margin. + * So, cutting off on the sleep time of + * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days + * should be safe. + */ + if (entity_is_long_sleeper(se)) se->vruntime = vruntime; else se->vruntime = max_vruntime(se->vruntime, vruntime); @@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (flags & ENQUEUE_WAKEUP) place_entity(cfs_rq, se, 0); + /* Entity has migrated, no longer consider this task hot */ + if (flags & ENQUEUE_MIGRATED) + se->exec_start = 0; check_schedstat_required(); update_stats_enqueue_fair(cfs_rq, se, flags); @@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) /* Tell new CPU we are migrated */ se->avg.last_update_time = 0; - /* We have migrated, no longer consider this task hot */ - se->exec_start = 0; - update_scan_period(p, new_cpu); } From 263f5ecaf7080513efc248ec739b6d9e00f4129f Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Mar 2023 04:33:38 -0700 Subject: [PATCH 261/898] perf/x86/amd/core: Always clear status for idx The variable 'status' (which contains the unhandled overflow bits) is not being properly masked in some cases, displaying the following warning: WARNING: CPU: 156 PID: 475601 at arch/x86/events/amd/core.c:972 amd_pmu_v2_handle_irq+0x216/0x270 This seems to be happening because the loop is being continued before the status bit being unset, in case x86_perf_event_set_period() returns 0. This is also causing an inconsistency because the "handled" counter is incremented, but the status bit is not cleaned. Move the bit cleaning together above, together when the "handled" counter is incremented. Fixes: 7685665c390d ("perf/x86/amd/core: Add PerfMonV2 overflow handling") Signed-off-by: Breno Leitao Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sandipan Das Link: https://lore.kernel.org/r/20230321113338.1669660-1-leitao@debian.org --- arch/x86/events/amd/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 8c45b198b62f5..bccea57dee81e 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -923,6 +923,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) /* Event overflow */ handled++; + status &= ~mask; perf_sample_data_init(&data, 0, hwc->last_period); if (!x86_perf_event_set_period(event)) @@ -933,8 +934,6 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); - - status &= ~mask; } /* From b416514054810cf2d2cc348ae477cea619b64da7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 15 Mar 2023 19:43:43 +0000 Subject: [PATCH 262/898] entry/rcu: Check TIF_RESCHED _after_ delayed RCU wake-up RCU sometimes needs to perform a delayed wake up for specific kthreads handling offloaded callbacks (RCU_NOCB). These wakeups are performed by timers and upon entry to idle (also to guest and to user on nohz_full). However the delayed wake-up on kernel exit is actually performed after the thread flags are fetched towards the fast path check for work to do on exit to user. As a result, and if there is no other pending work to do upon that kernel exit, the current task will resume to userspace with TIF_RESCHED set and the pending wake up ignored. Fix this with fetching the thread flags _after_ the delayed RCU-nocb kthread wake-up. Fixes: 47b8ff194c1f ("entry: Explicitly flush pending rcuog wakeup before last rescheduling point") Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230315194349.10798-3-joel@joelfernandes.org --- kernel/entry/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 1314894d2efad..be61332c66b54 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -192,13 +192,14 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, static void exit_to_user_mode_prepare(struct pt_regs *regs) { - unsigned long ti_work = read_thread_flags(); + unsigned long ti_work; lockdep_assert_irqs_disabled(); /* Flush pending rcuog wakeup before the last need_resched() check */ tick_nohz_user_enter_prepare(); + ti_work = read_thread_flags(); if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) ti_work = exit_to_user_mode_loop(regs, ti_work); From 97fd768e501fd5d377cb0bf46a35bad2cd21c153 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Mar 2023 15:17:57 +0100 Subject: [PATCH 263/898] efi/libstub: zboot: Add compressed image to make targets Avoid needlessly rebuilding the compressed image by adding the file 'vmlinuz' to the 'targets' Kbuild make variable. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/Makefile.zboot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot index 43e9a4cab9f5d..ccdd6a130d986 100644 --- a/drivers/firmware/efi/libstub/Makefile.zboot +++ b/drivers/firmware/efi/libstub/Makefile.zboot @@ -44,4 +44,4 @@ OBJCOPYFLAGS_vmlinuz.efi := -O binary $(obj)/vmlinuz.efi: $(obj)/vmlinuz.efi.elf FORCE $(call if_changed,objcopy) -targets += zboot-header.o vmlinuz.o vmlinuz.efi.elf vmlinuz.efi +targets += zboot-header.o vmlinuz vmlinuz.o vmlinuz.efi.elf vmlinuz.efi From 2b91c4a870c9830eaf95e744454c9c218cccb736 Mon Sep 17 00:00:00 2001 From: Iwona Winiarska Date: Tue, 21 Mar 2023 10:04:10 +0100 Subject: [PATCH 264/898] hwmon: (peci/cputemp) Fix miscalculated DTS for SKX For Skylake, DTS temperature of the CPU is reported in S10.6 format instead of S8.8. Reported-by: Paul Fertser Link: https://lore.kernel.org/lkml/ZBhHS7v+98NK56is@home.paul.comp/ Signed-off-by: Iwona Winiarska Link: https://lore.kernel.org/r/20230321090410.866766-1-iwona.winiarska@intel.com Signed-off-by: Guenter Roeck --- drivers/hwmon/peci/cputemp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c index 30850a479f61f..87d56f0fc888c 100644 --- a/drivers/hwmon/peci/cputemp.c +++ b/drivers/hwmon/peci/cputemp.c @@ -537,6 +537,12 @@ static const struct cpu_info cpu_hsx = { .thermal_margin_to_millidegree = &dts_eight_dot_eight_to_millidegree, }; +static const struct cpu_info cpu_skx = { + .reg = &resolved_cores_reg_hsx, + .min_peci_revision = 0x33, + .thermal_margin_to_millidegree = &dts_ten_dot_six_to_millidegree, +}; + static const struct cpu_info cpu_icx = { .reg = &resolved_cores_reg_icx, .min_peci_revision = 0x40, @@ -558,7 +564,7 @@ static const struct auxiliary_device_id peci_cputemp_ids[] = { }, { .name = "peci_cpu.cputemp.skx", - .driver_data = (kernel_ulong_t)&cpu_hsx, + .driver_data = (kernel_ulong_t)&cpu_skx, }, { .name = "peci_cpu.cputemp.icx", From 2315332efcbe7124252f080e03b57d3d2f1f4771 Mon Sep 17 00:00:00 2001 From: Phinex Hung Date: Tue, 21 Mar 2023 14:02:23 +0800 Subject: [PATCH 265/898] hwmon: fix potential sensor registration fail if of_node is missing It is not sufficient to check of_node in current device. In some cases, this would cause the sensor registration to fail. This patch looks for device's ancestors to find a valid of_node if any. Fixes: d560168b5d0f ("hwmon: (core) New hwmon registration API") Signed-off-by: Phinex Hung Link: https://lore.kernel.org/r/20230321060224.3819-1-phinex@realtek.com Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 33edb5c02f7d7..d193ed3cb35e5 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -757,6 +757,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, struct hwmon_device *hwdev; const char *label; struct device *hdev; + struct device *tdev = dev; int i, err, id; /* Complain about invalid characters in hwmon name attribute */ @@ -826,7 +827,9 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, hwdev->name = name; hdev->class = &hwmon_class; hdev->parent = dev; - hdev->of_node = dev ? dev->of_node : NULL; + while (tdev && !tdev->of_node) + tdev = tdev->parent; + hdev->of_node = tdev ? tdev->of_node : NULL; hwdev->chip = chip; dev_set_drvdata(hdev, drvdata); dev_set_name(hdev, HWMON_ID_FORMAT, id); @@ -838,7 +841,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, INIT_LIST_HEAD(&hwdev->tzdata); - if (dev && dev->of_node && chip && chip->ops->read && + if (hdev->of_node && chip && chip->ops->read && chip->info[0]->type == hwmon_chip && (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) { err = hwmon_thermal_register_sensors(hdev); From 813cc94c7847ae4a17e9f744fb4dbdf7df6bd732 Mon Sep 17 00:00:00 2001 From: Tianyi Jing Date: Sat, 18 Mar 2023 22:38:51 +0800 Subject: [PATCH 266/898] hwmon: (xgene) Fix ioremap and memremap leak Smatch reports: drivers/hwmon/xgene-hwmon.c:757 xgene_hwmon_probe() warn: 'ctx->pcc_comm_addr' from ioremap() not released on line: 757. This is because in drivers/hwmon/xgene-hwmon.c:701 xgene_hwmon_probe(), ioremap and memremap is not released, which may cause a leak. To fix this, ioremap and memremap is modified to devm_ioremap and devm_memremap. Signed-off-by: Tianyi Jing Reviewed-by: Dongliang Mu Link: https://lore.kernel.org/r/20230318143851.2191625-1-jingfelix@hust.edu.cn [groeck: Fixed formatting and subject] Signed-off-by: Guenter Roeck --- drivers/hwmon/xgene-hwmon.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index d1abea49f01be..78d9f52e2a719 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -698,14 +698,14 @@ static int xgene_hwmon_probe(struct platform_device *pdev) ctx->comm_base_addr = pcc_chan->shmem_base_addr; if (ctx->comm_base_addr) { if (version == XGENE_HWMON_V2) - ctx->pcc_comm_addr = (void __force *)ioremap( - ctx->comm_base_addr, - pcc_chan->shmem_size); + ctx->pcc_comm_addr = (void __force *)devm_ioremap(&pdev->dev, + ctx->comm_base_addr, + pcc_chan->shmem_size); else - ctx->pcc_comm_addr = memremap( - ctx->comm_base_addr, - pcc_chan->shmem_size, - MEMREMAP_WB); + ctx->pcc_comm_addr = devm_memremap(&pdev->dev, + ctx->comm_base_addr, + pcc_chan->shmem_size, + MEMREMAP_WB); } else { dev_err(&pdev->dev, "Failed to get PCC comm region\n"); rc = -ENODEV; From b69245126a48e50882021180fa5d264dc7149ccc Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 15 Mar 2023 22:54:08 +0900 Subject: [PATCH 267/898] bootconfig: Fix testcase to increase max node Since commit 6c40624930c5 ("bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support") increased the max number of bootconfig node to 8192, the bootconfig testcase of the max number of nodes fails. To fix this issue, we can not simply increase the number in the test script because the test bootconfig file becomes too big (>32KB). To fix that, we can use a combination of three alphabets (26^3 = 17576). But with that, we can not express the 8193 (just one exceed from the limitation) because it also exceeds the max size of bootconfig. So, the first 26 nodes will just use one alphabet. With this fix, test-bootconfig.sh passes all tests. Link: https://lore.kernel.org/all/167888844790.791176.670805252426835131.stgit@devnote2/ Reported-by: Heinz Wiesinger Link: https://lore.kernel.org/all/2463802.XAFRqVoOGU@amaterasu.liwjatan.org Fixes: 6c40624930c5 ("bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- tools/bootconfig/test-bootconfig.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index f68e2e9eef8b2..a2c484c243f5d 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -87,10 +87,14 @@ xfail grep -i "error" $OUTFILE echo "Max node number check" -echo -n > $TEMPCONF -for i in `seq 1 1024` ; do - echo "node$i" >> $TEMPCONF -done +awk ' +BEGIN { + for (i = 0; i < 26; i += 1) + printf("%c\n", 65 + i % 26) + for (i = 26; i < 8192; i += 1) + printf("%c%c%c\n", 65 + i % 26, 65 + (i / 26) % 26, 65 + (i / 26 / 26)) +} +' > $TEMPCONF xpass $BOOTCONF -a $TEMPCONF $INITRD echo "badnode" >> $TEMPCONF From 47f9e4c924025c5be87959d3335e66fcbb7f6b5c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Mar 2023 15:15:18 +0000 Subject: [PATCH 268/898] keys: Do not cache key in task struct if key is requested from kernel thread The key which gets cached in task structure from a kernel thread does not get invalidated even after expiry. Due to which, a new key request from kernel thread will be served with the cached key if it's present in task struct irrespective of the key validity. The change is to not cache key in task_struct when key requested from kernel thread so that kernel thread gets a valid key on every key request. The problem has been seen with the cifs module doing DNS lookups from a kernel thread and the results getting pinned by being attached to that kernel thread's cache - and thus not something that can be easily got rid of. The cache would ordinarily be cleared by notify-resume, but kernel threads don't do that. This isn't seen with AFS because AFS is doing request_key() within the kernel half of a user thread - which will do notify-resume. Fixes: 7743c48e54ee ("keys: Cache result of request_key*() temporarily in task_struct") Signed-off-by: Bharath SM Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen cc: Shyam Prasad N cc: Steve French cc: keyrings@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/CAGypqWw951d=zYRbdgNR4snUDvJhWL=q3=WOyh7HhSJupjz2vA@mail.gmail.com/ --- security/keys/request_key.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 2da4404276f0f..07a0ef2baacd8 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -38,9 +38,12 @@ static void cache_requested_key(struct key *key) #ifdef CONFIG_KEYS_REQUEST_CACHE struct task_struct *t = current; - key_put(t->cached_requested_key); - t->cached_requested_key = key_get(key); - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + /* Do not cache key if it is a kernel thread */ + if (!(t->flags & PF_KTHREAD)) { + key_put(t->cached_requested_key); + t->cached_requested_key = key_get(key); + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + } #endif } From 4fc5c74dde69a7eda172514aaeb5a7df3600adb3 Mon Sep 17 00:00:00 2001 From: Robbie Harwood Date: Mon, 20 Feb 2023 12:12:53 -0500 Subject: [PATCH 269/898] verify_pefile: relax wrapper length check The PE Format Specification (section "The Attribute Certificate Table (Image Only)") states that `dwLength` is to be rounded up to 8-byte alignment when used for traversal. Therefore, the field is not required to be an 8-byte multiple in the first place. Accordingly, pesign has not performed this alignment since version 0.110. This causes kexec failure on pesign'd binaries with "PEFILE: Signature wrapper len wrong". Update the comment and relax the check. Signed-off-by: Robbie Harwood Signed-off-by: David Howells cc: Jarkko Sakkinen cc: Eric Biederman cc: Herbert Xu cc: keyrings@vger.kernel.org cc: linux-crypto@vger.kernel.org cc: kexec@lists.infradead.org Link: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-attribute-certificate-table-image-only Link: https://github.com/rhboot/pesign Link: https://lore.kernel.org/r/20230220171254.592347-2-rharwood@redhat.com/ # v2 --- crypto/asymmetric_keys/verify_pefile.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c index 7553ab18db898..fe1bb374239d7 100644 --- a/crypto/asymmetric_keys/verify_pefile.c +++ b/crypto/asymmetric_keys/verify_pefile.c @@ -135,11 +135,15 @@ static int pefile_strip_sig_wrapper(const void *pebuf, pr_debug("sig wrapper = { %x, %x, %x }\n", wrapper.length, wrapper.revision, wrapper.cert_type); - /* Both pesign and sbsign round up the length of certificate table - * (in optional header data directories) to 8 byte alignment. + /* sbsign rounds up the length of certificate table (in optional + * header data directories) to 8 byte alignment. However, the PE + * specification states that while entries are 8-byte aligned, this is + * not included in their length, and as a result, pesign has not + * rounded up since 0.110. */ - if (round_up(wrapper.length, 8) != ctx->sig_len) { - pr_debug("Signature wrapper len wrong\n"); + if (wrapper.length > ctx->sig_len) { + pr_debug("Signature wrapper bigger than sig len (%x > %x)\n", + ctx->sig_len, wrapper.length); return -ELIBBAD; } if (wrapper.revision != WIN_CERT_REVISION_2_0) { From 3584c1dbfffdabf8e3dc1dd25748bb38dd01cd43 Mon Sep 17 00:00:00 2001 From: Robbie Harwood Date: Mon, 20 Feb 2023 12:12:54 -0500 Subject: [PATCH 270/898] asymmetric_keys: log on fatal failures in PE/pkcs7 These particular errors can be encountered while trying to kexec when secureboot lockdown is in place. Without this change, even with a signed debug build, one still needs to reboot the machine to add the appropriate dyndbg parameters (since lockdown blocks debugfs). Accordingly, upgrade all pr_debug() before fatal error into pr_warn(). Signed-off-by: Robbie Harwood Signed-off-by: David Howells cc: Jarkko Sakkinen cc: Eric Biederman cc: Herbert Xu cc: keyrings@vger.kernel.org cc: linux-crypto@vger.kernel.org cc: kexec@lists.infradead.org Link: https://lore.kernel.org/r/20230220171254.592347-3-rharwood@redhat.com/ # v2 --- crypto/asymmetric_keys/pkcs7_verify.c | 10 +++++----- crypto/asymmetric_keys/verify_pefile.c | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 4fa769c4bcdb7..f0d4ff3c20a83 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -79,16 +79,16 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7, } if (sinfo->msgdigest_len != sig->digest_size) { - pr_debug("Sig %u: Invalid digest size (%u)\n", - sinfo->index, sinfo->msgdigest_len); + pr_warn("Sig %u: Invalid digest size (%u)\n", + sinfo->index, sinfo->msgdigest_len); ret = -EBADMSG; goto error; } if (memcmp(sig->digest, sinfo->msgdigest, sinfo->msgdigest_len) != 0) { - pr_debug("Sig %u: Message digest doesn't match\n", - sinfo->index); + pr_warn("Sig %u: Message digest doesn't match\n", + sinfo->index); ret = -EKEYREJECTED; goto error; } @@ -478,7 +478,7 @@ int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7, const void *data, size_t datalen) { if (pkcs7->data) { - pr_debug("Data already supplied\n"); + pr_warn("Data already supplied\n"); return -EINVAL; } pkcs7->data = data; diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c index fe1bb374239d7..22beaf2213a22 100644 --- a/crypto/asymmetric_keys/verify_pefile.c +++ b/crypto/asymmetric_keys/verify_pefile.c @@ -74,7 +74,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen, break; default: - pr_debug("Unknown PEOPT magic = %04hx\n", pe32->magic); + pr_warn("Unknown PEOPT magic = %04hx\n", pe32->magic); return -ELIBBAD; } @@ -95,7 +95,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen, ctx->certs_size = ddir->certs.size; if (!ddir->certs.virtual_address || !ddir->certs.size) { - pr_debug("Unsigned PE binary\n"); + pr_warn("Unsigned PE binary\n"); return -ENODATA; } @@ -127,7 +127,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf, unsigned len; if (ctx->sig_len < sizeof(wrapper)) { - pr_debug("Signature wrapper too short\n"); + pr_warn("Signature wrapper too short\n"); return -ELIBBAD; } @@ -142,16 +142,16 @@ static int pefile_strip_sig_wrapper(const void *pebuf, * rounded up since 0.110. */ if (wrapper.length > ctx->sig_len) { - pr_debug("Signature wrapper bigger than sig len (%x > %x)\n", - ctx->sig_len, wrapper.length); + pr_warn("Signature wrapper bigger than sig len (%x > %x)\n", + ctx->sig_len, wrapper.length); return -ELIBBAD; } if (wrapper.revision != WIN_CERT_REVISION_2_0) { - pr_debug("Signature is not revision 2.0\n"); + pr_warn("Signature is not revision 2.0\n"); return -ENOTSUPP; } if (wrapper.cert_type != WIN_CERT_TYPE_PKCS_SIGNED_DATA) { - pr_debug("Signature certificate type is not PKCS\n"); + pr_warn("Signature certificate type is not PKCS\n"); return -ENOTSUPP; } @@ -164,7 +164,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf, ctx->sig_offset += sizeof(wrapper); ctx->sig_len -= sizeof(wrapper); if (ctx->sig_len < 4) { - pr_debug("Signature data missing\n"); + pr_warn("Signature data missing\n"); return -EKEYREJECTED; } @@ -198,7 +198,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf, return 0; } not_pkcs7: - pr_debug("Signature data not PKCS#7\n"); + pr_warn("Signature data not PKCS#7\n"); return -ELIBBAD; } @@ -341,8 +341,8 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen, digest_size = crypto_shash_digestsize(tfm); if (digest_size != ctx->digest_len) { - pr_debug("Digest size mismatch (%zx != %x)\n", - digest_size, ctx->digest_len); + pr_warn("Digest size mismatch (%zx != %x)\n", + digest_size, ctx->digest_len); ret = -EBADMSG; goto error_no_desc; } @@ -373,7 +373,7 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen, * PKCS#7 certificate. */ if (memcmp(digest, ctx->digest, ctx->digest_len) != 0) { - pr_debug("Digest mismatch\n"); + pr_warn("Digest mismatch\n"); ret = -EKEYREJECTED; } else { pr_debug("The digests match!\n"); From 387d42ae6df76d2ae813432d05630535a5480038 Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Thu, 9 Mar 2023 13:38:56 -0800 Subject: [PATCH 271/898] ice: fix rx buffers handling for flow director packets Adding flow director filters stopped working correctly after commit 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side"). As a result, only first flow director filter can be added, adding next filter leads to NULL pointer dereference attached below. Rx buffer handling and reallocation logic has been optimized, however flow director specific traffic was not accounted for. As a result driver handled those packets incorrectly since new logic was based on ice_rx_ring::first_desc which was not set in this case. Fix this by setting struct ice_rx_ring::first_desc to next_to_clean for flow director received packets. [ 438.544867] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 438.551840] #PF: supervisor read access in kernel mode [ 438.556978] #PF: error_code(0x0000) - not-present page [ 438.562115] PGD 7c953b2067 P4D 0 [ 438.565436] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 438.569794] CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Not tainted 6.2.0-net-bug #1 [ 438.577531] Hardware name: Intel Corporation M50CYP2SBSTD/M50CYP2SBSTD, BIOS SE5C620.86B.01.01.0005.2202160810 02/16/2022 [ 438.588470] RIP: 0010:ice_clean_rx_irq+0x2b9/0xf20 [ice] [ 438.593860] Code: 45 89 f7 e9 ac 00 00 00 8b 4d 78 41 31 4e 10 41 09 d5 4d 85 f6 0f 84 82 00 00 00 49 8b 4e 08 41 8b 76 1c 65 8b 3d 47 36 4a 3f <48> 8b 11 48 c1 ea 36 39 d7 0f 85 a6 00 00 00 f6 41 08 02 0f 85 9c [ 438.612605] RSP: 0018:ff8c732640003ec8 EFLAGS: 00010082 [ 438.617831] RAX: 0000000000000800 RBX: 00000000000007ff RCX: 0000000000000000 [ 438.624957] RDX: 0000000000000800 RSI: 0000000000000000 RDI: 0000000000000000 [ 438.632089] RBP: ff4ed275a2158200 R08: 00000000ffffffff R09: 0000000000000020 [ 438.639222] R10: 0000000000000000 R11: 0000000000000020 R12: 0000000000001000 [ 438.646356] R13: 0000000000000000 R14: ff4ed275d0daffe0 R15: 0000000000000000 [ 438.653485] FS: 0000000000000000(0000) GS:ff4ed2738fa00000(0000) knlGS:0000000000000000 [ 438.661563] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 438.667310] CR2: 0000000000000000 CR3: 0000007c9f0d6006 CR4: 0000000000771ef0 [ 438.674444] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 438.681573] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 438.688697] PKRU: 55555554 [ 438.691404] Call Trace: [ 438.693857] [ 438.695877] ? profile_tick+0x17/0x80 [ 438.699542] ice_msix_clean_ctrl_vsi+0x24/0x50 [ice] [ 438.702571] ice 0000:b1:00.0: VF 1: ctrl_vsi irq timeout [ 438.704542] __handle_irq_event_percpu+0x43/0x1a0 [ 438.704549] handle_irq_event+0x34/0x70 [ 438.704554] handle_edge_irq+0x9f/0x240 [ 438.709901] iavf 0000:b1:01.1: Failed to add Flow Director filter with status: 6 [ 438.714571] __common_interrupt+0x63/0x100 [ 438.714580] common_interrupt+0xb4/0xd0 [ 438.718424] iavf 0000:b1:01.1: Rule ID: 127 dst_ip: 0.0.0.0 src_ip 0.0.0.0 UDP: dst_port 4 src_port 0 [ 438.722255] [ 438.722257] [ 438.722257] asm_common_interrupt+0x22/0x40 [ 438.722262] RIP: 0010:cpuidle_enter_state+0xc8/0x430 [ 438.722267] Code: 6e e9 25 ff e8 f9 ef ff ff 8b 53 04 49 89 c5 0f 1f 44 00 00 31 ff e8 d7 f1 24 ff 45 84 ff 0f 85 57 02 00 00 fb 0f 1f 44 00 00 <45> 85 f6 0f 88 85 01 00 00 49 63 d6 48 8d 04 52 48 8d 04 82 49 8d [ 438.722269] RSP: 0018:ffffffff86003e50 EFLAGS: 00000246 [ 438.784108] RAX: ff4ed2738fa00000 RBX: ffbe72a64fc01020 RCX: 0000000000000000 [ 438.791234] RDX: 0000000000000000 RSI: ffffffff858d84de RDI: ffffffff85893641 [ 438.798365] RBP: 0000000000000002 R08: 0000000000000002 R09: 000000003158af9d [ 438.805490] R10: 0000000000000008 R11: 0000000000000354 R12: ffffffff862365a0 [ 438.812622] R13: 000000661b472a87 R14: 0000000000000002 R15: 0000000000000000 [ 438.819757] cpuidle_enter+0x29/0x40 [ 438.823333] do_idle+0x1b6/0x230 [ 438.826566] cpu_startup_entry+0x19/0x20 [ 438.830492] rest_init+0xcb/0xd0 [ 438.833717] arch_call_rest_init+0xa/0x30 [ 438.837731] start_kernel+0x776/0xb70 [ 438.841396] secondary_startup_64_no_verify+0xe5/0xeb [ 438.846449] Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Signed-off-by: Piotr Raczynski Acked-by: Maciej Fijalkowski Reviewed-by: Simon Horman Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dfd22862e926b..b61dd9f015405 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1210,6 +1210,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); if (++ntc == cnt) ntc = 0; + rx_ring->first_desc = ntc; continue; } From 83b49e7f63da88a1544cba2b2e40bfabb24bd203 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 10 Mar 2023 12:33:44 +0100 Subject: [PATCH 272/898] ice: check if VF exists before mode check Setting trust on VF should return EINVAL when there is no VF. Move checking for switchdev mode after checking if VF exists. Fixes: c54d209c78b8 ("ice: Wait for VF to be reset/ready before configuration") Signed-off-by: Michal Swiatkowski Signed-off-by: Kalyan Kodamagula Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 96a64c25e2ef7..0cc05e54a7815 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1341,15 +1341,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) struct ice_vf *vf; int ret; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + if (ice_is_eswitch_mode_switchdev(pf)) { dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); return -EOPNOTSUPP; } - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; From 7d46c0e670d5f646879b52bacc387bf48ff0e7f1 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 13 Mar 2023 13:09:15 +0100 Subject: [PATCH 273/898] ice: remove filters only if VSI is deleted Filters shouldn't be removed in VSI rebuild path. Removing them on PF VSI results in no rule for PF MAC after changing for example queues amount. Remove all filters only in the VSI remove flow. As unload should also cause the filter to be removed introduce, a new function ice_stop_eth(). It will unroll ice_start_eth(), so remove filters and close VSI. Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Signed-off-by: Michal Swiatkowski Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 0f52ea38b6f3a..450317dfcca73 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -291,6 +291,7 @@ static void ice_vsi_delete_from_hw(struct ice_vsi *vsi) struct ice_vsi_ctx *ctxt; int status; + ice_fltr_remove_all(vsi); ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return; @@ -2892,7 +2893,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) ice_cfg_sw_lldp(vsi, false, false); - ice_fltr_remove_all(vsi); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); if (err) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c233464b8f6bf..0d8b8c6f9bd35 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4641,6 +4641,12 @@ static int ice_start_eth(struct ice_vsi *vsi) return err; } +static void ice_stop_eth(struct ice_vsi *vsi) +{ + ice_fltr_remove_all(vsi); + ice_vsi_close(vsi); +} + static int ice_init_eth(struct ice_pf *pf) { struct ice_vsi *vsi = ice_get_main_vsi(pf); @@ -5129,7 +5135,7 @@ void ice_unload(struct ice_pf *pf) { ice_deinit_features(pf); ice_deinit_rdma(pf); - ice_vsi_close(ice_get_main_vsi(pf)); + ice_stop_eth(ice_get_main_vsi(pf)); ice_vsi_decfg(ice_get_main_vsi(pf)); ice_deinit_dev(pf); } From 632e04739c8f45c2d9ca4d4c5bd18d80c2ac9296 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 10 Mar 2023 08:49:40 +0100 Subject: [PATCH 274/898] clk: rs9: Fix suspend/resume Disabling the cache in commit 2ff4ba9e3702 ("clk: rs9: Fix I2C accessors") without removing cache synchronization in resume path results in a kernel panic as map->cache_ops is unset, due to REGCACHE_NONE. Enable flat cache again to support resume again. num_reg_defaults_raw is necessary to read the cache defaults from hardware. Some registers are strapped in hardware and cannot be provided in software. Fixes: 2ff4ba9e3702 ("clk: rs9: Fix I2C accessors") Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20230310074940.3475703-1-alexander.stein@ew.tq-group.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-renesas-pcie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-renesas-pcie.c b/drivers/clk/clk-renesas-pcie.c index f91f30560820d..ff3a52d484790 100644 --- a/drivers/clk/clk-renesas-pcie.c +++ b/drivers/clk/clk-renesas-pcie.c @@ -143,8 +143,9 @@ static int rs9_regmap_i2c_read(void *context, static const struct regmap_config rs9_regmap_config = { .reg_bits = 8, .val_bits = 8, - .cache_type = REGCACHE_NONE, + .cache_type = REGCACHE_FLAT, .max_register = RS9_REG_BCP, + .num_reg_defaults_raw = 0x8, .rd_table = &rs9_readable_table, .wr_table = &rs9_writeable_table, .reg_write = rs9_regmap_i2c_write, From cdce67099117ece371582f706c6eff7d3a65326d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 10 Mar 2023 21:34:58 +0900 Subject: [PATCH 275/898] PCI: dwc: Fix PORT_LINK_CONTROL update when CDM check enabled If CDM_CHECK is enabled (by the DT "snps,enable-cdm-check" property), 'val' is overwritten by PCIE_PL_CHK_REG_CONTROL_STATUS initialization. Commit ec7b952f453c ("PCI: dwc: Always enable CDM check if "snps,enable-cdm-check" exists") did not account for further usage of 'val', so we wrote improper values to PCIE_PORT_LINK_CONTROL when the CDM check is enabled. Move the PCIE_PORT_LINK_CONTROL update to be completely after the PCIE_PL_CHK_REG_CONTROL_STATUS register initialization. [bhelgaas: commit log adapted from Serge's version] Fixes: ec7b952f453c ("PCI: dwc: Always enable CDM check if "snps,enable-cdm-check" exists") Link: https://lore.kernel.org/r/20230310123510.675685-2-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Yoshihiro Shimoda Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin --- drivers/pci/controller/dwc/pcie-designware.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 53a16b8b6ac23..8e33e6e59e686 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -1001,11 +1001,6 @@ void dw_pcie_setup(struct dw_pcie *pci) dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); } - val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL); - val &= ~PORT_LINK_FAST_LINK_MODE; - val |= PORT_LINK_DLL_LINK_EN; - dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); - if (dw_pcie_cap_is(pci, CDM_CHECK)) { val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS | @@ -1013,6 +1008,11 @@ void dw_pcie_setup(struct dw_pcie *pci) dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); } + val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL); + val &= ~PORT_LINK_FAST_LINK_MODE; + val |= PORT_LINK_DLL_LINK_EN; + dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); + if (!pci->num_lanes) { dev_dbg(pci->dev, "Using h/w default number of lanes\n"); return; From 4e264be98b88a6d6f476c11087fe865696e8bef5 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 13 Mar 2023 17:06:45 +0100 Subject: [PATCH 276/898] iavf: fix hang on reboot with ice When a system with E810 with existing VFs gets rebooted the following hang may be observed. Pid 1 is hung in iavf_remove(), part of a network driver: PID: 1 TASK: ffff965400e5a340 CPU: 24 COMMAND: "systemd-shutdow" #0 [ffffaad04005fa50] __schedule at ffffffff8b3239cb #1 [ffffaad04005fae8] schedule at ffffffff8b323e2d #2 [ffffaad04005fb00] schedule_hrtimeout_range_clock at ffffffff8b32cebc #3 [ffffaad04005fb80] usleep_range_state at ffffffff8b32c930 #4 [ffffaad04005fbb0] iavf_remove at ffffffffc12b9b4c [iavf] #5 [ffffaad04005fbf0] pci_device_remove at ffffffff8add7513 #6 [ffffaad04005fc10] device_release_driver_internal at ffffffff8af08baa #7 [ffffaad04005fc40] pci_stop_bus_device at ffffffff8adcc5fc #8 [ffffaad04005fc60] pci_stop_and_remove_bus_device at ffffffff8adcc81e #9 [ffffaad04005fc70] pci_iov_remove_virtfn at ffffffff8adf9429 #10 [ffffaad04005fca8] sriov_disable at ffffffff8adf98e4 #11 [ffffaad04005fcc8] ice_free_vfs at ffffffffc04bb2c8 [ice] #12 [ffffaad04005fd10] ice_remove at ffffffffc04778fe [ice] #13 [ffffaad04005fd38] ice_shutdown at ffffffffc0477946 [ice] #14 [ffffaad04005fd50] pci_device_shutdown at ffffffff8add58f1 #15 [ffffaad04005fd70] device_shutdown at ffffffff8af05386 #16 [ffffaad04005fd98] kernel_restart at ffffffff8a92a870 #17 [ffffaad04005fda8] __do_sys_reboot at ffffffff8a92abd6 #18 [ffffaad04005fee0] do_syscall_64 at ffffffff8b317159 #19 [ffffaad04005ff08] __context_tracking_enter at ffffffff8b31b6fc #20 [ffffaad04005ff18] syscall_exit_to_user_mode at ffffffff8b31b50d #21 [ffffaad04005ff28] do_syscall_64 at ffffffff8b317169 #22 [ffffaad04005ff50] entry_SYSCALL_64_after_hwframe at ffffffff8b40009b RIP: 00007f1baa5c13d7 RSP: 00007fffbcc55a98 RFLAGS: 00000202 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1baa5c13d7 RDX: 0000000001234567 RSI: 0000000028121969 RDI: 00000000fee1dead RBP: 00007fffbcc55ca0 R8: 0000000000000000 R9: 00007fffbcc54e90 R10: 00007fffbcc55050 R11: 0000000000000202 R12: 0000000000000005 R13: 0000000000000000 R14: 00007fffbcc55af0 R15: 0000000000000000 ORIG_RAX: 00000000000000a9 CS: 0033 SS: 002b During reboot all drivers PM shutdown callbacks are invoked. In iavf_shutdown() the adapter state is changed to __IAVF_REMOVE. In ice_shutdown() the call chain above is executed, which at some point calls iavf_remove(). However iavf_remove() expects the VF to be in one of the states __IAVF_RUNNING, __IAVF_DOWN or __IAVF_INIT_FAILED. If that's not the case it sleeps forever. So if iavf_shutdown() gets invoked before iavf_remove() the system will hang indefinitely because the adapter is already in state __IAVF_REMOVE. Fix this by returning from iavf_remove() if the state is __IAVF_REMOVE, as we already went through iavf_shutdown(). Fixes: 974578017fc1 ("iavf: Add waiting so the port is initialized in remove") Fixes: a8417330f8a5 ("iavf: Fix race condition between iavf_shutdown and iavf_remove") Reported-by: Marius Cornea Signed-off-by: Stefan Assmann Reviewed-by: Michal Kubiak Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 327cd9b1af2c8..095201e83c9db 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5074,6 +5074,11 @@ static void iavf_remove(struct pci_dev *pdev) mutex_unlock(&adapter->crit_lock); break; } + /* Simply return if we already went through iavf_shutdown */ + if (adapter->state == __IAVF_REMOVE) { + mutex_unlock(&adapter->crit_lock); + return; + } mutex_unlock(&adapter->crit_lock); usleep_range(500, 1000); From c672297bbc0e86dbf88396b8053e2fbb173f16ff Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 13 Mar 2023 15:07:33 +0100 Subject: [PATCH 277/898] i40e: fix flow director packet filter programming Initialize to zero structures to build a valid Tx Packet used for the filter programming. Fixes: a9219b332f52 ("i40e: VLAN field for flow director") Signed-off-by: Radoslaw Tyl Reviewed-by: Michal Swiatkowski Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 924f972b91faf..72b091f2509d8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -171,10 +171,10 @@ static char *i40e_create_dummy_packet(u8 *dummy_packet, bool ipv4, u8 l4proto, struct i40e_fdir_filter *data) { bool is_vlan = !!data->vlan_tag; - struct vlan_hdr vlan; - struct ipv6hdr ipv6; - struct ethhdr eth; - struct iphdr ip; + struct vlan_hdr vlan = {}; + struct ipv6hdr ipv6 = {}; + struct ethhdr eth = {}; + struct iphdr ip = {}; u8 *tmp; if (ipv4) { From fbaa38214cd9e150764ccaa82e04ecf42cc1140c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:01 +0100 Subject: [PATCH 278/898] cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 26 +++++++++++++------------- drivers/pci/doe.c | 25 ++++++++++++++----------- include/linux/pci-doe.h | 8 ++++++-- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 7328a25524113..49a99a84b6aa6 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -462,7 +462,7 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport) return NULL; } -#define CDAT_DOE_REQ(entry_handle) \ +#define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \ (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ @@ -475,8 +475,8 @@ static void cxl_doe_task_complete(struct pci_doe_task *task) } struct cdat_doe_task { - u32 request_pl; - u32 response_pl[32]; + __le32 request_pl; + __le32 response_pl[32]; struct completion c; struct pci_doe_task task; }; @@ -510,10 +510,10 @@ static int cxl_cdat_get_length(struct device *dev, return rc; } wait_for_completion(&t.c); - if (t.task.rv < sizeof(u32)) + if (t.task.rv < sizeof(__le32)) return -EIO; - *length = t.response_pl[1]; + *length = le32_to_cpu(t.response_pl[1]); dev_dbg(dev, "CDAT length %zu\n", *length); return 0; @@ -524,13 +524,13 @@ static int cxl_cdat_read_table(struct device *dev, struct cxl_cdat *cdat) { size_t length = cdat->length; - u32 *data = cdat->table; + __le32 *data = cdat->table; int entry_handle = 0; do { DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t); size_t entry_dw; - u32 *entry; + __le32 *entry; int rc; rc = pci_doe_submit_task(cdat_doe, &t.task); @@ -540,21 +540,21 @@ static int cxl_cdat_read_table(struct device *dev, } wait_for_completion(&t.c); /* 1 DW header + 1 DW data min */ - if (t.task.rv < (2 * sizeof(u32))) + if (t.task.rv < (2 * sizeof(__le32))) return -EIO; /* Get the CXL table access header entry handle */ entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, - t.response_pl[0]); + le32_to_cpu(t.response_pl[0])); entry = t.response_pl + 1; - entry_dw = t.task.rv / sizeof(u32); + entry_dw = t.task.rv / sizeof(__le32); /* Skip Header */ entry_dw -= 1; - entry_dw = min(length / sizeof(u32), entry_dw); + entry_dw = min(length / sizeof(__le32), entry_dw); /* Prevent length < 1 DW from causing a buffer overflow */ if (entry_dw) { - memcpy(data, entry, entry_dw * sizeof(u32)); - length -= entry_dw * sizeof(u32); + memcpy(data, entry, entry_dw * sizeof(__le32)); + length -= entry_dw * sizeof(__le32); data += entry_dw; } } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index 66d9ab2886468..6f097932ccbf9 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -128,7 +128,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, return -EIO; /* Length is 2 DW of header + length of payload in DW */ - length = 2 + task->request_pl_sz / sizeof(u32); + length = 2 + task->request_pl_sz / sizeof(__le32); if (length > PCI_DOE_MAX_LENGTH) return -EIO; if (length == PCI_DOE_MAX_LENGTH) @@ -141,9 +141,9 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, length)); - for (i = 0; i < task->request_pl_sz / sizeof(u32); i++) + for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++) pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, - task->request_pl[i]); + le32_to_cpu(task->request_pl[i])); pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO); @@ -195,11 +195,11 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas /* First 2 dwords have already been read */ length -= 2; - payload_length = min(length, task->response_pl_sz / sizeof(u32)); + payload_length = min(length, task->response_pl_sz / sizeof(__le32)); /* Read the rest of the response payload */ for (i = 0; i < payload_length; i++) { - pci_read_config_dword(pdev, offset + PCI_DOE_READ, - &task->response_pl[i]); + pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val); + task->response_pl[i] = cpu_to_le32(val); /* Prior to the last ack, ensure Data Object Ready */ if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb)) return -EIO; @@ -217,7 +217,7 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) return -EIO; - return min(length, task->response_pl_sz / sizeof(u32)) * sizeof(u32); + return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32); } static void signal_task_complete(struct pci_doe_task *task, int rv) @@ -317,14 +317,16 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid, { u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX, *index); + __le32 request_pl_le = cpu_to_le32(request_pl); + __le32 response_pl_le; u32 response_pl; DECLARE_COMPLETION_ONSTACK(c); struct pci_doe_task task = { .prot.vid = PCI_VENDOR_ID_PCI_SIG, .prot.type = PCI_DOE_PROTOCOL_DISCOVERY, - .request_pl = &request_pl, + .request_pl = &request_pl_le, .request_pl_sz = sizeof(request_pl), - .response_pl = &response_pl, + .response_pl = &response_pl_le, .response_pl_sz = sizeof(response_pl), .complete = pci_doe_task_complete, .private = &c, @@ -340,6 +342,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid, if (task.rv != sizeof(response_pl)) return -EIO; + response_pl = le32_to_cpu(response_pl_le); *vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl); *protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL, response_pl); @@ -533,8 +536,8 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) * DOE requests must be a whole number of DW and the response needs to * be big enough for at least 1 DW */ - if (task->request_pl_sz % sizeof(u32) || - task->response_pl_sz < sizeof(u32)) + if (task->request_pl_sz % sizeof(__le32) || + task->response_pl_sz < sizeof(__le32)) return -EINVAL; if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h index ed9b4df792b88..43765eaf2342c 100644 --- a/include/linux/pci-doe.h +++ b/include/linux/pci-doe.h @@ -34,6 +34,10 @@ struct pci_doe_mb; * @work: Used internally by the mailbox * @doe_mb: Used internally by the mailbox * + * Payloads are treated as opaque byte streams which are transmitted verbatim, + * without byte-swapping. If payloads contain little-endian register values, + * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu(). + * * The payload sizes and rv are specified in bytes with the following * restrictions concerning the protocol. * @@ -45,9 +49,9 @@ struct pci_doe_mb; */ struct pci_doe_task { struct pci_doe_protocol prot; - u32 *request_pl; + __le32 *request_pl; size_t request_pl_sz; - u32 *response_pl; + __le32 *response_pl; size_t response_pl_sz; int rv; void (*complete)(struct pci_doe_task *task); From 10ec8ca8ec1a2f04c4ed90897225231c58c124a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 20 Mar 2023 15:37:25 +0100 Subject: [PATCH 279/898] bpf: Adjust insufficient default bpf_jit_limit We've seen recent AWS EKS (Kubernetes) user reports like the following: After upgrading EKS nodes from v20230203 to v20230217 on our 1.24 EKS clusters after a few days a number of the nodes have containers stuck in ContainerCreating state or liveness/readiness probes reporting the following error: Readiness probe errored: rpc error: code = Unknown desc = failed to exec in container: failed to start exec "4a11039f730203ffc003b7[...]": OCI runtime exec failed: exec failed: unable to start container process: unable to init seccomp: error loading seccomp filter into kernel: error loading seccomp filter: errno 524: unknown However, we had not been seeing this issue on previous AMIs and it only started to occur on v20230217 (following the upgrade from kernel 5.4 to 5.10) with no other changes to the underlying cluster or workloads. We tried the suggestions from that issue (sysctl net.core.bpf_jit_limit=452534528) which helped to immediately allow containers to be created and probes to execute but after approximately a day the issue returned and the value returned by cat /proc/vmallocinfo | grep bpf_jit | awk '{s+=$2} END {print s}' was steadily increasing. I tested bpf tree to observe bpf_jit_charge_modmem, bpf_jit_uncharge_modmem their sizes passed in as well as bpf_jit_current under tcpdump BPF filter, seccomp BPF and native (e)BPF programs, and the behavior all looks sane and expected, that is nothing "leaking" from an upstream perspective. The bpf_jit_limit knob was originally added in order to avoid a situation where unprivileged applications loading BPF programs (e.g. seccomp BPF policies) consuming all the module memory space via BPF JIT such that loading of kernel modules would be prevented. The default limit was defined back in 2018 and while good enough back then, we are generally seeing far more BPF consumers today. Adjust the limit for the BPF JIT pool from originally 1/4 to now 1/2 of the module memory space to better reflect today's needs and avoid more users running into potentially hard to debug issues. Fixes: fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") Reported-by: Stephen Haynes Reported-by: Lefteris Alexakis Signed-off-by: Daniel Borkmann Link: https://github.com/awslabs/amazon-eks-ami/issues/1179 Link: https://github.com/awslabs/amazon-eks-ami/issues/1219 Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230320143725.8394-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b297e9f60ca10..e2d256c820723 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -972,7 +972,7 @@ static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); - bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1, PAGE_SIZE), LONG_MAX); return 0; } From c83172b0639c8a005c0dd3b36252dc22ddd9f19c Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Fri, 25 Nov 2022 04:15:40 +0200 Subject: [PATCH 280/898] net/mlx5e: Set uplink rep as NETNS_LOCAL Previously, NETNS_LOCAL was not set for uplink representors, inconsistent with VF representors, and allowed the uplink representor to be moved between net namespaces and separated from the VF representors it shares the core device with. Such usage would break the isolation model of namespaces, as devices in different namespaces would have access to shared memory. To solve this issue, set NETNS_LOCAL for uplink representors if eswitch is in switchdev mode. Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a7f2ab22cc40c..7ca7e9b57607f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4150,8 +4150,12 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } - if (mlx5e_is_uplink_rep(priv)) + if (mlx5e_is_uplink_rep(priv)) { features = mlx5e_fix_uplink_rep_features(netdev, features); + features |= NETIF_F_NETNS_LOCAL; + } else { + features &= ~NETIF_F_NETNS_LOCAL; + } mutex_unlock(&priv->state_lock); From 662404b24a4c4d839839ed25e3097571f5938b9b Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 9 Feb 2023 12:48:52 +0200 Subject: [PATCH 281/898] net/mlx5e: Block entering switchdev mode with ns inconsistency Upon entering switchdev mode, VF/SF representors are spawned in the devlink instance's net namespace, whereas the PF net device transforms into the uplink representor, remaining in the net namespace the PF net device was in. Therefore, if a PF net device's namespace is different from its parent devlink net namespace, entering switchdev mode can create an illegal situation where all representors sharing the same core device are NOT in the same net namespace. To avoid this issue, block entering switchdev mode for devices whose child netdev net namespace has diverged from the parent devlink's. Fixes: 7768d1971de6 ("net/mlx5: E-Switch, Add control for encapsulation") Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 22075943bb582..25a8076a77bff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3405,6 +3405,18 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } +static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) +{ + struct net *devl_net, *netdev_net; + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); + devl_net = devlink_net(devlink); + + return net_eq(devl_net, netdev_net); +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -3419,6 +3431,13 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && + !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); + return -EPERM; + } + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { From 922f56e9a795d6f3dd72d3428ebdd7ee040fa855 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 31 Jan 2023 14:07:03 +0200 Subject: [PATCH 282/898] net/mlx5: Fix steering rules cleanup vport's mc, uc and multicast rules are not deleted in teardown path when EEH happens. Since the vport's promisc settings(uc, mc and all) in firmware are reset after EEH, mlx5 driver will try to delete the above rules in the initialization path. This cause kernel crash because these software rules are no longer valid. Fix by nullifying these rules right after delete to avoid accessing any dangling pointers. Call Trace: __list_del_entry_valid+0xcc/0x100 (unreliable) tree_put_node+0xf4/0x1b0 [mlx5_core] tree_remove_node+0x30/0x70 [mlx5_core] mlx5_del_flow_rules+0x14c/0x1f0 [mlx5_core] esw_apply_vport_rx_mode+0x10c/0x200 [mlx5_core] esw_update_vport_rx_mode+0xb4/0x180 [mlx5_core] esw_vport_change_handle_locked+0x1ec/0x230 [mlx5_core] esw_enable_vport+0x130/0x260 [mlx5_core] mlx5_eswitch_enable_sriov+0x2a0/0x2f0 [mlx5_core] mlx5_device_enable_sriov+0x74/0x440 [mlx5_core] mlx5_load_one+0x114c/0x1550 [mlx5_core] mlx5_pci_resume+0x68/0xf0 [mlx5_core] eeh_report_resume+0x1a4/0x230 eeh_pe_dev_traverse+0x98/0x170 eeh_handle_normal_event+0x3e4/0x640 eeh_handle_event+0x4c/0x370 eeh_event_handler+0x14c/0x210 kthread+0x168/0x1b0 ret_from_kernel_thread+0x5c/0x84 Fixes: a35f71f27a61 ("net/mlx5: E-Switch, Implement promiscuous rx modes vf request handling") Signed-off-by: Huy Nguyen Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0f052513fefa1..8bdf28762f412 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -959,6 +959,7 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; + esw_apply_vport_rx_mode(esw, vport, false, false); esw_vport_cleanup(esw, vport); esw->enabled_vports--; From 6e9d51b1a5cb8d750c3daf89db4f4cdfd1051819 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 1 Mar 2023 15:47:11 +0200 Subject: [PATCH 283/898] net/mlx5e: Initialize link speed to zero mlx5e_port_max_linkspeed does not guarantee value assignment for speed. Avoid cases where link_speed might be used uninitialized. In case mlx5e_port_max_linkspeed fails, a default link speed of 50000 will be used for the calculations. Fixes: 3f6d08d196b2 ("net/mlx5e: Add RSS support for hairpin") Signed-off-by: Roy Novich Reviewed-by: Tariq Toukan Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6bfed633343ab..87a2850b32d09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1103,8 +1103,8 @@ static void mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, struct mlx5_core_dev *mdev) { + u32 link_speed = 0; u64 link_speed64; - u32 link_speed; hairpin_params->mdev = mdev; /* set hairpin pair per each 50Gbs share of the link */ From 7e3fce82d945cf6e7f99034b113ff2d250d7524d Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 20 Mar 2023 13:13:55 +0200 Subject: [PATCH 284/898] net/mlx5e: Overcome slow response for first macsec ASO WQE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First ASO WQE poll causes a cache miss in hardware hence the resut is delayed. It causes to the situation where such WQE is polled earlier than it is needed. Add logic to retry ASO CQ polling operation. Fixes: 739cfa34518e ("net/mlx5: Make ASO poll CQ usable in atomic context")  Signed-off-by: Emeel Hakim Reviewed-by: Leon Romanovsky Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 8af53178e40d4..33b3620ea45c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1412,6 +1412,7 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac struct mlx5e_macsec_aso *aso; struct mlx5_aso_wqe *aso_wqe; struct mlx5_aso *maso; + unsigned long expires; int err; aso = &macsec->aso; @@ -1425,7 +1426,13 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); - err = mlx5_aso_poll_cq(maso, false); + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(maso, false); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); + if (err) goto err_out; From 44d553188c38ac74b799dfdcebafef2f7bb70942 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 15 Mar 2023 11:04:38 +0200 Subject: [PATCH 285/898] net/mlx5: Read the TC mapping of all priorities on ETS query When ETS configurations are queried by the user to get the mapping assignment between packet priority and traffic class, only priorities up to maximum TCs are queried from QTCT register in FW to retrieve their assigned TC, leaving the rest of the priorities mapped to the default TC #0 which might be misleading. Fix by querying the TC mapping of all priorities on each ETS query, regardless of the maximum number of TCs configured in FW. Fixes: 820c2c5e773d ("net/mlx5e: Read ETS settings directly from firmware") Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 2449731b7d79a..89de92d064836 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -117,12 +117,14 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, ets)) return -EOPNOTSUPP; - ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; - for (i = 0; i < ets->ets_cap; i++) { + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); if (err) return err; + } + ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets->ets_cap; i++) { err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); if (err) return err; From 640fcdbcf27fc62de9223f958ceb4e897a00e791 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Feb 2023 14:16:10 +0300 Subject: [PATCH 286/898] net/mlx5: E-Switch, Fix an Oops in error handling code The error handling dereferences "vport". There is nothing we can do if it is an error pointer except returning the error code. Fixes: 133dcfc577ea ("net/mlx5: E-Switch, Alloc and free unique metadata for match") Signed-off-by: Dan Carpenter Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index d55775627a473..50d2ea3239798 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -364,8 +364,7 @@ int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vpo if (WARN_ON_ONCE(IS_ERR(vport))) { esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); - err = PTR_ERR(vport); - goto out; + return PTR_ERR(vport); } esw_acl_ingress_ofld_rules_destroy(esw, vport); From 9a801afd3eb95e1a89aba17321062df06fb49d98 Mon Sep 17 00:00:00 2001 From: Dylan Jhong Date: Mon, 13 Mar 2023 11:49:06 +0800 Subject: [PATCH 287/898] riscv: mm: Fix incorrect ASID argument when flushing TLB Currently, we pass the CONTEXTID instead of the ASID to the TLB flush function. We should only take the ASID field to prevent from touching the reserved bit field. Fixes: 3f1e782998cd ("riscv: add ASID-based tlbflushing methods") Signed-off-by: Dylan Jhong Reviewed-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20230313034906.2401730-1-dylan@andestech.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 2 ++ arch/riscv/mm/context.c | 2 +- arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 801019381dea3..a09196f8de688 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -12,6 +12,8 @@ #include #ifdef CONFIG_MMU +extern unsigned long asid_mask; + static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 0f784e3d307bb..12e22e7330e7b 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -22,7 +22,7 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; -static unsigned long asid_mask; +unsigned long asid_mask; static atomic_long_t current_version; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 37ed760d007c3..ef701fa83f368 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -42,7 +42,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, /* check if the tlbflush needs to be sent to other CPUs */ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; if (static_branch_unlikely(&use_asid_allocator)) { - unsigned long asid = atomic_long_read(&mm->context.id); + unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; if (broadcast) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); From 032a954061afd4b7426c3eb6bfd2952ef1e9a384 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Sun, 19 Mar 2023 10:55:40 +0100 Subject: [PATCH 288/898] net: dsa: tag_brcm: legacy: fix daisy-chained switches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When BCM63xx internal switches are connected to switches with a 4-byte Broadcom tag, it does not identify the packet as VLAN tagged, so it adds one based on its PVID (which is likely 0). Right now, the packet is received by the BCM63xx internal switch and the 6-byte tag is properly processed. The next step would to decode the corresponding 4-byte tag. However, the internal switch adds an invalid VLAN tag after the 6-byte tag and the 4-byte tag handling fails. In order to fix this we need to remove the invalid VLAN tag after the 6-byte tag before passing it to the 4-byte tag decoding. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Michal Swiatkowski Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230319095540.239064-1-noltari@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_brcm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 10239daa57454..cacdafb41200e 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -252,6 +253,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, struct net_device *dev) { + int len = BRCM_LEG_TAG_LEN; int source_port; u8 *brcm_tag; @@ -266,12 +268,16 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; + /* VLAN tag is added by BCM63xx internal switch */ + if (netdev_uses_dsa(skb->dev)) + len += VLAN_HLEN; + /* Remove Broadcom tag and update checksum */ - skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN); + skb_pull_rcsum(skb, len); dsa_default_offload_fwd_mark(skb); - dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN); + dsa_strip_etype_header(skb, len); return skb; } From 3ced71d273f8edf07bf01a831a49ca6b988e06b3 Mon Sep 17 00:00:00 2001 From: Kevin Locke Date: Tue, 21 Mar 2023 15:39:22 -0600 Subject: [PATCH 289/898] kbuild: deb-pkg: set version for linux-headers paths As a result of the switch to dh_listpackages, $version is no longer set when install_kernel_headers() is called. This causes files in the linux-headers deb package to be installed to a path with an empty $version (e.g. /usr/src/linux-headers-/scripts/sign-file rather than /usr/src/linux-headers-6.3.0-rc3/scripts/sign-file). To avoid this, while continuing to use the version information from dh_listpackages, pass $version from $package as the second argument of install_kernel_headers(). Fixes: 36862e14e316 ("kbuild: deb-pkg: use dh_listpackages to know enabled packages") Signed-off-by: Kevin Locke Signed-off-by: Masahiro Yamada --- scripts/package/builddeb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index c5ae57167d7ce..7b23f52c70c5f 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -162,6 +162,7 @@ install_linux_image_dbg () { install_kernel_headers () { pdir=$1 + version=$2 rm -rf $pdir @@ -229,7 +230,7 @@ do linux-libc-dev) install_libc_headers debian/linux-libc-dev;; linux-headers-*) - install_kernel_headers debian/linux-headers;; + install_kernel_headers debian/linux-headers ${package#linux-headers-};; esac done From 968b66ffeb7956acc72836a7797aeb7b2444ec51 Mon Sep 17 00:00:00 2001 From: Frank Crawford Date: Sat, 18 Mar 2023 19:05:42 +1100 Subject: [PATCH 290/898] hwmon (it87): Fix voltage scaling for chips with 10.9mV ADCs Fix voltage scaling for chips that have 10.9mV ADCs, where scaling was not performed. Fixes: ead8080351c9 ("hwmon: (it87) Add support for IT8732F") Signed-off-by: Frank Crawford Link: https://lore.kernel.org/r/20230318080543.1226700-2-frank@crawford.emu.id.au [groeck: Update subject and description to focus on bug fix] Signed-off-by: Guenter Roeck --- drivers/hwmon/it87.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 66f7ceaa7c3f5..e9614eb557d4e 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -515,6 +515,8 @@ static const struct it87_devices it87_devices[] = { #define has_six_temp(data) ((data)->features & FEAT_SIX_TEMP) #define has_vin3_5v(data) ((data)->features & FEAT_VIN3_5V) #define has_conf_noexit(data) ((data)->features & FEAT_CONF_NOEXIT) +#define has_scaling(data) ((data)->features & (FEAT_12MV_ADC | \ + FEAT_10_9MV_ADC)) struct it87_sio_data { int sioaddr; @@ -3134,7 +3136,7 @@ static int it87_probe(struct platform_device *pdev) "Detected broken BIOS defaults, disabling PWM interface\n"); /* Starting with IT8721F, we handle scaling of internal voltages */ - if (has_12mv_adc(data)) { + if (has_scaling(data)) { if (sio_data->internal & BIT(0)) data->in_scaled |= BIT(3); /* in3 is AVCC */ if (sio_data->internal & BIT(1)) From 4fe3c88552a3fbe1944426a4506a18cdeb457b5a Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Mon, 20 Mar 2023 14:33:18 +0000 Subject: [PATCH 291/898] atm: idt77252: fix kmemleak when rmmod idt77252 There are memory leaks reported by kmemleak: unreferenced object 0xffff888106500800 (size 128): comm "modprobe", pid 1017, jiffies 4297787785 (age 67.152s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000970ce626>] __kmem_cache_alloc_node+0x20c/0x380 [<00000000fb5f78d9>] kmalloc_trace+0x2f/0xb0 [<000000000e947e2a>] idt77252_init_one+0x2847/0x3c90 [idt77252] [<000000006efb048e>] local_pci_probe+0xeb/0x1a0 ... unreferenced object 0xffff888106500b00 (size 128): comm "modprobe", pid 1017, jiffies 4297787785 (age 67.152s) hex dump (first 32 bytes): 00 20 3d 01 80 88 ff ff 00 20 3d 01 80 88 ff ff . =...... =..... f0 23 3d 01 80 88 ff ff 00 20 3d 01 00 00 00 00 .#=...... =..... backtrace: [<00000000970ce626>] __kmem_cache_alloc_node+0x20c/0x380 [<00000000fb5f78d9>] kmalloc_trace+0x2f/0xb0 [<00000000f451c5be>] alloc_scq.constprop.0+0x4a/0x400 [idt77252] [<00000000e6313849>] idt77252_init_one+0x28cf/0x3c90 [idt77252] The root cause is traced to the vc_maps which alloced in open_card_oam() are not freed in close_card_oam(). The vc_maps are used to record open connections, so when close a vc_map in close_card_oam(), the memory should be freed. Moreover, the ubr0 is not closed when close a idt77252 device, leading to the memory leak of vc_map and scq_info. Fix them by adding kfree in close_card_oam() and implementing new close_card_ubr0() to close ubr0. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Li Zetao Reviewed-by: Francois Romieu Link: https://lore.kernel.org/r/20230320143318.2644630-1-lizetao1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/atm/idt77252.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index eec0cc2144e02..e327a0229dc17 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2909,6 +2909,7 @@ close_card_oam(struct idt77252_dev *card) recycle_rx_pool_skb(card, &vc->rcv.rx_pool); } + kfree(vc); } } } @@ -2952,6 +2953,15 @@ open_card_ubr0(struct idt77252_dev *card) return 0; } +static void +close_card_ubr0(struct idt77252_dev *card) +{ + struct vc_map *vc = card->vcs[0]; + + free_scq(card, vc->scq); + kfree(vc); +} + static int idt77252_dev_open(struct idt77252_dev *card) { @@ -3001,6 +3011,7 @@ static void idt77252_dev_close(struct atm_dev *dev) struct idt77252_dev *card = dev->dev_data; u32 conf; + close_card_ubr0(card); close_card_oam(card); conf = SAR_CFG_RXPTH | /* enable receive path */ From 8e50ed774554f93d55426039b27b1e38d7fa64d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Mar 2023 16:34:27 +0000 Subject: [PATCH 292/898] erspan: do not use skb_mac_header() in ndo_start_xmit() Drivers should not assume skb_mac_header(skb) == skb->data in their ndo_start_xmit(). Use skb_network_offset() and skb_transport_offset() which better describe what is needed in erspan_fb_xmit() and ip6erspan_tunnel_xmit() syzbot reported: WARNING: CPU: 0 PID: 5083 at include/linux/skbuff.h:2873 skb_mac_header include/linux/skbuff.h:2873 [inline] WARNING: CPU: 0 PID: 5083 at include/linux/skbuff.h:2873 ip6erspan_tunnel_xmit+0x1d9c/0x2d90 net/ipv6/ip6_gre.c:962 Modules linked in: CPU: 0 PID: 5083 Comm: syz-executor406 Not tainted 6.3.0-rc2-syzkaller-00866-gd4671cb96fa3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 RIP: 0010:skb_mac_header include/linux/skbuff.h:2873 [inline] RIP: 0010:ip6erspan_tunnel_xmit+0x1d9c/0x2d90 net/ipv6/ip6_gre.c:962 Code: 04 02 41 01 de 84 c0 74 08 3c 03 0f 8e 1c 0a 00 00 45 89 b4 24 c8 00 00 00 c6 85 77 fe ff ff 01 e9 33 e7 ff ff e8 b4 27 a1 f8 <0f> 0b e9 b6 e7 ff ff e8 a8 27 a1 f8 49 8d bf f0 0c 00 00 48 b8 00 RSP: 0018:ffffc90003b2f830 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 000000000000ffff RCX: 0000000000000000 RDX: ffff888021273a80 RSI: ffffffff88e1bd4c RDI: 0000000000000003 RBP: ffffc90003b2f9d8 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000000 R12: ffff88802b28da00 R13: 00000000000000d0 R14: ffff88807e25b6d0 R15: ffff888023408000 FS: 0000555556a61300(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055e5b11eb6e8 CR3: 0000000027c1b000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __netdev_start_xmit include/linux/netdevice.h:4900 [inline] netdev_start_xmit include/linux/netdevice.h:4914 [inline] __dev_direct_xmit+0x504/0x730 net/core/dev.c:4300 dev_direct_xmit include/linux/netdevice.h:3088 [inline] packet_xmit+0x20a/0x390 net/packet/af_packet.c:285 packet_snd net/packet/af_packet.c:3075 [inline] packet_sendmsg+0x31a0/0x5150 net/packet/af_packet.c:3107 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0xde/0x190 net/socket.c:747 __sys_sendto+0x23a/0x340 net/socket.c:2142 __do_sys_sendto net/socket.c:2154 [inline] __se_sys_sendto net/socket.c:2150 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2150 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f123aaa1039 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc15d12058 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f123aaa1039 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000020000040 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f123aa648c0 R13: 431bde82d7b634db R14: 0000000000000000 R15: 0000000000000000 Fixes: 1baf5ebf8954 ("erspan: auto detect truncated packets.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230320163427.8096-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_gre.c | 4 ++-- net/ipv6/ip6_gre.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ffff46cdcb58f..e55a202649608 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -552,7 +552,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -561,7 +561,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 89f5f0f3f5d65..a4ecfc9d25930 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -959,7 +959,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) From 6acc72a43eac78a309160d0a7512bbc59bcdd757 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Mar 2023 03:03:23 +0200 Subject: [PATCH 293/898] net: mscc: ocelot: fix stats region batching The blamed commit changed struct ocelot_stat_layout :: "u32 offset" to "u32 reg". However, "u32 reg" is not quite a register address, but an enum ocelot_reg, which in itself encodes an enum ocelot_target target in the upper bits, and an index into the ocelot->map[target][] array in the lower bits. So, whereas the previous code comparison between stats_layout[i].offset and last + 1 was correct (because those "offsets" at the time were 32-bit relative addresses), the new code, comparing layout[i].reg to last + 4 is not correct, because the "reg" here is an enum/index, not an actual register address. What we want to compare are indeed register addresses, but to do that, we need to actually go through the same motions as __ocelot_bulk_read_ix() itself. With this bug, all statistics counters are deemed by ocelot_prepare_stats_regions() as constituting their own region. (Truncated) log on VSC9959 (Felix) below (prints added by me): Before: region of 1 contiguous counters starting with SYS:STAT:CNT[0x000] region of 1 contiguous counters starting with SYS:STAT:CNT[0x001] region of 1 contiguous counters starting with SYS:STAT:CNT[0x002] ... region of 1 contiguous counters starting with SYS:STAT:CNT[0x041] region of 1 contiguous counters starting with SYS:STAT:CNT[0x042] region of 1 contiguous counters starting with SYS:STAT:CNT[0x080] region of 1 contiguous counters starting with SYS:STAT:CNT[0x081] ... region of 1 contiguous counters starting with SYS:STAT:CNT[0x0ac] region of 1 contiguous counters starting with SYS:STAT:CNT[0x100] region of 1 contiguous counters starting with SYS:STAT:CNT[0x101] ... region of 1 contiguous counters starting with SYS:STAT:CNT[0x111] After: region of 67 contiguous counters starting with SYS:STAT:CNT[0x000] region of 45 contiguous counters starting with SYS:STAT:CNT[0x080] region of 18 contiguous counters starting with SYS:STAT:CNT[0x100] Since commit d87b1c08f38a ("net: mscc: ocelot: use bulk reads for stats") intended bulking as a performance improvement, and since now, with trivial-sized regions, performance is even worse than without bulking at all, this could easily qualify as a performance regression. Fixes: d4c367650704 ("net: mscc: ocelot: keep ocelot_stat_layout by reg address, not offset") Signed-off-by: Vladimir Oltean Acked-by: Colin Foster Tested-by: Colin Foster Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index bdb893476832b..096c81ec9dd61 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -899,7 +899,8 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) if (!layout[i].reg) continue; - if (region && layout[i].reg == last + 4) { + if (region && ocelot->map[SYS][layout[i].reg & REG_MASK] == + ocelot->map[SYS][last & REG_MASK] + 4) { region->count++; } else { region = devm_kzalloc(ocelot->dev, sizeof(*region), From 17dfd210459837b453c2a3c20406ee12082f151c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Mar 2023 03:03:24 +0200 Subject: [PATCH 294/898] net: mscc: ocelot: fix transfer from region->buf to ocelot->stats To understand the problem, we need some definitions. The driver is aware of multiple counters (enum ocelot_stat), yet not all switches supported by the driver implement all counters. There are 2 statistics layouts: ocelot_stats_layout and ocelot_mm_stats_layout, the latter having 36 counters more than the former. ocelot->stats[] is not a compact array, i.e. there are elements within it which are not going to be populated for ocelot_stats_layout. On the other hand, ocelot->stats[] is easily indexable, for example "tx_octets" for port 3 can be found at ocelot->stats[3 * OCELOT_NUM_STATS + OCELOT_STAT_TX_OCTETS], and that is why we keep it sparse. Regions, as created by ocelot_prepare_stats_regions(), are compact (every element from region->buf will correspond to a counter that is present in this switch's layout) but are not easily indexable. Let's define holes as the ranges of values of enum ocelot_stat for which ocelot_stats_layout doesn't have a "reg" defined. For example, there is a hole between OCELOT_STAT_RX_GREEN_PRIO_7 and OCELOT_STAT_TX_OCTETS which is of 23 elements that are only present on ocelot_mm_stats_layout, and as such, they are also present in enum ocelot_stat. Let's define the left extremity of the hole - the last enum ocelot_stat still defined - as A (in this case OCELOT_STAT_RX_GREEN_PRIO_7) and the right extremity - the first enum ocelot_stat that is defined after a series of undefined ones - as B (in this case OCELOT_STAT_TX_OCTETS). There is a bug in the procedure which transfers stats from region->buf[] to ocelot->stats[]. For each hole in the ocelot_stats_layout, the logic transfers the stats starting with enum ocelot_stat B to ocelot->stats[] index A + 1. So all stats after a hole are saved to a position which is off by B - A + 1 elements. This causes 2 kinds of issues: (a) counters which shouldn't increment increment (b) counters which should increment don't Holes in the ocelot_stat_layout automatically imply the end of a region and the beginning of a new one; however the reverse is not necessarily true. For example, for ocelot_mm_stat_layout, there could be multiple regions (which indicate discontinuities in register addresses) while there is no hole (which indicates discontinuities in enum ocelot_stat values). In the example above, the stats from the second region->buf[] are not transferred to ocelot->stats starting with index "port * OCELOT_NUM_STATS + OCELOT_STAT_TX_OCTETS" as they should, but rather, starting with element "port * OCELOT_NUM_STATS + OCELOT_STAT_RX_GREEN_PRIO_7 + 1". That stats[] array element is not reported to user space for switches that use ocelot_stat_layout, and that is how issue (b) occurs. However, if the length of the second region is larger than the hole, then some stats will start to be transferred to the ocelot->stats[] indices which *are* reported to user space, but those indices contain wrong values (corresponding to unexpected counters). This is how issue (a) occurs. The procedure, as it was introduced in commit d87b1c08f38a ("net: mscc: ocelot: use bulk reads for stats"), was not buggy, because there were no holes in the struct ocelot_stat_layout instances at that time. The problem is that when those holes were introduced, the function was not updated to take them into consideration. To update the procedure, we need to know, for each region, which enum ocelot_stat corresponds to its region->base. We have no way of deducing that based on the contents of struct ocelot_stats_region, so we need to add this information. Fixes: ab3f97a9610a ("net: mscc: ocelot: export ethtool MAC Merge stats for Felix VSC9959") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index 096c81ec9dd61..f18371154475e 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -258,6 +258,7 @@ struct ocelot_stat_layout { struct ocelot_stats_region { struct list_head node; u32 base; + enum ocelot_stat first_stat; int count; u32 *buf; }; @@ -341,11 +342,12 @@ static int ocelot_port_update_stats(struct ocelot *ocelot, int port) */ static void ocelot_port_transfer_stats(struct ocelot *ocelot, int port) { - unsigned int idx = port * OCELOT_NUM_STATS; struct ocelot_stats_region *region; int j; list_for_each_entry(region, &ocelot->stats_regions, node) { + unsigned int idx = port * OCELOT_NUM_STATS + region->first_stat; + for (j = 0; j < region->count; j++) { u64 *stat = &ocelot->stats[idx + j]; u64 val = region->buf[j]; @@ -355,8 +357,6 @@ static void ocelot_port_transfer_stats(struct ocelot *ocelot, int port) *stat = (*stat & ~(u64)U32_MAX) + val; } - - idx += region->count; } } @@ -915,6 +915,7 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) WARN_ON(last >= layout[i].reg); region->base = layout[i].reg; + region->first_stat = i; region->count = 1; list_add_tail(®ion->node, &ocelot->stats_regions); } From 5291099e0f61a4a1f4d2e11ac2af8123ece17e0e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Mar 2023 03:03:25 +0200 Subject: [PATCH 295/898] net: mscc: ocelot: add TX_MM_HOLD to ocelot_mm_stats_layout The lack of a definition for this counter is what initially prompted me to investigate a problem which really manifested itself as the previous change, "net: mscc: ocelot: fix transfer from region->buf to ocelot->stats". When TX_MM_HOLD is defined in enum ocelot_stat but not in struct ocelot_stat_layout ocelot_mm_stats_layout, this creates a hole, which due to the aforementioned bug, makes all counters following TX_MM_HOLD be recorded off by one compared to their correct position. So for example, a non-zero TX_PMAC_OCTETS would be reported as TX_MERGE_FRAGMENTS, TX_PMAC_UNICAST would be reported as TX_PMAC_OCTETS, TX_PMAC_64 would be reported as TX_PMAC_PAUSE, etc etc. This is because the size of the hole (1) is much smaller than the size of the region, so the phenomenon where the stats are off-by-one, rather than lost, prevails. However, the phenomenon where stats are lost can be seen too, for example with DROP_LOCAL, which is at the beginning of its own region (offset 0x000400 vs the previous 0x0002b0 constitutes a discontinuity). This is also reported as off by one and saved to TX_PMAC_1527_MAX, but that counter is not reported to the unstructured "ethtool -S", as opposed to DROP_LOCAL which is (as "drop_local"). Fixes: ab3f97a9610a ("net: mscc: ocelot: export ethtool MAC Merge stats for Felix VSC9959") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index f18371154475e..d0e6cd8dbe5c9 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -274,6 +274,7 @@ static const struct ocelot_stat_layout ocelot_mm_stats_layout[OCELOT_NUM_STATS] OCELOT_STAT(RX_ASSEMBLY_OK), OCELOT_STAT(RX_MERGE_FRAGMENTS), OCELOT_STAT(TX_MERGE_FRAGMENTS), + OCELOT_STAT(TX_MM_HOLD), OCELOT_STAT(RX_PMAC_OCTETS), OCELOT_STAT(RX_PMAC_UNICAST), OCELOT_STAT(RX_PMAC_MULTICAST), From 4107b8746d93ace135b8c4da4f19bbae81db785f Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 21 Mar 2023 14:45:43 +1100 Subject: [PATCH 296/898] net/sonic: use dma_mapping_error() for error check The DMA address returned by dma_map_single() should be checked with dma_mapping_error(). Fix it accordingly. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Signed-off-by: Zhang Changzhong Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/6645a4b5c1e364312103f48b7b36783b94e197a2.1679370343.git.fthain@linux-m68k.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/natsemi/sonic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index d17d1b4f2585f..825356ee3492e 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -292,7 +292,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) */ laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); - if (!laddr) { + if (dma_mapping_error(lp->device, laddr)) { pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -509,7 +509,7 @@ static bool sonic_alloc_rb(struct net_device *dev, struct sonic_local *lp, *new_addr = dma_map_single(lp->device, skb_put(*new_skb, SONIC_RBSIZE), SONIC_RBSIZE, DMA_FROM_DEVICE); - if (!*new_addr) { + if (dma_mapping_error(lp->device, *new_addr)) { dev_kfree_skb(*new_skb); *new_skb = NULL; return false; From 39e7d2ab6ea9fd6b389091ec223d566934fe7be5 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 21 Mar 2023 09:31:26 +0100 Subject: [PATCH 297/898] swiotlb: use wrap_area_index() instead of open-coding it No functional change, just use an existing helper. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 91454b513db06..3856e2b524b47 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -695,10 +695,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, /* * Update the indices to avoid searching in the next round. */ - if (index + nslots < mem->area_nslabs) - area->index = index + nslots; - else - area->index = 0; + area->index = wrap_area_index(mem, index + nslots); area->used += nslots; spin_unlock_irqrestore(&area->lock, flags); return slot_index; From 0eee5ae1025699ea93d44fdb6ef2365505082103 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 21 Mar 2023 09:31:27 +0100 Subject: [PATCH 298/898] swiotlb: fix slot alignment checks Explicit alignment and page alignment are used only to calculate the stride, not when checking actual slot physical address. Originally, only page alignment was implemented, and that worked, because the whole SWIOTLB is allocated on a page boundary, so aligning the start index was sufficient to ensure a page-aligned slot. When commit 1f221a0d0dbf ("swiotlb: respect min_align_mask") added support for min_align_mask, the index could be incremented in the search loop, potentially finding an unaligned slot if minimum device alignment is between IO_TLB_SIZE and PAGE_SIZE. The bug could go unnoticed, because the slot size is 2 KiB, and the most common page size is 4 KiB, so there is no alignment value in between. IIUC the intention has been to find a slot that conforms to all alignment constraints: device minimum alignment, an explicit alignment (given as function parameter) and optionally page alignment (if allocation size is >= PAGE_SIZE). The most restrictive mask can be trivially computed with logical AND. The rest can stay. Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask") Fixes: e81e99bacc9f ("swiotlb: Support aligned swiotlb buffers") Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 3856e2b524b47..5b919ef832b6f 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -634,22 +634,26 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, BUG_ON(!nslots); BUG_ON(area_index >= mem->nareas); + /* + * For allocations of PAGE_SIZE or larger only look for page aligned + * allocations. + */ + if (alloc_size >= PAGE_SIZE) + iotlb_align_mask &= PAGE_MASK; + iotlb_align_mask &= alloc_align_mask; + /* * For mappings with an alignment requirement don't bother looping to - * unaligned slots once we found an aligned one. For allocations of - * PAGE_SIZE or larger only look for page aligned allocations. + * unaligned slots once we found an aligned one. */ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; - if (alloc_size >= PAGE_SIZE) - stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); - stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1); spin_lock_irqsave(&area->lock, flags); if (unlikely(nslots > mem->area_nslabs - area->used)) goto not_found; slot_base = area_index * mem->area_nslabs; - index = wrap_area_index(mem, ALIGN(area->index, stride)); + index = area->index; for (slots_checked = 0; slots_checked < mem->area_nslabs; ) { slot_index = slot_base + index; From def84ab600b71ea3fcc422a876d5d0d0daa7d4f3 Mon Sep 17 00:00:00 2001 From: Martin George Date: Thu, 16 Mar 2023 17:20:09 +0530 Subject: [PATCH 299/898] nvme: send Identify with CNS 06h only to I/O controllers Identify CNS 06h (I/O Command Set Specific Identify Controller data structure) is supported only on i/o controllers. But nvme_init_non_mdts_limits() currently invokes this on all controllers. Correct this by ensuring this is sent to I/O controllers only. Signed-off-by: Martin George Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d4be525f8100a..53ef028596c61 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3063,7 +3063,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) else ctrl->max_zeroes_sectors = 0; - if (nvme_ctrl_limited_cns(ctrl)) + if (ctrl->subsys->subtype != NVME_NQN_NVME || + nvme_ctrl_limited_cns(ctrl)) return 0; id = kzalloc(sizeof(*id), GFP_KERNEL); From aa01c67de5926fdb276793180564f172c55fb0d7 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Mon, 20 Mar 2023 09:57:36 -0600 Subject: [PATCH 300/898] nvme-tcp: fix nvme_tcp_term_pdu to match spec The FEI field of C2HTermReq/H2CTermReq is 4 bytes but not 4-byte-aligned in the NVMe/TCP specification (it is located at offset 10 in the PDU). Split it into two 16-bit integers in struct nvme_tcp_term_pdu so no padding is inserted. There should also be 10 reserved bytes after. There are currently no users of this type. Fixes: fc221d05447aa6db ("nvme-tcp: Add protocol header") Reported-by: Geert Uytterhoeven Signed-off-by: Caleb Sander Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme-tcp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 75470159a194d..57ebe1267f7fb 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -115,8 +115,9 @@ struct nvme_tcp_icresp_pdu { struct nvme_tcp_term_pdu { struct nvme_tcp_hdr hdr; __le16 fes; - __le32 fei; - __u8 rsvd[8]; + __le16 feil; + __le16 feiu; + __u8 rsvd[10]; }; /** From 66a1c22b709178e7b823d44465d0c2e5ed7492fb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 21 Mar 2023 09:30:59 +0100 Subject: [PATCH 301/898] mm/slab: Fix undefined init_cache_node_node() for NUMA and !SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sh/migor_defconfig: mm/slab.c: In function ‘slab_memory_callback’: mm/slab.c:1127:23: error: implicit declaration of function ‘init_cache_node_node’; did you mean ‘drain_cache_node_node’? [-Werror=implicit-function-declaration] 1127 | ret = init_cache_node_node(nid); | ^~~~~~~~~~~~~~~~~~~~ | drain_cache_node_node The #ifdef condition protecting the definition of init_cache_node_node() no longer matches the conditions protecting the (multiple) users. Fix this by syncing the conditions. Fixes: 76af6a054da40553 ("mm/migrate: add CPU hotplug to demotion #ifdef") Reported-by: Randy Dunlap Link: https://lore.kernel.org/r/b5bdea22-ed2f-3187-6efe-0c72330270a4@infradead.org Signed-off-by: Geert Uytterhoeven Reviewed-by: John Paul Adrian Glaubitz Acked-by: Randy Dunlap Cc: Signed-off-by: Vlastimil Babka --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 74ece29e3a7ea..0d35747b9472c 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -839,7 +839,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) return 0; } -#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP) +#if defined(CONFIG_NUMA) || defined(CONFIG_SMP) /* * Allocates and initializes node for a node on each slab cache, used for * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node From b58e3d4311b54b6dd0e37165277965da0c9eb21d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Mar 2023 10:53:24 +0100 Subject: [PATCH 302/898] wifi: iwlwifi: mvm: fix mvmtxq->stopped handling This could race if the queue is redirected while full, then the flushing internally would start it while it's not yet usable again. Fix it by using two state bits instead of just one. Reviewed-by: Benjamin Berg Tested-by: Jose Ignacio Tornos Martinez Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 5 ++++- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 5 ++++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ++-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 565522466eba5..f81c609ecf58d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -732,7 +732,10 @@ void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) rcu_read_lock(); do { - while (likely(!mvmtxq->stopped && + while (likely(!test_bit(IWL_MVM_TXQ_STATE_STOP_FULL, + &mvmtxq->state) && + !test_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, + &mvmtxq->state) && !test_bit(IWL_MVM_STATUS_IN_D3, &mvm->status))) { skb = ieee80211_tx_dequeue(hw, txq); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 90bc95d96a78e..421d2649b0f0c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -729,7 +729,9 @@ struct iwl_mvm_txq { struct list_head list; u16 txq_id; atomic_t tx_request; - bool stopped; +#define IWL_MVM_TXQ_STATE_STOP_FULL 0 +#define IWL_MVM_TXQ_STATE_STOP_REDIRECT 1 + unsigned long state; }; static inline struct iwl_mvm_txq * diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index f4e9446d9dc2d..efad8f92d1321 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1691,7 +1691,10 @@ static void iwl_mvm_queue_state_change(struct iwl_op_mode *op_mode, txq = sta->txq[tid]; mvmtxq = iwl_mvm_txq_from_mac80211(txq); - mvmtxq->stopped = !start; + if (start) + clear_bit(IWL_MVM_TXQ_STATE_STOP_FULL, &mvmtxq->state); + else + set_bit(IWL_MVM_TXQ_STATE_STOP_FULL, &mvmtxq->state); if (start && mvmsta->sta_state != IEEE80211_STA_NOTEXIST) iwl_mvm_mac_itxq_xmit(mvm->hw, txq); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 69634fb82a9bf..21ad7b85c434c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -693,7 +693,7 @@ static int iwl_mvm_redirect_queue(struct iwl_mvm *mvm, int queue, int tid, queue, iwl_mvm_ac_to_tx_fifo[ac]); /* Stop the queue and wait for it to empty */ - txq->stopped = true; + set_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, &txq->state); ret = iwl_trans_wait_tx_queues_empty(mvm->trans, BIT(queue)); if (ret) { @@ -736,7 +736,7 @@ static int iwl_mvm_redirect_queue(struct iwl_mvm *mvm, int queue, int tid, out: /* Continue using the queue */ - txq->stopped = false; + clear_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, &txq->state); return ret; } From 923bf981eb6ecc027227716e30701bdcc1845fbf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Mar 2023 10:53:25 +0100 Subject: [PATCH 303/898] wifi: iwlwifi: mvm: protect TXQ list manipulation Some recent upstream debugging uncovered the fact that in iwlwifi, the TXQ list manipulation is racy. Introduce a new state bit for when the TXQ is completely ready and can be used without locking, and if that's not set yet acquire the lock to check everything correctly. Reviewed-by: Benjamin Berg Tested-by: Jose Ignacio Tornos Martinez Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 45 ++++++------------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 25 +++++++++-- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index f81c609ecf58d..b55b1b17f4d19 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -760,42 +760,25 @@ static void iwl_mvm_mac_wake_tx_queue(struct ieee80211_hw *hw, struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); - /* - * Please note that racing is handled very carefully here: - * mvmtxq->txq_id is updated during allocation, and mvmtxq->list is - * deleted afterwards. - * This means that if: - * mvmtxq->txq_id != INVALID_QUEUE && list_empty(&mvmtxq->list): - * queue is allocated and we can TX. - * mvmtxq->txq_id != INVALID_QUEUE && !list_empty(&mvmtxq->list): - * a race, should defer the frame. - * mvmtxq->txq_id == INVALID_QUEUE && list_empty(&mvmtxq->list): - * need to allocate the queue and defer the frame. - * mvmtxq->txq_id == INVALID_QUEUE && !list_empty(&mvmtxq->list): - * queue is already scheduled for allocation, no need to allocate, - * should defer the frame. - */ - - /* If the queue is allocated TX and return. */ - if (!txq->sta || mvmtxq->txq_id != IWL_MVM_INVALID_QUEUE) { - /* - * Check that list is empty to avoid a race where txq_id is - * already updated, but the queue allocation work wasn't - * finished - */ - if (unlikely(txq->sta && !list_empty(&mvmtxq->list))) - return; - + if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) || + !txq->sta) { iwl_mvm_mac_itxq_xmit(hw, txq); return; } - /* The list is being deleted only after the queue is fully allocated. */ - if (!list_empty(&mvmtxq->list)) - return; + /* iwl_mvm_mac_itxq_xmit() will later be called by the worker + * to handle any packets we leave on the txq now + */ - list_add_tail(&mvmtxq->list, &mvm->add_stream_txqs); - schedule_work(&mvm->add_stream_wk); + spin_lock_bh(&mvm->add_stream_lock); + /* The list is being deleted only after the queue is fully allocated. */ + if (list_empty(&mvmtxq->list) && + /* recheck under lock */ + !test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) { + list_add_tail(&mvmtxq->list, &mvm->add_stream_txqs); + schedule_work(&mvm->add_stream_wk); + } + spin_unlock_bh(&mvm->add_stream_lock); } #define CHECK_BA_TRIGGER(_mvm, _trig, _tid_bm, _tid, _fmt...) \ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 421d2649b0f0c..f307c345dfa0b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -731,6 +731,7 @@ struct iwl_mvm_txq { atomic_t tx_request; #define IWL_MVM_TXQ_STATE_STOP_FULL 0 #define IWL_MVM_TXQ_STATE_STOP_REDIRECT 1 +#define IWL_MVM_TXQ_STATE_READY 2 unsigned long state; }; @@ -829,6 +830,7 @@ struct iwl_mvm { struct iwl_mvm_tvqm_txq_info tvqm_info[IWL_MAX_TVQM_QUEUES]; }; struct work_struct add_stream_wk; /* To add streams to queues */ + spinlock_t add_stream_lock; const char *nvm_file_name; struct iwl_nvm_data *nvm_data; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index efad8f92d1321..9711841bb4564 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1195,6 +1195,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, INIT_DELAYED_WORK(&mvm->scan_timeout_dwork, iwl_mvm_scan_timeout_wk); INIT_WORK(&mvm->add_stream_wk, iwl_mvm_add_new_dqa_stream_wk); INIT_LIST_HEAD(&mvm->add_stream_txqs); + spin_lock_init(&mvm->add_stream_lock); init_waitqueue_head(&mvm->rx_sync_waitq); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 21ad7b85c434c..9caae77995ca9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -384,8 +384,11 @@ static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_tid(sta, tid); - mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_lock_bh(&mvm->add_stream_lock); list_del_init(&mvmtxq->list); + clear_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_unlock_bh(&mvm->add_stream_lock); } /* Regardless if this is a reserved TXQ for a STA - mark it as false */ @@ -479,8 +482,11 @@ static int iwl_mvm_remove_sta_queue_marking(struct iwl_mvm *mvm, int queue) disable_agg_tids |= BIT(tid); mvmsta->tid_data[tid].txq_id = IWL_MVM_INVALID_QUEUE; - mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_lock_bh(&mvm->add_stream_lock); list_del_init(&mvmtxq->list); + clear_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_unlock_bh(&mvm->add_stream_lock); } mvmsta->tfd_queue_msk &= ~BIT(queue); /* Don't use this queue anymore */ @@ -1444,12 +1450,22 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) * a queue in the function itself. */ if (iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid)) { + spin_lock_bh(&mvm->add_stream_lock); list_del_init(&mvmtxq->list); + spin_unlock_bh(&mvm->add_stream_lock); continue; } - list_del_init(&mvmtxq->list); + /* now we're ready, any remaining races/concurrency will be + * handled in iwl_mvm_mac_itxq_xmit() + */ + set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + local_bh_disable(); + spin_lock(&mvm->add_stream_lock); + list_del_init(&mvmtxq->list); + spin_unlock(&mvm->add_stream_lock); + iwl_mvm_mac_itxq_xmit(mvm->hw, txq); local_bh_enable(); } @@ -1864,8 +1880,11 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm, struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(sta->txq[i]); + spin_lock_bh(&mvm->add_stream_lock); mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; list_del_init(&mvmtxq->list); + clear_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + spin_unlock_bh(&mvm->add_stream_lock); } } From 4e348c6c6e23491ae6eb5e077848a42d0562339c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 14 Mar 2023 10:59:50 +0100 Subject: [PATCH 304/898] wifi: mac80211: fix qos on mesh interfaces When ieee80211_select_queue is called for mesh, the sta pointer is usually NULL, since the nexthop is looked up much later in the tx path. Explicitly check for unicast address in that case in order to make qos work again. Cc: stable@vger.kernel.org Fixes: 50e2ab392919 ("wifi: mac80211: fix queue selection for mesh/OCB interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230314095956.62085-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/wme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index a12c636386801..1601be5764145 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -147,6 +147,7 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { + const struct ethhdr *eth = (void *)skb->data; struct mac80211_qos_map *qos_map; bool qos; @@ -154,8 +155,9 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, skb_get_hash(skb); /* all mesh/ocb stations are required to support WME */ - if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB)) + if ((sdata->vif.type == NL80211_IFTYPE_MESH_POINT && + !is_multicast_ether_addr(eth->h_dest)) || + (sdata->vif.type == NL80211_IFTYPE_OCB && sta)) qos = true; else if (sta) qos = sta->sta.wme; From f355f70145744518ca1d9799b42f4a8da9aa0d36 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 14 Mar 2023 10:59:52 +0100 Subject: [PATCH 305/898] wifi: mac80211: fix mesh path discovery based on unicast packets If a packet has reached its intended destination, it was bumped to the code that accepts it, without first checking if a mesh_path needs to be created based on the discovered source. Fix this by moving the destination address check further down. Cc: stable@vger.kernel.org Fixes: 986e43b19ae9 ("wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230314095956.62085-3-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f7fdfe710951f..e8de500eb9f3c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2765,17 +2765,6 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta mesh_rmc_check(sdata, eth->h_source, mesh_hdr)) return RX_DROP_MONITOR; - /* Frame has reached destination. Don't forward */ - if (ether_addr_equal(sdata->vif.addr, eth->h_dest)) - goto rx_accept; - - if (!ifmsh->mshcfg.dot11MeshForwarding) { - if (is_multicast_ether_addr(eth->h_dest)) - goto rx_accept; - - return RX_DROP_MONITOR; - } - /* forward packet */ if (sdata->crypto_tx_tailroom_needed_cnt) tailroom = IEEE80211_ENCRYPT_TAILROOM; @@ -2814,6 +2803,17 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta rcu_read_unlock(); } + /* Frame has reached destination. Don't forward */ + if (ether_addr_equal(sdata->vif.addr, eth->h_dest)) + goto rx_accept; + + if (!ifmsh->mshcfg.dot11MeshForwarding) { + if (is_multicast_ether_addr(eth->h_dest)) + goto rx_accept; + + return RX_DROP_MONITOR; + } + skb_set_queue_mapping(skb, ieee802_1d_to_ac[skb->priority]); ieee80211_fill_mesh_addresses(&hdr, &hdr.frame_control, From e51f49512d98783b90799c9cc2002895ec3aa0eb Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Wed, 22 Mar 2023 10:55:38 +0200 Subject: [PATCH 306/898] ASoC: SOF: ipc4: Ensure DSP is in D0I0 during sof_ipc4_set_get_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set_get_data() IPC op bypasses the check for the no_pm flag as done with the regular IPC tx_msg op. Since set_get_data should be performed when the DSP is in D0I0, set the DSP power state to D0I0 before sending the IPC's in sof_ipc4_set_get_data(). Fixes: ceb89acc4dc8 ("ASoC: SOF: ipc4: Add support for mandatory message handling functionality") Signed-off-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230322085538.10214-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c index 8ede4b9529978..246b56d24a6f1 100644 --- a/sound/soc/sof/ipc4.c +++ b/sound/soc/sof/ipc4.c @@ -405,6 +405,9 @@ static int sof_ipc4_tx_msg(struct snd_sof_dev *sdev, void *msg_data, size_t msg_ static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data, size_t payload_bytes, bool set) { + const struct sof_dsp_power_state target_state = { + .state = SOF_DSP_PM_D0, + }; size_t payload_limit = sdev->ipc->max_payload_size; struct sof_ipc4_msg *ipc4_msg = data; struct sof_ipc4_msg tx = {{ 0 }}; @@ -435,6 +438,11 @@ static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data, tx.extension |= SOF_IPC4_MOD_EXT_MSG_FIRST_BLOCK(1); + /* ensure the DSP is in D0i0 before sending IPC */ + ret = snd_sof_dsp_set_power_state(sdev, &target_state); + if (ret < 0) + return ret; + /* Serialise IPC TX */ mutex_lock(&sdev->ipc->tx_mutex); From d701cf6578e8447af4ac0fa08e7c5a0fad6df1ae Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 21 Feb 2023 17:10:43 +0100 Subject: [PATCH 307/898] MAINTAINERS: Update s390-iommu driver maintainer information The s390 DMA API conversion changes currently under review will extend the use of the s390-iommu driver to the DMA API. With s390's mandatory use of an IOMMU this means all DMA for PCI devices will then use the s390-iommu driver. With this in mind and considering my involvement in these changes it makes sense to reflect this increased interdependence in the maintainer structure. Thus add myself as first maintainer and move Gerald to reviewer status. Reviewed-by: Gerald Schaefer Reviewed-by: Matthew Rosato Acked-by: Peter Oberparleiter Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230221161043.37065-1-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d8ebab595b2a9..33a61d516446c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18291,8 +18291,9 @@ F: drivers/s390/block/dasd* F: include/linux/dasd_mod.h S390 IOMMU (PCI) +M: Niklas Schnelle M: Matthew Rosato -M: Gerald Schaefer +R: Gerald Schaefer L: linux-s390@vger.kernel.org S: Supported F: drivers/iommu/s390-iommu.c From caa0708a81d6a2217c942959ef40d515ec1d3108 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 28 Feb 2023 10:01:42 +0900 Subject: [PATCH 308/898] bootconfig: Change message if no bootconfig with CONFIG_BOOT_CONFIG_FORCE=y Change no bootconfig data error message if user do not specify 'bootconfig' option but CONFIG_BOOT_CONFIG_FORCE=y. With CONFIG_BOOT_CONFIG_FORCE=y, the kernel proceeds bootconfig check even if user does not specify 'bootconfig' option. So the current error message is confusing. Let's show just an information message to notice skipping the bootconfig in that case. Link: https://lore.kernel.org/all/167754610254.318944.16848412476667893329.stgit@devnote2/ Fixes: b743852ccc1d ("Allow forcing unconditional bootconfig processing") Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/CAMuHMdV9jJvE2y8gY5V_CxidUikCf5515QMZHzTA3rRGEOj6=w@mail.gmail.com/ Suggested-by: Paul E. McKenney Signed-off-by: Masami Hiramatsu (Google) Tested-by: Paul E. McKenney Acked-by: Mukesh Ojha --- init/main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index 4425d1783d5c2..bb87b789c5439 100644 --- a/init/main.c +++ b/init/main.c @@ -156,7 +156,7 @@ static char *extra_init_args; #ifdef CONFIG_BOOT_CONFIG /* Is bootconfig on command line? */ -static bool bootconfig_found = IS_ENABLED(CONFIG_BOOT_CONFIG_FORCE); +static bool bootconfig_found; static size_t initargs_offs; #else # define bootconfig_found false @@ -429,7 +429,7 @@ static void __init setup_boot_config(void) err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, bootconfig_params); - if (IS_ERR(err) || !bootconfig_found) + if (IS_ERR(err) || !(bootconfig_found || IS_ENABLED(CONFIG_BOOT_CONFIG_FORCE))) return; /* parse_args() stops at the next param of '--' and returns an address */ @@ -437,7 +437,11 @@ static void __init setup_boot_config(void) initargs_offs = err - tmp_cmdline; if (!data) { - pr_err("'bootconfig' found on command line, but no bootconfig found\n"); + /* If user intended to use bootconfig, show an error level message */ + if (bootconfig_found) + pr_err("'bootconfig' found on command line, but no bootconfig found\n"); + else + pr_info("No bootconfig data provided, so skipping bootconfig"); return; } From 583329dcf22e568a328a944f20427ccfc95dce01 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 19 Mar 2023 20:32:18 -0400 Subject: [PATCH 309/898] platform/x86: think-lmi: add missing type attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver was missing the mandatory type attribute...oops. Add it in along with logic to determine whether the attribute is an enumeration type or a string by parsing the possible_values attribute. Upstream bug https://bugzilla.kernel.org/show_bug.cgi?id=216460 Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms") Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230320003221.561750-1-mpearson-lenovo@squebb.ca Reviewed-by: Thomas Weißschuh Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 86b33b74519be..5fa5451c48026 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -947,6 +947,20 @@ static ssize_t possible_values_show(struct kobject *kobj, struct kobj_attribute return sysfs_emit(buf, "%s\n", setting->possible_values); } +static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); + + if (setting->possible_values) { + /* Figure out what setting type is as BIOS does not return this */ + if (strchr(setting->possible_values, ',')) + return sysfs_emit(buf, "enumeration\n"); + } + /* Anything else is going to be a string */ + return sysfs_emit(buf, "string\n"); +} + static ssize_t current_value_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -1036,10 +1050,13 @@ static struct kobj_attribute attr_possible_values = __ATTR_RO(possible_values); static struct kobj_attribute attr_current_val = __ATTR_RW_MODE(current_value, 0600); +static struct kobj_attribute attr_type = __ATTR_RO(type); + static struct attribute *tlmi_attrs[] = { &attr_displ_name.attr, &attr_current_val.attr, &attr_possible_values.attr, + &attr_type.attr, NULL }; From 45e21289bfc6e257885514790a8a8887da822d40 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 19 Mar 2023 20:32:19 -0400 Subject: [PATCH 310/898] platform/x86: think-lmi: use correct possible_values delimiters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit firmware-attributes class requires that possible values are delimited using ';' but the Lenovo firmware uses ',' instead. Parse string and replace where appropriate. Suggested-by: Thomas Weißschuh Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms") Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230320003221.561750-2-mpearson-lenovo@squebb.ca Reviewed-by: Thomas Weißschuh Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 5fa5451c48026..e190fec260214 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -954,7 +954,7 @@ static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, if (setting->possible_values) { /* Figure out what setting type is as BIOS does not return this */ - if (strchr(setting->possible_values, ',')) + if (strchr(setting->possible_values, ';')) return sysfs_emit(buf, "enumeration\n"); } /* Anything else is going to be a string */ @@ -1441,6 +1441,13 @@ static int tlmi_analyze(void) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); } + /* + * firmware-attributes requires that possible_values are separated by ';' but + * Lenovo FW uses ','. Replace appropriately. + */ + if (setting->possible_values) + strreplace(setting->possible_values, ',', ';'); + kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); tlmi_priv.setting[i] = setting; kfree(item); From c0e0421a60bf468e88cf569fbd727346b138ed04 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 17:52:33 +0100 Subject: [PATCH 311/898] ACPI: processor: Reorder acpi_processor_driver_init() The cpufreq policy notifier in the ACPI processor driver may as well be registered before the driver itself, which causes acpi_processor_cpufreq_init to be true (unless the notifier registration fails, which is unlikely at that point) when the ACPI CPU thermal cooling devices are registered, so the processor_get_max_state() result does not change while acpi_processor_driver_init() is running. Change the ordering in acpi_processor_driver_init() accordingly to prevent the max_state value from remaining 0 permanently for all ACPI CPU cooling devices due to setting acpi_processor_cpufreq_init too late. [Note that processor_get_max_state() may still return different values at different times after this change, depending on the cpufreq driver registration time, but that issue needs to be addressed separately.] Fixes: a365105c685c("thermal: sysfs: Reuse cdev->max_state") Reported-by: Wang, Quanxian Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/acpi/processor_driver.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 1278969eec1f9..4bd16b3f07814 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -263,6 +263,12 @@ static int __init acpi_processor_driver_init(void) if (acpi_disabled) return 0; + if (!cpufreq_register_notifier(&acpi_processor_notifier_block, + CPUFREQ_POLICY_NOTIFIER)) { + acpi_processor_cpufreq_init = true; + acpi_processor_ignore_ppc_init(); + } + result = driver_register(&acpi_processor_driver); if (result < 0) return result; @@ -276,12 +282,6 @@ static int __init acpi_processor_driver_init(void) cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead", NULL, acpi_soft_cpu_dead); - if (!cpufreq_register_notifier(&acpi_processor_notifier_block, - CPUFREQ_POLICY_NOTIFIER)) { - acpi_processor_cpufreq_init = true; - acpi_processor_ignore_ppc_init(); - } - acpi_processor_throttling_init(); return 0; err: From c43198af05cffa5de8d4f356c40ce4bdca066272 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 17:54:34 +0100 Subject: [PATCH 312/898] thermal: core: Introduce thermal_cooling_device_present() Introduce a helper function, thermal_cooling_device_present(), for checking if the given cooling device is in the list of registered cooling devices to avoid some code duplication in a subsequent patch. No expected functional impact. Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/thermal_core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 55679fd86505d..8894342540f11 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1045,6 +1045,18 @@ devm_thermal_of_cooling_device_register(struct device *dev, } EXPORT_SYMBOL_GPL(devm_thermal_of_cooling_device_register); +static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev) +{ + struct thermal_cooling_device *pos = NULL; + + list_for_each_entry(pos, &thermal_cdev_list, node) { + if (pos == cdev) + return true; + } + + return false; +} + static void __unbind(struct thermal_zone_device *tz, int mask, struct thermal_cooling_device *cdev) { @@ -1067,20 +1079,17 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) int i; const struct thermal_zone_params *tzp; struct thermal_zone_device *tz; - struct thermal_cooling_device *pos = NULL; if (!cdev) return; mutex_lock(&thermal_list_lock); - list_for_each_entry(pos, &thermal_cdev_list, node) - if (pos == cdev) - break; - if (pos != cdev) { - /* thermal cooling device not found */ + + if (!thermal_cooling_device_present(cdev)) { mutex_unlock(&thermal_list_lock); return; } + list_del(&cdev->node); /* Unbind all thermal zones associated with 'this' cdev */ From 790930f44289c8209c57461b2db499fcc702e0b3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 18:01:26 +0100 Subject: [PATCH 313/898] thermal: core: Introduce thermal_cooling_device_update() Introduce a core thermal API function, thermal_cooling_device_update(), for updating the max_state value for a cooling device and rearranging its statistics in sysfs after a possible change of its ->get_max_state() callback return value. That callback is now invoked only once, during cooling device registration, to populate the max_state field in the cooling device object, so if its return value changes, it needs to be invoked again and the new return value needs to be stored as max_state. Moreover, the statistics presented in sysfs need to be rearranged in general, because there may not be enough room in them to store data for all of the possible states (in the case when max_state grows). The new function takes care of that (and some other minor things related to it), but some extra locking and lockdep annotations are added in several places too to protect against crashes in the cases when the statistics are not present or when a stale max_state value might be used by sysfs attributes. Note that the actual user of the new function will be added separately. Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com/ Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/thermal_core.c | 83 ++++++++++++++++++++++++++++++++- drivers/thermal/thermal_core.h | 2 + drivers/thermal/thermal_sysfs.c | 74 +++++++++++++++++++++++++---- include/linux/thermal.h | 1 + 4 files changed, 150 insertions(+), 10 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 8894342540f11..cfd4c1afeae77 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -613,6 +613,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, struct thermal_instance *pos; struct thermal_zone_device *pos1; struct thermal_cooling_device *pos2; + bool upper_no_limit; int result; if (trip >= tz->num_trips || trip < 0) @@ -632,7 +633,13 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, /* lower default 0, upper default max_state */ lower = lower == THERMAL_NO_LIMIT ? 0 : lower; - upper = upper == THERMAL_NO_LIMIT ? cdev->max_state : upper; + + if (upper == THERMAL_NO_LIMIT) { + upper = cdev->max_state; + upper_no_limit = true; + } else { + upper_no_limit = false; + } if (lower > upper || upper > cdev->max_state) return -EINVAL; @@ -644,6 +651,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, dev->cdev = cdev; dev->trip = trip; dev->upper = upper; + dev->upper_no_limit = upper_no_limit; dev->lower = lower; dev->target = THERMAL_NO_TARGET; dev->weight = weight; @@ -1057,6 +1065,79 @@ static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev) return false; } +/** + * thermal_cooling_device_update - Update a cooling device object + * @cdev: Target cooling device. + * + * Update @cdev to reflect a change of the underlying hardware or platform. + * + * Must be called when the maximum cooling state of @cdev becomes invalid and so + * its .get_max_state() callback needs to be run to produce the new maximum + * cooling state value. + */ +void thermal_cooling_device_update(struct thermal_cooling_device *cdev) +{ + struct thermal_instance *ti; + unsigned long state; + + if (IS_ERR_OR_NULL(cdev)) + return; + + /* + * Hold thermal_list_lock throughout the update to prevent the device + * from going away while being updated. + */ + mutex_lock(&thermal_list_lock); + + if (!thermal_cooling_device_present(cdev)) + goto unlock_list; + + /* + * Update under the cdev lock to prevent the state from being set beyond + * the new limit concurrently. + */ + mutex_lock(&cdev->lock); + + if (cdev->ops->get_max_state(cdev, &cdev->max_state)) + goto unlock; + + thermal_cooling_device_stats_reinit(cdev); + + list_for_each_entry(ti, &cdev->thermal_instances, cdev_node) { + if (ti->upper == cdev->max_state) + continue; + + if (ti->upper < cdev->max_state) { + if (ti->upper_no_limit) + ti->upper = cdev->max_state; + + continue; + } + + ti->upper = cdev->max_state; + if (ti->lower > ti->upper) + ti->lower = ti->upper; + + if (ti->target == THERMAL_NO_TARGET) + continue; + + if (ti->target > ti->upper) + ti->target = ti->upper; + } + + if (cdev->ops->get_cur_state(cdev, &state) || state > cdev->max_state) + goto unlock; + + thermal_cooling_device_stats_update(cdev, state); + +unlock: + mutex_unlock(&cdev->lock); + +unlock_list: + mutex_unlock(&thermal_list_lock); +} +EXPORT_SYMBOL_GPL(thermal_cooling_device_update); + static void __unbind(struct thermal_zone_device *tz, int mask, struct thermal_cooling_device *cdev) { diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 7af54382e9151..3d4a787c6b28a 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -101,6 +101,7 @@ struct thermal_instance { struct list_head tz_node; /* node in tz->thermal_instances */ struct list_head cdev_node; /* node in cdev->thermal_instances */ unsigned int weight; /* The weight of the cooling device */ + bool upper_no_limit; }; #define to_thermal_zone(_dev) \ @@ -127,6 +128,7 @@ int thermal_zone_create_device_groups(struct thermal_zone_device *, int); void thermal_zone_destroy_device_groups(struct thermal_zone_device *); void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *); void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev); +void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev); /* used only at binding time */ ssize_t trip_point_show(struct device *, struct device_attribute *, char *); ssize_t weight_show(struct device *, struct device_attribute *, char *); diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index cef860deaf912..a4aba7b8bb8b9 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -685,6 +685,8 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, { struct cooling_dev_stats *stats = cdev->stats; + lockdep_assert_held(&cdev->lock); + if (!stats) return; @@ -706,13 +708,22 @@ static ssize_t total_trans_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; - int ret; + struct cooling_dev_stats *stats; + int ret = 0; + + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; spin_lock(&stats->lock); ret = sprintf(buf, "%u\n", stats->total_trans); spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return ret; } @@ -721,11 +732,18 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; + struct cooling_dev_stats *stats; ssize_t len = 0; int i; + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; + spin_lock(&stats->lock); + update_time_in_state(stats); for (i = 0; i <= cdev->max_state; i++) { @@ -734,6 +752,9 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, } spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return len; } @@ -742,8 +763,16 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; - int i, states = cdev->max_state + 1; + struct cooling_dev_stats *stats; + int i, states; + + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; + + states = cdev->max_state + 1; spin_lock(&stats->lock); @@ -757,6 +786,9 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return count; } @@ -764,10 +796,18 @@ static ssize_t trans_table_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; + struct cooling_dev_stats *stats; ssize_t len = 0; int i, j; + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) { + len = -ENODATA; + goto unlock; + } + len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i <= cdev->max_state; i++) { @@ -775,8 +815,10 @@ static ssize_t trans_table_show(struct device *dev, break; len += snprintf(buf + len, PAGE_SIZE - len, "state%2u ", i); } - if (len >= PAGE_SIZE) - return PAGE_SIZE; + if (len >= PAGE_SIZE) { + len = PAGE_SIZE; + goto unlock; + } len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -799,8 +841,12 @@ static ssize_t trans_table_show(struct device *dev, if (len >= PAGE_SIZE) { pr_warn_once("Thermal transition table exceeds PAGE_SIZE. Disabling\n"); - return -EFBIG; + len = -EFBIG; } + +unlock: + mutex_unlock(&cdev->lock); + return len; } @@ -830,6 +876,8 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) unsigned long states = cdev->max_state + 1; int var; + lockdep_assert_held(&cdev->lock); + var = sizeof(*stats); var += sizeof(*stats->time_in_state) * states; var += sizeof(*stats->trans_table) * states * states; @@ -855,6 +903,8 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev) { + lockdep_assert_held(&cdev->lock); + kfree(cdev->stats); cdev->stats = NULL; } @@ -879,6 +929,12 @@ void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev) cooling_device_stats_destroy(cdev); } +void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev) +{ + cooling_device_stats_destroy(cdev); + cooling_device_stats_setup(cdev); +} + /* these helper will be used only at the time of bindig */ ssize_t trip_point_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 2bb4bf33f4f32..13c6aaed18df3 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -384,6 +384,7 @@ devm_thermal_of_cooling_device_register(struct device *dev, struct device_node *np, char *type, void *devdata, const struct thermal_cooling_device_ops *ops); +void thermal_cooling_device_update(struct thermal_cooling_device *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); From 22c52fa5155a2f48aedb0f675903b20457285a27 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 18:03:40 +0100 Subject: [PATCH 314/898] ACPI: processor: thermal: Update CPU cooling devices on cpufreq policy changes When a cpufreq policy appears or goes away, the CPU cooling devices for the CPUs covered by that policy need to be updated so that the new processor_get_max_state() value is stored as max_state and the statistics in sysfs are rearranged for each of them. Do that accordingly in acpi_thermal_cpufreq_init() and acpi_thermal_cpufreq_exit(). Fixes: a365105c685c("thermal: sysfs: Reuse cdev->max_state") Reported-by: Wang, Quanxian Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/acpi/processor_thermal.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index e534fd49a67e5..b7c6287eccca2 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -140,9 +140,13 @@ void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) ret = freq_qos_add_request(&policy->constraints, &pr->thermal_req, FREQ_QOS_MAX, INT_MAX); - if (ret < 0) + if (ret < 0) { pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); + continue; + } + + thermal_cooling_device_update(pr->cdev); } } @@ -153,8 +157,12 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) for_each_cpu(cpu, policy->related_cpus) { struct acpi_processor *pr = per_cpu(processors, cpu); - if (pr) - freq_qos_remove_request(&pr->thermal_req); + if (!pr) + continue; + + freq_qos_remove_request(&pr->thermal_req); + + thermal_cooling_device_update(pr->cdev); } } #else /* ! CONFIG_CPU_FREQ */ From cf337f27f3bfc4aeab4954c468239fd6233c7638 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 19 Mar 2023 20:32:20 -0400 Subject: [PATCH 315/898] platform/x86: think-lmi: only display possible_values if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some attributes don't have any values available. In those cases don't make the possible_values entry visible. Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms") Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230320003221.561750-3-mpearson-lenovo@squebb.ca Reviewed-by: Thomas Weißschuh Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index e190fec260214..3f0641360251f 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -941,9 +941,6 @@ static ssize_t possible_values_show(struct kobject *kobj, struct kobj_attribute { struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); - if (!tlmi_priv.can_get_bios_selections) - return -EOPNOTSUPP; - return sysfs_emit(buf, "%s\n", setting->possible_values); } @@ -1052,6 +1049,18 @@ static struct kobj_attribute attr_current_val = __ATTR_RW_MODE(current_value, 06 static struct kobj_attribute attr_type = __ATTR_RO(type); +static umode_t attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); + + /* We don't want to display possible_values attributes if not available */ + if ((attr == &attr_possible_values.attr) && (!setting->possible_values)) + return 0; + + return attr->mode; +} + static struct attribute *tlmi_attrs[] = { &attr_displ_name.attr, &attr_current_val.attr, @@ -1061,6 +1070,7 @@ static struct attribute *tlmi_attrs[] = { }; static const struct attribute_group tlmi_attr_group = { + .is_visible = attr_is_visible, .attrs = tlmi_attrs, }; From 8a02d70679fc1c434401863333c8ea7dbf201494 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 19 Mar 2023 20:32:21 -0400 Subject: [PATCH 316/898] platform/x86: think-lmi: Add possible_values for ThinkStation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ThinkStation platforms don't support the API to return possible_values but instead embed it in the settings string. Try and extract this information and set the possible_values attribute appropriately. Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms") Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230320003221.561750-4-mpearson-lenovo@squebb.ca Reviewed-by: Thomas Weißschuh Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 3f0641360251f..c816646eb6616 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1450,6 +1450,26 @@ static int tlmi_analyze(void) if (ret || !setting->possible_values) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); + } else { + /* + * Older Thinkstations don't support the bios_selections API. + * Instead they store this as a [Optional:Option1,Option2] section of the + * name string. + * Try and pull that out if it's available. + */ + char *item, *optstart, *optend; + + if (!tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID)) { + optstart = strstr(item, "[Optional:"); + if (optstart) { + optstart += strlen("[Optional:"); + optend = strstr(optstart, "]"); + if (optend) + setting->possible_values = + kstrndup(optstart, optend - optstart, + GFP_KERNEL); + } + } } /* * firmware-attributes requires that possible_values are separated by ';' but From acd0acb802b90f88d19ad4337183e44fd0f77c50 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Mar 2023 11:30:57 +0800 Subject: [PATCH 317/898] platform/surface: aggregator: Add missing fwnode_handle_put() In fwnode_for_each_child_node(), we should add fwnode_handle_put() when break out of the iteration fwnode_for_each_child_node() as it will automatically increase and decrease the refcounter. Fixes: fc622b3d36e6 ("platform/surface: Set up Surface Aggregator device registry") Signed-off-by: Liang He Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20230322033057.1855741-1-windhl@126.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/aggregator/bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/bus.c b/drivers/platform/surface/aggregator/bus.c index aaad41294200d..42ccd7f1c9b9c 100644 --- a/drivers/platform/surface/aggregator/bus.c +++ b/drivers/platform/surface/aggregator/bus.c @@ -485,8 +485,10 @@ int __ssam_register_clients(struct device *parent, struct ssam_controller *ctrl, * device, so ignore it and continue with the next one. */ status = ssam_add_client_device(parent, ctrl, child); - if (status && status != -ENODEV) + if (status && status != -ENODEV) { + fwnode_handle_put(child); goto err; + } } return 0; From b8838e653034425cd26983c7d96535e2742a6212 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:13 +0100 Subject: [PATCH 318/898] arm64: dts: qcom: sc8280xp-x13s: mark s11b regulator as always-on The s11b supply is used by the wlan module (as well as some of the pmics) which are not yet fully described in the devicetree. Mark the regulator as always-on for now. Fixes: 123b30a75623 ("arm64: dts: qcom: sc8280xp-x13s: enable WiFi controller") Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-2-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index fa412bea8985b..532859363a226 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -377,6 +377,7 @@ regulator-min-microvolt = <1272000>; regulator-max-microvolt = <1272000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_s12b: smps12 { From f4472fd33e4751c54efb13d2536261e9273770a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:14 +0100 Subject: [PATCH 319/898] arm64: dts: qcom: sc8280xp-x13s: mark s10b regulator as always-on The s10b supply is used by several components that are not (yet) described in devicetree (e.g. ram, charger, ec) and must not be disabled. Mark the regulator as always-on. Fixes: f29077d86652 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-3-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 532859363a226..d6f6feee635ec 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -370,6 +370,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_s11b: smps11 { From 291e6b6cd7145108d45aeffbac4bb1348fece6b6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:15 +0100 Subject: [PATCH 320/898] arm64: dts: qcom: sc8280xp-x13s: mark s12b regulator as always-on The s12b supply is used by several pmic regulators as well as the wlan/bluetooth radio which are not yet fully described in the devicetree. Mark the regulator as always-on for now. Fixes: f29077d86652 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-4-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index d6f6feee635ec..f327bc72463f4 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -386,6 +386,7 @@ regulator-min-microvolt = <984000>; regulator-max-microvolt = <984000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l3b: ldo3 { From 07b0883e1f09416d07d25a2158f8cd35b732b686 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:16 +0100 Subject: [PATCH 321/898] arm64: dts: qcom: sc8280xp-x13s: mark bob regulator as always-on The bob supply is used by several pmic regulators and components which are not (yet fully) described in the devicetree. Mark the regulator as always-on for now. Fixes: f29077d86652 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-5-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index f327bc72463f4..99c6d6574559f 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -444,6 +444,7 @@ regulator-min-microvolt = <3008000>; regulator-max-microvolt = <3960000>; regulator-initial-mode = ; + regulator-always-on; }; }; From bb765a743377d46d8da8e7f7e5128022504741b9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 12:42:00 +0100 Subject: [PATCH 322/898] mlxsw: spectrum_fid: Fix incorrect local port type Local port is a 10-bit number, but it was mistakenly stored in a u8, resulting in firmware errors when using a netdev corresponding to a local port higher than 255. Fix by storing the local port in u16, as is done in the rest of the code. Fixes: bf73904f5fba ("mlxsw: Add support for 802.1Q FID family") Signed-off-by: Ido Schimmel Reviewed-by: Danielle Ratson Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/eace1f9d96545ab8a2775db857cb7e291a9b166b.1679398549.git.petrm@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 045a24cacfa51..b6ee2d658b0c4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -1354,7 +1354,7 @@ static int mlxsw_sp_fid_8021q_port_vid_map(struct mlxsw_sp_fid *fid, u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; + u16 local_port = mlxsw_sp_port->local_port; int err; /* In case there are no {Port, VID} => FID mappings on the port, @@ -1391,7 +1391,7 @@ mlxsw_sp_fid_8021q_port_vid_unmap(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; + u16 local_port = mlxsw_sp_port->local_port; mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); mlxsw_sp_fid_evid_map(fid, local_port, vid, false); From a56cde41340ac4049fa6edac9e6cfbcd2804074e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 14 Feb 2023 15:26:43 +0100 Subject: [PATCH 323/898] dt-bindings: mtd: jedec,spi-nor: Document CPOL/CPHA support SPI EEPROMs typically support both SPI Mode 0 (CPOL=CPHA=0) and Mode 3 (CPOL=CPHA=1). However, using the latter is currently flagged as an error by "make dtbs_check", e.g.: arch/arm/boot/dts/r8a7791-koelsch.dtb: flash@0: Unevaluated properties are not allowed ('spi-cpha', 'spi-cpol' were unexpected) From schema: Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml Fix this by documenting support for CPOL=CPHA=1. Fixes: 233363aba72ac638 ("spi/panel: dt-bindings: drop CPHA and CPOL from common properties") Cc: stable@vger.kernel.org Signed-off-by: Geert Uytterhoeven Reviewed-by: Miquel Raynal Reviewed-by: Krzysztof Kozlowski Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/afe470603028db9374930b0c57464b1f6d52bdd3.1676384304.git.geert+renesas@glider.be --- Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml index 3fe981b14e2cb..54736362378eb 100644 --- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml @@ -76,6 +76,13 @@ properties: If "broken-flash-reset" is present then having this property does not make any difference. + spi-cpol: true + spi-cpha: true + +dependencies: + spi-cpol: [ spi-cpha ] + spi-cpha: [ spi-cpol ] + unevaluatedProperties: false examples: From 9b043c649022ec55040aa9315cc72059c4ec254c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 10 Mar 2023 09:54:52 +0100 Subject: [PATCH 324/898] mtd: rawnand: nandsim: Artificially prevent sequential page reads The continuous read support added recently makes nandsim unhappy. Indeed, all the supported commands should be re-encoded into internal commands, so of course there is currently no support for the commands and patterns needed for continuous reads to work. I tried to add support for them but nandsim (which is more a tool to develop/debug upper layers rather than the raw NAND core) suffers from a big limitation: it's internal parser needs to know what exact operation is happening when the address cycles are performed. The research is then sequential from the start up to the address cycles, but does not check what's coming next even though the information is available. This is a limitation which is related to the old API used by the core which kind of forced the controllers to guess what operation was being performed rather early. Today the core uses a more transparent API called ->exec_op() which no longer requires controller drivers to do any more guessing, but despite being updated to ->exec_op(), nandsim is still a bit constrained on this regard and thus cannot handle sequential page reads because the start sequence beginning is identical to a regular page read. If the internal algorithm is updated some day, it should be possible to make it support sequential page reads by adding something like: /* Large page devices continuous read page start */ {OPT_LARGEPAGE, {STATE_CMD_READ0, STATE_ADDR_PAGE, STATE_CMD_READSTART, STATE_CMD_READCACHESEQ | ACTION_CPY, STATE_DATAOUT, STATE_READY}}, /* Large page devices continuous read page continue */ {OPT_LARGEPAGE, {STATE_CMD_READCACHESEQ | ACTION_CPY_NEXT, STATE_DATAOUT, STATE_READY}}, /* Large page devices continuous read page end */ {OPT_LARGEPAGE, {STATE_CMD_READCACHEEND | ACTION_CPY_NEXT, STATE_DATAOUT, STATE_READY}}, For now, we just return -EOPNOTSUPP when the core asks controller drivers if they support the feature in order to prevent any further use of these opcodes. Note: This is a hack, ->exec_op() is not supposed to check against the COMMAND opcodes unless _really_ needed. Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Reported-by: Zhihao Cheng Link: https://lore.kernel.org/linux-mtd/fd34fe55-7f4a-030d-8653-9bb9cf08410d@huawei.com/ Signed-off-by: Miquel Raynal Tested-by: Zhihao Cheng Acked-by: Richard Weinberger Link: https://lore.kernel.org/linux-mtd/20230310085452.1368716-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/nandsim.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index c21abf7489481..179b28459b4bd 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -2160,8 +2160,23 @@ static int ns_exec_op(struct nand_chip *chip, const struct nand_operation *op, const struct nand_op_instr *instr = NULL; struct nandsim *ns = nand_get_controller_data(chip); - if (check_only) + if (check_only) { + /* The current implementation of nandsim needs to know the + * ongoing operation when performing the address cycles. This + * means it cannot make the difference between a regular read + * and a continuous read. Hence, this hack to manually refuse + * supporting sequential cached operations. + */ + for (op_id = 0; op_id < op->ninstrs; op_id++) { + instr = &op->instrs[op_id]; + if (instr->type == NAND_OP_CMD_INSTR && + (instr->ctx.cmd.opcode == NAND_CMD_READCACHEEND || + instr->ctx.cmd.opcode == NAND_CMD_READCACHESEQ)) + return -EOPNOTSUPP; + } + return 0; + } ns->lines.ce = 1; From e732e39ed9929c05fd219035bc9653ba4100d4fa Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Mon, 13 Mar 2023 10:32:44 +0300 Subject: [PATCH 325/898] mtd: rawnand: meson: invalidate cache on polling ECC bit 'info_buf' memory is cached and driver polls ECC bit in it. This bit is set by the NAND controller. If 'usleep_range()' returns before device sets this bit, 'info_buf' will be cached and driver won't see update of this bit and will loop forever. Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Signed-off-by: Arseniy Krasnov Reviewed-by: Neil Armstrong Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/d4ef0bd6-816e-f6fa-9385-f05f775f0ae2@sberdevices.ru --- drivers/mtd/nand/raw/meson_nand.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 30e326adabfc1..a28574c009003 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -176,6 +176,7 @@ struct meson_nfc { dma_addr_t daddr; dma_addr_t iaddr; + u32 info_bytes; unsigned long assigned_cs; }; @@ -503,6 +504,7 @@ static int meson_nfc_dma_buffer_setup(struct nand_chip *nand, void *databuf, nfc->daddr, datalen, dir); return ret; } + nfc->info_bytes = infolen; cmd = GENCMDIADDRL(NFC_CMD_AIL, nfc->iaddr); writel(cmd, nfc->reg_base + NFC_REG_CMD); @@ -520,8 +522,10 @@ static void meson_nfc_dma_buffer_release(struct nand_chip *nand, struct meson_nfc *nfc = nand_get_controller_data(nand); dma_unmap_single(nfc->dev, nfc->daddr, datalen, dir); - if (infolen) + if (infolen) { dma_unmap_single(nfc->dev, nfc->iaddr, infolen, dir); + nfc->info_bytes = 0; + } } static int meson_nfc_read_buf(struct nand_chip *nand, u8 *buf, int len) @@ -710,6 +714,8 @@ static void meson_nfc_check_ecc_pages_valid(struct meson_nfc *nfc, usleep_range(10, 15); /* info is updated by nfc dma engine*/ smp_rmb(); + dma_sync_single_for_cpu(nfc->dev, nfc->iaddr, nfc->info_bytes, + DMA_FROM_DEVICE); ret = *info & ECC_COMPLETE; } while (!ret); } From 6214894f49a967c749ee6c07cb00f9cede748df4 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 30 Nov 2022 16:09:11 +0100 Subject: [PATCH 326/898] hvc/xen: prevent concurrent accesses to the shared ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hvc machinery registers both a console and a tty device based on the hv ops provided by the specific implementation. Those two interfaces however have different locks, and there's no single locks that's shared between the tty and the console implementations, hence the driver needs to protect itself against concurrent accesses. Otherwise concurrent calls using the split interfaces are likely to corrupt the ring indexes, leaving the console unusable. Introduce a lock to xencons_info to serialize accesses to the shared ring. This is only required when using the shared memory console, concurrent accesses to the hypercall based console implementation are not an issue. Note the conditional logic in domU_read_console() is slightly modified so the notify_daemon() call can be done outside of the locked region: it's an hypercall and there's no need for it to be done with the lock held. Fixes: b536b4b96230 ('xen: use the hvc console infrastructure for Xen console') Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20221130150919.13935-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/tty/hvc/hvc_xen.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 5bddb2f5e9318..98764e740c078 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -43,6 +43,7 @@ struct xencons_info { int irq; int vtermno; grant_ref_t gntref; + spinlock_t ring_lock; }; static LIST_HEAD(xenconsoles); @@ -89,12 +90,15 @@ static int __write_console(struct xencons_info *xencons, XENCONS_RING_IDX cons, prod; struct xencons_interface *intf = xencons->intf; int sent = 0; + unsigned long flags; + spin_lock_irqsave(&xencons->ring_lock, flags); cons = intf->out_cons; prod = intf->out_prod; mb(); /* update queue values before going on */ if ((prod - cons) > sizeof(intf->out)) { + spin_unlock_irqrestore(&xencons->ring_lock, flags); pr_err_once("xencons: Illegal ring page indices"); return -EINVAL; } @@ -104,6 +108,7 @@ static int __write_console(struct xencons_info *xencons, wmb(); /* write ring before updating pointer */ intf->out_prod = prod; + spin_unlock_irqrestore(&xencons->ring_lock, flags); if (sent) notify_daemon(xencons); @@ -146,16 +151,19 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); unsigned int eoiflag = 0; + unsigned long flags; if (xencons == NULL) return -EINVAL; intf = xencons->intf; + spin_lock_irqsave(&xencons->ring_lock, flags); cons = intf->in_cons; prod = intf->in_prod; mb(); /* get pointers before reading ring */ if ((prod - cons) > sizeof(intf->in)) { + spin_unlock_irqrestore(&xencons->ring_lock, flags); pr_err_once("xencons: Illegal ring page indices"); return -EINVAL; } @@ -179,10 +187,13 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) xencons->out_cons = intf->out_cons; xencons->out_cons_same = 0; } + if (!recv && xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + spin_unlock_irqrestore(&xencons->ring_lock, flags); + if (recv) { notify_daemon(xencons); - } else if (xencons->out_cons_same++ > 1) { - eoiflag = XEN_EOI_FLAG_SPURIOUS; } xen_irq_lateeoi(xencons->irq, eoiflag); @@ -239,6 +250,7 @@ static int xen_hvm_console_init(void) info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; + spin_lock_init(&info->ring_lock); } else if (info->intf != NULL) { /* already configured */ return 0; @@ -275,6 +287,7 @@ static int xen_hvm_console_init(void) static int xencons_info_pv_init(struct xencons_info *info, int vtermno) { + spin_lock_init(&info->ring_lock); info->evtchn = xen_start_info->console.domU.evtchn; /* GFN == MFN for PV guest */ info->intf = gfn_to_virt(xen_start_info->console.domU.mfn); @@ -325,6 +338,7 @@ static int xen_initial_domain_console_init(void) info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; + spin_lock_init(&info->ring_lock); } info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); @@ -482,6 +496,7 @@ static int xencons_probe(struct xenbus_device *dev, info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; + spin_lock_init(&info->ring_lock); dev_set_drvdata(&dev->dev, info); info->xbdev = dev; info->vtermno = xenbus_devid_to_vtermno(devid); From aadbd07ff8a75ed342388846da78dfaddb8b106a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 21 Mar 2023 09:03:26 +0100 Subject: [PATCH 327/898] x86/PVH: avoid 32-bit build warning when obtaining VGA console info In the commit referenced below I failed to pay attention to this code also being buildable as 32-bit. Adjust the type of "ret" - there's no real need for it to be wider than 32 bits. Fixes: 934ef33ee75c ("x86/PVH: obtain VGA console info in Dom0") Reported-by: kernel test robot Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/2d2193ff-670b-0a27-e12d-2c5c4c121c79@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pvh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 1da44aca896c6..ada3868c02c23 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -48,7 +48,7 @@ void __init xen_pvh_init(struct boot_params *boot_params) struct xen_platform_op op = { .cmd = XENPF_get_dom0_console, }; - long ret = HYPERVISOR_platform_op(&op); + int ret = HYPERVISOR_platform_op(&op); if (ret > 0) xen_init_vga(&op.u.dom0_console, From 5f4efc9dfcfd8440113057290f624ba2c893afb7 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 22 Mar 2023 16:34:04 +0100 Subject: [PATCH 328/898] ALSA: hda/realtek: Fix support for Dell Precision 3260 Unfortunately, in commit 5911d78fabbb a wrong codec patch was selected. The model=alc283-dac-wcaps is equivalent to ALC283_FIXUP_CHROME_BOOK not ALC295_FIXUP_CHROME_BOOK. Fixes: 5911d78fabbb ("ALSA: hda/realtek: Improve support for Dell Precision 3260") Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230322153404.386473-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0ec2c59bb8d56..b501f9489fc1b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9262,7 +9262,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0ac9, "Dell Precision 3260", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x1028, 0x0ac9, "Dell Precision 3260", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), From 915efd8a446b74442039d31689d5d863caf82517 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 21 Mar 2023 14:52:31 +0100 Subject: [PATCH 329/898] xdp: bpf_xdp_metadata use EOPNOTSUPP for no driver support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When driver doesn't implement a bpf_xdp_metadata kfunc the fallback implementation returns EOPNOTSUPP, which indicate device driver doesn't implement this kfunc. Currently many drivers also return EOPNOTSUPP when the hint isn't available, which is ambiguous from an API point of view. Instead change drivers to return ENODATA in these cases. There can be natural cases why a driver doesn't provide any hardware info for a specific hint, even on a frame to frame basis (e.g. PTP). Lets keep these cases as separate return codes. When describing the return values, adjust the function kernel-doc layout to get proper rendering for the return values. Fixes: ab46182d0dcb ("net/mlx4_en: Support RX XDP metadata") Fixes: bc8d405b1ba9 ("net/mlx5e: Support RX XDP metadata") Fixes: 306531f0249f ("veth: Support RX XDP metadata") Fixes: 3d76a4d3d4e5 ("bpf: XDP metadata RX kfuncs") Signed-off-by: Jesper Dangaard Brouer Acked-by: Stanislav Fomichev Acked-by: Toke Høiland-Jørgensen Acked-by: Tariq Toukan Link: https://lore.kernel.org/r/167940675120.2718408.8176058626864184420.stgit@firesoul Signed-off-by: Alexei Starovoitov --- Documentation/networking/xdp-rx-metadata.rst | 7 +++++-- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 ++-- drivers/net/veth.c | 4 ++-- net/core/xdp.c | 10 ++++++++-- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index aac63fc2d08bd..25ce72af81c21 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -23,10 +23,13 @@ metadata is supported, this set will grow: An XDP program can use these kfuncs to read the metadata into stack variables for its own consumption. Or, to pass the metadata on to other consumers, an XDP program can store it into the metadata area carried -ahead of the packet. +ahead of the packet. Not all packets will necessary have the requested +metadata available in which case the driver returns ``-ENODATA``. Not all kfuncs have to be implemented by the device driver; when not -implemented, the default ones that return ``-EOPNOTSUPP`` will be used. +implemented, the default ones that return ``-EOPNOTSUPP`` will be used +to indicate the device driver have not implemented this kfunc. + Within an XDP frame, the metadata layout (accessed via ``xdp_buff``) is as follows:: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 0869d4fff17b1..4b5e459b6d49f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -674,7 +674,7 @@ int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) struct mlx4_en_xdp_buff *_ctx = (void *)ctx; if (unlikely(_ctx->ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL)) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = mlx4_en_get_hwtstamp(_ctx->mdev, mlx4_en_get_cqe_ts(_ctx->cqe)); @@ -686,7 +686,7 @@ int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) struct mlx4_en_xdp_buff *_ctx = (void *)ctx; if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH))) - return -EOPNOTSUPP; + return -ENODATA; *hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index bcd6370de440f..c5dae48b7932f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -162,7 +162,7 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) const struct mlx5e_xdp_buff *_ctx = (void *)ctx; if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp))) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time, _ctx->rq->clock, get_cqe_ts(_ctx->cqe)); @@ -174,7 +174,7 @@ static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) const struct mlx5e_xdp_buff *_ctx = (void *)ctx; if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))) - return -EOPNOTSUPP; + return -ENODATA; *hash = be32_to_cpu(_ctx->cqe->rss_hash_result); return 0; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1bb54de7124d9..046461ee42ead 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1610,7 +1610,7 @@ static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) struct veth_xdp_buff *_ctx = (void *)ctx; if (!_ctx->skb) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; return 0; @@ -1621,7 +1621,7 @@ static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) struct veth_xdp_buff *_ctx = (void *)ctx; if (!_ctx->skb) - return -EOPNOTSUPP; + return -ENODATA; *hash = skb_get_hash(_ctx->skb); return 0; diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc5533177..247797168579c 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -720,7 +720,10 @@ __diag_ignore_all("-Wmissing-prototypes", * @ctx: XDP context pointer. * @timestamp: Return value pointer. * - * Returns 0 on success or ``-errno`` on error. + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver does not implement kfunc + * * ``-ENODATA`` : means no RX-timestamp available for this frame */ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) { @@ -732,7 +735,10 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim * @ctx: XDP context pointer. * @hash: Return value pointer. * - * Returns 0 on success or ``-errno`` on error. + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc + * * ``-ENODATA`` : means no RX-hash available for this frame */ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash) { From ba98413bf45edbf33672e2539e321b851b2cfbd1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 11:35:33 +0100 Subject: [PATCH 330/898] drm/meson: fix missing component unbind on bind errors Make sure to unbind all subcomponents when binding the aggregate device fails. Fixes: a41e82e6c457 ("drm/meson: Add support for components") Cc: stable@vger.kernel.org # 4.12 Cc: Neil Armstrong Signed-off-by: Johan Hovold Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230306103533.4915-1-johan+linaro@kernel.org --- drivers/gpu/drm/meson/meson_drv.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 79bfe3938d3c6..7caf937c3c90d 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -325,23 +325,23 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) ret = meson_encoder_hdmi_init(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = meson_plane_create(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = meson_overlay_create(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = meson_crtc_create(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto exit_afbcd; + goto unbind_all; drm_mode_config_reset(drm); @@ -359,6 +359,9 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); +unbind_all: + if (has_components) + component_unbind_all(drm->dev, drm); exit_afbcd: if (priv->afbcd.ops) priv->afbcd.ops->exit(priv); From 1a70ca89d59c7c8af006d29b965a95ede0abb0da Mon Sep 17 00:00:00 2001 From: Matheus Castello Date: Wed, 22 Mar 2023 15:38:21 +0100 Subject: [PATCH 331/898] drm/bridge: lt8912b: return EPROBE_DEFER if bridge is not found Returns EPROBE_DEFER when of_drm_find_bridge() fails, this is consistent with what all the other DRM bridge drivers are doing and this is required since the bridge might not be there when the driver is probed and this should not be a fatal failure. Cc: Fixes: 30e2ae943c26 ("drm/bridge: Introduce LT8912B DSI to HDMI bridge") Signed-off-by: Matheus Castello Signed-off-by: Francesco Dolcini Reviewed-by: Laurent Pinchart Reviewed-by: Andrzej Hajda Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230322143821.109744-1-francesco@dolcini.it --- drivers/gpu/drm/bridge/lontium-lt8912b.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index 2019a8167d693..b40baced13316 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -676,8 +676,8 @@ static int lt8912_parse_dt(struct lt8912 *lt) lt->hdmi_port = of_drm_find_bridge(port_node); if (!lt->hdmi_port) { - dev_err(lt->dev, "%s: Failed to get hdmi port\n", __func__); - ret = -ENODEV; + ret = -EPROBE_DEFER; + dev_err_probe(lt->dev, ret, "%s: Failed to get hdmi port\n", __func__); goto err_free_host_node; } From 02a4d923e4400a36d340ea12d8058f69ebf3a383 Mon Sep 17 00:00:00 2001 From: Savino Dicanosa Date: Tue, 21 Mar 2023 19:44:02 +0000 Subject: [PATCH 332/898] io_uring/rsrc: fix null-ptr-deref in io_file_bitmap_get() When fixed files are unregistered, file_alloc_end and alloc_hint are not cleared. This can later cause a NULL pointer dereference in io_file_bitmap_get() if auto index selection is enabled via IORING_FILE_INDEX_ALLOC: [ 6.519129] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] [ 6.541468] RIP: 0010:_find_next_zero_bit+0x1a/0x70 [...] [ 6.560906] Call Trace: [ 6.561322] [ 6.561672] io_file_bitmap_get+0x38/0x60 [ 6.562281] io_fixed_fd_install+0x63/0xb0 [ 6.562851] ? __pfx_io_socket+0x10/0x10 [ 6.563396] io_socket+0x93/0xf0 [ 6.563855] ? __pfx_io_socket+0x10/0x10 [ 6.564411] io_issue_sqe+0x5b/0x3d0 [ 6.564914] io_submit_sqes+0x1de/0x650 [ 6.565452] __do_sys_io_uring_enter+0x4fc/0xb20 [ 6.566083] ? __do_sys_io_uring_register+0x11e/0xd80 [ 6.566779] do_syscall_64+0x3c/0x90 [ 6.567247] entry_SYSCALL_64_after_hwframe+0x72/0xdc [...] To fix the issue, set file alloc range and alloc_hint to zero after file tables are freed. Cc: stable@vger.kernel.org Fixes: 4278a0deb1f6 ("io_uring: defer alloc_hint update to io_file_bitmap_set()") Signed-off-by: Savino Dicanosa [axboe: add explicit bitmap == NULL check as well] Signed-off-by: Jens Axboe --- io_uring/filetable.c | 3 +++ io_uring/rsrc.c | 1 + 2 files changed, 4 insertions(+) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 68dfc6936aa72..b80614e7d6051 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -19,6 +19,9 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx) unsigned long nr = ctx->file_alloc_end; int ret; + if (!table->bitmap) + return -ENFILE; + do { ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint); if (ret != nr) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index e2bac9f899029..7a43aed8e395c 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -794,6 +794,7 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) } #endif io_free_file_tables(&ctx->file_table); + io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); ctx->file_data = NULL; ctx->nr_user_files = 0; From a3f547addcaa10df5a226526bc9e2d9a94542344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Mon, 6 Mar 2023 20:31:44 +0100 Subject: [PATCH 333/898] x86/mm: Do not shuffle CPU entry areas without KASLR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") fixed an omission of KASLR on CPU entry areas. It doesn't take into account KASLR switches though, which may result in unintended non-determinism when a user wants to avoid it (e.g. debugging, benchmarking). Generate only a single combination of CPU entry areas offsets -- the linear array that existed prior randomization when KASLR is turned off. Since we have 3f148f331814 ("x86/kasan: Map shadow for percpu pages on demand") and followups, we can use the more relaxed guard kasrl_enabled() (in contrast to kaslr_memory_enabled()). Fixes: 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") Signed-off-by: Michal Koutný Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230306193144.24605-1-mkoutny%40suse.com --- arch/x86/mm/cpu_entry_area.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 7316a82242599..e91500a809639 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -10,6 +10,7 @@ #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -29,6 +30,12 @@ static __init void init_cea_offsets(void) unsigned int max_cea; unsigned int i, j; + if (!kaslr_enabled()) { + for_each_possible_cpu(i) + per_cpu(_cea_offset, i) = i; + return; + } + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; /* O(sodding terrible) */ From b15888840207c2bfe678dd1f68a32db54315e71f Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Mon, 27 Feb 2023 13:05:03 -0800 Subject: [PATCH 334/898] x86/fpu/xstate: Prevent false-positive warning in __copy_xstate_uabi_buf() __copy_xstate_to_uabi_buf() copies either from the tasks XSAVE buffer or from init_fpstate into the ptrace buffer. Dynamic features, like XTILEDATA, have an all zeroes init state and are not saved in init_fpstate, which means the corresponding bit is not set in the xfeatures bitmap of the init_fpstate header. But __copy_xstate_to_uabi_buf() retrieves addresses for both the tasks xstate and init_fpstate unconditionally via __raw_xsave_addr(). So if the tasks XSAVE buffer has a dynamic feature set, then the address retrieval for init_fpstate triggers the warning in __raw_xsave_addr() which checks the feature bit in the init_fpstate header. Remove the address retrieval from init_fpstate for extended features. They have an all zeroes init state so init_fpstate has zeros for them. Then zeroing the user buffer for the init state is the same as copying them from init_fpstate. Fixes: 2308ee57d93d ("x86/fpu/amx: Enable the AMX feature in 64-bit mode") Reported-by: Mingwei Zhang Link: https://lore.kernel.org/kvm/20230221163655.920289-2-mizhang@google.com/ Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Tested-by: Mingwei Zhang Link: https://lore.kernel.org/all/20230227210504.18520-2-chang.seok.bae%40intel.com Cc: stable@vger.kernel.org --- arch/x86/kernel/fpu/xstate.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 714166cc25f2f..0bab497c94369 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1118,21 +1118,20 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, zerofrom = offsetof(struct xregs_state, extended_state_area); /* - * The ptrace buffer is in non-compacted XSAVE format. In - * non-compacted format disabled features still occupy state space, - * but there is no state to copy from in the compacted - * init_fpstate. The gap tracking will zero these states. - */ - mask = fpstate->user_xfeatures; - - /* - * Dynamic features are not present in init_fpstate. When they are - * in an all zeros init state, remove those from 'mask' to zero - * those features in the user buffer instead of retrieving them - * from init_fpstate. + * This 'mask' indicates which states to copy from fpstate. + * Those extended states that are not present in fpstate are + * either disabled or initialized: + * + * In non-compacted format, disabled features still occupy + * state space but there is no state to copy from in the + * compacted init_fpstate. The gap tracking will zero these + * states. + * + * The extended features have an all zeroes init state. Thus, + * remove them from 'mask' to zero those features in the user + * buffer instead of retrieving them from init_fpstate. */ - if (fpu_state_size_dynamic()) - mask &= (header.xfeatures | xinit->header.xcomp_bv); + mask = header.xfeatures; for_each_extended_xfeature(i, mask) { /* @@ -1151,9 +1150,8 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { - copy_feature(header.xfeatures & BIT_ULL(i), &to, + membuf_write(&to, __raw_xsave_addr(xsave, i), - __raw_xsave_addr(xinit, i), xstate_sizes[i]); } /* From 62faca1ca10cc84e99ae7f38aa28df2bc945369b Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Mon, 27 Feb 2023 13:05:04 -0800 Subject: [PATCH 335/898] selftests/x86/amx: Add a ptrace test Include a test case to validate the XTILEDATA injection to the target. Also, it ensures the kernel's ability to copy states between different XSAVE formats. Refactor the memcmp() code to be usable for the state validation. Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230227210504.18520-3-chang.seok.bae%40intel.com --- tools/testing/selftests/x86/amx.c | 108 +++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 625e42901237c..d884fd69dd510 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -14,8 +14,10 @@ #include #include #include +#include #include #include +#include #include "../kselftest.h" /* For __cpuid_count() */ @@ -583,6 +585,13 @@ static void test_dynamic_state(void) _exit(0); } +static inline int __compare_tiledata_state(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2) +{ + return memcmp(&xbuf1->bytes[xtiledata.xbuf_offset], + &xbuf2->bytes[xtiledata.xbuf_offset], + xtiledata.size); +} + /* * Save current register state and compare it to @xbuf1.' * @@ -599,9 +608,7 @@ static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1) fatal_error("failed to allocate XSAVE buffer\n"); xsave(xbuf2, XFEATURE_MASK_XTILEDATA); - ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset], - &xbuf2->bytes[xtiledata.xbuf_offset], - xtiledata.size); + ret = __compare_tiledata_state(xbuf1, xbuf2); free(xbuf2); @@ -826,6 +833,99 @@ static void test_context_switch(void) free(finfo); } +/* Ptrace test */ + +/* + * Make sure the ptracee has the expanded kernel buffer on the first + * use. Then, initialize the state before performing the state + * injection from the ptracer. + */ +static inline void ptracee_firstuse_tiledata(void) +{ + load_rand_tiledata(stashed_xsave); + init_xtiledata(); +} + +/* + * Ptracer injects the randomized tile data state. It also reads + * before and after that, which will execute the kernel's state copy + * functions. So, the tester is advised to double-check any emitted + * kernel messages. + */ +static void ptracer_inject_tiledata(pid_t target) +{ + struct xsave_buffer *xbuf; + struct iovec iov; + + xbuf = alloc_xbuf(); + if (!xbuf) + fatal_error("unable to allocate XSAVE buffer"); + + printf("\tRead the init'ed tiledata via ptrace().\n"); + + iov.iov_base = xbuf; + iov.iov_len = xbuf_size; + + memset(stashed_xsave, 0, xbuf_size); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_GETREGSET"); + + if (!__compare_tiledata_state(stashed_xsave, xbuf)) + printf("[OK]\tThe init'ed tiledata was read from ptracee.\n"); + else + printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n"); + + printf("\tInject tiledata via ptrace().\n"); + + load_rand_tiledata(xbuf); + + memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset], + &xbuf->bytes[xtiledata.xbuf_offset], + xtiledata.size); + + if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_SETREGSET"); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_GETREGSET"); + + if (!__compare_tiledata_state(stashed_xsave, xbuf)) + printf("[OK]\tTiledata was correctly written to ptracee.\n"); + else + printf("[FAIL]\tTiledata was not correctly written to ptracee.\n"); +} + +static void test_ptrace(void) +{ + pid_t child; + int status; + + child = fork(); + if (child < 0) { + err(1, "fork"); + } else if (!child) { + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL)) + err(1, "PTRACE_TRACEME"); + + ptracee_firstuse_tiledata(); + + raise(SIGTRAP); + _exit(0); + } + + do { + wait(&status); + } while (WSTOPSIG(status) != SIGTRAP); + + ptracer_inject_tiledata(child); + + ptrace(PTRACE_DETACH, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "ptrace test"); +} + int main(void) { /* Check hardware availability at first */ @@ -846,6 +946,8 @@ int main(void) ctxtswtest_config.num_threads = 5; test_context_switch(); + test_ptrace(); + clearhandler(SIGILL); free_stashed_xsave(); From f1b80a3878b2d76ced46a275fdfd7fb80b4f083b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 14 Mar 2023 17:50:10 +0200 Subject: [PATCH 336/898] thermal: core: Restore behavior regarding invalid trip points Commit 7c3d5c20dc16 ("thermal/core: Add a generic thermal_zone_get_trip() function") stopped marking trip points with a zero temperature as disabled, behavior that was originally introduced in commit 81ad4276b505 ("Thermal: Ignore invalid trip points"). When using the mlxsw driver we see that when such trip points are not disabled, the thermal subsystem repeatedly tries to set the state of the associated cooling devices to the maximum state. Address this by restoring the original behavior and mark trip points with a zero temperature as disabled. Fixes: 7c3d5c20dc16 ("thermal/core: Add a generic thermal_zone_get_trip() function") Signed-off-by: Ido Schimmel Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 55679fd86505d..b50931f84aaa0 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1309,7 +1309,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t struct thermal_trip trip; result = thermal_zone_get_trip(tz, count, &trip); - if (result) + if (result || !trip.temperature) set_bit(count, &tz->trips_disabled); } From e38c5e80c3d293a883c6f1d553f2146ec0bda35e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Mar 2023 15:53:32 +0100 Subject: [PATCH 337/898] ASoC: Intel: bytcr_rt5640: Add quirk for the Acer Iconia One 7 B1-750 The Acer Iconia One 7 B1-750 tablet mostly works fine with the defaults for an Bay Trail CR tablet. Except for the internal mic, instead of an analog mic on IN3 a digital mic on DMIC1 is uses. Add a quirk with these settings for this tablet. Acked-by: Pierre-Louis Bossart Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230322145332.131525-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 79e0039c79a38..5a12940ef9070 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -533,6 +533,18 @@ static int byt_rt5640_aif1_hw_params(struct snd_pcm_substream *substream, /* Please keep this list alphabetically sorted */ static const struct dmi_system_id byt_rt5640_quirk_table[] = { + { /* Acer Iconia One 7 B1-750 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "VESPA2"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD1_IN4P | + BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Acer Iconia Tab 8 W1-810 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), From 6165a16a5ad9b237bb3131cff4d3c601ccb8f9a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 Mar 2023 00:17:36 -0400 Subject: [PATCH 338/898] NFSv4: Fix hangs when recovering open state after a server reboot When we're using a cached open stateid or a delegation in order to avoid sending a CLAIM_PREVIOUS open RPC call to the server, we don't have a new open stateid to present to update_open_stateid(). Instead rely on nfs4_try_open_cached(), just as if we were doing a normal open. Fixes: d2bfda2e7aa0 ("NFSv4: don't reprocess cached open CLAIM_PREVIOUS") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 22a93ae46cd72..5607b1e2b8212 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1980,8 +1980,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (!data->rpc_done) { if (data->rpc_status) return ERR_PTR(data->rpc_status); - /* cached opens have already been processed */ - goto update; + return nfs4_try_open_cached(data); } ret = nfs_refresh_inode(inode, &data->f_attr); @@ -1990,7 +1989,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); -update: + if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) return ERR_PTR(-EAGAIN); From 5f24a8725fe7bc2c6adf7ce00dd3e818387d8995 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 22 Mar 2023 15:01:34 -0400 Subject: [PATCH 339/898] SUNRPC: Fix a crash in gss_krb5_checksum() Anna says: > KASAN reports [...] a slab-out-of-bounds in gss_krb5_checksum(), > and it can cause my client to panic when running cthon basic > tests with krb5p. > Running faddr2line gives me: > > gss_krb5_checksum+0x4b6/0x630: > ahash_request_free at > /home/anna/Programs/linux-nfs.git/./include/crypto/hash.h:619 > (inlined by) gss_krb5_checksum at > /home/anna/Programs/linux-nfs.git/net/sunrpc/auth_gss/gss_krb5_crypto.c:358 My diagnosis is that the memcpy() at the end of gss_krb5_checksum() reads past the end of the buffer containing the checksum data because the callers have ignored gss_krb5_checksum()'s API contract: * Caller provides the truncation length of the output token (h) in * cksumout.len. Instead they provide the fixed length of the hmac buffer. This length happens to be larger than the value returned by crypto_ahash_digestsize(). Change these errant callers to work like krb5_etm_{en,de}crypt(). As a defensive measure, bound the length of the byte copy at the end of gss_krb5_checksum(). Kunit sez: Testing complete. Ran 68 tests: passed: 68 Elapsed time: 81.680s total, 5.875s configuring, 75.610s building, 0.103s running Reported-by: Anna Schumaker Fixes: 8270dbfcebea ("SUNRPC: Obscure Kerberos integrity keys") Signed-off-by: Chuck Lever --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 6c7c52eeed4f8..212c5d57465a1 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -353,7 +353,9 @@ gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen, err = crypto_ahash_final(req); if (err) goto out_free_ahash; - memcpy(cksumout->data, checksumdata, cksumout->len); + + memcpy(cksumout->data, checksumdata, + min_t(int, cksumout->len, crypto_ahash_digestsize(tfm))); out_free_ahash: ahash_request_free(req); @@ -809,8 +811,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN; buf->len += GSS_KRB5_TOK_HDR_LEN; - /* Do the HMAC */ - hmac.len = GSS_KRB5_MAX_CKSUM_LEN; + hmac.len = kctx->gk5e->cksumlength; hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len; /* @@ -873,8 +874,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, if (ret) goto out_err; - /* Calculate our hmac over the plaintext data */ - our_hmac_obj.len = sizeof(our_hmac); + our_hmac_obj.len = kctx->gk5e->cksumlength; our_hmac_obj.data = our_hmac; ret = gss_krb5_checksum(ahash, NULL, 0, &subbuf, 0, &our_hmac_obj); if (ret) From d18a04157fc171fd48075e3dc96471bd3b87f0dd Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 6 Mar 2023 12:27:43 +0000 Subject: [PATCH 340/898] rcu: Fix rcu_torture_read ftrace event Fix the rcutorturename field so that its size is correctly reported in the text format embedded in trace.dat files. As it stands, it is reported as being of size 1: field:char rcutorturename[8]; offset:8; size:1; signed:0; Signed-off-by: Douglas Raillard Reviewed-by: Mukesh Ojha Cc: stable@vger.kernel.org Fixes: 04ae87a52074e ("ftrace: Rework event_create_dir()") Reviewed-by: Steven Rostedt (Google) [ boqun: Add "Cc" and "Fixes" tags per Steven ] Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 90b2fb0292cb1..012fa0d171b27 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -768,7 +768,7 @@ TRACE_EVENT_RCU(rcu_torture_read, TP_ARGS(rcutorturename, rhp, secs, c_old, c), TP_STRUCT__entry( - __field(char, rcutorturename[RCUTORTURENAME_LEN]) + __array(char, rcutorturename, RCUTORTURENAME_LEN) __field(struct rcu_head *, rhp) __field(unsigned long, secs) __field(unsigned long, c_old) From 7a891d4b62d62566323676cb0e922ded4f37afe1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 Mar 2023 00:01:21 +0900 Subject: [PATCH 341/898] ksmbd: fix wrong signingkey creation when encryption is AES256 MacOS and Win11 support AES256 encrytion and it is included in the cipher array of encryption context. Especially on macOS, The most preferred cipher is AES256. Connecting to ksmbd fails on newer MacOS clients that support AES256 encryption. MacOS send disconnect request after receiving final session setup response from ksmbd. Because final session setup is signed with signing key was generated incorrectly. For signging key, 'L' value should be initialized to 128 if key size is 16bytes. Cc: stable@vger.kernel.org Reported-by: Miao Lihua <441884205@qq.com> Tested-by: Miao Lihua <441884205@qq.com> Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/auth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 6e61b5bc7d86e..cead696b656a8 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -727,8 +727,9 @@ static int generate_key(struct ksmbd_conn *conn, struct ksmbd_session *sess, goto smb3signkey_ret; } - if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || - conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) + if (key_size == SMB3_ENC_DEC_KEY_SIZE && + (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || + conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L256, 4); else rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L128, 4); From 728f14c72b71a19623df329c1c7c9d1452e56f1e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 Mar 2023 00:02:30 +0900 Subject: [PATCH 342/898] ksmbd: set FILE_NAMED_STREAMS attribute in FS_ATTRIBUTE_INFORMATION If vfs objects = streams_xattr in ksmbd.conf FILE_NAMED_STREAMS should be set to Attributes in FS_ATTRIBUTE_INFORMATION. MacOS client show "Format: SMB (Unknown)" on faked NTFS and no streams support. Cc: stable@vger.kernel.org Reported-by: Miao Lihua <441884205@qq.com> Tested-by: Miao Lihua <441884205@qq.com> Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 0685c1c77b9fc..bc64d36c4dcfd 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -4934,6 +4934,10 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps); + if (test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_STREAMS)) + info->Attributes |= cpu_to_le32(FILE_NAMED_STREAMS); + info->MaxPathNameComponentLength = cpu_to_le32(stfs.f_namelen); len = smbConvertToUTF16((__le16 *)info->FileSystemName, "NTFS", PATH_MAX, conn->local_nls, 0); From 2d74ec97131b1179a373b6d521f195c84e894eb6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 5 Mar 2023 21:04:00 +0900 Subject: [PATCH 343/898] ksmbd: add low bound validation to FSCTL_SET_ZERO_DATA Smatch static checker warning: fs/ksmbd/smb2pdu.c:7759 smb2_ioctl() warn: no lower bound on 'off' Fix unexpected result that could caused from negative off and bfz. Fixes: b5e5f9dfc915 ("ksmbd: check invalid FileOffset and BeyondFinalZero in FSCTL_ZERO_DATA") Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Reviewed-by: Sergey Senozhatsky Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index bc64d36c4dcfd..f09afbdde58ad 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7755,7 +7755,7 @@ int smb2_ioctl(struct ksmbd_work *work) off = le64_to_cpu(zero_data->FileOffset); bfz = le64_to_cpu(zero_data->BeyondFinalZero); - if (off > bfz) { + if (off < 0 || bfz < 0 || off > bfz) { ret = -EINVAL; goto out; } From 342edb60dcda7a409430359b0cac2864bb9dfe44 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 7 Mar 2023 21:56:07 +0900 Subject: [PATCH 344/898] ksmbd: add low bound validation to FSCTL_QUERY_ALLOCATED_RANGES Smatch static checker warning: fs/ksmbd/vfs.c:1040 ksmbd_vfs_fqar_lseek() warn: no lower bound on 'length' fs/ksmbd/vfs.c:1041 ksmbd_vfs_fqar_lseek() warn: no lower bound on 'start' Fix unexpected result that could caused from negative start and length. Fixes: f44158485826 ("cifsd: add file operations") Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Reviewed-by: Sergey Senozhatsky Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index f09afbdde58ad..cb779d2172345 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7448,13 +7448,16 @@ static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id, if (in_count == 0) return -EINVAL; + start = le64_to_cpu(qar_req->file_offset); + length = le64_to_cpu(qar_req->length); + + if (start < 0 || length < 0) + return -EINVAL; + fp = ksmbd_lookup_fd_fast(work, id); if (!fp) return -ENOENT; - start = le64_to_cpu(qar_req->file_offset); - length = le64_to_cpu(qar_req->length); - ret = ksmbd_vfs_fqar_lseek(fp, start, length, qar_rsp, in_count, out_count); if (ret && ret != -E2BIG) From 2624b445544ffc1472ccabfb6ec867c199d4c95c Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 2 Mar 2023 21:58:04 +0800 Subject: [PATCH 345/898] ksmbd: fix possible refcount leak in smb2_open() Reference count of acls will leak when memory allocation fails. Fix this by adding the missing posix_acl_release(). Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index cb779d2172345..97c9d1b5bcc0b 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2977,8 +2977,11 @@ int smb2_open(struct ksmbd_work *work) sizeof(struct smb_acl) + sizeof(struct smb_ace) * ace_num * 2, GFP_KERNEL); - if (!pntsd) + if (!pntsd) { + posix_acl_release(fattr.cf_acls); + posix_acl_release(fattr.cf_dacls); goto err_out; + } rc = build_sec_desc(idmap, pntsd, NULL, 0, From be6f42fad5f5fd1fea9d562df82c38ad6ed3bfe9 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 21 Mar 2023 15:25:34 +0900 Subject: [PATCH 346/898] ksmbd: don't terminate inactive sessions after a few seconds Steve reported that inactive sessions are terminated after a few seconds. ksmbd terminate when receiving -EAGAIN error from kernel_recvmsg(). -EAGAIN means there is no data available in timeout. So ksmbd should keep connection with unlimited retries instead of terminating inactive sessions. Cc: stable@vger.kernel.org Reported-by: Steve French Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 4 ++-- fs/ksmbd/connection.h | 3 ++- fs/ksmbd/transport_rdma.c | 2 +- fs/ksmbd/transport_tcp.c | 35 +++++++++++++++++++++++------------ 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 5b10b03800c16..5d914715605fb 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -298,7 +298,7 @@ int ksmbd_conn_handler_loop(void *p) kvfree(conn->request_buf); conn->request_buf = NULL; - size = t->ops->read(t, hdr_buf, sizeof(hdr_buf)); + size = t->ops->read(t, hdr_buf, sizeof(hdr_buf), -1); if (size != sizeof(hdr_buf)) break; @@ -344,7 +344,7 @@ int ksmbd_conn_handler_loop(void *p) * We already read 4 bytes to find out PDU size, now * read in PDU */ - size = t->ops->read(t, conn->request_buf + 4, pdu_size); + size = t->ops->read(t, conn->request_buf + 4, pdu_size, 2); if (size < 0) { pr_err("sock_read failed: %d\n", size); break; diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h index 3643354a3fa79..0e3a848defaf3 100644 --- a/fs/ksmbd/connection.h +++ b/fs/ksmbd/connection.h @@ -114,7 +114,8 @@ struct ksmbd_transport_ops { int (*prepare)(struct ksmbd_transport *t); void (*disconnect)(struct ksmbd_transport *t); void (*shutdown)(struct ksmbd_transport *t); - int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size); + int (*read)(struct ksmbd_transport *t, char *buf, + unsigned int size, int max_retries); int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov, int size, bool need_invalidate_rkey, unsigned int remote_key); diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 096eda9ef873b..c06efc020bd95 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -670,7 +670,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t, } static int smb_direct_read(struct ksmbd_transport *t, char *buf, - unsigned int size) + unsigned int size, int unused) { struct smb_direct_recvmsg *recvmsg; struct smb_direct_data_transfer *data_transfer; diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 603893fd87f57..20e85e2701f26 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -291,16 +291,18 @@ static int ksmbd_tcp_run_kthread(struct interface *iface) /** * ksmbd_tcp_readv() - read data from socket in given iovec - * @t: TCP transport instance - * @iov_orig: base IO vector - * @nr_segs: number of segments in base iov - * @to_read: number of bytes to read from socket + * @t: TCP transport instance + * @iov_orig: base IO vector + * @nr_segs: number of segments in base iov + * @to_read: number of bytes to read from socket + * @max_retries: maximum retry count * * Return: on success return number of bytes read from socket, * otherwise return error number */ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, - unsigned int nr_segs, unsigned int to_read) + unsigned int nr_segs, unsigned int to_read, + int max_retries) { int length = 0; int total_read; @@ -308,7 +310,6 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; - int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -335,14 +336,23 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if ((length == -ERESTARTSYS || length == -EAGAIN) && - max_retry) { + } else if (length == -ERESTARTSYS || length == -EAGAIN) { + /* + * If max_retries is negative, Allow unlimited + * retries to keep connection with inactive sessions. + */ + if (max_retries == 0) { + total_read = length; + break; + } else if (max_retries > 0) { + max_retries--; + } + usleep_range(1000, 2000); length = 0; - max_retry--; continue; } else if (length <= 0) { - total_read = -EAGAIN; + total_read = length; break; } } @@ -358,14 +368,15 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, * Return: on success return number of bytes read from socket, * otherwise return error number */ -static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, unsigned int to_read) +static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, + unsigned int to_read, int max_retries) { struct kvec iov; iov.iov_base = buf; iov.iov_len = to_read; - return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read); + return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read, max_retries); } static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov, From b53e8cfec30b93c120623232ba27c041b1ef8f1a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 21 Mar 2023 15:36:40 +0900 Subject: [PATCH 347/898] ksmbd: return STATUS_NOT_SUPPORTED on unsupported smb2.0 dialect ksmbd returned "Input/output error" when mounting with vers=2.0 to ksmbd. It should return STATUS_NOT_SUPPORTED on unsupported smb2.0 dialect. Cc: stable@vger.kernel.org Reported-by: Steve French Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index fa2b54df6ee6f..079c9e76818d8 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -434,7 +434,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname, static int __smb2_negotiate(struct ksmbd_conn *conn) { - return (conn->dialect >= SMB21_PROT_ID && + return (conn->dialect >= SMB20_PROT_ID && conn->dialect <= SMB311_PROT_ID); } @@ -465,7 +465,7 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) } } - if (command == SMB2_NEGOTIATE_HE && __smb2_negotiate(conn)) { + if (command == SMB2_NEGOTIATE_HE) { ret = smb2_handle_negotiate(work); init_smb2_neg_rsp(work); return ret; From 3c44a431d62bf4a20d7b901f98266ae3f4676d48 Mon Sep 17 00:00:00 2001 From: Zhengping Jiang Date: Tue, 21 Feb 2023 16:17:56 -0800 Subject: [PATCH 348/898] Bluetooth: hci_sync: Resume adv with no RPA when active scan The address resolution should be disabled during the active scan, so all the advertisements can reach the host. The advertising has to be paused before disabling the address resolution, because the advertising will prevent any changes to the resolving list and the address resolution status. Skipping this will cause the hci error and the discovery failure. According to the bluetooth specification: "7.8.44 LE Set Address Resolution Enable command This command shall not be used when: - Advertising (other than periodic advertising) is enabled, - Scanning is enabled, or - an HCI_LE_Create_Connection, HCI_LE_Extended_Create_Connection, or HCI_LE_Periodic_Advertising_Create_Sync command is outstanding." If the host is using RPA, the controller needs to generate RPA for the advertising, so the advertising must remain paused during the active scan. If the host is not using RPA, the advertising can be resumed after disabling the address resolution. Fixes: 9afc675edeeb ("Bluetooth: hci_sync: allow advertise when scan without RPA") Signed-off-by: Zhengping Jiang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 64 +++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 117eedb6f7099..7e152e912e8c9 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2367,6 +2367,45 @@ static int hci_resume_advertising_sync(struct hci_dev *hdev) return err; } +static int hci_pause_addr_resolution(struct hci_dev *hdev) +{ + int err; + + if (!use_ll_privacy(hdev)) + return 0; + + if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) + return 0; + + /* Cannot disable addr resolution if scanning is enabled or + * when initiating an LE connection. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) || + hci_lookup_le_connect(hdev)) { + bt_dev_err(hdev, "Command not allowed when scan/LE connect"); + return -EPERM; + } + + /* Cannot disable addr resolution if advertising is enabled. */ + err = hci_pause_advertising_sync(hdev); + if (err) { + bt_dev_err(hdev, "Pause advertising failed: %d", err); + return err; + } + + err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); + if (err) + bt_dev_err(hdev, "Unable to disable Address Resolution: %d", + err); + + /* Return if address resolution is disabled and RPA is not used. */ + if (!err && scan_use_rpa(hdev)) + return err; + + hci_resume_advertising_sync(hdev); + return err; +} + struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool extended, struct sock *sk) { @@ -2402,7 +2441,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) u8 filter_policy; int err; - /* Pause advertising if resolving list can be used as controllers are + /* Pause advertising if resolving list can be used as controllers * cannot accept resolving list modifications while advertising. */ if (use_ll_privacy(hdev)) { @@ -5394,27 +5433,12 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) cancel_interleave_scan(hdev); - /* Pause advertising since active scanning disables address resolution - * which advertising depend on in order to generate its RPAs. - */ - if (use_ll_privacy(hdev) && hci_dev_test_flag(hdev, HCI_PRIVACY)) { - err = hci_pause_advertising_sync(hdev); - if (err) { - bt_dev_err(hdev, "pause advertising failed: %d", err); - goto failed; - } - } - - /* Disable address resolution while doing active scanning since the - * accept list shall not be used and all reports shall reach the host - * anyway. + /* Pause address resolution for active scan and stop advertising if + * privacy is enabled. */ - err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); - if (err) { - bt_dev_err(hdev, "Unable to disable Address Resolution: %d", - err); + err = hci_pause_addr_resolution(hdev); + if (err) goto failed; - } /* All active scans will be done with either a resolvable private * address (when privacy feature has been enabled) or non-resolvable From 876e78104f23ce9267822757a63562a609b126c3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 24 Feb 2023 15:43:31 -0800 Subject: [PATCH 349/898] Bluetooth: hci_core: Detect if an ACL packet is in fact an ISO packet Because some transports don't have a dedicated type for ISO packets (see 14202eff214e1e941fefa0366d4c3bc4b1a0d500) they may use ACL type when in fact they are ISO packets. In the past this was left for the driver to detect such thing but it creates a problem when using the likes of btproxy when used by a VM as the host would not be aware of the connection the guest is doing it won't be able to detect such behavior, so this make bt_recv_frame detect when it happens as it is the common interface to all drivers including guest VMs. Fixes: 14202eff214e ("Bluetooth: btusb: Detect if an ACL packet is in fact an ISO packet") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b65c3aabcd536..334e308451f53 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2871,10 +2871,25 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) return -ENXIO; } - if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && - hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && - hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && - hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) { + switch (hci_skb_pkt_type(skb)) { + case HCI_EVENT_PKT: + break; + case HCI_ACLDATA_PKT: + /* Detect if ISO packet has been sent as ACL */ + if (hci_conn_num(hdev, ISO_LINK)) { + __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); + __u8 type; + + type = hci_conn_lookup_type(hdev, hci_handle(handle)); + if (type == ISO_LINK) + hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; + } + break; + case HCI_SCODATA_PKT: + break; + case HCI_ISODATA_PKT: + break; + default: kfree_skb(skb); return -EINVAL; } From efe375b716c1c1c9b52a816f5b933a95421020a2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 24 Feb 2023 15:54:31 -0800 Subject: [PATCH 350/898] Bluetooth: btusb: Remove detection of ISO packets over bulk This removes the code introduced by 14202eff214e1e941fefa0366d4c3bc4b1a0d500 as hci_recv_frame is now able to detect ACL packets that are in fact ISO packets. Fixes: 14202eff214e ("Bluetooth: btusb: Detect if an ACL packet is in fact an ISO packet") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 18bc947187115..5c536151ef836 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1050,21 +1050,11 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count) hci_skb_expect(skb) -= len; if (skb->len == HCI_ACL_HDR_SIZE) { - __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); __le16 dlen = hci_acl_hdr(skb)->dlen; - __u8 type; /* Complete ACL header */ hci_skb_expect(skb) = __le16_to_cpu(dlen); - /* Detect if ISO packet has been sent over bulk */ - if (hci_conn_num(data->hdev, ISO_LINK)) { - type = hci_conn_lookup_type(data->hdev, - hci_handle(handle)); - if (type == ISO_LINK) - hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; - } - if (skb_tailroom(skb) < hci_skb_expect(skb)) { kfree_skb(skb); skb = NULL; From 2f10e40a948e8a2abe7f983df3959a333ca8955f Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 20 Feb 2023 19:38:24 +0000 Subject: [PATCH 351/898] Bluetooth: ISO: fix timestamped HCI ISO data packet parsing Use correct HCI ISO data packet header struct when the packet has timestamp. The timestamp, when present, goes before the other fields (Core v5.3 4E 5.4.5), so the structs are not compatible. Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 24444b502e586..8d136a7301630 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1620,7 +1620,6 @@ static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason) void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { struct iso_conn *conn = hcon->iso_data; - struct hci_iso_data_hdr *hdr; __u16 pb, ts, len; if (!conn) @@ -1642,6 +1641,8 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) } if (ts) { + struct hci_iso_ts_data_hdr *hdr; + /* TODO: add timestamp to the packet? */ hdr = skb_pull_data(skb, HCI_ISO_TS_DATA_HDR_SIZE); if (!hdr) { @@ -1649,15 +1650,19 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) goto drop; } + len = __le16_to_cpu(hdr->slen); } else { + struct hci_iso_data_hdr *hdr; + hdr = skb_pull_data(skb, HCI_ISO_DATA_HDR_SIZE); if (!hdr) { BT_ERR("Frame is too short (len %d)", skb->len); goto drop; } + + len = __le16_to_cpu(hdr->slen); } - len = __le16_to_cpu(hdr->slen); flags = hci_iso_data_flags(len); len = hci_iso_data_len(len); From 294d749b5df5a22d17989833fb1a0a2cd1dfd243 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Tue, 28 Feb 2023 16:31:54 +0530 Subject: [PATCH 352/898] Bluetooth: btintel: Iterate only bluetooth device ACPI entries Current flow interates over entire ACPI table entries looking for Bluetooth Per Platform Antenna Gain(PPAG) entry. This patch iterates over ACPI entries relvant to Bluetooth device only. Fixes: c585a92b2f9c ("Bluetooth: btintel: Set Per Platform Antenna Gain(PPAG)") Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 44 +++++++++++++++++++------------- drivers/bluetooth/btintel.h | 7 ----- include/net/bluetooth/hci_core.h | 1 + 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index bede8b0055940..e8d4b59e89c5b 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -26,7 +26,14 @@ #define ECDSA_HEADER_LEN 320 #define BTINTEL_PPAG_NAME "PPAG" -#define BTINTEL_PPAG_PREFIX "\\_SB_.PCI0.XHCI.RHUB" + +/* structure to store the PPAG data read from ACPI table */ +struct btintel_ppag { + u32 domain; + u32 mode; + acpi_status status; + struct hci_dev *hdev; +}; #define CMD_WRITE_BOOT_PARAMS 0xfc0e struct cmd_write_boot_params { @@ -1295,17 +1302,16 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data status = acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); if (ACPI_FAILURE(status)) { - bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status)); + bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status)); return status; } - if (strncmp(BTINTEL_PPAG_PREFIX, string.pointer, - strlen(BTINTEL_PPAG_PREFIX))) { + len = strlen(string.pointer); + if (len < strlen(BTINTEL_PPAG_NAME)) { kfree(string.pointer); return AE_OK; } - len = strlen(string.pointer); if (strncmp((char *)string.pointer + len - 4, BTINTEL_PPAG_NAME, 4)) { kfree(string.pointer); return AE_OK; @@ -1314,7 +1320,8 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data status = acpi_evaluate_object(handle, NULL, NULL, &buffer); if (ACPI_FAILURE(status)) { - bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status)); + ppag->status = status; + bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status)); return status; } @@ -1323,8 +1330,9 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data if (p->type != ACPI_TYPE_PACKAGE || p->package.count != 2) { kfree(buffer.pointer); - bt_dev_warn(hdev, "Invalid object type: %d or package count: %d", + bt_dev_warn(hdev, "PPAG-BT: Invalid object type: %d or package count: %d", p->type, p->package.count); + ppag->status = AE_ERROR; return AE_ERROR; } @@ -1335,6 +1343,7 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data ppag->domain = (u32)p->package.elements[0].integer.value; ppag->mode = (u32)p->package.elements[1].integer.value; + ppag->status = AE_OK; kfree(buffer.pointer); return AE_CTRL_TERMINATE; } @@ -2314,12 +2323,11 @@ static int btintel_configure_offload(struct hci_dev *hdev) static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver) { - acpi_status status; struct btintel_ppag ppag; struct sk_buff *skb; struct btintel_loc_aware_reg ppag_cmd; - /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ + /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ switch (ver->cnvr_top & 0xFFF) { case 0x504: /* Hrp2 */ case 0x202: /* Jfp2 */ @@ -2330,26 +2338,26 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver memset(&ppag, 0, sizeof(ppag)); ppag.hdev = hdev; - status = acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, NULL, - btintel_ppag_callback, &ppag, NULL); + ppag.status = AE_NOT_FOUND; + acpi_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_HANDLE(GET_HCIDEV_DEV(hdev)), + 1, NULL, btintel_ppag_callback, &ppag, NULL); - if (ACPI_FAILURE(status)) { - /* Do not log warning message if ACPI entry is not found */ - if (status == AE_NOT_FOUND) + if (ACPI_FAILURE(ppag.status)) { + if (ppag.status == AE_NOT_FOUND) { + bt_dev_dbg(hdev, "PPAG-BT: ACPI entry not found"); return; - bt_dev_warn(hdev, "PPAG: ACPI Failure: %s", acpi_format_exception(status)); + } return; } if (ppag.domain != 0x12) { - bt_dev_warn(hdev, "PPAG-BT Domain disabled"); + bt_dev_warn(hdev, "PPAG-BT: domain is not bluetooth"); return; } /* PPAG mode, BIT0 = 0 Disabled, BIT0 = 1 Enabled */ if (!(ppag.mode & BIT(0))) { - bt_dev_dbg(hdev, "PPAG disabled"); + bt_dev_dbg(hdev, "PPAG-BT: disabled"); return; } diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index 8e7da877efae6..8fdb65b66315a 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -137,13 +137,6 @@ struct intel_offload_use_cases { __u8 preset[8]; } __packed; -/* structure to store the PPAG data read from ACPI table */ -struct btintel_ppag { - u32 domain; - u32 mode; - struct hci_dev *hdev; -}; - struct btintel_loc_aware_reg { __le32 mcc; __le32 sel; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7254edfba4c9c..6ed9b4d546a7a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1613,6 +1613,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn); void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev)) +#define GET_HCIDEV_DEV(hdev) ((hdev)->dev.parent) /* ----- LMP capabilities ----- */ #define lmp_encrypt_capable(dev) ((dev)->features[0][0] & LMP_ENCRYPT) From 1c66bee492a5fe00ae3fe890bb693bfc99f994c6 Mon Sep 17 00:00:00 2001 From: Min Li Date: Sat, 4 Mar 2023 21:50:35 +0800 Subject: [PATCH 353/898] Bluetooth: Fix race condition in hci_cmd_sync_clear There is a potential race condition in hci_cmd_sync_work and hci_cmd_sync_clear, and could lead to use-after-free. For instance, hci_cmd_sync_work is added to the 'req_workqueue' after cancel_work_sync The entry of 'cmd_sync_work_list' may be freed in hci_cmd_sync_clear, and causing kernel panic when it is used in 'hci_cmd_sync_work'. Here's the call trace: dump_stack_lvl+0x49/0x63 print_report.cold+0x5e/0x5d3 ? hci_cmd_sync_work+0x282/0x320 kasan_report+0xaa/0x120 ? hci_cmd_sync_work+0x282/0x320 __asan_report_load8_noabort+0x14/0x20 hci_cmd_sync_work+0x282/0x320 process_one_work+0x77b/0x11c0 ? _raw_spin_lock_irq+0x8e/0xf0 worker_thread+0x544/0x1180 ? poll_idle+0x1e0/0x1e0 kthread+0x285/0x320 ? process_one_work+0x11c0/0x11c0 ? kthread_complete_and_exit+0x30/0x30 ret_from_fork+0x22/0x30 Allocated by task 266: kasan_save_stack+0x26/0x50 __kasan_kmalloc+0xae/0xe0 kmem_cache_alloc_trace+0x191/0x350 hci_cmd_sync_queue+0x97/0x2b0 hci_update_passive_scan+0x176/0x1d0 le_conn_complete_evt+0x1b5/0x1a00 hci_le_conn_complete_evt+0x234/0x340 hci_le_meta_evt+0x231/0x4e0 hci_event_packet+0x4c5/0xf00 hci_rx_work+0x37d/0x880 process_one_work+0x77b/0x11c0 worker_thread+0x544/0x1180 kthread+0x285/0x320 ret_from_fork+0x22/0x30 Freed by task 269: kasan_save_stack+0x26/0x50 kasan_set_track+0x25/0x40 kasan_set_free_info+0x24/0x40 ____kasan_slab_free+0x176/0x1c0 __kasan_slab_free+0x12/0x20 slab_free_freelist_hook+0x95/0x1a0 kfree+0xba/0x2f0 hci_cmd_sync_clear+0x14c/0x210 hci_unregister_dev+0xff/0x440 vhci_release+0x7b/0xf0 __fput+0x1f3/0x970 ____fput+0xe/0x20 task_work_run+0xd4/0x160 do_exit+0x8b0/0x22a0 do_group_exit+0xba/0x2a0 get_signal+0x1e4a/0x25b0 arch_do_signal_or_restart+0x93/0x1f80 exit_to_user_mode_prepare+0xf5/0x1a0 syscall_exit_to_user_mode+0x26/0x50 ret_from_fork+0x15/0x30 Fixes: 6a98e3836fa2 ("Bluetooth: Add helper for serialized HCI command execution") Cc: stable@vger.kernel.org Signed-off-by: Min Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 7e152e912e8c9..5b8dc8fb2e27a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -643,6 +643,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) cancel_work_sync(&hdev->cmd_sync_work); cancel_work_sync(&hdev->reenable_adv_work); + mutex_lock(&hdev->cmd_sync_work_lock); list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { if (entry->destroy) entry->destroy(hdev, entry->data, -ECANCELED); @@ -650,6 +651,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) list_del(&entry->list); kfree(entry); } + mutex_unlock(&hdev->cmd_sync_work_lock); } void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) From 52dd5e964a55c98c1b0bcf5fc737a5ddd00e7d4d Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Mon, 6 Mar 2023 14:32:21 -0800 Subject: [PATCH 354/898] Bluetooth: Remove "Power-on" check from Mesh feature The Bluetooth mesh experimental feature enable was requiring the controller to be powered off in order for the Enable to work. Mesh is supposed to be enablable regardless of the controller state, and created an unintended requirement that the mesh daemon be started before the classic bluetoothd daemon. Fixes: af6bcc1921ff ("Bluetooth: Add experimental wrapper for MGMT based mesh") Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7add66f30e4d1..39589f864ea76 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4639,12 +4639,6 @@ static int set_mgmt_mesh_func(struct sock *sk, struct hci_dev *hdev, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); - /* Changes can only be made when controller is powered down */ - if (hdev_is_powered(hdev)) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_REJECTED); - /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, From c79493c3ccf06a3aeb72017a96ca3dfd166bc16b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 22 Mar 2023 01:28:31 +0200 Subject: [PATCH 355/898] net: enetc: fix aggregate RMON counters not showing the ranges When running "ethtool -S eno0 --groups rmon" without an explicit "--src emac|pmac" argument, the kernel will not report rx-rmon-etherStatsPkts64to64Octets, rx-rmon-etherStatsPkts65to127Octets, etc. This is because on ETHTOOL_MAC_STATS_SRC_AGGREGATE, we do not populate the "ranges" argument. ocelot_port_get_rmon_stats() does things differently and things work there. I had forgotten to make sure that the code is structured the same way in both drivers, so do that now. Fixes: cf52bd238b75 ("net: enetc: add support for MAC Merge statistics counters") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230321232831.1200905-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index bca68edfbe9cd..da9d4b310fcdd 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -370,8 +370,7 @@ static const struct ethtool_rmon_hist_range enetc_rmon_ranges[] = { }; static void enetc_rmon_stats(struct enetc_hw *hw, int mac, - struct ethtool_rmon_stats *s, - const struct ethtool_rmon_hist_range **ranges) + struct ethtool_rmon_stats *s) { s->undersize_pkts = enetc_port_rd(hw, ENETC_PM_RUND(mac)); s->oversize_pkts = enetc_port_rd(hw, ENETC_PM_ROVR(mac)); @@ -393,8 +392,6 @@ static void enetc_rmon_stats(struct enetc_hw *hw, int mac, s->hist_tx[4] = enetc_port_rd(hw, ENETC_PM_T1023(mac)); s->hist_tx[5] = enetc_port_rd(hw, ENETC_PM_T1522(mac)); s->hist_tx[6] = enetc_port_rd(hw, ENETC_PM_T1523X(mac)); - - *ranges = enetc_rmon_ranges; } static void enetc_get_eth_mac_stats(struct net_device *ndev, @@ -447,13 +444,15 @@ static void enetc_get_rmon_stats(struct net_device *ndev, struct enetc_hw *hw = &priv->si->hw; struct enetc_si *si = priv->si; + *ranges = enetc_rmon_ranges; + switch (rmon_stats->src) { case ETHTOOL_MAC_STATS_SRC_EMAC: - enetc_rmon_stats(hw, 0, rmon_stats, ranges); + enetc_rmon_stats(hw, 0, rmon_stats); break; case ETHTOOL_MAC_STATS_SRC_PMAC: if (si->hw_features & ENETC_SI_F_QBU) - enetc_rmon_stats(hw, 1, rmon_stats, ranges); + enetc_rmon_stats(hw, 1, rmon_stats); break; case ETHTOOL_MAC_STATS_SRC_AGGREGATE: ethtool_aggregate_rmon_stats(ndev, rmon_stats); From 758d29fb3a8b3c756b4e4e0aa9b32ca8cfaf3feb Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Sun, 19 Mar 2023 19:37:58 +0000 Subject: [PATCH 356/898] tools: ynl: Fix genlmsg header encoding formats The pack strings use 'b' signed char for cmd and version but struct genlmsghdr defines them as unsigned char. Use 'B' instead. Fixes: 4e4480e89c47 ("tools: ynl: move the cli and netlink code around") Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20230319193803.97453-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 90764a83c6461..32536e1f9064b 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -200,7 +200,7 @@ def _genl_msg(nl_type, nl_flags, genl_cmd, genl_version, seq=None): if seq is None: seq = random.randint(1, 1024) nlmsg = struct.pack("HHII", nl_type, nl_flags, seq, 0) - genlmsg = struct.pack("bbH", genl_cmd, genl_version, 0) + genlmsg = struct.pack("BBH", genl_cmd, genl_version, 0) return nlmsg + genlmsg @@ -264,7 +264,7 @@ def __init__(self, nl_msg): self.hdr = nl_msg.raw[0:4] self.raw = nl_msg.raw[4:] - self.genl_cmd, self.genl_version, _ = struct.unpack("bbH", self.hdr) + self.genl_cmd, self.genl_version, _ = struct.unpack("BBH", self.hdr) self.raw_attrs = NlAttrs(self.raw) @@ -358,7 +358,7 @@ def _decode_enum(self, rsp, attr_spec): raw >>= 1 i += 1 else: - value = enum['entries'][raw - i] + value = enum.entries_by_val[raw - i].name rsp[attr_spec['name']] = value def _decode(self, attrs, space): From 2f4e429c846972c8405951a9ff7a82aceeca7461 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 20 Feb 2023 13:02:11 +0000 Subject: [PATCH 357/898] cifs: lock chan_lock outside match_session Coverity had rightly indicated a possible deadlock due to chan_lock being done inside match_session. All callers of match_* functions should pick up the necessary locks and call them. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Fixes: 724244cdb382 ("cifs: protect session channel fields with chan_lock") Signed-off-by: Steve French --- fs/cifs/connect.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 49b37594e991f..f42cc70773122 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1721,7 +1721,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, return ERR_PTR(rc); } -/* this function must be called with ses_lock held */ +/* this function must be called with ses_lock and chan_lock held */ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) { if (ctx->sectype != Unspecified && @@ -1732,12 +1732,8 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) * If an existing session is limited to less channels than * requested, it should not be reused */ - spin_lock(&ses->chan_lock); - if (ses->chan_max < ctx->max_channels) { - spin_unlock(&ses->chan_lock); + if (ses->chan_max < ctx->max_channels) return 0; - } - spin_unlock(&ses->chan_lock); switch (ses->sectype) { case Kerberos: @@ -1865,10 +1861,13 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) spin_unlock(&ses->ses_lock); continue; } + spin_lock(&ses->chan_lock); if (!match_session(ses, ctx)) { + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); continue; } + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); ++ses->ses_count; @@ -2693,6 +2692,7 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&tcp_srv->srv_lock); spin_lock(&ses->ses_lock); + spin_lock(&ses->chan_lock); spin_lock(&tcon->tc_lock); if (!match_server(tcp_srv, ctx, dfs_super_cmp) || !match_session(ses, ctx) || @@ -2705,6 +2705,7 @@ cifs_match_super(struct super_block *sb, void *data) rc = compare_mount_options(sb, mnt_data); out: spin_unlock(&tcon->tc_lock); + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); spin_unlock(&tcp_srv->srv_lock); From 68c3e4fc8628b1487c965aabb29207249657eb5f Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Tue, 21 Mar 2023 10:23:32 -0700 Subject: [PATCH 358/898] gve: Cache link_speed value from device The link speed is never changed for the uptime of a VM, and the current implementation sends an admin queue command for each call. Admin queue command invocations have nontrivial overhead (e.g., VM exits), which can be disruptive to users if triggered frequently. Our telemetry data shows that there are VMs that make frequent calls to this admin queue command. Caching the result of the original admin queue command would eliminate the need to send multiple admin queue commands on subsequent calls to retrieve link speed. Fixes: 7e074d5a76ca ("gve: Enable Link Speed Reporting in the driver.") Signed-off-by: Joshua Washington Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230321172332.91678-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index ce574d097e280..5f81470843b49 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -537,7 +537,10 @@ static int gve_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { struct gve_priv *priv = netdev_priv(netdev); - int err = gve_adminq_report_link_speed(priv); + int err = 0; + + if (priv->link_speed == 0) + err = gve_adminq_report_link_speed(priv); cmd->base.speed = priv->link_speed; return err; From 8eac0095de355ee31e1b014f79f83d2cd62a2d04 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Tue, 21 Mar 2023 10:05:39 -0700 Subject: [PATCH 359/898] net: asix: fix modprobe "sysfs: cannot create duplicate filename" "modprobe asix ; rmmod asix ; modprobe asix" fails with: sysfs: cannot create duplicate filename \ '/devices/virtual/mdio_bus/usb-003:004' Issue was originally reported by Anton Lundin on 2022-06-22 (link below). Chrome OS team hit the same issue in Feb, 2023 when trying to find work arounds for other issues with AX88172 devices. The use of devm_mdiobus_register() with usbnet devices results in the MDIO data being associated with the USB device. When the asix driver is unloaded, the USB device continues to exist and the corresponding "mdiobus_unregister()" is NOT called until the USB device is unplugged or unauthorized. So the next "modprobe asix" will fail because the MDIO phy sysfs attributes still exist. The 'easy' (from a design PoV) fix is to use the non-devm variants of mdiobus_* functions and explicitly manage this use in the asix_bind and asix_unbind function calls. I've not explored trying to fix usbnet initialization so devm_* stuff will work. Fixes: e532a096be0e5 ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Link: https://lore.kernel.org/netdev/20220623063649.GD23685@pengutronix.de/T/ Tested-by: Eizan Miyamoto Signed-off-by: Grant Grundler Link: https://lore.kernel.org/r/20230321170539.732147-1-grundler@chromium.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 743cbf5d662c9..f7cff58fe0449 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -666,8 +666,9 @@ static int asix_resume(struct usb_interface *intf) static int ax88772_init_mdio(struct usbnet *dev) { struct asix_common_private *priv = dev->driver_priv; + int ret; - priv->mdio = devm_mdiobus_alloc(&dev->udev->dev); + priv->mdio = mdiobus_alloc(); if (!priv->mdio) return -ENOMEM; @@ -679,7 +680,20 @@ static int ax88772_init_mdio(struct usbnet *dev) snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); - return devm_mdiobus_register(&dev->udev->dev, priv->mdio); + ret = mdiobus_register(priv->mdio); + if (ret) { + netdev_err(dev->net, "Could not register MDIO bus (err %d)\n", ret); + mdiobus_free(priv->mdio); + priv->mdio = NULL; + } + + return ret; +} + +static void ax88772_mdio_unregister(struct asix_common_private *priv) +{ + mdiobus_unregister(priv->mdio); + mdiobus_free(priv->mdio); } static int ax88772_init_phy(struct usbnet *dev) @@ -896,16 +910,23 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) ret = ax88772_init_mdio(dev); if (ret) - return ret; + goto mdio_err; ret = ax88772_phylink_setup(dev); if (ret) - return ret; + goto phylink_err; ret = ax88772_init_phy(dev); if (ret) - phylink_destroy(priv->phylink); + goto initphy_err; + return 0; + +initphy_err: + phylink_destroy(priv->phylink); +phylink_err: + ax88772_mdio_unregister(priv); +mdio_err: return ret; } @@ -926,6 +947,7 @@ static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf) phylink_disconnect_phy(priv->phylink); rtnl_unlock(); phylink_destroy(priv->phylink); + ax88772_mdio_unregister(priv); asix_rx_fixup_common_free(dev->driver_priv); } From 8f058a6ef99f0b88a177b58cc46a44ff5112e40a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Mon, 20 Mar 2023 22:05:18 +0300 Subject: [PATCH 360/898] net: dsa: mt7530: move enabling disabling core clock to mt7530_pll_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the code that enables and disables TRGMII clocks and core clock. Move enabling and disabling core clock to mt7530_pll_setup() as it's supposed to be run there. Add 20 ms delay before enabling the core clock as seen on the U-Boot MediaTek ethernet driver. Change the comment for enabling and disabling TRGMII clocks as the code seems to affect both TXC and RXC. Tested rgmii and trgmii modes of port 6 and rgmii mode of port 5 on MCM MT7530 on MT7621AT Unielec U7621-06 and standalone MT7530 on MT7623NI Bananapi BPI-R2. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Link: https://source.denx.de/u-boot/u-boot/-/blob/29a48bf9ccba45a5e560bb564bbe76e42629325f/drivers/net/mtk_eth.c#L589 Tested-by: Arınç ÜNAL Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230320190520.124513-1-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c2d81b7a429dc..d4a559007973c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -396,6 +396,9 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, /* Set up switch core clock for MT7530 */ static void mt7530_pll_setup(struct mt7530_priv *priv) { + /* Disable core clock */ + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); + /* Disable PLL */ core_write(priv, CORE_GSWPLL_GRP1, 0); @@ -409,6 +412,11 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) RG_GSWPLL_EN_PRE | RG_GSWPLL_POSDIV_200M(2) | RG_GSWPLL_FBKDIV_200M(32)); + + udelay(20); + + /* Enable core clock */ + core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } /* Setup TX circuit including relevant PAD and driving */ @@ -466,9 +474,8 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), TD_DM_DRVP(8) | TD_DM_DRVN(8)); - /* Disable MT7530 core and TRGMII Tx clocks */ - core_clear(priv, CORE_TRGMII_GSW_CLK_CG, - REG_GSWCK_EN | REG_TRGMIICK_EN); + /* Disable the MT7530 TRGMII clocks */ + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); /* Setup the MT7530 TRGMII Tx Clock */ core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1)); @@ -485,9 +492,8 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) | RG_LCDDS_PWDB | RG_LCDDS_ISO_EN); - /* Enable MT7530 core and TRGMII Tx clocks */ - core_set(priv, CORE_TRGMII_GSW_CLK_CG, - REG_GSWCK_EN | REG_TRGMIICK_EN); + /* Enable the MT7530 TRGMII clocks */ + core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); } else { for (i = 0 ; i < NUM_TRGMII_CTRL; i++) mt7530_rmw(priv, MT7530_TRGMII_RD(i), From fdcc8ccd823740c18e803b886cec461bc0e64201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Mon, 20 Mar 2023 22:05:19 +0300 Subject: [PATCH 361/898] net: dsa: mt7530: move lowering TRGMII driving to mt7530_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move lowering the TRGMII Tx clock driving to mt7530_setup(), after setting the core clock, as seen on the U-Boot MediaTek ethernet driver. Move the code which looks like it lowers the TRGMII Rx clock driving to after the TRGMII Tx clock driving is lowered. This is run after lowering the Tx clock driving on the U-Boot MediaTek ethernet driver as well. This way, the switch should consume less power regardless of port 6 being used. Update the comment explaining mt7530_pad_clk_setup(). Tested rgmii and trgmii modes of port 6 and rgmii mode of port 5 on MCM MT7530 on MT7621AT Unielec U7621-06 and standalone MT7530 on MT7623NI Bananapi BPI-R2. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Link: https://source.denx.de/u-boot/u-boot/-/blob/29a48bf9ccba45a5e560bb564bbe76e42629325f/drivers/net/mtk_eth.c#L682 Tested-by: Arınç ÜNAL Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230320190520.124513-2-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d4a559007973c..8831bd409a405 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -419,12 +419,12 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } -/* Setup TX circuit including relevant PAD and driving */ +/* Setup port 6 interface mode and TRGMII TX circuit */ static int mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) { struct mt7530_priv *priv = ds->priv; - u32 ncpo1, ssc_delta, trgint, i, xtal; + u32 ncpo1, ssc_delta, trgint, xtal; xtal = mt7530_read(priv, MT7530_MHWTRAP) & HWTRAP_XTAL_MASK; @@ -469,11 +469,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) P6_INTF_MODE(trgint)); if (trgint) { - /* Lower Tx Driving for TRGMII path */ - for (i = 0 ; i < NUM_TRGMII_CTRL ; i++) - mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), - TD_DM_DRVP(8) | TD_DM_DRVN(8)); - /* Disable the MT7530 TRGMII clocks */ core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); @@ -494,10 +489,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) /* Enable the MT7530 TRGMII clocks */ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); - } else { - for (i = 0 ; i < NUM_TRGMII_CTRL; i++) - mt7530_rmw(priv, MT7530_TRGMII_RD(i), - RD_TAP_MASK, RD_TAP(16)); } return 0; @@ -2207,6 +2198,15 @@ mt7530_setup(struct dsa_switch *ds) mt7530_pll_setup(priv); + /* Lower Tx driving for TRGMII path */ + for (i = 0; i < NUM_TRGMII_CTRL; i++) + mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), + TD_DM_DRVP(8) | TD_DM_DRVN(8)); + + for (i = 0; i < NUM_TRGMII_CTRL; i++) + mt7530_rmw(priv, MT7530_TRGMII_RD(i), + RD_TAP_MASK, RD_TAP(16)); + /* Enable port 6 */ val = mt7530_read(priv, MT7530_MHWTRAP); val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS; From 407b508bdd70b6848993843d96ed49ac4108fb52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Mon, 20 Mar 2023 22:05:20 +0300 Subject: [PATCH 362/898] net: dsa: mt7530: move setting ssc_delta to PHY_INTERFACE_MODE_TRGMII case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move setting the ssc_delta variable to under the PHY_INTERFACE_MODE_TRGMII case as it's only needed when trgmii is used. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230320190520.124513-3-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 8831bd409a405..02410ac439b76 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -441,6 +441,10 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) break; case PHY_INTERFACE_MODE_TRGMII: trgint = 1; + if (xtal == HWTRAP_XTAL_25MHZ) + ssc_delta = 0x57; + else + ssc_delta = 0x87; if (priv->id == ID_MT7621) { /* PLL frequency: 150MHz: 1.2GBit */ if (xtal == HWTRAP_XTAL_40MHZ) @@ -460,11 +464,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) return -EINVAL; } - if (xtal == HWTRAP_XTAL_25MHZ) - ssc_delta = 0x57; - else - ssc_delta = 0x87; - mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK, P6_INTF_MODE(trgint)); From 1073c15fd39e804ad36ff26a7c7d53b0ab51b184 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 22 Mar 2023 09:51:07 +0100 Subject: [PATCH 363/898] scripts: merge_config: Fix typo in variable name. ${WARNOVERRIDE} was misspelled as ${WARNOVVERIDE}, which caused a shell syntax error in certain paths of the script execution. Fixes: 46dff8d7e381 ("scripts: merge_config: Add option to suppress warning on overrides") Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Mark Brown Acked-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/kconfig/merge_config.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 32620de473ad2..902eb429b9dbd 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -145,7 +145,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do NEW_VAL=$(grep -w $CFG $MERGE_FILE) BUILTIN_FLAG=false if [ "$BUILTIN" = "true" ] && [ "${NEW_VAL#CONFIG_*=}" = "m" ] && [ "${PREV_VAL#CONFIG_*=}" = "y" ]; then - ${WARNOVVERIDE} Previous value: $PREV_VAL + ${WARNOVERRIDE} Previous value: $PREV_VAL ${WARNOVERRIDE} New value: $NEW_VAL ${WARNOVERRIDE} -y passed, will not demote y to m ${WARNOVERRIDE} From fb27e70f6e408dee5d22b083e7a38a59e6118253 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 22 Mar 2023 19:11:45 +0100 Subject: [PATCH 364/898] modpost: Fix processing of CRCs on 32-bit build machines modpost now reads CRCs from .*.cmd files, parsing them using strtol(). This is inconsistent with its parsing of Module.symvers and with their definition as *unsigned* 32-bit values. strtol() clamps values to [LONG_MIN, LONG_MAX], and when building on a 32-bit system this changes all CRCs >= 0x80000000 to be 0x7fffffff. Change extract_crcs_for_object() to use strtoul() instead. Cc: stable@vger.kernel.org Fixes: f292d875d0dc ("modpost: extract symbol versions from *.cmd files") Signed-off-by: Ben Hutchings Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index efff8078e3958..9466b6a2abae4 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1733,7 +1733,7 @@ static void extract_crcs_for_object(const char *object, struct module *mod) if (!isdigit(*p)) continue; /* skip this line */ - crc = strtol(p, &p, 0); + crc = strtoul(p, &p, 0); if (*p != '\n') continue; /* skip this line */ From 58e84f6b3e84e46524b7e5a916b53c1ad798bc8f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 20 Mar 2023 12:59:55 +0200 Subject: [PATCH 365/898] RDMA/cma: Allow UD qp_type to join multicast only As for multicast: - The SIDR is the only mode that makes sense; - Besides PS_UDP, other port spaces like PS_IB is also allowed, as it is UD compatible. In this case qkey also needs to be set [1]. This patch allows only UD qp_type to join multicast, and set qkey to default if it's not set, to fix an uninit-value error: the ib->rec.qkey field is accessed without being initialized. ===================================================== BUG: KMSAN: uninit-value in cma_set_qkey drivers/infiniband/core/cma.c:510 [inline] BUG: KMSAN: uninit-value in cma_make_mc_event+0xb73/0xe00 drivers/infiniband/core/cma.c:4570 cma_set_qkey drivers/infiniband/core/cma.c:510 [inline] cma_make_mc_event+0xb73/0xe00 drivers/infiniband/core/cma.c:4570 cma_iboe_join_multicast drivers/infiniband/core/cma.c:4782 [inline] rdma_join_multicast+0x2b83/0x30a0 drivers/infiniband/core/cma.c:4814 ucma_process_join+0xa76/0xf60 drivers/infiniband/core/ucma.c:1479 ucma_join_multicast+0x1e3/0x250 drivers/infiniband/core/ucma.c:1546 ucma_write+0x639/0x6d0 drivers/infiniband/core/ucma.c:1732 vfs_write+0x8ce/0x2030 fs/read_write.c:588 ksys_write+0x28c/0x520 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __ia32_sys_write+0xdb/0x120 fs/read_write.c:652 do_syscall_32_irqs_on arch/x86/entry/common.c:114 [inline] __do_fast_syscall_32+0x96/0xf0 arch/x86/entry/common.c:180 do_fast_syscall_32+0x34/0x70 arch/x86/entry/common.c:205 do_SYSENTER_32+0x1b/0x20 arch/x86/entry/common.c:248 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c Local variable ib.i created at: cma_iboe_join_multicast drivers/infiniband/core/cma.c:4737 [inline] rdma_join_multicast+0x586/0x30a0 drivers/infiniband/core/cma.c:4814 ucma_process_join+0xa76/0xf60 drivers/infiniband/core/ucma.c:1479 CPU: 0 PID: 29874 Comm: syz-executor.3 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ===================================================== [1] https://lore.kernel.org/linux-rdma/20220117183832.GD84788@nvidia.com/ Fixes: b5de0c60cc30 ("RDMA/cma: Fix use after free race in roce multicast join") Reported-by: syzbot+8fcbb77276d43cc8b693@syzkaller.appspotmail.com Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/58a4a98323b5e6b1282e83f6b76960d06e43b9fa.1679309909.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 60 ++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3081559377133..6b9563d4f23c9 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -624,22 +624,11 @@ static inline unsigned short cma_family(struct rdma_id_private *id_priv) return id_priv->id.route.addr.src_addr.ss_family; } -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) +static int cma_set_default_qkey(struct rdma_id_private *id_priv) { struct ib_sa_mcmember_rec rec; int ret = 0; - if (id_priv->qkey) { - if (qkey && id_priv->qkey != qkey) - return -EINVAL; - return 0; - } - - if (qkey) { - id_priv->qkey = qkey; - return 0; - } - switch (id_priv->id.ps) { case RDMA_PS_UDP: case RDMA_PS_IB: @@ -659,6 +648,16 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) return ret; } +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) +{ + if (!qkey || + (id_priv->qkey && (id_priv->qkey != qkey))) + return -EINVAL; + + id_priv->qkey = qkey; + return 0; +} + static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) { dev_addr->dev_type = ARPHRD_INFINIBAND; @@ -1229,7 +1228,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { - ret = cma_set_qkey(id_priv, 0); + ret = cma_set_default_qkey(id_priv); if (ret) return ret; @@ -4569,7 +4568,10 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { - ret = cma_set_qkey(id_priv, qkey); + if (qkey) + ret = cma_set_qkey(id_priv, qkey); + else + ret = cma_set_default_qkey(id_priv); if (ret) return ret; rep.qp_num = id_priv->qp_num; @@ -4774,9 +4776,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, enum ib_gid_type gid_type; struct net_device *ndev; - if (!status) - status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); - else + if (status) pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", status); @@ -4804,7 +4804,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, } event->param.ud.qp_num = 0xFFFFFF; - event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey); + event->param.ud.qkey = id_priv->qkey; out: if (ndev) @@ -4823,8 +4823,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING) goto out; - cma_make_mc_event(status, id_priv, multicast, &event, mc); - ret = cma_cm_event_handler(id_priv, &event); + ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); + if (!ret) { + cma_make_mc_event(status, id_priv, multicast, &event, mc); + ret = cma_cm_event_handler(id_priv, &event); + } rdma_destroy_ah_attr(&event.param.ud.ah_attr); WARN_ON(ret); @@ -4877,9 +4880,11 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (ret) return ret; - ret = cma_set_qkey(id_priv, 0); - if (ret) - return ret; + if (!id_priv->qkey) { + ret = cma_set_default_qkey(id_priv); + if (ret) + return ret; + } cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); rec.qkey = cpu_to_be32(id_priv->qkey); @@ -4956,9 +4961,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); ib.rec.pkey = cpu_to_be16(0xffff); - if (id_priv->id.ps == RDMA_PS_UDP) - ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!ndev) @@ -4984,6 +4986,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (err || !ib.rec.mtu) return err ?: -EINVAL; + if (!id_priv->qkey) + cma_set_default_qkey(id_priv); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &ib.rec.port_gid); INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler); @@ -5009,6 +5014,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) return -EINVAL; + if (id_priv->id.qp_type != IB_QPT_UD) + return -EINVAL; + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM; From b1de5c78ebe9858ccec9d49af2f76724f1d47e3e Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Mar 2023 14:20:57 +0800 Subject: [PATCH 366/898] net: mdio: thunder: Add missing fwnode_handle_put() In device_for_each_child_node(), we should add fwnode_handle_put() when break out of the iteration device_for_each_child_node() as it will automatically increase and decrease the refcounter. Fixes: 379d7ac7ca31 ("phy: mdio-thunder: Add driver for Cavium Thunder SoC MDIO buses.") Signed-off-by: Liang He Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-thunder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c index 3847ee92c1096..6067d96b2b7bf 100644 --- a/drivers/net/mdio/mdio-thunder.c +++ b/drivers/net/mdio/mdio-thunder.c @@ -106,6 +106,7 @@ static int thunder_mdiobus_pci_probe(struct pci_dev *pdev, if (i >= ARRAY_SIZE(nexus->buses)) break; } + fwnode_handle_put(fwn); return 0; err_release_regions: From fc3608aaa5751318837e4bbe0282b3836bca5080 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Mar 2023 01:11:18 +0100 Subject: [PATCH 367/898] efi/libstub: Use relocated version of kernel's struct screen_info In some cases, we expose the kernel's struct screen_info to the EFI stub directly, so it gets populated before even entering the kernel. This means the early console is available as soon as the early param parsing happens, which is nice. It also means we need two different ways to pass this information, as this trick only works if the EFI stub is baked into the core kernel image, which is not always the case. Huacai reports that the preparatory refactoring that was needed to implement this alternative method for zboot resulted in a non-functional efifb earlycon for other cases as well, due to the reordering of the kernel image relocation with the population of the screen_info struct, and the latter now takes place after copying the image to its new location, which means we copy the old, uninitialized state. So let's ensure that the same-image version of alloc_screen_info() produces the correct screen_info pointer, by taking the displacement of the loaded image into account. Reported-by: Huacai Chen Tested-by: Huacai Chen Link: https://lore.kernel.org/linux-efi/20230310021749.921041-1-chenhuacai@loongson.cn/ Fixes: 42c8ea3dca094ab8 ("efi: libstub: Factor out EFI stub entrypoint into separate file") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64-stub.c | 4 +++- drivers/firmware/efi/libstub/efi-stub-entry.c | 11 +++++++++++ drivers/firmware/efi/libstub/efi-stub.c | 5 ----- drivers/firmware/efi/libstub/efistub.h | 1 + drivers/firmware/efi/libstub/screen_info.c | 9 +-------- drivers/firmware/efi/libstub/zboot.c | 5 +++++ 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b996553cdb4c3..770b8ecb73984 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -85,8 +85,10 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } } - if (image->image_base != _text) + if (image->image_base != _text) { efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); + image->image_base = _text; + } if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN)) efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n", diff --git a/drivers/firmware/efi/libstub/efi-stub-entry.c b/drivers/firmware/efi/libstub/efi-stub-entry.c index 5245c4f031c0a..cc4dcaea67fa6 100644 --- a/drivers/firmware/efi/libstub/efi-stub-entry.c +++ b/drivers/firmware/efi/libstub/efi-stub-entry.c @@ -5,6 +5,15 @@ #include "efistub.h" +static unsigned long screen_info_offset; + +struct screen_info *alloc_screen_info(void) +{ + if (IS_ENABLED(CONFIG_ARM)) + return __alloc_screen_info(); + return (void *)&screen_info + screen_info_offset; +} + /* * EFI entry point for the generic EFI stub used by ARM, arm64, RISC-V and * LoongArch. This is the entrypoint that is described in the PE/COFF header @@ -56,6 +65,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, return status; } + screen_info_offset = image_addr - (unsigned long)image->image_base; + status = efi_stub_common(handle, image, image_addr, cmdline_ptr); efi_free(image_size, image_addr); diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 2955c1ac6a36e..f9c1e8a2bd1d3 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -47,11 +47,6 @@ static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; static bool flat_va_mapping = (EFI_RT_VIRTUAL_OFFSET != 0); -struct screen_info * __weak alloc_screen_info(void) -{ - return &screen_info; -} - void __weak free_screen_info(struct screen_info *si) { } diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index bd9c38a93bbce..148013bcb5f89 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1062,6 +1062,7 @@ efi_enable_reset_attack_mitigation(void) { } void efi_retrieve_tpm2_eventlog(void); struct screen_info *alloc_screen_info(void); +struct screen_info *__alloc_screen_info(void); void free_screen_info(struct screen_info *si); void efi_cache_sync_image(unsigned long image_base, diff --git a/drivers/firmware/efi/libstub/screen_info.c b/drivers/firmware/efi/libstub/screen_info.c index 8e76a8b384ba1..4be1c4d1f922b 100644 --- a/drivers/firmware/efi/libstub/screen_info.c +++ b/drivers/firmware/efi/libstub/screen_info.c @@ -15,18 +15,11 @@ * early, but it only works if the EFI stub is part of the core kernel image * itself. The zboot decompressor can only use the configuration table * approach. - * - * In order to support both methods from the same build of the EFI stub - * library, provide this dummy global definition of struct screen_info. If it - * is required to satisfy a link dependency, it means we need to override the - * __weak alloc and free methods with the ones below, and those will be pulled - * in as well. */ -struct screen_info screen_info; static efi_guid_t screen_info_guid = LINUX_EFI_SCREEN_INFO_TABLE_GUID; -struct screen_info *alloc_screen_info(void) +struct screen_info *__alloc_screen_info(void) { struct screen_info *si; efi_status_t status; diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c index ba234e062a1a2..6105e5e2eda46 100644 --- a/drivers/firmware/efi/libstub/zboot.c +++ b/drivers/firmware/efi/libstub/zboot.c @@ -57,6 +57,11 @@ void __weak efi_cache_sync_image(unsigned long image_base, // executable code loaded into memory to be safe for execution. } +struct screen_info *alloc_screen_info(void) +{ + return __alloc_screen_info(); +} + asmlinkage efi_status_t __efiapi efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab) { From aaee0ce460b954e08b6e630d7e54b2abb672feb8 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 15 Mar 2023 15:52:09 +0800 Subject: [PATCH 368/898] drm/amdgpu: reposition the gpu reset checking for reuse Move the amdgpu_acpi_should_gpu_reset out of CONFIG_SUSPEND to share it with hibernate case. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 41 +++++++++++++----------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 164141bc8b4ad..a6b312da021e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1391,10 +1391,12 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); void amdgpu_acpi_detect(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } +static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline void amdgpu_acpi_detect(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, @@ -1405,11 +1407,9 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); -bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } -static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index d4196fcb85a08..60b1857f469eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -971,6 +971,29 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) return true; } + +/** + * amdgpu_acpi_should_gpu_reset + * + * @adev: amdgpu_device_pointer + * + * returns true if should reset GPU, false if not + */ +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + return false; + + if (amdgpu_sriov_vf(adev)) + return false; + +#if IS_ENABLED(CONFIG_SUSPEND) + return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; +#else + return true; +#endif +} + /* * amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods * @@ -1042,24 +1065,6 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) (pm_suspend_target_state == PM_SUSPEND_MEM); } -/** - * amdgpu_acpi_should_gpu_reset - * - * @adev: amdgpu_device_pointer - * - * returns true if should reset GPU, false if not - */ -bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) -{ - if (adev->flags & AMD_IS_APU) - return false; - - if (amdgpu_sriov_vf(adev)) - return false; - - return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; -} - /** * amdgpu_acpi_is_s0ix_active * From b589626674de94d977e81c99bf7905872b991197 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 9 Mar 2023 16:27:51 +0800 Subject: [PATCH 369/898] drm/amdgpu: skip ASIC reset for APUs when go to S4 For GC IP v11.0.4/11, PSP TMR need to be reserved for ASIC mode2 reset. But for S4, when psp suspend, it will destroy the TMR that fails the ASIC reset. [ 96.006101] amdgpu 0000:62:00.0: amdgpu: MODE2 reset [ 100.409717] amdgpu 0000:62:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x00000011 SMN_C2PMSG_82:0x00000002 [ 100.411593] amdgpu 0000:62:00.0: amdgpu: Mode2 reset failed! [ 100.412470] amdgpu 0000:62:00.0: PM: pci_pm_freeze(): amdgpu_pmops_freeze+0x0/0x50 [amdgpu] returns -62 [ 100.414020] amdgpu 0000:62:00.0: PM: dpm_run_callback(): pci_pm_freeze+0x0/0xd0 returns -62 [ 100.415311] amdgpu 0000:62:00.0: PM: pci_pm_freeze+0x0/0xd0 returned -62 after 4623202 usecs [ 100.416608] amdgpu 0000:62:00.0: PM: failed to freeze async: error -62 We can skip the reset on APUs, assuming we can resume them properly. Verified on some GFX11, GFX10 and old GFX9 APUs. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f5ffca24def40..ba5def374368e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2467,7 +2467,10 @@ static int amdgpu_pmops_freeze(struct device *dev) adev->in_s4 = false; if (r) return r; - return amdgpu_asic_reset(adev); + + if (amdgpu_acpi_should_gpu_reset(adev)) + return amdgpu_asic_reset(adev); + return 0; } static int amdgpu_pmops_thaw(struct device *dev) From 4eb0b49a0ad3e004a6a65b84efe37bc7e66d560f Mon Sep 17 00:00:00 2001 From: Tong Liu01 Date: Wed, 15 Mar 2023 15:24:22 +0800 Subject: [PATCH 370/898] drm/amdgpu: add mes resume when do gfx post soft reset [why] when gfx do soft reset, mes will also do reset, if mes is not resumed when do recover from soft reset, mes is unable to respond in later sequence [how] resume mes when do gfx post soft reset Signed-off-by: Tong Liu01 Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3bf697a80cf2f..08650f93f210a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4655,6 +4655,14 @@ static bool gfx_v11_0_check_soft_reset(void *handle) return false; } +static int gfx_v11_0_post_soft_reset(void *handle) +{ + /** + * GFX soft reset will impact MES, need resume MES when do GFX soft reset + */ + return amdgpu_mes_resume((struct amdgpu_device *)handle); +} + static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; @@ -6166,6 +6174,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .wait_for_idle = gfx_v11_0_wait_for_idle, .soft_reset = gfx_v11_0_soft_reset, .check_soft_reset = gfx_v11_0_check_soft_reset, + .post_soft_reset = gfx_v11_0_post_soft_reset, .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, From 033c56474acf567a450f8bafca50e0b610f2b716 Mon Sep 17 00:00:00 2001 From: YuBiao Wang Date: Thu, 16 Mar 2023 11:30:32 +0800 Subject: [PATCH 371/898] drm/amdgpu: Force signal hw_fences that are embedded in non-sched jobs [Why] For engines not supporting soft reset, i.e. VCN, there will be a failed ib test before mode 1 reset during asic reset. The fences in this case are never signaled and next time when we try to free the sa_bo, kernel will hang. [How] During pre_asic_reset, driver will clear job fences and afterwards the fences' refcount will be reduced to 1. For drm_sched_jobs it will be released in job_free_cb, and for non-sched jobs like ib_test, it's meant to be released in sa_bo_free but only when the fences are signaled. So we have to force signal the non_sched bad job's fence during pre_asic_reset or the clear is not complete. Signed-off-by: YuBiao Wang Acked-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index faff4a3f96e6e..f52d0ba91a770 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -678,6 +678,15 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) ptr = &ring->fence_drv.fences[i]; old = rcu_dereference_protected(*ptr, 1); if (old && old->ops == &amdgpu_job_fence_ops) { + struct amdgpu_job *job; + + /* For non-scheduler bad job, i.e. failed ib test, we need to signal + * it right here or we won't be able to track them in fence_drv + * and they will remain unsignaled during sa_bo free. + */ + job = container_of(old, struct amdgpu_job, hw_fence); + if (!job->base.s_fence && !dma_fence_is_signaled(old)) + dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); dma_fence_put(old); } From e06bfcc1a1c41bcb8c31470d437e147ce9f0acfd Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Wed, 15 Mar 2023 18:59:59 +0800 Subject: [PATCH 372/898] drm/amdgpu/gfx: set cg flags to enter/exit safe mode sriov needs to enter/exit safe mode in update umd p state add the cg flag to let it enter or exit while needed Signed-off-by: Jane Jian Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 08650f93f210a..ecf8ceb53311a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1287,6 +1287,11 @@ static int gfx_v11_0_sw_init(void *handle) break; } + /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) && + amdgpu_sriov_is_pp_one_vf(adev)) + adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; + /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, From 6d457ca162da98a6a1a381320e936d7448177de9 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 6 Mar 2023 11:39:51 +0800 Subject: [PATCH 373/898] drm/amd/display: remove outdated 8bpc comments [Why] The commit c76e483cd916 ("drm/amd/display: Don't restrict bpc to 8 bpc") removes the historical 8bpc dependency and sets max_bpc to 16. [How] The comment that states "8bpc for non-edp" needs to be removed as well. Reviewed-by: Harry Wentland Acked-by: Qingqing Zhuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 32abbafd43fa4..a01fd41643fc2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7244,7 +7244,6 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, if (!aconnector->mst_root) drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16); - /* This defaults to the max in the range, but we want 8bpc for non-edp. */ aconnector->base.state->max_bpc = 16; aconnector->base.state->max_requested_bpc = aconnector->base.state->max_bpc; From 2b072442f4962231a8516485012bb2d2551ef2fe Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 15 Mar 2023 20:07:23 +0800 Subject: [PATCH 374/898] drm/amdgpu/nv: Apply ASPM quirk on Intel ADL + AMD Navi S2idle resume freeze can be observed on Intel ADL + AMD WX5500. This is caused by commit 0064b0ce85bb ("drm/amd/pm: enable ASPM by default"). The root cause is still not clear for now. So extend and apply the ASPM quirk from commit e02fe3bc7aba ("drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems"), to workaround the issue on Navi cards too. Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2458 Reviewed-by: Alex Deucher Signed-off-by: Kai-Heng Feng Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/nv.c | 2 +- drivers/gpu/drm/amd/amdgpu/vi.c | 17 +---------------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a6b312da021e9..39018f784f9c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1272,6 +1272,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); +bool amdgpu_device_aspm_support_quirk(void); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index da5b0258a237b..3d98fc2ad36b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,6 +80,10 @@ #include +#if IS_ENABLED(CONFIG_X86) +#include +#endif + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -1356,6 +1360,17 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) return pcie_aspm_enabled(adev->pdev); } +bool amdgpu_device_aspm_support_quirk(void) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); +#else + return true; +#endif +} + /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 22e25ca285f80..ebe0e2d7dbd1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -578,7 +578,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev) static void nv_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_device_should_use_aspm(adev)) + if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) return; if (!(adev->flags & AMD_IS_APU) && diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 12ef782eb4785..ceab8783575ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -81,10 +81,6 @@ #include "mxgpu_vi.h" #include "amdgpu_dm.h" -#if IS_ENABLED(CONFIG_X86) -#include -#endif - #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L @@ -1138,24 +1134,13 @@ static void vi_enable_aspm(struct amdgpu_device *adev) WREG32_PCIE(ixPCIE_LC_CNTL, data); } -static bool aspm_support_quirk_check(void) -{ -#if IS_ENABLED(CONFIG_X86) - struct cpuinfo_x86 *c = &cpu_data(0); - - return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); -#else - return true; -#endif -} - static void vi_program_aspm(struct amdgpu_device *adev) { u32 data, data1, orig; bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check()) + if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) return; if (adev->flags & AMD_IS_APU || From 4c94e57c258cb7800aa5f3a9d9597d91291407a9 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Thu, 9 Mar 2023 16:14:08 -0500 Subject: [PATCH 375/898] drm/amd/display: fix wrong index used in dccg32_set_dpstreamclk [Why & How] When merging commit 9af611f29034 ("drm/amd/display: Fix DCN32 DPSTREAMCLK_CNTL programming"), index change was not picked up. Cc: stable@vger.kernel.org Cc: Mario Limonciello Fixes: 9af611f29034 ("drm/amd/display: Fix DCN32 DPSTREAMCLK_CNTL programming") Reviewed-by: Qingqing Zhuo Acked-by: Qingqing Zhuo Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index e4472c6be6c32..3fb4bcc343531 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -271,8 +271,7 @@ static void dccg32_set_dpstreamclk( dccg32_set_dtbclk_p_src(dccg, src, otg_inst); /* enabled to select one of the DTBCLKs for pipe */ - switch (otg_inst) - { + switch (dp_hpo_inst) { case 0: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, From f9537b1fa7fb51c2162bc15ce469cbbf1ca0fbfe Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Thu, 9 Mar 2023 15:58:54 -0500 Subject: [PATCH 376/898] drm/amd/display: Set dcn32 caps.seamless_odm [Why & How] seamless_odm set was not picked up while merging commit 2d017189e2b3 ("drm/amd/display: Blank eDP on enable drv if odm enabled") Fixes: 2d017189e2b3 ("drm/amd/display: Blank eDP on enable drv if odm enabled") Reviewed-by: Qingqing Zhuo Acked-by: Qingqing Zhuo Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index d024007f0f650..4b7abb4af6235 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2186,6 +2186,7 @@ static bool dcn32_resource_construct( dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; + dc->caps.seamless_odm = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; From 6f57937980142715e927697a6ffd2050f38ed6f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:40 +0100 Subject: [PATCH 377/898] pwm: hibvt: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only both polarities. Complete the implementation of .get_state() by setting .polarity according to the configured hardware state. Fixes: d09f00810850 ("pwm: Add PWM driver for HiSilicon BVT SOCs") Link: https://lore.kernel.org/r/20230228135508.1798428-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-hibvt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index 12c05c155cab0..1b9274c5ad872 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -146,6 +146,7 @@ static int hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, value = readl(base + PWM_CTRL_ADDR(pwm->hwpwm)); state->enabled = (PWM_ENABLE_MASK & value); + state->polarity = (PWM_POLARITY_MASK & value) ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL; return 0; } From 30006b77c7e130e01d1ab2148cc8abf73dfcc4bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:41 +0100 Subject: [PATCH 378/898] pwm: cros-ec: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. Reviewed-by: Guenter Roeck Fixes: 1f0d3bb02785 ("pwm: Add ChromeOS EC PWM driver") Link: https://lore.kernel.org/r/20230228135508.1798428-3-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-cros-ec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index 86df6702cb835..ad18b0ebe3f1e 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -198,6 +198,7 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, state->enabled = (ret > 0); state->period = EC_PWM_MAX_DUTY; + state->polarity = PWM_POLARITY_NORMAL; /* * Note that "disabled" and "duty cycle == 0" are treated the same. If From b20b097128d9145fadcea1cbb45c4d186cb57466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:42 +0100 Subject: [PATCH 379/898] pwm: iqs620a: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. Fixes: 6f0841a8197b ("pwm: Add support for Azoteq IQS620A PWM generator") Reviewed-by: Jeff LaBundy Link: https://lore.kernel.org/r/20230228135508.1798428-4-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-iqs620a.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 8362b4870c66c..47b3141135f38 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -126,6 +126,7 @@ static int iqs620_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, mutex_unlock(&iqs620_pwm->lock); state->period = IQS620_PWM_PERIOD_NS; + state->polarity = PWM_POLARITY_NORMAL; return 0; } From 2be4dcf6627e1bcbbef8e6ba1811f5127d39202c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:43 +0100 Subject: [PATCH 380/898] pwm: sprd: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. Fixes: 8aae4b02e8a6 ("pwm: sprd: Add Spreadtrum PWM support") Link: https://lore.kernel.org/r/20230228135508.1798428-5-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c index d866ce345f977..bde579a338c27 100644 --- a/drivers/pwm/pwm-sprd.c +++ b/drivers/pwm/pwm-sprd.c @@ -109,6 +109,7 @@ static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, duty = val & SPRD_PWM_DUTY_MSK; tmp = (prescale + 1) * NSEC_PER_SEC * duty; state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate); + state->polarity = PWM_POLARITY_NORMAL; /* Disable PWM clocks if the PWM channel is not in enable state. */ if (!state->enabled) From 8caa81eb950cb2e9d2d6959b37d853162d197f57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:44 +0100 Subject: [PATCH 381/898] pwm: meson: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. This fixes a regression that was possible since commit c73a3107624d ("pwm: Handle .get_state() failures") which stopped to zero-initialize the state passed to the .get_state() callback. This was reported at https://forum.odroid.com/viewtopic.php?f=177&t=46360 . While this was an unintended side effect, the real issue is the driver's callback not setting the polarity. There is a complicating fact, that the .apply() callback fakes support for inversed polarity. This is not (and cannot) be matched by .get_state(). As fixing this isn't easy, only point it out in a comment to prevent authors of other drivers from copying that approach. Fixes: c375bcbaabdb ("pwm: meson: Read the full hardware state in meson_pwm_get_state()") Reported-by: Munehisa Kamata Acked-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20230310191405.2606296-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-meson.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 16d79ca5d8f53..5cd7b90872c62 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -162,6 +162,12 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, duty = state->duty_cycle; period = state->period; + /* + * Note this is wrong. The result is an output wave that isn't really + * inverted and so is wrongly identified by .get_state as normal. + * Fixing this needs some care however as some machines might rely on + * this. + */ if (state->polarity == PWM_POLARITY_INVERSED) duty = period - duty; @@ -358,6 +364,8 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, state->duty_cycle = 0; } + state->polarity = PWM_POLARITY_NORMAL; + return 0; } From 1271a7b98e7989ba6bb978e14403fc84efe16e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:45 +0100 Subject: [PATCH 382/898] pwm: Zero-initialize the pwm_state passed to driver's .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is just to ensure that .usage_power is properly initialized and doesn't contain random stack data. The other members of struct pwm_state should get a value assigned in a successful call to .get_state(). So in the absence of bugs in driver implementations, this is only a safe-guard and no fix. Reported-by: Munehisa Kamata Link: https://lore.kernel.org/r/20230310214004.2619480-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index e01147f66e15a..474725714a05b 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -115,7 +115,14 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) } if (pwm->chip->ops->get_state) { - struct pwm_state state; + /* + * Zero-initialize state because most drivers are unaware of + * .usage_power. The other members of state are supposed to be + * set by lowlevel drivers. We still initialize the whole + * structure for simplicity even though this might paper over + * faulty implementations of .get_state(). + */ + struct pwm_state state = { 0, }; err = pwm->chip->ops->get_state(pwm->chip, pwm, &state); trace_pwm_get(pwm, &state, err); @@ -448,7 +455,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, { struct pwm_state *last = &pwm->last; struct pwm_chip *chip = pwm->chip; - struct pwm_state s1, s2; + struct pwm_state s1 = { 0 }, s2 = { 0 }; int err; if (!IS_ENABLED(CONFIG_PWM_DEBUG)) @@ -530,6 +537,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, return; } + *last = (struct pwm_state){ 0 }; err = chip->ops->get_state(chip, pwm, last); trace_pwm_get(pwm, last, err); if (err) From a4a3203426f4b67535d6442ddc5dca8878a0678f Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 23 Mar 2023 11:01:25 +0000 Subject: [PATCH 383/898] ASoC: codecs: lpass: fix the order or clks turn off during suspend The order in which clocks are stopped matters as some of the clock like NPL are derived from MCLK. Without this patch, Dragonboard RB5 DSP would crash with below error: qcom_q6v5_pas 17300000.remoteproc: fatal error received: ABT_dal.c:278:ABTimeout: AHB Bus hang is detected, Number of bus hang detected := 2 , addr0 = 0x3370000 , addr1 = 0x0!!! Turn off fsgen first, followed by npl and then finally mclk, which is exactly the opposite order of enable sequence. Fixes: 1dc3459009c3 ("ASoC: codecs: lpass: register mclk after runtime pm") Reported-by: Amit Pundir Signed-off-by: Srinivas Kandagatla Tested-by: Amit Pundir Link: https://lore.kernel.org/r/20230323110125.23790-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-rx-macro.c | 4 ++-- sound/soc/codecs/lpass-tx-macro.c | 4 ++-- sound/soc/codecs/lpass-wsa-macro.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index a73a7d7a1c0a8..faba4237bd3da 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -3670,9 +3670,9 @@ static int __maybe_unused rx_macro_runtime_suspend(struct device *dev) regcache_cache_only(rx->regmap, true); regcache_mark_dirty(rx->regmap); - clk_disable_unprepare(rx->mclk); - clk_disable_unprepare(rx->npl); clk_disable_unprepare(rx->fsgen); + clk_disable_unprepare(rx->npl); + clk_disable_unprepare(rx->mclk); return 0; } diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 473d3cd395548..589c490a8c487 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -2098,9 +2098,9 @@ static int __maybe_unused tx_macro_runtime_suspend(struct device *dev) regcache_cache_only(tx->regmap, true); regcache_mark_dirty(tx->regmap); - clk_disable_unprepare(tx->mclk); - clk_disable_unprepare(tx->npl); clk_disable_unprepare(tx->fsgen); + clk_disable_unprepare(tx->npl); + clk_disable_unprepare(tx->mclk); return 0; } diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index ba7480f3831e2..3f6f1bdd4e030 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -2506,9 +2506,9 @@ static int __maybe_unused wsa_macro_runtime_suspend(struct device *dev) regcache_cache_only(wsa->regmap, true); regcache_mark_dirty(wsa->regmap); - clk_disable_unprepare(wsa->mclk); - clk_disable_unprepare(wsa->npl); clk_disable_unprepare(wsa->fsgen); + clk_disable_unprepare(wsa->npl); + clk_disable_unprepare(wsa->mclk); return 0; } From 0b1d9debe30304f35c1211e6dcdca1935ce67240 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 8 Mar 2023 00:21:34 +0100 Subject: [PATCH 384/898] efi/libstub: randomalloc: Return EFI_OUT_OF_RESOURCES on failure The logic in efi_random_alloc() will iterate over the memory map twice, once to count the number of candidate slots, and another time to locate the chosen slot after randomization. If there is insufficient memory to do the allocation, the second loop will run to completion without actually having located a slot, but we currently return EFI_SUCCESS in this case, as we fail to initialize status to the appropriate error value of EFI_OUT_OF_RESOURCES. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/randomalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 1692d19ae80f0..32c7a54923b4c 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -101,6 +101,7 @@ efi_status_t efi_random_alloc(unsigned long size, * to calculate the randomly chosen address, and allocate it directly * using EFI_ALLOCATE_ADDRESS. */ + status = EFI_OUT_OF_RESOURCES; for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) { efi_memory_desc_t *md = (void *)map->map + map_offset; efi_physical_addr_t target; From 072a28c8907c841f7d4b56c78bce46d3ee211e73 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 8 Mar 2023 12:11:31 +0000 Subject: [PATCH 385/898] cifs: do not poll server interfaces too regularly We have the server interface list hanging off the tcon structure today for reasons unknown. So each tcon which is connected to a file server can query them separately, which is really unnecessary. To avoid this, in the query function, we will check the time of last update of the interface list, and avoid querying the server if it is within a certain range. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 6dfb865ee9d75..96ca09d885a2a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -530,6 +530,14 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; spin_lock(&ses->iface_lock); + /* do not query too frequently, this time with lock held */ + if (ses->iface_last_update && + time_before(jiffies, ses->iface_last_update + + (SMB_INTERFACE_POLL_INTERVAL * HZ))) { + spin_unlock(&ses->iface_lock); + return 0; + } + /* * Go through iface_list and do kref_put to remove * any unused ifaces. ifaces in use will be removed @@ -696,6 +704,12 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ struct network_interface_info_ioctl_rsp *out_buf = NULL; struct cifs_ses *ses = tcon->ses; + /* do not query too frequently */ + if (ses->iface_last_update && + time_before(jiffies, ses->iface_last_update + + (SMB_INTERFACE_POLL_INTERVAL * HZ))) + return 0; + rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_QUERY_NETWORK_INTERFACE_INFO, NULL /* no data input */, 0 /* no data input */, From 896cd316b841053f6df95ab77b5f1322c16a8e18 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 9 Mar 2023 13:23:29 +0000 Subject: [PATCH 386/898] cifs: empty interface list when server doesn't support query interfaces When querying server interfaces returns -EOPNOTSUPP, clear the list of interfaces. Assumption is that multichannel would be disabled too. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96ca09d885a2a..f7e18ab7ee9cd 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -717,7 +717,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ if (rc == -EOPNOTSUPP) { cifs_dbg(FYI, "server does not support query network interfaces\n"); - goto out; + ret_data_len = 0; } else if (rc != 0) { cifs_tcon_dbg(VFS, "error %d on ioctl to get interface list\n", rc); goto out; From d12bc6d26f92c51b28e8f4a146ffcc630b688198 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 13 Mar 2023 11:09:12 +0000 Subject: [PATCH 387/898] cifs: dump pending mids for all channels in DebugData Currently, we only dump the pending mid information only on the primary channel in /proc/fs/cifs/DebugData. If multichannel is active, we do not print the pending MID list on secondary channels. This change will dump the pending mids for all the channels based on server->conn_id. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 19a70a69c7603..38369c6cd062c 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -216,6 +216,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct mid_q_entry *mid_entry; struct TCP_Server_Info *server; + struct TCP_Server_Info *chan_server; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_server_iface *iface; @@ -474,23 +475,35 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_puts(m, "\t\t[CONNECTED]\n"); } spin_unlock(&ses->iface_lock); + + seq_puts(m, "\n\n\tMIDs: "); + spin_lock(&ses->chan_lock); + for (j = 0; j < ses->chan_count; j++) { + chan_server = ses->chans[j].server; + if (!chan_server) + continue; + + if (list_empty(&chan_server->pending_mid_q)) + continue; + + seq_printf(m, "\n\tServer ConnectionId: 0x%llx", + chan_server->conn_id); + spin_lock(&chan_server->mid_lock); + list_for_each_entry(mid_entry, &chan_server->pending_mid_q, qhead) { + seq_printf(m, "\n\t\tState: %d com: %d pid: %d cbdata: %p mid %llu", + mid_entry->mid_state, + le16_to_cpu(mid_entry->command), + mid_entry->pid, + mid_entry->callback_data, + mid_entry->mid); + } + spin_unlock(&chan_server->mid_lock); + } + spin_unlock(&ses->chan_lock); + seq_puts(m, "\n--\n"); } if (i == 0) seq_printf(m, "\n\t\t[NONE]"); - - seq_puts(m, "\n\n\tMIDs: "); - spin_lock(&server->mid_lock); - list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { - seq_printf(m, "\n\tState: %d com: %d pid:" - " %d cbdata: %p mid %llu\n", - mid_entry->mid_state, - le16_to_cpu(mid_entry->command), - mid_entry->pid, - mid_entry->callback_data, - mid_entry->mid); - } - spin_unlock(&server->mid_lock); - seq_printf(m, "\n--\n"); } if (c == 0) seq_printf(m, "\n\t[NONE]"); From 175b54abc443b6965e9379b71ec05f7c73c192e9 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 13 Mar 2023 12:17:34 +0000 Subject: [PATCH 388/898] cifs: print session id while listing open files In the output of /proc/fs/cifs/open_files, we only print the tree id for the tcon of each open file. It becomes difficult to know which tcon these files belong to with just the tree id. This change dumps ses id in addition to all other data today. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 38369c6cd062c..e9c8c088d948c 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -176,7 +176,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) seq_puts(m, "# Version:1\n"); seq_puts(m, "# Format:\n"); - seq_puts(m, "# "); + seq_puts(m, "# "); #ifdef CONFIG_CIFS_DEBUG2 seq_printf(m, " \n"); #else @@ -189,8 +189,9 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) spin_lock(&tcon->open_file_lock); list_for_each_entry(cfile, &tcon->openFileList, tlist) { seq_printf(m, - "0x%x 0x%llx 0x%x %d %d %d %pd", + "0x%x 0x%llx 0x%llx 0x%x %d %d %d %pd", tcon->tid, + ses->Suid, cfile->fid.persistent_fid, cfile->f_flags, cfile->count, From 3670de80678961eda7fa2220883fc77c16868951 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 17 Mar 2023 14:15:15 +0800 Subject: [PATCH 389/898] usb: chipdea: core: fix return -EINVAL if request role is the same with current role It should not return -EINVAL if the request role is the same with current role, return non-error and without do anything instead. Fixes: a932a8041ff9 ("usb: chipidea: core: add sysfs group") cc: Acked-by: Peter Chen Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20230317061516.2451728-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 27c601296130e..b6f2a41de20ec 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -984,9 +984,12 @@ static ssize_t role_store(struct device *dev, strlen(ci->roles[role]->name))) break; - if (role == CI_ROLE_END || role == ci->role) + if (role == CI_ROLE_END) return -EINVAL; + if (role == ci->role) + return n; + pm_runtime_get_sync(dev); disable_irq(ci->irq); ci_role_stop(ci); From 451b15ed138ec15bffbebb58a00ebdd884c3e659 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 17 Mar 2023 14:15:16 +0800 Subject: [PATCH 390/898] usb: chipidea: core: fix possible concurrent when switch role The user may call role_store() when driver is handling ci_handle_id_switch() which is triggerred by otg event or power lost event. Unfortunately, the controller may go into chaos in this case. Fix this by protecting it with mutex lock. Fixes: a932a8041ff9 ("usb: chipidea: core: add sysfs group") cc: Acked-by: Peter Chen Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20230317061516.2451728-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci.h | 2 ++ drivers/usb/chipidea/core.c | 8 +++++++- drivers/usb/chipidea/otg.c | 5 ++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index 005c67cb3afb7..f210b7489fd5b 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -208,6 +208,7 @@ struct hw_bank { * @in_lpm: if the core in low power mode * @wakeup_int: if wakeup interrupt occur * @rev: The revision number for controller + * @mutex: protect code from concorrent running when doing role switch */ struct ci_hdrc { struct device *dev; @@ -260,6 +261,7 @@ struct ci_hdrc { bool in_lpm; bool wakeup_int; enum ci_revision rev; + struct mutex mutex; }; static inline struct ci_role_driver *ci_role(struct ci_hdrc *ci) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index b6f2a41de20ec..281fc51720cea 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -987,8 +987,12 @@ static ssize_t role_store(struct device *dev, if (role == CI_ROLE_END) return -EINVAL; - if (role == ci->role) + mutex_lock(&ci->mutex); + + if (role == ci->role) { + mutex_unlock(&ci->mutex); return n; + } pm_runtime_get_sync(dev); disable_irq(ci->irq); @@ -998,6 +1002,7 @@ static ssize_t role_store(struct device *dev, ci_handle_vbus_change(ci); enable_irq(ci->irq); pm_runtime_put_sync(dev); + mutex_unlock(&ci->mutex); return (ret == 0) ? n : ret; } @@ -1033,6 +1038,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&ci->lock); + mutex_init(&ci->mutex); ci->dev = dev; ci->platdata = dev_get_platdata(dev); ci->imx28_write_fix = !!(ci->platdata->flags & diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 622c3b68aa1e6..f5490f2a5b6bc 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -167,8 +167,10 @@ static int hw_wait_vbus_lower_bsv(struct ci_hdrc *ci) void ci_handle_id_switch(struct ci_hdrc *ci) { - enum ci_role role = ci_otg_role(ci); + enum ci_role role; + mutex_lock(&ci->mutex); + role = ci_otg_role(ci); if (role != ci->role) { dev_dbg(ci->dev, "switching from %s to %s\n", ci_role(ci)->name, ci->roles[role]->name); @@ -198,6 +200,7 @@ void ci_handle_id_switch(struct ci_hdrc *ci) if (role == CI_ROLE_GADGET) ci_handle_vbus_change(ci); } + mutex_unlock(&ci->mutex); } /** * ci_otg_work - perform otg (vbus/id) event handle From f747313249b74f323ddf841a9c8db14d989f296a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 16 Mar 2023 09:41:27 +0100 Subject: [PATCH 391/898] usb: dwc2: fix a devres leak in hw_enable upon suspend resume Each time the platform goes to low power, PM suspend / resume routines call: __dwc2_lowlevel_hw_enable -> devm_add_action_or_reset(). This adds a new devres each time. This may also happen at runtime, as dwc2_lowlevel_hw_enable() can be called from udc_start(). This can be seen with tracing: - echo 1 > /sys/kernel/debug/tracing/events/dev/devres_log/enable - go to low power - cat /sys/kernel/debug/tracing/trace A new "ADD" entry is found upon each low power cycle: ... devres_log: 49000000.usb-otg ADD 82a13bba devm_action_release (8 bytes) ... devres_log: 49000000.usb-otg ADD 49889daf devm_action_release (8 bytes) ... A second issue is addressed here: - regulator_bulk_enable() is called upon each PM cycle (suspend/resume). - regulator_bulk_disable() never gets called. So the reference count for these regulators constantly increase, by one upon each low power cycle, due to missing regulator_bulk_disable() call in __dwc2_lowlevel_hw_disable(). The original fix that introduced the devm_add_action_or_reset() call, fixed an issue during probe, that happens due to other errors in dwc2_driver_probe() -> dwc2_core_reset(). Then the probe fails without disabling regulators, when dr_mode == USB_DR_MODE_PERIPHERAL. Rather fix the error path: disable all the low level hardware in the error path, by using the "hsotg->ll_hw_enabled" flag. Checking dr_mode has been introduced to avoid a dual call to dwc2_lowlevel_hw_disable(). "ll_hw_enabled" should achieve the same (and is used currently in the remove() routine). Fixes: 54c196060510 ("usb: dwc2: Always disable regulators on driver teardown") Fixes: 33a06f1300a7 ("usb: dwc2: Fix error path in gadget registration") Cc: stable Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20230316084127.126084-1-fabrice.gasnier@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 23ef759968231..e935067396649 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -91,13 +91,6 @@ static int dwc2_get_dr_mode(struct dwc2_hsotg *hsotg) return 0; } -static void __dwc2_disable_regulators(void *data) -{ - struct dwc2_hsotg *hsotg = data; - - regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies), hsotg->supplies); -} - static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg) { struct platform_device *pdev = to_platform_device(hsotg->dev); @@ -108,11 +101,6 @@ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg) if (ret) return ret; - ret = devm_add_action_or_reset(&pdev->dev, - __dwc2_disable_regulators, hsotg); - if (ret) - return ret; - if (hsotg->clk) { ret = clk_prepare_enable(hsotg->clk); if (ret) @@ -168,7 +156,7 @@ static int __dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg) if (hsotg->clk) clk_disable_unprepare(hsotg->clk); - return 0; + return regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies), hsotg->supplies); } /** @@ -608,7 +596,7 @@ static int dwc2_driver_probe(struct platform_device *dev) if (hsotg->params.activate_stm_id_vb_detection) regulator_disable(hsotg->usb33d); error: - if (hsotg->dr_mode != USB_DR_MODE_PERIPHERAL) + if (hsotg->ll_hw_enabled) dwc2_lowlevel_hw_disable(hsotg); return retval; } From 5021383242ada277a38bd052a4c12ed4707faccb Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 15 Mar 2023 15:44:33 +0100 Subject: [PATCH 392/898] usb: dwc2: fix a race, don't power off/on phy for dual-role mode When in dual role mode (dr_mode == USB_DR_MODE_OTG), platform probe successively basically calls: - dwc2_gadget_init() - dwc2_hcd_init() - dwc2_lowlevel_hw_disable() since recent change [1] - usb_add_gadget_udc() The PHYs (and so the clocks it may provide) shouldn't be disabled for all SoCs, in OTG mode, as the HCD part has been initialized. On STM32 this creates some weird race condition upon boot, when: - initially attached as a device, to a HOST - and there is a gadget script invoked to setup the device part. Below issue becomes systematic, as long as the gadget script isn't started by userland: the hardware PHYs (and so the clocks provided by the PHYs) remains disabled. It ends up in having an endless interrupt storm, before the watchdog resets the platform. [ 16.924163] dwc2 49000000.usb-otg: EPs: 9, dedicated fifos, 952 entries in SPRAM [ 16.962704] dwc2 49000000.usb-otg: DWC OTG Controller [ 16.966488] dwc2 49000000.usb-otg: new USB bus registered, assigned bus number 2 [ 16.974051] dwc2 49000000.usb-otg: irq 77, io mem 0x49000000 [ 17.032170] hub 2-0:1.0: USB hub found [ 17.042299] hub 2-0:1.0: 1 port detected [ 17.175408] dwc2 49000000.usb-otg: Mode Mismatch Interrupt: currently in Host mode [ 17.181741] dwc2 49000000.usb-otg: Mode Mismatch Interrupt: currently in Host mode [ 17.189303] dwc2 49000000.usb-otg: Mode Mismatch Interrupt: currently in Host mode ... The host part is also not functional, until the gadget part is configured. The HW may only be disabled for peripheral mode (original init), e.g. dr_mode == USB_DR_MODE_PERIPHERAL, until the gadget driver initializes. But when in USB_DR_MODE_OTG, the HW should remain enabled, as the HCD part is able to run, while the gadget part isn't necessarily configured. I don't fully get the of purpose the original change, that claims disabling the hardware is missing. It creates conditions on SOCs using the PHY initialization to be completely non working in OTG mode. Original change [1] should be reworked to be platform specific. [1] https://lore.kernel.org/r/20221206-dwc2-gadget-dual-role-v1-2-36515e1092cd@theobroma-systems.com Fixes: ade23d7b7ec5 ("usb: dwc2: power on/off phy for peripheral mode in dual-role mode") Cc: stable Signed-off-by: Fabrice Gasnier Reviewed-by: Quentin Schulz Tested-by: Quentin Schulz Link: https://lore.kernel.org/r/20230315144433.3095859-1-fabrice.gasnier@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 6 ++---- drivers/usb/dwc2/platform.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 62fa6378d2d73..8b15742d9e8aa 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4549,8 +4549,7 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget, hsotg->gadget.dev.of_node = hsotg->dev->of_node; hsotg->gadget.speed = USB_SPEED_UNKNOWN; - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || - (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) { + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) { ret = dwc2_lowlevel_hw_enable(hsotg); if (ret) goto err; @@ -4612,8 +4611,7 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) if (!IS_ERR_OR_NULL(hsotg->uphy)) otg_set_peripheral(hsotg->uphy->otg, NULL); - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || - (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) dwc2_lowlevel_hw_disable(hsotg); return 0; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index e935067396649..d1589ba7d322d 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -564,8 +564,7 @@ static int dwc2_driver_probe(struct platform_device *dev) dwc2_debugfs_init(hsotg); /* Gadget code manages lowlevel hw on its own */ - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || - (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) dwc2_lowlevel_hw_disable(hsotg); #if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || \ From 260595b439776c473cc248f0de63fe78d964d849 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 23 Mar 2023 12:26:02 -0400 Subject: [PATCH 393/898] Reinstate "GFS2: free disk inode which is deleted by remote node -V2" It turns out that reverting commit 970343cd4904 ("GFS2: free disk inode which is deleted by remote node -V2") causes a regression related to evicting inodes that were unlinked on a different cluster node. We could also have simply added a call to d_mark_dontcache() to function gfs2_try_evict(), but the original pre-revert code is better tested and proven. This reverts commit 445cb1277e10d7e19b631ef8a64aa3f055df377d. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/dentry.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 6fe9ca253b709..2e215e8c3c88e 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -83,8 +83,26 @@ static int gfs2_dhash(const struct dentry *dentry, struct qstr *str) return 0; } +static int gfs2_dentry_delete(const struct dentry *dentry) +{ + struct gfs2_inode *ginode; + + if (d_really_is_negative(dentry)) + return 0; + + ginode = GFS2_I(d_inode(dentry)); + if (!gfs2_holder_initialized(&ginode->i_iopen_gh)) + return 0; + + if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags)) + return 1; + + return 0; +} + const struct dentry_operations gfs2_dops = { .d_revalidate = gfs2_drevalidate, .d_hash = gfs2_dhash, + .d_delete = gfs2_dentry_delete, }; From 943d045a6d796175e5d08f9973953b1d2c07d797 Mon Sep 17 00:00:00 2001 From: Siddharth Kawar Date: Mon, 20 Mar 2023 20:37:40 +0000 Subject: [PATCH 394/898] SUNRPC: fix shutdown of NFS TCP client socket NFS server Duplicate Request Cache (DRC) algorithms rely on NFS clients reconnecting using the same local TCP port. Unique NFS operations are identified by the per-TCP connection set of XIDs. This prevents file corruption when non-idempotent NFS operations are retried. Currently, NFS client TCP connections are using different local TCP ports when reconnecting to NFS servers. After an NFS server initiates shutdown of the TCP connection, the NFS client's TCP socket is set to NULL after the socket state has reached TCP_LAST_ACK(9). When reconnecting, the new socket attempts to reuse the same local port but fails with EADDRNOTAVAIL (99). This forces the socket to use a different local TCP port to reconnect to the remote NFS server. State Transition and Events: TCP_CLOSE_WAIT(8) TCP_LAST_ACK(9) connect(fail EADDRNOTAVAIL(99)) TCP_CLOSE(7) bind on new port connect success dmesg excerpts showing reconnect switching from TCP local port of 926 to 763 after commit 7c81e6a9d75b: [13354.947854] NFS call mkdir testW ... [13405.654781] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.654813] RPC: state 8 conn 1 dead 0 zapped 1 sk_shutdown 1 [13405.654826] RPC: xs_data_ready... [13405.654892] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.654895] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [13405.654899] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.654900] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [13405.654950] RPC: xs_connect scheduled xprt 00000000037d0f03 [13405.654975] RPC: xs_bind 0.0.0.0:926: ok (0) [13405.654980] RPC: worker connecting xprt 00000000037d0f03 via tcp to 10.101.6.228 (port 2049) [13405.654991] RPC: 00000000037d0f03 connect status 99 connected 0 sock state 7 [13405.655001] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.655002] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [13405.655024] RPC: xs_connect scheduled xprt 00000000037d0f03 [13405.655038] RPC: xs_bind 0.0.0.0:763: ok (0) [13405.655041] RPC: worker connecting xprt 00000000037d0f03 via tcp to 10.101.6.228 (port 2049) [13405.655065] RPC: 00000000037d0f03 connect status 115 connected 0 sock state 2 State Transition and Events with patch applied: TCP_CLOSE_WAIT(8) TCP_LAST_ACK(9) TCP_CLOSE(7) connect(reuse of port succeeds) dmesg excerpts showing reconnect on same TCP local port of 936 with patch applied: [ 257.139935] NFS: mkdir(0:59/560857152), testQ [ 257.139937] NFS call mkdir testQ ... [ 307.822702] RPC: state 8 conn 1 dead 0 zapped 1 sk_shutdown 1 [ 307.822714] RPC: xs_data_ready... [ 307.822817] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.822821] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.822825] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.822826] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.823606] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.823609] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.823629] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.823632] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.823676] RPC: xs_connect scheduled xprt 00000000ce702f14 [ 307.823704] RPC: xs_bind 0.0.0.0:936: ok (0) [ 307.823709] RPC: worker connecting xprt 00000000ce702f14 via tcp to 10.101.1.30 (port 2049) [ 307.823748] RPC: 00000000ce702f14 connect status 115 connected 0 sock state 2 ... [ 314.916193] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 314.916251] RPC: xs_connect scheduled xprt 00000000ce702f14 [ 314.916282] RPC: xs_bind 0.0.0.0:936: ok (0) [ 314.916292] RPC: worker connecting xprt 00000000ce702f14 via tcp to 10.101.1.30 (port 2049) [ 314.916342] RPC: 00000000ce702f14 connect status 115 connected 0 sock state 2 Fixes: 7c81e6a9d75b ("SUNRPC: Tweak TCP socket shutdown in the RPC client") Signed-off-by: Siddharth Rajendra Kawar Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index adcbedc244d63..6cacd70a15ffa 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2158,6 +2158,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) switch (skst) { case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: + case TCP_LAST_ACK: break; case TCP_ESTABLISHED: case TCP_CLOSE_WAIT: From e89c2e815e76471cb507bd95728bf26da7976430 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 13 Mar 2023 16:00:23 -0700 Subject: [PATCH 395/898] riscv: Handle zicsr/zifencei issues between clang and binutils There are two related issues that appear in certain combinations with clang and GNU binutils. The first occurs when a version of clang that supports zicsr or zifencei via '-march=' [1] (i.e, >= 17.x) is used in combination with a version of GNU binutils that do not recognize zicsr and zifencei in the '-march=' value (i.e., < 2.36): riscv64-linux-gnu-ld: -march=rv64i2p0_m2p0_a2p0_c2p0_zicsr2p0_zifencei2p0: Invalid or unknown z ISA extension: 'zifencei' riscv64-linux-gnu-ld: failed to merge target specific data of file fs/efivarfs/file.o riscv64-linux-gnu-ld: -march=rv64i2p0_m2p0_a2p0_c2p0_zicsr2p0_zifencei2p0: Invalid or unknown z ISA extension: 'zifencei' riscv64-linux-gnu-ld: failed to merge target specific data of file fs/efivarfs/super.o The second occurs when a version of clang that does not support zicsr or zifencei via '-march=' (i.e., <= 16.x) is used in combination with a version of GNU as that defaults to a newer ISA base spec, which requires specifying zicsr and zifencei in the '-march=' value explicitly (i.e, >= 2.38): ../arch/riscv/kernel/kexec_relocate.S: Assembler messages: ../arch/riscv/kernel/kexec_relocate.S:147: Error: unrecognized opcode `fence.i', extension `zifencei' required clang-12: error: assembler command failed with exit code 1 (use -v to see invocation) This is the same issue addressed by commit 6df2a016c0c8 ("riscv: fix build with binutils 2.38") (see [2] for additional information) but older versions of clang miss out on it because the cc-option check fails: clang-12: error: invalid arch name 'rv64imac_zicsr_zifencei', unsupported standard user-level extension 'zicsr' clang-12: error: invalid arch name 'rv64imac_zicsr_zifencei', unsupported standard user-level extension 'zicsr' To resolve the first issue, only attempt to add zicsr and zifencei to the march string when using the GNU assembler 2.38 or newer, which is when the default ISA spec was updated, requiring these extensions to be specified explicitly. LLVM implements an older version of the base specification for all currently released versions, so these instructions are available as part of the 'i' extension. If LLVM's implementation is updated in the future, a CONFIG_AS_IS_LLVM condition can be added to CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. To resolve the second issue, use version 2.2 of the base ISA spec when using an older version of clang that does not support zicsr or zifencei via '-march=', as that is the spec version most compatible with the one clang/LLVM implements and avoids the need to specify zicsr and zifencei explicitly due to still being a part of 'i'. [1]: https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 [2]: https://lore.kernel.org/ZAxT7T9Xy1Fo3d5W@aurel32.net/ Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1808 Co-developed-by: Conor Dooley Signed-off-by: Conor Dooley Signed-off-by: Nathan Chancellor Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230313-riscv-zicsr-zifencei-fiasco-v1-1-dd1b7840a551@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 22 ++++++++++++++++++++++ arch/riscv/Makefile | 10 ++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c5e42cc376048..5b182d1c196ce 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -464,6 +464,28 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zihintpause) depends on LLD_VERSION >= 150000 || LD_VERSION >= 23600 +config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + def_bool y + # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc + depends on AS_IS_GNU && AS_VERSION >= 23800 + help + Newer binutils versions default to ISA spec version 20191213 which + moves some instructions from the I extension to the Zicsr and Zifencei + extensions. + +config TOOLCHAIN_NEEDS_OLD_ISA_SPEC + def_bool y + depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 + depends on CC_IS_CLANG && CLANG_VERSION < 170000 + help + Certain versions of clang do not support zicsr and zifencei via -march + but newer versions of binutils require it for the reasons noted in the + help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This + option causes an older ISA spec compatible with these older versions + of clang to be passed to GAS, which has the same result as passing zicsr + and zifencei to -march. + config FPU bool "FPU support" default y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 4de83b9b1772d..b05e833a022d1 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -57,10 +57,12 @@ riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c -# Newer binutils versions default to ISA spec version 20191213 which moves some -# instructions from the I extension to the Zicsr and Zifencei extensions. -toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) -riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei +ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC +KBUILD_CFLAGS += -Wa,-misa-spec=2.2 +KBUILD_AFLAGS += -Wa,-misa-spec=2.2 +else +riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei +endif # Check if the toolchain supports Zihintpause extension riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause From 902160cdb2bf4d23cd75f43ed0597ddf0134bb89 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Wed, 8 Mar 2023 13:28:37 +0530 Subject: [PATCH 396/898] Bluetooth: btinel: Check ACPI handle for NULL before accessing Older platforms and Virtual platforms which doesn't have support for bluetooth device in ACPI firmware will not have valid ACPI handle. Check for validity of handle before accessing. dmesg log from simics environment (virtual platform): BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: acpi_ns_walk_namespace+0x5c/0x278 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI Modules linked in: bnep intel_powerclamp coretemp kvm_intel kvm irqbypass intel_cstate input_leds joydev serio_raw mac_hid btusb(OE) btintel(OE) bluetooth(OE) lpc_ich compat(OE) ecdh_generic i7core_edac i5500_temp shpchp binfmt_misc sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid e1000e psmouse ahci pata_acpi libahci ptp pps_core floppy CPU: 0 PID: 35 Comm: kworker/u3:0 Tainted: G OE 4.15.0-140-generic #144-Ubuntu Hardware name: Simics Simics, BIOS Simics 01/01/2011 Workqueue: hci0 hci_power_on [bluetooth] RIP: 0010:acpi_ns_walk_namespace+0x5c/0x278 RSP: 0000:ffffaa9c0049bba8 EFLAGS: 00010246 RAX: 0000000000000001 RBX: 0000000000001001 RCX: 0000000000000010 RDX: ffffffff92ea7e27 RSI: ffffffff92ea7e10 RDI: 00000000000000c8 RBP: ffffaa9c0049bbf8 R08: 0000000000000000 R09: ffffffffc05b39d0 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000001 R13: 0000000000000000 R14: ffffffffc05b39d0 R15: ffffaa9c0049bc70 FS: 0000000000000000(0000) GS:ffff8be73fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000075f0e000 CR4: 00000000000006f0 Fixes: 294d749b5df5 ("Bluetooth: btintel: Iterate only bluetooth device ACPI entries") Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index e8d4b59e89c5b..af774688f1c0d 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2326,6 +2326,7 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver struct btintel_ppag ppag; struct sk_buff *skb; struct btintel_loc_aware_reg ppag_cmd; + acpi_handle handle; /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ switch (ver->cnvr_top & 0xFFF) { @@ -2335,12 +2336,18 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver return; } + handle = ACPI_HANDLE(GET_HCIDEV_DEV(hdev)); + if (!handle) { + bt_dev_info(hdev, "No support for BT device in ACPI firmware"); + return; + } + memset(&ppag, 0, sizeof(ppag)); ppag.hdev = hdev; ppag.status = AE_NOT_FOUND; - acpi_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_HANDLE(GET_HCIDEV_DEV(hdev)), - 1, NULL, btintel_ppag_callback, &ppag, NULL); + acpi_walk_namespace(ACPI_TYPE_PACKAGE, handle, 1, NULL, + btintel_ppag_callback, &ppag, NULL); if (ACPI_FAILURE(ppag.status)) { if (ppag.status == AE_NOT_FOUND) { From 5d44ab9e204200a78ad55cdf185aa2bb109b5950 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 8 Mar 2023 14:31:55 +0100 Subject: [PATCH 397/898] Bluetooth: btqcomsmd: Fix command timeout after setting BD address On most devices using the btqcomsmd driver (e.g. the DragonBoard 410c and other devices based on the Qualcomm MSM8916/MSM8909/... SoCs) the Bluetooth firmware seems to become unresponsive for a while after setting the BD address. On recent kernel versions (at least 5.17+) this often causes timeouts for subsequent commands, e.g. the HCI reset sent by the Bluetooth core during initialization: Bluetooth: hci0: Opcode 0x c03 failed: -110 Unfortunately this behavior does not seem to be documented anywhere. Experimentation suggests that the minimum necessary delay to avoid the problem is ~150us. However, to be sure add a sleep for > 1ms in case it is a bit longer on other firmware versions. Older kernel versions are likely also affected, although perhaps with slightly different errors or less probability. Side effects can easily hide the issue in most cases, e.g. unrelated incoming interrupts that cause the necessary delay. Fixes: 1511cc750c3d ("Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver") Signed-off-by: Stephan Gerhold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqcomsmd.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index 2acb719e596f5..11c7e04bf3947 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -122,6 +122,21 @@ static int btqcomsmd_setup(struct hci_dev *hdev) return 0; } +static int btqcomsmd_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) +{ + int ret; + + ret = qca_set_bdaddr_rome(hdev, bdaddr); + if (ret) + return ret; + + /* The firmware stops responding for a while after setting the bdaddr, + * causing timeouts for subsequent commands. Sleep a bit to avoid this. + */ + usleep_range(1000, 10000); + return 0; +} + static int btqcomsmd_probe(struct platform_device *pdev) { struct btqcomsmd *btq; @@ -162,7 +177,7 @@ static int btqcomsmd_probe(struct platform_device *pdev) hdev->close = btqcomsmd_close; hdev->send = btqcomsmd_send; hdev->setup = btqcomsmd_setup; - hdev->set_bdaddr = qca_set_bdaddr_rome; + hdev->set_bdaddr = btqcomsmd_set_bdaddr; ret = hci_register_dev(hdev); if (ret < 0) From 9aa9d9473f1550d1936c31259720b3f1f4690576 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 8 Mar 2023 14:20:34 -0800 Subject: [PATCH 398/898] Bluetooth: L2CAP: Fix responding with wrong PDU type L2CAP_ECRED_CONN_REQ shall be responded with L2CAP_ECRED_CONN_RSP not L2CAP_LE_CONN_RSP: L2CAP LE EATT Server - Reject - run Listening for connections New client connection with handle 0x002a Sending L2CAP Request from client Client received response code 0x15 Unexpected L2CAP response code (expected 0x18) L2CAP LE EATT Server - Reject - test failed > ACL Data RX: Handle 42 flags 0x02 dlen 26 LE L2CAP: Enhanced Credit Connection Request (0x17) ident 1 len 18 PSM: 39 (0x0027) MTU: 64 MPS: 64 Credits: 5 Source CID: 65 Source CID: 66 Source CID: 67 Source CID: 68 Source CID: 69 < ACL Data TX: Handle 42 flags 0x00 dlen 16 LE L2CAP: LE Connection Response (0x15) ident 1 len 8 invalid size 00 00 00 00 00 00 06 00 L2CAP LE EATT Server - Reject - run Listening for connections New client connection with handle 0x002a Sending L2CAP Request from client Client received response code 0x18 L2CAP LE EATT Server - Reject - test passed Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 117 +++++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 38 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index adfc3ea06d088..49926f59cc123 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -708,6 +708,17 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) } EXPORT_SYMBOL_GPL(l2cap_chan_del); +static void __l2cap_chan_list_id(struct l2cap_conn *conn, u16 id, + l2cap_chan_func_t func, void *data) +{ + struct l2cap_chan *chan, *l; + + list_for_each_entry_safe(chan, l, &conn->chan_l, list) { + if (chan->ident == id) + func(chan, data); + } +} + static void __l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data) { @@ -775,23 +786,9 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) { - struct l2cap_conn *conn = chan->conn; - struct l2cap_ecred_conn_rsp rsp; - u16 result; - - if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) - result = L2CAP_CR_LE_AUTHORIZATION; - else - result = L2CAP_CR_LE_BAD_PSM; - l2cap_state_change(chan, BT_DISCONN); - memset(&rsp, 0, sizeof(rsp)); - - rsp.result = cpu_to_le16(result); - - l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), - &rsp); + __l2cap_ecred_conn_rsp_defer(chan); } static void l2cap_chan_connect_reject(struct l2cap_chan *chan) @@ -846,7 +843,7 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; case L2CAP_MODE_EXT_FLOWCTL: l2cap_chan_ecred_connect_reject(chan); - break; + return; } } } @@ -3938,43 +3935,86 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) &rsp); } -void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data) { + int *result = data; + + if (*result || test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + switch (chan->state) { + case BT_CONNECT2: + /* If channel still pending accept add to result */ + (*result)++; + return; + case BT_CONNECTED: + return; + default: + /* If not connected or pending accept it has been refused */ + *result = -ECONNREFUSED; + return; + } +} + +struct l2cap_ecred_rsp_data { struct { struct l2cap_ecred_conn_rsp rsp; - __le16 dcid[5]; + __le16 scid[L2CAP_ECRED_MAX_CID]; } __packed pdu; + int count; +}; + +static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) +{ + struct l2cap_ecred_rsp_data *rsp = data; + + if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + /* Reset ident so only one response is sent */ + chan->ident = 0; + + /* Include all channels pending with the same ident */ + if (!rsp->pdu.rsp.result) + rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid); + else + l2cap_chan_del(chan, ECONNRESET); +} + +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +{ struct l2cap_conn *conn = chan->conn; - u16 ident = chan->ident; - int i = 0; + struct l2cap_ecred_rsp_data data; + u16 id = chan->ident; + int result = 0; - if (!ident) + if (!id) return; - BT_DBG("chan %p ident %d", chan, ident); + BT_DBG("chan %p id %d", chan, id); - pdu.rsp.mtu = cpu_to_le16(chan->imtu); - pdu.rsp.mps = cpu_to_le16(chan->mps); - pdu.rsp.credits = cpu_to_le16(chan->rx_credits); - pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); + memset(&data, 0, sizeof(data)); - mutex_lock(&conn->chan_lock); + data.pdu.rsp.mtu = cpu_to_le16(chan->imtu); + data.pdu.rsp.mps = cpu_to_le16(chan->mps); + data.pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); - list_for_each_entry(chan, &conn->chan_l, list) { - if (chan->ident != ident) - continue; + /* Verify that all channels are ready */ + __l2cap_chan_list_id(conn, id, l2cap_ecred_list_defer, &result); - /* Reset ident so only one response is sent */ - chan->ident = 0; + if (result > 0) + return; - /* Include all channels pending with the same ident */ - pdu.dcid[i++] = cpu_to_le16(chan->scid); - } + if (result < 0) + data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_AUTHORIZATION); - mutex_unlock(&conn->chan_lock); + /* Build response */ + __l2cap_chan_list_id(conn, id, l2cap_ecred_rsp_defer, &data); - l2cap_send_cmd(conn, ident, L2CAP_ECRED_CONN_RSP, - sizeof(pdu.rsp) + i * sizeof(__le16), &pdu); + l2cap_send_cmd(conn, id, L2CAP_ECRED_CONN_RSP, + sizeof(data.pdu.rsp) + (data.count * sizeof(__le16)), + &data.pdu); } void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) @@ -6078,6 +6118,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); chan->ident = cmd->ident; + chan->mode = L2CAP_MODE_EXT_FLOWCTL; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); From 1e9ac114c4428fdb7ff4635b45d4f46017e8916f Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 9 Mar 2023 16:07:39 +0800 Subject: [PATCH 399/898] Bluetooth: btsdio: fix use after free bug in btsdio_remove due to unfinished work In btsdio_probe, &data->work was bound with btsdio_work.In btsdio_send_frame, it was started by schedule_work. If we call btsdio_remove with an unfinished job, there may be a race condition and cause UAF bug on hdev. Fixes: ddbaf13e3609 ("[Bluetooth] Add generic driver for Bluetooth SDIO devices") Signed-off-by: Zheng Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btsdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 795be33f2892d..02893600db390 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -354,6 +354,7 @@ static void btsdio_remove(struct sdio_func *func) BT_DBG("func %p", func); + cancel_work_sync(&data->work); if (!data) return; From 1a0291f81529e8044fb29845a0196ba47af894ce Mon Sep 17 00:00:00 2001 From: Howard Chung Date: Thu, 16 Mar 2023 18:11:38 +0800 Subject: [PATCH 400/898] Bluetooth: mgmt: Fix MGMT add advmon with RSSI command The MGMT command: MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI uses variable length argument. This causes host not able to register advmon with rssi. This patch has been locally tested by adding monitor with rssi via btmgmt on a kernel 6.1 machine. Reviewed-by: Archie Pusaka Fixes: b338d91703fa ("Bluetooth: Implement support for Mesh") Signed-off-by: Howard Chung Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 39589f864ea76..249dc6777fb4e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9357,7 +9357,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, HCI_MGMT_VAR_LEN }, { add_adv_patterns_monitor_rssi, - MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE }, + MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE, + HCI_MGMT_VAR_LEN }, { set_mesh, MGMT_SET_MESH_RECEIVER_SIZE, HCI_MGMT_VAR_LEN }, { mesh_features, MGMT_MESH_READ_FEATURES_SIZE }, From bce56405201111807cc8e4f47c6de3e10b17c1ac Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Mon, 20 Mar 2023 21:50:18 -0400 Subject: [PATCH 401/898] Bluetooth: HCI: Fix global-out-of-bounds To loop a variable-length array, hci_init_stage_sync(stage) considers that stage[i] is valid as long as stage[i-1].func is valid. Thus, the last element of stage[].func should be intentionally invalid as hci_init0[], le_init2[], and others did. However, amp_init1[] and amp_init2[] have no invalid element, letting hci_init_stage_sync() keep accessing amp_init1[] over its valid range. This patch fixes this by adding {} in the last of amp_init1[] and amp_init2[]. ================================================================== BUG: KASAN: global-out-of-bounds in hci_dev_open_sync ( /v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) Read of size 8 at addr ffffffffaed1ab70 by task kworker/u5:0/1032 CPU: 0 PID: 1032 Comm: kworker/u5:0 Not tainted 6.2.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04 Workqueue: hci1 hci_power_on Call Trace: dump_stack_lvl (/v6.2-bzimage/lib/dump_stack.c:107 (discriminator 1)) print_report (/v6.2-bzimage/mm/kasan/report.c:307 /v6.2-bzimage/mm/kasan/report.c:417) ? hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) kasan_report (/v6.2-bzimage/mm/kasan/report.c:184 /v6.2-bzimage/mm/kasan/report.c:519) ? hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) ? __pfx_hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:4635) ? mutex_lock (/v6.2-bzimage/./arch/x86/include/asm/atomic64_64.h:190 /v6.2-bzimage/./include/linux/atomic/atomic-long.h:443 /v6.2-bzimage/./include/linux/atomic/atomic-instrumented.h:1781 /v6.2-bzimage/kernel/locking/mutex.c:171 /v6.2-bzimage/kernel/locking/mutex.c:285) ? __pfx_mutex_lock (/v6.2-bzimage/kernel/locking/mutex.c:282) hci_power_on (/v6.2-bzimage/net/bluetooth/hci_core.c:485 /v6.2-bzimage/net/bluetooth/hci_core.c:984) ? __pfx_hci_power_on (/v6.2-bzimage/net/bluetooth/hci_core.c:969) ? read_word_at_a_time (/v6.2-bzimage/./include/asm-generic/rwonce.h:85) ? strscpy (/v6.2-bzimage/./arch/x86/include/asm/word-at-a-time.h:62 /v6.2-bzimage/lib/string.c:161) process_one_work (/v6.2-bzimage/kernel/workqueue.c:2294) worker_thread (/v6.2-bzimage/./include/linux/list.h:292 /v6.2-bzimage/kernel/workqueue.c:2437) ? __pfx_worker_thread (/v6.2-bzimage/kernel/workqueue.c:2379) kthread (/v6.2-bzimage/kernel/kthread.c:376) ? __pfx_kthread (/v6.2-bzimage/kernel/kthread.c:331) ret_from_fork (/v6.2-bzimage/arch/x86/entry/entry_64.S:314) The buggy address belongs to the variable: amp_init1+0x30/0x60 The buggy address belongs to the physical page: page:000000003a157ec6 refcount:1 mapcount:0 mapping:0000000000000000 ia flags: 0x200000000001000(reserved|node=0|zone=2) raw: 0200000000001000 ffffea0005054688 ffffea0005054688 000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffffffffaed1aa00: f9 f9 f9 f9 00 00 00 00 f9 f9 f9 f9 00 00 00 00 ffffffffaed1aa80: 00 00 00 00 f9 f9 f9 f9 00 00 00 00 00 00 00 00 >ffffffffaed1ab00: 00 f9 f9 f9 f9 f9 f9 f9 00 00 00 00 00 00 f9 f9 ^ ffffffffaed1ab80: f9 f9 f9 f9 00 00 00 00 f9 f9 f9 f9 00 00 00 f9 ffffffffaed1ac00: f9 f9 f9 f9 00 06 f9 f9 f9 f9 f9 f9 00 00 02 f9 This bug is found by FuzzBT, a modified version of Syzkaller. Other contributors for this bug are Ruoyu Wu and Peng Hui. Fixes: d0b137062b2d ("Bluetooth: hci_sync: Rework init stages") Signed-off-by: Sungwoo Kim Reviewed-by: Simon Horman Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5b8dc8fb2e27a..5a6aa1627791b 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3360,6 +3360,7 @@ static const struct hci_init_stage amp_init1[] = { HCI_INIT(hci_read_flow_control_mode_sync), /* HCI_OP_READ_LOCATION_DATA */ HCI_INIT(hci_read_location_data_sync), + {} }; static int hci_init1_sync(struct hci_dev *hdev) @@ -3394,6 +3395,7 @@ static int hci_init1_sync(struct hci_dev *hdev) static const struct hci_init_stage amp_init2[] = { /* HCI_OP_READ_LOCAL_FEATURES */ HCI_INIT(hci_read_local_features_sync), + {} }; /* Read Buffer Size (ACL mtu, max pkt, etc.) */ From e9c3cda4d86e56bf7fe403729f38c4f0f65d3860 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 6 Mar 2023 09:15:17 +0100 Subject: [PATCH 402/898] mm, vmalloc: fix high order __GFP_NOFAIL allocations Gao Xiang has reported that the page allocator complains about high order __GFP_NOFAIL request coming from the vmalloc core: __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5549 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2286 vm_area_alloc_pages mm/vmalloc.c:2989 [inline] __vmalloc_area_node mm/vmalloc.c:3057 [inline] __vmalloc_node_range+0x978/0x13c0 mm/vmalloc.c:3227 kvmalloc_node+0x156/0x1a0 mm/util.c:606 kvmalloc include/linux/slab.h:737 [inline] kvmalloc_array include/linux/slab.h:755 [inline] kvcalloc include/linux/slab.h:760 [inline] it seems that I have completely missed high order allocation backing vmalloc areas case when implementing __GFP_NOFAIL support. This means that [k]vmalloc at al. can allocate higher order allocations with __GFP_NOFAIL which can trigger OOM killer for non-costly orders easily or cause a lot of reclaim/compaction activity if those requests cannot be satisfied. Fix the issue by falling back to zero order allocations for __GFP_NOFAIL requests if the high order request fails. Link: https://lkml.kernel.org/r/ZAXynvdNqcI0f6Us@dhcp22.suse.cz Fixes: 9376130c390a ("mm/vmalloc: add support for __GFP_NOFAIL") Reported-by: Gao Xiang Link: https://lkml.kernel.org/r/20230305053035.1911-1-hsiangkao@linux.alibaba.com Signed-off-by: Michal Hocko Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka Cc: Baoquan He Cc: Christoph Hellwig Cc: Mel Gorman Signed-off-by: Andrew Morton --- mm/vmalloc.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ef910bf349e13..bef6cf2b4d46d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2883,6 +2883,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, unsigned int order, unsigned int nr_pages, struct page **pages) { unsigned int nr_allocated = 0; + gfp_t alloc_gfp = gfp; + bool nofail = false; struct page *page; int i; @@ -2893,6 +2895,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * more permissive. */ if (!order) { + /* bulk allocator doesn't support nofail req. officially */ gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL; while (nr_allocated < nr_pages) { @@ -2931,20 +2934,35 @@ vm_area_alloc_pages(gfp_t gfp, int nid, if (nr != nr_pages_request) break; } + } else if (gfp & __GFP_NOFAIL) { + /* + * Higher order nofail allocations are really expensive and + * potentially dangerous (pre-mature OOM, disruptive reclaim + * and compaction etc. + */ + alloc_gfp &= ~__GFP_NOFAIL; + nofail = true; } /* High-order pages or fallback path if "bulk" fails. */ - while (nr_allocated < nr_pages) { if (fatal_signal_pending(current)) break; if (nid == NUMA_NO_NODE) - page = alloc_pages(gfp, order); + page = alloc_pages(alloc_gfp, order); else - page = alloc_pages_node(nid, gfp, order); - if (unlikely(!page)) - break; + page = alloc_pages_node(nid, alloc_gfp, order); + if (unlikely(!page)) { + if (!nofail) + break; + + /* fall back to the zero order allocations */ + alloc_gfp |= __GFP_NOFAIL; + order = 0; + continue; + } + /* * Higher order allocations must be able to be treated as * indepdenent small pages by callers (as they can with From 0fa99fdfe1b38da396d0b2d1496a823bcd0ebea0 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 7 Mar 2023 13:02:46 -0500 Subject: [PATCH 403/898] maple_tree: fix mas_skip_node() end slot detection Patch series "Fix mas_skip_node() for mas_empty_area()", v2. mas_empty_area() was incorrectly returning an error when there was room. The issue was tracked down to mas_skip_node() using the incorrect end-of-slot count. Instead of using the nodes hard limit, the limit of data should be used. mas_skip_node() was also setting the min and max to that of the child node, which was unnecessary. Within these limits being set, there was also a bug that corrupted the maple state's max if the offset was set to the maximum node pivot. The bug was without consequence unless there was a sufficient gap in the next child node which would cause an error to be returned. This patch set fixes these errors by removing the limit setting from mas_skip_node() and uses the mas_data_end() for slot limits, and adds tests for all failures discovered. This patch (of 2): mas_skip_node() is used to move the maple state to the node with a higher limit. It does this by walking up the tree and increasing the slot count. Since slot count may not be able to be increased, it may need to walk up multiple times to find room to walk right to a higher limit node. The limit of slots that was being used was the node limit and not the last location of data in the node. This would cause the maple state to be shifted outside actual data and enter an error state, thus returning -EBUSY. The result of the incorrect error state means that mas_awalk() would return an error instead of finding the allocation space. The fix is to use mas_data_end() in mas_skip_node() to detect the nodes data end point and continue walking the tree up until it is safe to move to a node with a higher limit. The walk up the tree also sets the maple state limits so remove the buggy code from mas_skip_node(). Setting the limits had the unfortunate side effect of triggering another bug if the parent node was full and the there was no suitable gap in the second last child, but room in the next child. mas_skip_node() may also be passed a maple state in an error state from mas_anode_descend() when no allocations are available. Return on such an error state immediately. Link: https://lkml.kernel.org/r/20230307180247.2220303-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230307180247.2220303-2-Liam.Howlett@oracle.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Reported-by: Snild Dolkow Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ Tested-by: Snild Dolkow Cc: Peng Zhang Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 646297cae5d16..9e2735cbc2b49 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5099,35 +5099,21 @@ static inline bool mas_rewind_node(struct ma_state *mas) */ static inline bool mas_skip_node(struct ma_state *mas) { - unsigned char slot, slot_count; - unsigned long *pivots; - enum maple_type mt; + if (mas_is_err(mas)) + return false; - mt = mte_node_type(mas->node); - slot_count = mt_slots[mt] - 1; do { if (mte_is_root(mas->node)) { - slot = mas->offset; - if (slot > slot_count) { + if (mas->offset >= mas_data_end(mas)) { mas_set_err(mas, -EBUSY); return false; } } else { mas_ascend(mas); - slot = mas->offset; - mt = mte_node_type(mas->node); - slot_count = mt_slots[mt] - 1; } - } while (slot > slot_count); - - mas->offset = ++slot; - pivots = ma_pivots(mas_mn(mas), mt); - if (slot > 0) - mas->min = pivots[slot - 1] + 1; - - if (slot <= slot_count) - mas->max = pivots[slot]; + } while (mas->offset >= mas_data_end(mas)); + mas->offset++; return true; } From 4bd6dded6318dc8e2514d74868c1f8fb38b61a60 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 7 Mar 2023 13:02:47 -0500 Subject: [PATCH 404/898] test_maple_tree: add more testing for mas_empty_area() Test robust filling of an entire area of the tree, then test one beyond. This is to test the walking back up the tree at the end of nodes and error condition. Test inspired by the reproducer code provided by Snild Dolkow. The last test in the function tests for the case of a corrupted maple state caused by the incorrect limits set during mas_skip_node(). There needs to be a gap in the second last child and last child, but the search must rule out the second last child's gap. This would avoid correcting the maple state to the correct max limit and return an error. Link: https://lkml.kernel.org/r/20230307180247.2220303-3-Liam.Howlett@oracle.com Cc: Snild Dolkow Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ Fixes: e15e06a83923 ("lib/test_maple_tree: add testing for maple tree") Signed-off-by: Liam R. Howlett Cc: Peng Zhang Cc: Signed-off-by: Andrew Morton --- lib/test_maple_tree.c | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 3d19b1f78d718..f1db333270e9f 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2670,6 +2670,49 @@ static noinline void check_empty_area_window(struct maple_tree *mt) rcu_read_unlock(); } +static noinline void check_empty_area_fill(struct maple_tree *mt) +{ + const unsigned long max = 0x25D78000; + unsigned long size; + int loop, shift; + MA_STATE(mas, mt, 0, 0); + + mt_set_non_kernel(99999); + for (shift = 12; shift <= 16; shift++) { + loop = 5000; + size = 1 << shift; + while (loop--) { + mas_set(&mas, 0); + mas_lock(&mas); + MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != 0); + MT_BUG_ON(mt, mas.last != mas.index + size - 1); + mas_store_gfp(&mas, (void *)size, GFP_KERNEL); + mas_unlock(&mas); + mas_reset(&mas); + } + } + + /* No space left. */ + size = 0x1000; + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != -EBUSY); + rcu_read_unlock(); + + /* Fill a depth 3 node to the maximum */ + for (unsigned long i = 629440511; i <= 629440800; i += 6) + mtree_store_range(mt, i, i + 5, (void *)i, GFP_KERNEL); + /* Make space in the second-last depth 4 node */ + mtree_erase(mt, 631668735); + /* Make space in the last depth 4 node */ + mtree_erase(mt, 629506047); + mas_reset(&mas); + /* Search from just after the gap in the second-last depth 4 */ + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area(&mas, 629506048, 690000000, 0x5000) != 0); + rcu_read_unlock(); + mt_set_non_kernel(0); +} + static DEFINE_MTREE(tree); static int maple_tree_seed(void) { @@ -2926,6 +2969,11 @@ static int maple_tree_seed(void) check_empty_area_window(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_empty_area_fill(&tree); + mtree_destroy(&tree); + + #if defined(BENCH) skip: #endif From 003587000276f81d0114b5ce773d80c119d8cb30 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 7 Mar 2023 17:55:48 +0900 Subject: [PATCH 405/898] nilfs2: fix kernel-infoleak in nilfs_ioctl_wrap_copy() The ioctl helper function nilfs_ioctl_wrap_copy(), which exchanges a metadata array to/from user space, may copy uninitialized buffer regions to user space memory for read-only ioctl commands NILFS_IOCTL_GET_SUINFO and NILFS_IOCTL_GET_CPINFO. This can occur when the element size of the user space metadata given by the v_size member of the argument nilfs_argv structure is larger than the size of the metadata element (nilfs_suinfo structure or nilfs_cpinfo structure) on the file system side. KMSAN-enabled kernels detect this issue as follows: BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xc0/0x100 lib/usercopy.c:33 instrument_copy_to_user include/linux/instrumented.h:121 [inline] _copy_to_user+0xc0/0x100 lib/usercopy.c:33 copy_to_user include/linux/uaccess.h:169 [inline] nilfs_ioctl_wrap_copy+0x6fa/0xc10 fs/nilfs2/ioctl.c:99 nilfs_ioctl_get_info fs/nilfs2/ioctl.c:1173 [inline] nilfs_ioctl+0x2402/0x4450 fs/nilfs2/ioctl.c:1290 nilfs_compat_ioctl+0x1b8/0x200 fs/nilfs2/ioctl.c:1343 __do_compat_sys_ioctl fs/ioctl.c:968 [inline] __se_compat_sys_ioctl+0x7dd/0x1000 fs/ioctl.c:910 __ia32_compat_sys_ioctl+0x93/0xd0 fs/ioctl.c:910 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246 entry_SYSENTER_compat_after_hwframe+0x70/0x82 Uninit was created at: __alloc_pages+0x9f6/0xe90 mm/page_alloc.c:5572 alloc_pages+0xab0/0xd80 mm/mempolicy.c:2287 __get_free_pages+0x34/0xc0 mm/page_alloc.c:5599 nilfs_ioctl_wrap_copy+0x223/0xc10 fs/nilfs2/ioctl.c:74 nilfs_ioctl_get_info fs/nilfs2/ioctl.c:1173 [inline] nilfs_ioctl+0x2402/0x4450 fs/nilfs2/ioctl.c:1290 nilfs_compat_ioctl+0x1b8/0x200 fs/nilfs2/ioctl.c:1343 __do_compat_sys_ioctl fs/ioctl.c:968 [inline] __se_compat_sys_ioctl+0x7dd/0x1000 fs/ioctl.c:910 __ia32_compat_sys_ioctl+0x93/0xd0 fs/ioctl.c:910 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246 entry_SYSENTER_compat_after_hwframe+0x70/0x82 Bytes 16-127 of 3968 are uninitialized ... This eliminates the leak issue by initializing the page allocated as buffer using get_zeroed_page(). Link: https://lkml.kernel.org/r/20230307085548.6290-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+132fdd2f1e1805fdc591@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/000000000000a5bd2d05f63f04ae@google.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 5ccc638ae92f7..1dfbc0c34513f 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -71,7 +71,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_index > ~(__u64)0 - argv->v_nmembs) return -EINVAL; - buf = (void *)__get_free_pages(GFP_NOFS, 0); + buf = (void *)get_zeroed_page(GFP_NOFS); if (unlikely(!buf)) return -ENOMEM; maxmembs = PAGE_SIZE / argv->v_size; From 12871a154690c52e2ded718b392a3977c114f6c1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 7 Mar 2023 15:59:00 +0800 Subject: [PATCH 406/898] checksyscalls: ignore fstat to silence build warning on LoongArch fstat is replaced by statx on the new architecture, so an exception is added to the checksyscalls script to silence the following build warning on LoongArch: CALL scripts/checksyscalls.sh :569:2: warning: #warning syscall fstat not implemented [-Wcpp] Link: https://lkml.kernel.org/r/1678175940-20872-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Suggested-by: WANG Xuerui Suggested-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Cc: Huacai Chen Cc: Masahiro Yamada Signed-off-by: Andrew Morton --- scripts/checksyscalls.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index f33e61aca93d3..1e5d2eeb726df 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -114,7 +114,6 @@ cat << EOF #define __IGNORE_truncate #define __IGNORE_stat #define __IGNORE_lstat -#define __IGNORE_fstat #define __IGNORE_fcntl #define __IGNORE_fadvise64 #define __IGNORE_newfstatat @@ -255,6 +254,9 @@ cat << EOF /* 64-bit ports never needed these, and new 32-bit ports can use statx */ #define __IGNORE_fstat64 #define __IGNORE_fstatat64 + +/* Newer ports are not required to provide fstat in favor of statx */ +#define __IGNORE_fstat EOF } From 6bbf1090672673183a98cd6e19de91fa5a319df0 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 8 Mar 2023 19:04:20 +0000 Subject: [PATCH 407/898] mm: deduplicate error handling for map_deny_write_exec Patch series "Fixes for MDWE prctl" These are four small fixes for the recent memory-write-deny-execute prctl patches [1]. Two reported by Alexey about error handling and two tooling fixes by Peter. This patch (of 4): Commit cc8d1b097de7 ("mmap: clean up mmap_region() unrolling") deduplicated the error handling, do the same for the return value of `map_deny_write_exec`. Link: https://lkml.kernel.org/r/20230308190423.46491-1-joey.gouly@arm.com Link: https://lkml.kernel.org/r/20230308190423.46491-2-joey.gouly@arm.com Link: https://lore.kernel.org/linux-arm-kernel/20230119160344.54358-1-joey.gouly@arm.com/ [1] Fixes: b507808ebce2 ("mm: implement memory-deny-write-execute as a prctl") Signed-off-by: Joey Gouly Reported-by: Alexey Izbyshev Link: https://lore.kernel.org/linux-arm-kernel/8408d8901e9d7ee6b78db4c6cba04b78@ispras.ru/ Reviewed-by: Catalin Marinas Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: nd Cc: Peter Xu Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/mmap.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 740b54be3ed41..ad499f7b767fa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2621,12 +2621,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (map_deny_write_exec(vma, vma->vm_flags)) { error = -EACCES; - if (file) - goto close_and_free_vma; - else if (vma->vm_file) - goto unmap_and_free_vma; - else - goto free_vma; + goto close_and_free_vma; } /* Allow architectures to sanity-check the vm_flags */ From 3d27a95b1d96757e48ce970baa3d419af299c2af Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 8 Mar 2023 19:04:21 +0000 Subject: [PATCH 408/898] mm: fix error handling for map_deny_write_exec Commit 4a18419f71cd ("mm/mprotect: use mmu_gather") changed 'goto out;' to 'break' in the loop. This wasn't noticed while rebasing the MDWE patches, so fix it now. Link: https://lkml.kernel.org/r/20230308190423.46491-3-joey.gouly@arm.com Fixes: b507808ebce2 ("mm: implement memory-deny-write-execute as a prctl") Signed-off-by: Joey Gouly Reported-by: Alexey Izbyshev Link: https://lore.kernel.org/linux-arm-kernel/8408d8901e9d7ee6b78db4c6cba04b78@ispras.ru/ Reviewed-by: Catalin Marinas Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: nd Cc: Peter Xu Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/mprotect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 231929f119d95..13e84d8c0797a 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -805,7 +805,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, if (map_deny_write_exec(vma, newflags)) { error = -EACCES; - goto out; + break; } /* Allow architectures to sanity-check the new flags */ From d035230ec9937a9138921d2a0eeb99496ea7eac0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 Mar 2023 19:04:22 +0000 Subject: [PATCH 409/898] kselftest: vm: fix unused variable warning Remove unused variable from the MDWE test. [joey.gouly@arm.com: add commit message] Link: https://lkml.kernel.org/r/20230308190423.46491-4-joey.gouly@arm.com Fixes: 4cf1fe34fd18 ("kselftest: vm: add tests for memory-deny-write-execute") Signed-off-by: Peter Xu Signed-off-by: Joey Gouly Acked-by: Catalin Marinas Cc: Alexey Izbyshev Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: nd Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mdwe_test.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c index f466a099f1bf7..bc91bef5d254e 100644 --- a/tools/testing/selftests/mm/mdwe_test.c +++ b/tools/testing/selftests/mm/mdwe_test.c @@ -163,9 +163,8 @@ TEST_F(mdwe, mprotect_WRITE_EXEC) TEST_F(mdwe, mmap_FIXED) { - void *p, *p2; + void *p; - p2 = mmap(NULL, self->size, PROT_READ | PROT_EXEC, self->flags, 0, 0); self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0); ASSERT_NE(self->p, MAP_FAILED); From 6db504ce55bdbc575723938fc480713c9183f6a2 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 8 Mar 2023 17:03:10 -0500 Subject: [PATCH 410/898] mm/ksm: fix race with VMA iteration and mm_struct teardown exit_mmap() will tear down the VMAs and maple tree with the mmap_lock held in write mode. Ensure that the maple tree is still valid by checking ksm_test_exit() after taking the mmap_lock in read mode, but before the for_each_vma() iterator dereferences a destroyed maple tree. Since the maple tree is destroyed, the flags telling lockdep to check an external lock has been cleared. Skip the for_each_vma() iterator to avoid dereferencing a maple tree without the external lock flag, which would create a lockdep warning. Link: https://lkml.kernel.org/r/20230308220310.3119196-1-Liam.Howlett@oracle.com Fixes: a5f18ba07276 ("mm/ksm: use vma iterators instead of vma linked list") Signed-off-by: Liam R. Howlett Reported-by: Pengfei Xu Link: https://lore.kernel.org/lkml/ZAdUUhSbaa6fHS36@xpf.sh.intel.com/ Reported-by: syzbot+2ee18845e89ae76342c5@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=64a3e95957cd3deab99df7cd7b5a9475af92c93e Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Cc: Signed-off-by: Andrew Morton --- mm/ksm.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index ad591b779d534..2b8d30068cbbd 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -988,9 +988,15 @@ static int unmerge_and_remove_all_rmap_items(void) mm = mm_slot->slot.mm; mmap_read_lock(mm); + + /* + * Exit right away if mm is exiting to avoid lockdep issue in + * the maple tree + */ + if (ksm_test_exit(mm)) + goto mm_exiting; + for_each_vma(vmi, vma) { - if (ksm_test_exit(mm)) - break; if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) continue; err = unmerge_ksm_pages(vma, @@ -999,6 +1005,7 @@ static int unmerge_and_remove_all_rmap_items(void) goto error; } +mm_exiting: remove_trailing_rmap_items(&mm_slot->rmap_list); mmap_read_unlock(mm); From 90db9dbedd26ce029f3a0f8d2cbd3a142f452408 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 27 Feb 2023 10:47:27 +0100 Subject: [PATCH 411/898] kasan, powerpc: don't rename memintrinsics if compiler adds prefixes With appropriate compiler support [1], KASAN builds use __asan prefixed meminstrinsics, and KASAN no longer overrides memcpy/memset/memmove. If compiler support is detected (CC_HAS_KASAN_MEMINTRINSIC_PREFIX), define memintrinsics normally (do not prefix '__'). On powerpc, KASAN is the only user of __mem functions, which are used to define instrumented memintrinsics. Alias the normal versions for KASAN to use in its implementation. Link: https://lore.kernel.org/all/20230224085942.1791837-1-elver@google.com/ [1] Link: https://lore.kernel.org/oe-kbuild-all/202302271348.U5lvmo0S-lkp@intel.com/ Link: https://lkml.kernel.org/r/20230227094726.3833247-1-elver@google.com Signed-off-by: Marco Elver Reported-by: kernel test robot Acked-by: Michael Ellerman [powerpc] Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Christophe Leroy Cc: Daniel Axtens Cc: Dmitry Vyukov Cc: Liam R. Howlett Cc: Nicholas Piggin Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- arch/powerpc/include/asm/kasan.h | 2 +- arch/powerpc/include/asm/string.h | 15 +++++++++++---- arch/powerpc/kernel/prom_init_check.sh | 9 +++++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 92a968202ba7c..365d2720097cb 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX) #define _GLOBAL_KASAN(fn) _GLOBAL(__##fn) #define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn) #define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn) diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 2aa0e31e68844..60ba22770f51c 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -30,11 +30,17 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); void memcpy_flushcache(void *dest, const void *src, size_t size); +#ifdef CONFIG_KASAN +/* __mem variants are used by KASAN to implement instrumented meminstrinsics. */ +#ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX +#define __memset memset +#define __memcpy memcpy +#define __memmove memmove +#else /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */ void *__memset(void *s, int c, __kernel_size_t count); void *__memcpy(void *to, const void *from, __kernel_size_t n); void *__memmove(void *to, const void *from, __kernel_size_t n); - -#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +#ifndef __SANITIZE_ADDRESS__ /* * For files that are not instrumented (e.g. mm/slub.c) we * should use not instrumented version of mem* functions. @@ -46,8 +52,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t n); #ifndef __NO_FORTIFY #define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ #endif - -#endif +#endif /* !__SANITIZE_ADDRESS__ */ +#endif /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */ +#endif /* CONFIG_KASAN */ #ifdef CONFIG_PPC64 #ifndef CONFIG_KASAN diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 5a319863f2890..69623b9045d55 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,8 +13,13 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null -if [ $? -eq 0 ] +has_renamed_memintrinsics() +{ + grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \ + ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG} +} + +if has_renamed_memintrinsics then MEM_FUNCS="__memcpy __memset" else From cc2a978d28422ca957d14cb25caa72e25c45be1d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 10 Mar 2023 13:35:08 +0100 Subject: [PATCH 412/898] mailmap: add entry for Tobias Klauser Map my old email addresses to the current address. Link: https://lkml.kernel.org/r/20230310123508.22079-1-tklauser@distanz.ch Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton --- .mailmap | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index 317e51a0065c8..96e9acaed89f5 100644 --- a/.mailmap +++ b/.mailmap @@ -437,6 +437,10 @@ Thomas Graf Thomas Körper Thomas Pedersen Tiezhu Yang +Tobias Klauser +Tobias Klauser +Tobias Klauser +Tobias Klauser Todor Tomov Tony Luck TripleX Chung From f446883d12b8bfa486f7c98d403054d61d38c989 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 9 Mar 2023 20:29:13 -0800 Subject: [PATCH 413/898] Revert "kasan: drop skip_kasan_poison variable in free_pages_prepare" This reverts commit 487a32ec24be819e747af8c2ab0d5c515508086a. should_skip_kasan_poison() reads the PG_skip_kasan_poison flag from page->flags. However, this line of code in free_pages_prepare(): page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; clears most of page->flags, including PG_skip_kasan_poison, before calling should_skip_kasan_poison(), which meant that it would never return true as a result of the page flag being set. Therefore, fix the code to call should_skip_kasan_poison() before clearing the flags, as we were doing before the reverted patch. This fixes a measurable performance regression introduced in the reverted commit, where munmap() takes longer than intended if HW tags KASAN is supported and enabled at runtime. Without this patch, we see a single-digit percentage performance regression in a particular mmap()-heavy benchmark when enabling HW tags KASAN, and with the patch, there is no statistically significant performance impact when enabling HW tags KASAN. Link: https://lkml.kernel.org/r/20230310042914.3805818-2-pcc@google.com Fixes: 487a32ec24be ("kasan: drop skip_kasan_poison variable in free_pages_prepare") Link: https://linux-review.googlesource.com/id/Ic4f13affeebd20548758438bb9ed9ca40e312b79 Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Catalin Marinas [arm64] Cc: Evgenii Stepanov Cc: Vincenzo Frascino Cc: Will Deacon Cc: [6.1] Signed-off-by: Andrew Morton --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ac1fc986af44c..7136c36c5d01e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1398,6 +1398,7 @@ static __always_inline bool free_pages_prepare(struct page *page, unsigned int order, bool check_free, fpi_t fpi_flags) { int bad = 0; + bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); bool init = want_init_on_free(); VM_BUG_ON_PAGE(PageTail(page), page); @@ -1470,7 +1471,7 @@ static __always_inline bool free_pages_prepare(struct page *page, * With hardware tag-based KASAN, memory tags must be set before the * page becomes unavailable via debug_pagealloc or arch_free_page. */ - if (!should_skip_kasan_poison(page, fpi_flags)) { + if (!skip_kasan_poison) { kasan_poison_pages(page, order, init); /* Memory is already initialized if KASAN did it internally. */ From 83bd3eeb326bb0b5800573622d18e1c5e4355323 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 13 Mar 2023 10:03:43 +0100 Subject: [PATCH 414/898] mailmap: map Rajendra Nayak's old address to his current one Rajendra's old email is still picked up by the likes of get_maintainer.pl and keeps bouncing like all other @codeaurora.org addresses. Map it to his current one. Link: https://lkml.kernel.org/r/20230313090343.2148346-1-konrad.dybcio@linaro.org Signed-off-by: Konrad Dybcio Cc: Rajendra Nayak Cc: Andy Gross Cc: Baolin Wang Cc: Bjorn Andersson Cc: Colin Ian King Cc: Jakub Kicinski Cc: Kirill Tkhai Cc: Marijn Suijten Cc: Qais Yousef Cc: Stephen Hemminger Cc: Vasily Averin Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 96e9acaed89f5..bc213f9eaf6fb 100644 --- a/.mailmap +++ b/.mailmap @@ -379,6 +379,7 @@ Quentin Monnet Quentin Perret Rafael J. Wysocki Rajeev Nandan +Rajendra Nayak Rajesh Shah Ralf Baechle Ralf Wildenhues From 9e26240c3bc1cf4895abdd5330a35465b5050109 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 14 Mar 2023 13:56:03 +0100 Subject: [PATCH 415/898] mailmap: map Sai Prakash Ranjan's old address to his current one Sai's old email is still picked up by the likes of get_maintainer.pl and keeps bouncing like all other @codeaurora.org addresses. Map it to his current one. Link: https://lkml.kernel.org/r/20230314125604.2734146-1-konrad.dybcio@linaro.org Signed-off-by: Konrad Dybcio Cc: Sai Prakash Ranjan Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index bc213f9eaf6fb..3c7f004960723 100644 --- a/.mailmap +++ b/.mailmap @@ -398,6 +398,7 @@ Ross Zwisler Rudolf Marek Rui Saraiva Sachin P Sant +Sai Prakash Ranjan Sakari Ailus Sam Ravnborg Sankeerth Billakanti From d2e44a50ecebb72257e5deb7ce8d227d7e3f53b9 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 14 Mar 2023 12:54:55 +0100 Subject: [PATCH 416/898] mailmap: add entry for Enric Balletbo i Serra Map Enric's old corporate addresses to his kernel.org address. Link: https://lkml.kernel.org/r/20230314115455.188818-1-eballetbo@kernel.org Signed-off-by: Enric Balletbo i Serra Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 3c7f004960723..cebd939c3a91d 100644 --- a/.mailmap +++ b/.mailmap @@ -133,6 +133,8 @@ Dmitry Safonov <0x7f454c46@gmail.com> Domen Puncer Douglas Gilbert Ed L. Cashin +Enric Balletbo i Serra +Enric Balletbo i Serra Erik Kaneda Eugen Hristev Evgeniy Polyakov From 13684e966d46283e0e89b6a4941596dc52b18bf3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Mar 2023 15:28:17 +0100 Subject: [PATCH 417/898] lib: dhry: fix unstable smp_processor_id(_) usage When running the in-kernel Dhrystone benchmark with CONFIG_DEBUG_PREEMPT=y: BUG: using smp_processor_id() in preemptible [00000000] code: bash/938 Fix this by not using smp_processor_id() directly, but instead wrapping the whole benchmark inside a get_cpu()/put_cpu() pair. This makes sure the whole benchmark is run on the same CPU core, and the reported values are consistent. Link: https://lkml.kernel.org/r/b0d29932bb24ad82cea7f821e295c898e9657be0.1678890070.git.geert+renesas@glider.be Fixes: d5528cc16893f1f6 ("lib: add Dhrystone benchmark test") Signed-off-by: Geert Uytterhoeven Reported-by: Tobias Klausmann Link: https://bugzilla.kernel.org/show_bug.cgi?id=217179 Signed-off-by: Andrew Morton --- lib/dhry_run.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/dhry_run.c b/lib/dhry_run.c index f9d33efa6d090..f15ac666e9d38 100644 --- a/lib/dhry_run.c +++ b/lib/dhry_run.c @@ -31,6 +31,7 @@ MODULE_PARM_DESC(iterations, static void dhry_benchmark(void) { + unsigned int cpu = get_cpu(); int i, n; if (iterations > 0) { @@ -45,9 +46,10 @@ static void dhry_benchmark(void) } report: + put_cpu(); if (n >= 0) - pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", - smp_processor_id(), n, n / DHRY_VAX); + pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", cpu, + n, n / DHRY_VAX); else if (n == -EAGAIN) pr_err("Please increase the number of iterations\n"); else From 1c86a188e03156223a34d09ce290b49bd4dd0403 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Wed, 15 Mar 2023 11:44:41 +0800 Subject: [PATCH 418/898] mm: kfence: fix using kfence_metadata without initialization in show_object() The variable kfence_metadata is initialized in kfence_init_pool(), then, it is not initialized if kfence is disabled after booting. In this case, kfence_metadata will be used (e.g. ->lock and ->state fields) without initialization when reading /sys/kernel/debug/kfence/objects. There will be a warning if you enable CONFIG_DEBUG_SPINLOCK. Fix it by creating debugfs files when necessary. Link: https://lkml.kernel.org/r/20230315034441.44321-1-songmuchun@bytedance.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Muchun Song Tested-by: Marco Elver Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jann Horn Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/kfence/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 5349c37a5dac9..79c94ee55f97b 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -726,10 +726,14 @@ static const struct seq_operations objects_sops = { }; DEFINE_SEQ_ATTRIBUTE(objects); -static int __init kfence_debugfs_init(void) +static int kfence_debugfs_init(void) { - struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL); + struct dentry *kfence_dir; + if (!READ_ONCE(kfence_enabled)) + return 0; + + kfence_dir = debugfs_create_dir("kfence", NULL); debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops); debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops); return 0; @@ -883,6 +887,8 @@ static int kfence_init_late(void) } kfence_init_enable(); + kfence_debugfs_init(); + return 0; } From 2e08ca1802441224f5b7cc6bffbb687f7406de95 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 16 Mar 2023 23:47:04 +0100 Subject: [PATCH 419/898] kfence: avoid passing -g for test Nathan reported that when building with GNU as and a version of clang that defaults to DWARF5: $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- \ LLVM=1 LLVM_IAS=0 O=build \ mrproper allmodconfig mm/kfence/kfence_test.o /tmp/kfence_test-08a0a0.s: Assembler messages: /tmp/kfence_test-08a0a0.s:14627: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14628: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14632: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14633: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14639: Error: non-constant .uleb128 is not supported ... This is because `-g` defaults to the compiler debug info default. If the assembler does not support some of the directives used, the above errors occur. To fix, remove the explicit passing of `-g`. All the test wants is that stack traces print valid function names, and debug info is not required for that. (I currently cannot recall why I added the explicit `-g`.) Link: https://lkml.kernel.org/r/20230316224705.709984-1-elver@google.com Fixes: bc8fbc5f305a ("kfence: add test suite") Signed-off-by: Marco Elver Reported-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton --- mm/kfence/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile index 0bb95728a7845..2de2a58d11a10 100644 --- a/mm/kfence/Makefile +++ b/mm/kfence/Makefile @@ -2,5 +2,5 @@ obj-y := core.o report.o -CFLAGS_kfence_test.o := -g -fno-omit-frame-pointer -fno-optimize-sibling-calls +CFLAGS_kfence_test.o := -fno-omit-frame-pointer -fno-optimize-sibling-calls obj-$(CONFIG_KFENCE_KUNIT_TEST) += kfence_test.o From 5eb39cde1e2487ba5ec1802dc5e58a77e700d99e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 16 Mar 2023 23:47:05 +0100 Subject: [PATCH 420/898] kcsan: avoid passing -g for test Nathan reported that when building with GNU as and a version of clang that defaults to DWARF5, the assembler will complain with: Error: non-constant .uleb128 is not supported This is because `-g` defaults to the compiler debug info default. If the assembler does not support some of the directives used, the above errors occur. To fix, remove the explicit passing of `-g`. All the test wants is that stack traces print valid function names, and debug info is not required for that. (I currently cannot recall why I added the explicit `-g`.) Link: https://lkml.kernel.org/r/20230316224705.709984-2-elver@google.com Fixes: 1fe84fd4a402 ("kcsan: Add test suite") Signed-off-by: Marco Elver Reported-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton --- kernel/kcsan/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index 8cf70f068d92d..a45f3dfc8d141 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -16,6 +16,6 @@ obj-y := core.o debugfs.o report.o KCSAN_INSTRUMENT_BARRIERS_selftest.o := y obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o -CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer +CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -fno-omit-frame-pointer CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o From 5aa360971beaadf51f099fb7904fa4807b7d39cd Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Thu, 16 Mar 2023 11:25:25 +0100 Subject: [PATCH 421/898] mailmap: add entries for Richard Leitner Map all my old email addresses to my current address. Link: https://lkml.kernel.org/r/20230316-my-mailmap-v1-1-76bc3a36ba41@linux.dev Signed-off-by: Richard Leitner Signed-off-by: Andrew Morton --- .mailmap | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.mailmap b/.mailmap index cebd939c3a91d..e2af78f67f7cc 100644 --- a/.mailmap +++ b/.mailmap @@ -390,6 +390,9 @@ Rémi Denis-Courmont Ricardo Ribalda Ricardo Ribalda Ricardo Ribalda Delgado Ricardo Ribalda +Richard Leitner +Richard Leitner +Richard Leitner Robert Foss Roman Gushchin Roman Gushchin From d0072ca529674c36421023ffe90837a7de9387f3 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 11 Mar 2023 08:18:00 +0900 Subject: [PATCH 422/898] mm: mmap: remove newline at the end of the trace We already have newline in TP_printk so remove the redundant newline character at the end of the mmap trace. <...>-345 [006] ..... 95.589290: exit_mmap: mt_mod ... <...>-345 [006] ..... 95.589413: vm_unmapped_area: addr=... <...>-345 [006] ..... 95.589571: vm_unmapped_area: addr=... <...>-345 [006] ..... 95.589606: vm_unmapped_area: addr=... to <...>-336 [006] ..... 44.762506: exit_mmap: mt_mod ... <...>-336 [006] ..... 44.762654: vm_unmapped_area: addr=... <...>-336 [006] ..... 44.762794: vm_unmapped_area: addr=... <...>-336 [006] ..... 44.762835: vm_unmapped_area: addr=... Link: https://lkml.kernel.org/r/ZAu6qDsNPmk82UjV@minwoo-desktop FIxes: df529cabb7a25 ("mm: mmap: add trace point of vm_unmapped_area") Signed-off-by: Minwoo Im Acked-by: Steven Rostedt (Google) Reviewed-by: Mukesh Ojha Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/trace/events/mmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h index 216de5f036210..f8d61485de16d 100644 --- a/include/trace/events/mmap.h +++ b/include/trace/events/mmap.h @@ -35,7 +35,7 @@ TRACE_EVENT(vm_unmapped_area, __entry->align_offset = info->align_offset; ), - TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n", + TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx", IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr, IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0, __entry->total_vm, __entry->flags, __entry->length, @@ -110,7 +110,7 @@ TRACE_EVENT(exit_mmap, __entry->mt = &mm->mm_mt; ), - TP_printk("mt_mod %p, DESTROY\n", + TP_printk("mt_mod %p, DESTROY", __entry->mt ) ); From cbedf1a33970c9b825ae75b81fbd3e88e224a418 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 23 Mar 2023 18:13:11 -0700 Subject: [PATCH 423/898] Input: i8042 - add TUXEDO devices to i8042 quirk tables for partial fix A lot of modern Clevo barebones have touchpad and/or keyboard issues after suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of them have an external PS/2 port so this can safely be set for all of them. I'm not entirely sure if every device listed really needs all four quirks, but after testing and production use, no negative effects could be observed when setting all four. Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS on the Clevo N150CU and the Clevo NHxxRZQ makes the keyboard very laggy for ~5 seconds after boot and sometimes also after resume. However both are required for the keyboard to not fail completely sometimes after boot or resume. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230321191619.647911-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index fe7ffe30997c2..028e45bd050bf 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1124,6 +1124,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + /* + * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes + * the keyboard very laggy for ~5 seconds after boot and + * sometimes also after resume. + * However both are required for the keyboard to not fail + * completely sometimes after boot or resume. + */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "N150CU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), @@ -1131,6 +1145,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + /* + * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes + * the keyboard very laggy for ~5 seconds after boot and + * sometimes also after resume. + * However both are required for the keyboard to not fail + * completely sometimes after boot or resume. + */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), From b20cf3f89c56b5f6a38b7f76a8128bf9f291bbd3 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Fri, 24 Mar 2023 09:06:58 +0800 Subject: [PATCH 424/898] platform/chrome: cros_ec_chardev: fix kernel data leak from ioctl It is possible to peep kernel page's data by providing larger `insize` in struct cros_ec_command[1] when invoking EC host commands. Fix it by using zeroed memory. [1]: https://elixir.bootlin.com/linux/v6.2/source/include/linux/platform_data/cros_ec_proto.h#L74 Fixes: eda2e30c6684 ("mfd / platform: cros_ec: Miscellaneous character device to talk with the EC") Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230324010658.1082361-1-tzungbi@kernel.org --- drivers/platform/chrome/cros_ec_chardev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c index 0de7c255254e0..d6de5a2941282 100644 --- a/drivers/platform/chrome/cros_ec_chardev.c +++ b/drivers/platform/chrome/cros_ec_chardev.c @@ -284,7 +284,7 @@ static long cros_ec_chardev_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) u_cmd.insize > EC_MAX_MSG_BYTES) return -EINVAL; - s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize), + s_cmd = kzalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize), GFP_KERNEL); if (!s_cmd) return -ENOMEM; From 39b291b86b5988bf8753c3874d5c773399d09b96 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 Mar 2023 21:15:52 +0900 Subject: [PATCH 425/898] ksmbd: return unsupported error on smb1 mount ksmbd disconnect connection when mounting with vers=smb1. ksmbd should send smb1 negotiate response to client for correct unsupported error return. This patch add needed SMB1 macros and fill NegProt part of the response for smb1 negotiate response. Cc: stable@vger.kernel.org Reported-by: Steve French Reviewed-by: Sergey Senozhatsky Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 7 ++----- fs/ksmbd/smb_common.c | 23 ++++++++++++++++++++--- fs/ksmbd/smb_common.h | 30 ++++++++---------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 5d914715605fb..115a67d2cf785 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -319,13 +319,10 @@ int ksmbd_conn_handler_loop(void *p) } /* - * Check if pdu size is valid (min : smb header size, - * max : 0x00FFFFFF). + * Check maximum pdu size(0x00FFFFFF). */ - if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE || - pdu_size > MAX_STREAM_PROT_LEN) { + if (pdu_size > MAX_STREAM_PROT_LEN) break; - } /* 4 for rfc1002 length field */ size = pdu_size + 4; diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index 079c9e76818d8..9c1ce6d199ce4 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -442,9 +442,26 @@ static int smb_handle_negotiate(struct ksmbd_work *work) { struct smb_negotiate_rsp *neg_rsp = work->response_buf; - ksmbd_debug(SMB, "Unsupported SMB protocol\n"); - neg_rsp->hdr.Status.CifsError = STATUS_INVALID_LOGON_TYPE; - return -EINVAL; + ksmbd_debug(SMB, "Unsupported SMB1 protocol\n"); + + /* + * Remove 4 byte direct TCP header, add 2 byte bcc and + * 2 byte DialectIndex. + */ + *(__be32 *)work->response_buf = + cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2 + 2); + neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS; + + neg_rsp->hdr.Command = SMB_COM_NEGOTIATE; + *(__le32 *)neg_rsp->hdr.Protocol = SMB1_PROTO_NUMBER; + neg_rsp->hdr.Flags = SMBFLG_RESPONSE; + neg_rsp->hdr.Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS | + SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME; + + neg_rsp->hdr.WordCount = 1; + neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect); + neg_rsp->ByteCount = 0; + return 0; } int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index e663ab9ea7590..d30ce4c1a1517 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -158,8 +158,15 @@ #define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) #define SMB_COM_NEGOTIATE 0x72 - #define SMB1_CLIENT_GUID_SIZE (16) + +#define SMBFLG_RESPONSE 0x80 /* this PDU is a response from server */ + +#define SMBFLG2_IS_LONG_NAME cpu_to_le16(0x40) +#define SMBFLG2_EXT_SEC cpu_to_le16(0x800) +#define SMBFLG2_ERR_STATUS cpu_to_le16(0x4000) +#define SMBFLG2_UNICODE cpu_to_le16(0x8000) + struct smb_hdr { __be32 smb_buf_length; __u8 Protocol[4]; @@ -199,28 +206,7 @@ struct smb_negotiate_req { struct smb_negotiate_rsp { struct smb_hdr hdr; /* wct = 17 */ __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ - __u8 SecurityMode; - __le16 MaxMpxCount; - __le16 MaxNumberVcs; - __le32 MaxBufferSize; - __le32 MaxRawSize; - __le32 SessionKey; - __le32 Capabilities; /* see below */ - __le32 SystemTimeLow; - __le32 SystemTimeHigh; - __le16 ServerTimeZone; - __u8 EncryptionKeyLength; __le16 ByteCount; - union { - unsigned char EncryptionKey[8]; /* cap extended security off */ - /* followed by Domain name - if extended security is off */ - /* followed by 16 bytes of server GUID */ - /* then security blob if cap_extended_security negotiated */ - struct { - unsigned char GUID[SMB1_CLIENT_GUID_SIZE]; - unsigned char SecurityBlob[1]; - } __packed extended_response; - } __packed u; } __packed; struct filesystem_attribute_info { From fa4e7a6fa12b1132340785e14bd439cbe95b7a5a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Mar 2023 08:50:05 +0100 Subject: [PATCH 426/898] ALSA: usb-audio: Fix regression on detection of Roland VS-100 It's been reported that the recent kernel can't probe the PCM devices on Roland VS-100 properly, and it turned out to be a regression by the recent addition of the bit shift range check for the format bits. In the old code, we just did bit-shift and it resulted in zero, which is then corrected to the standard PCM format, while the new code explicitly returns an error in such a case. For addressing the regression, relax the check and fallback to the standard PCM type (with the info output). Fixes: 43d5ca88dfcd ("ALSA: usb-audio: Fix potential out-of-bounds shift") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=217084 Link: https://lore.kernel.org/r/20230324075005.19403-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/format.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 405dc0bf6678c..4b1c5ba121f39 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -39,8 +39,12 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, case UAC_VERSION_1: default: { struct uac_format_type_i_discrete_descriptor *fmt = _fmt; - if (format >= 64) - return 0; /* invalid format */ + if (format >= 64) { + usb_audio_info(chip, + "%u:%d: invalid format type 0x%llx is detected, processed as PCM\n", + fp->iface, fp->altsetting, format); + format = UAC_FORMAT_TYPE_I_PCM; + } sample_width = fmt->bBitResolution; sample_bytes = fmt->bSubframeSize; format = 1ULL << format; From 08570b7c8db6d9185deccf5bcda773bd6f17172f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Jan 2023 23:14:00 +0100 Subject: [PATCH 427/898] gpu: host1x: fix uninitialized variable use The error handling for platform_get_irq() failing no longer works after a recent change, clang now points this out with a warning: drivers/gpu/host1x/dev.c:520:6: error: variable 'syncpt_irq' is uninitialized when used here [-Werror,-Wuninitialized] if (syncpt_irq < 0) ^~~~~~~~~~ Fix this by removing the variable and checking the correct error status. Fixes: 625d4ffb438c ("gpu: host1x: Rewrite syncpoint interrupt handling") Reviewed-by: Nathan Chancellor Reviewed-by: Mikko Perttunen Reported-by: "kernelci.org bot" Reviewed-by: Nick Desaulniers Reviewed-by: Jon Hunter Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230127221418.2522612-1-arnd@kernel.org --- drivers/gpu/host1x/dev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 4872d183d8604..aae2efeef5032 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -487,7 +487,6 @@ static int host1x_get_resets(struct host1x *host) static int host1x_probe(struct platform_device *pdev) { struct host1x *host; - int syncpt_irq; int err; host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); @@ -517,8 +516,8 @@ static int host1x_probe(struct platform_device *pdev) } host->syncpt_irq = platform_get_irq(pdev, 0); - if (syncpt_irq < 0) - return syncpt_irq; + if (host->syncpt_irq < 0) + return host->syncpt_irq; mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); From 4522ad764d7e99a79de817fcbc682ebcc75d01fe Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 23 Mar 2023 13:54:57 +0100 Subject: [PATCH 428/898] accel/ivpu: Do not access HW registers after unbind We should not access hardware after we unbind from the bus. Use drm_dev_enter() / drm_dev_exit() to mark code sections where hardware is accessed (and not already protected by other locks) and drm_dev_unplug() to mark device is gone. Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU") Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-2-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 8 ++++++-- drivers/accel/ivpu/ivpu_drv.h | 1 + drivers/accel/ivpu/ivpu_job.c | 11 +++++++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 231f29bb50257..ac06bbfca9202 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -118,6 +117,10 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); struct drm_ivpu_param *args = data; int ret = 0; + int idx; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; switch (args->param) { case DRM_IVPU_PARAM_DEVICE_ID: @@ -171,6 +174,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f break; } + drm_dev_exit(idx); return ret; } @@ -622,7 +626,7 @@ static void ivpu_remove(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - drm_dev_unregister(&vdev->drm); + drm_dev_unplug(&vdev->drm); ivpu_dev_fini(vdev); } diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index f47b4965db2e3..1b2aa05840add 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -7,6 +7,7 @@ #define __IVPU_DRV_H__ #include +#include #include #include #include diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 94068aedf97cf..910301fae4351 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -489,12 +489,12 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - int ret = 0; struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_submit *params = data; struct ivpu_job *job; u32 *buf_handles; + int idx, ret; if (params->engine > DRM_IVPU_ENGINE_COPY) return -EINVAL; @@ -523,6 +523,11 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_handles; } + if (!drm_dev_enter(&vdev->drm, &idx)) { + ret = -ENODEV; + goto free_handles; + } + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, params->buffer_count); @@ -530,7 +535,7 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (!job) { ivpu_err(vdev, "Failed to create job\n"); ret = -ENOMEM; - goto free_handles; + goto dev_exit; } ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, @@ -548,6 +553,8 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) job_put: job_put(job); +dev_exit: + drm_dev_exit(idx); free_handles: kfree(buf_handles); From 6013aa84ee45fd0faa23fc6312a1af60b06d408b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 23 Mar 2023 13:54:58 +0100 Subject: [PATCH 429/898] accel/ivpu: Cancel recovery work Prevent running recovery_work after device is removed. Fixes: 852be13f3bd3 ("accel/ivpu: Add PM support") Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-3-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 2 ++ drivers/accel/ivpu/ivpu_pm.c | 17 ++++++++++++++--- drivers/accel/ivpu/ivpu_pm.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ac06bbfca9202..d9e311b403480 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -580,6 +580,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) ivpu_pm_disable(vdev); ivpu_shutdown(vdev); ivpu_job_done_thread_fini(vdev); + ivpu_pm_cancel_recovery(vdev); + ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); ivpu_mmu_global_context_fini(vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 553bcbd787b3c..7df72fa8100f9 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -98,12 +98,18 @@ static int ivpu_resume(struct ivpu_device *vdev) static void ivpu_pm_recovery_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; + struct ivpu_device *vdev = pm->vdev; char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; int ret; - ret = pci_reset_function(to_pci_dev(vdev->drm.dev)); - if (ret) +retry: + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { + cond_resched(); + goto retry; + } + + if (ret && ret != -EAGAIN) ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); @@ -306,6 +312,11 @@ int ivpu_pm_init(struct ivpu_device *vdev) return 0; } +void ivpu_pm_cancel_recovery(struct ivpu_device *vdev) +{ + cancel_work_sync(&vdev->pm->recovery_work); +} + void ivpu_pm_enable(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index dc1b3758e13f4..baca981872551 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -21,6 +21,7 @@ struct ivpu_pm_info { int ivpu_pm_init(struct ivpu_device *vdev); void ivpu_pm_enable(struct ivpu_device *vdev); void ivpu_pm_disable(struct ivpu_device *vdev); +void ivpu_pm_cancel_recovery(struct ivpu_device *vdev); int ivpu_pm_suspend_cb(struct device *dev); int ivpu_pm_resume_cb(struct device *dev); From 3ff6edbc17dbab87f705b1da93854ffff3912673 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 23 Mar 2023 13:54:59 +0100 Subject: [PATCH 430/898] accel/ivpu: Do not use SSID 1 The SSID=1 is used by the firmware as default value in case SSID mapping is not initialized. This allows detecting use of miss-configured memory contexts. The future FW versions may not allow using SSID=1. SSID=65 is valid value, number of contexts are limited by number of available command queues, but SSID can be any u16 value. Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU") Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-4-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 4 ++-- drivers/accel/ivpu/ivpu_drv.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index d9e311b403480..70245cf845938 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -474,8 +474,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->hw->ops = &ivpu_hw_mtl_ops; vdev->platform = IVPU_PLATFORM_INVALID; - vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; - vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; + vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; + vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 1b2aa05840add..ef12a38e06e18 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -25,7 +25,10 @@ #define PCI_DEVICE_ID_MTL 0x7d1d #define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 -#define IVPU_CONTEXT_LIMIT 64 +/* SSID 1 is used by the VPU to represent invalid context */ +#define IVPU_USER_CONTEXT_MIN_SSID 2 +#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63) + #define IVPU_NUM_ENGINES 2 #define IVPU_PLATFORM_SILICON 0 From a8fed6d1e0b9bbea2cbe602831f84a2cff4653e3 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 23 Mar 2023 13:55:00 +0100 Subject: [PATCH 431/898] accel/ivpu: Fix power down sequence Remove FPGA workaround on power_down to skip checking for noc quiescent state. Put VPU in reset before powering it down and skip manipulating registers that are reset by the VPU reset. This fixes power down errors where VPU is powered down just after VPU is booted. Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU") Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-5-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_mtl.c | 37 ++------------------------------ 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 62bfaa9081c4f..70ca6de780601 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -403,11 +403,6 @@ static int ivpu_boot_host_ss_axi_enable(struct ivpu_device *vdev) return ivpu_boot_host_ss_axi_drive(vdev, true); } -static int ivpu_boot_host_ss_axi_disable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_axi_drive(vdev, false); -} - static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable) { int ret; @@ -441,11 +436,6 @@ static int ivpu_boot_host_ss_top_noc_enable(struct ivpu_device *vdev) return ivpu_boot_host_ss_top_noc_drive(vdev, true); } -static int ivpu_boot_host_ss_top_noc_disable(struct ivpu_device *vdev) -{ - return ivpu_boot_host_ss_top_noc_drive(vdev, false); -} - static void ivpu_boot_pwr_island_trickle_drive(struct ivpu_device *vdev, bool enable) { u32 val = REGV_RD32(MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0); @@ -504,16 +494,6 @@ static void ivpu_boot_dpu_active_drive(struct ivpu_device *vdev, bool enable) REGV_WR32(MTL_VPU_HOST_SS_AON_DPU_ACTIVE, val); } -static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) -{ - ivpu_boot_dpu_active_drive(vdev, false); - ivpu_boot_pwr_island_isolation_drive(vdev, true); - ivpu_boot_pwr_island_trickle_drive(vdev, false); - ivpu_boot_pwr_island_drive(vdev, false); - - return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); -} - static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) { int ret; @@ -797,21 +777,8 @@ static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev) { int ret = 0; - /* FPGA requires manual clearing of IP_Reset bit by enabling quiescent state */ - if (ivpu_is_fpga(vdev)) { - if (ivpu_boot_host_ss_top_noc_disable(vdev)) { - ivpu_err(vdev, "Failed to disable TOP NOC\n"); - ret = -EIO; - } - - if (ivpu_boot_host_ss_axi_disable(vdev)) { - ivpu_err(vdev, "Failed to disable AXI\n"); - ret = -EIO; - } - } - - if (ivpu_boot_pwr_domain_disable(vdev)) { - ivpu_err(vdev, "Failed to disable power domain\n"); + if (ivpu_hw_mtl_reset(vdev)) { + ivpu_err(vdev, "Failed to reset the VPU\n"); ret = -EIO; } From 3d8b2727d8de4720c6865917dd05249b1f449f34 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 23 Mar 2023 13:55:01 +0100 Subject: [PATCH 432/898] accel/ivpu: Disable buttress on device removal Use pci_set_power_state() to disable buttress when device is removed. This is workaround of hardware bug that hangs the system. Additionally not disabling buttress prevents CPU enter deeper Pkg-C states when the driver is unloaded or fail to probe. Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU") Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-6-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 4 ++++ drivers/accel/ivpu/ivpu_drv.h | 1 + drivers/accel/ivpu/ivpu_hw_mtl.c | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 70245cf845938..6a320a73e3ccf 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -569,6 +569,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev) ivpu_mmu_global_context_fini(vdev); err_power_down: ivpu_hw_power_down(vdev); + if (IVPU_WA(d3hot_after_power_off)) + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); err_xa_destroy: xa_destroy(&vdev->submitted_jobs_xa); xa_destroy(&vdev->context_xa); @@ -579,6 +581,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_pm_disable(vdev); ivpu_shutdown(vdev); + if (IVPU_WA(d3hot_after_power_off)) + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); ivpu_job_done_thread_fini(vdev); ivpu_pm_cancel_recovery(vdev); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index ef12a38e06e18..d3013fbd13b32 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -74,6 +74,7 @@ struct ivpu_wa_table { bool punit_disabled; bool clear_runtime_mem; + bool d3hot_after_power_off; }; struct ivpu_hw_info; diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 70ca6de780601..133ba33d2866d 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -101,6 +101,7 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) { vdev->wa.punit_disabled = ivpu_is_fpga(vdev); vdev->wa.clear_runtime_mem = false; + vdev->wa.d3hot_after_power_off = true; } static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) From 392b35bf92605deb620a9b0c94c14cad1c69fd34 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 23 Mar 2023 13:55:02 +0100 Subject: [PATCH 433/898] accel/ivpu: Remove support for 1 tile SKUs The support for single tile SKUs was dropped from MTL. Note that we can still boot the VPU with 1-tile work point config - this is independent from number of tiles present in the VPU. Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-7-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_mtl.c | 59 ++++++++++---------------------- 1 file changed, 18 insertions(+), 41 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 133ba33d2866d..98c8a4aa25f03 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -12,20 +12,20 @@ #include "ivpu_mmu.h" #include "ivpu_pm.h" -#define TILE_FUSE_ENABLE_BOTH 0x0 -#define TILE_FUSE_ENABLE_UPPER 0x1 -#define TILE_FUSE_ENABLE_LOWER 0x2 - -#define TILE_SKU_BOTH_MTL 0x3630 -#define TILE_SKU_LOWER_MTL 0x3631 -#define TILE_SKU_UPPER_MTL 0x3632 +#define TILE_FUSE_ENABLE_BOTH 0x0 +#define TILE_SKU_BOTH_MTL 0x3630 /* Work point configuration values */ -#define WP_CONFIG_1_TILE_5_3_RATIO 0x0101 -#define WP_CONFIG_1_TILE_4_3_RATIO 0x0102 -#define WP_CONFIG_2_TILE_5_3_RATIO 0x0201 -#define WP_CONFIG_2_TILE_4_3_RATIO 0x0202 -#define WP_CONFIG_0_TILE_PLL_OFF 0x0000 +#define CONFIG_1_TILE 0x01 +#define CONFIG_2_TILE 0x02 +#define PLL_RATIO_5_3 0x01 +#define PLL_RATIO_4_3 0x02 +#define WP_CONFIG(tile, ratio) (((tile) << 8) | (ratio)) +#define WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_5_3) +#define WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(CONFIG_1_TILE, PLL_RATIO_4_3) +#define WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_5_3) +#define WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(CONFIG_2_TILE, PLL_RATIO_4_3) +#define WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) #define PLL_REF_CLK_FREQ (50 * 1000000) #define PLL_SIMULATION_FREQ (10 * 1000000) @@ -219,7 +219,8 @@ static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) config = 0; } - ivpu_dbg(vdev, PM, "PLL workpoint request: %d Hz\n", PLL_RATIO_TO_FREQ(target_ratio)); + ivpu_dbg(vdev, PM, "PLL workpoint request: config 0x%04x pll ratio 0x%x\n", + config, target_ratio); ret = ivpu_pll_cmd_send(vdev, hw->pll.min_ratio, hw->pll.max_ratio, target_ratio, config); if (ret) { @@ -610,34 +611,10 @@ static int ivpu_boot_d0i3_drive(struct ivpu_device *vdev, bool enable) static int ivpu_hw_mtl_info_init(struct ivpu_device *vdev) { struct ivpu_hw_info *hw = vdev->hw; - u32 tile_fuse; - - tile_fuse = REGB_RD32(MTL_BUTTRESS_TILE_FUSE); - if (!REG_TEST_FLD(MTL_BUTTRESS_TILE_FUSE, VALID, tile_fuse)) - ivpu_warn(vdev, "Tile Fuse: Invalid (0x%x)\n", tile_fuse); - - hw->tile_fuse = REG_GET_FLD(MTL_BUTTRESS_TILE_FUSE, SKU, tile_fuse); - switch (hw->tile_fuse) { - case TILE_FUSE_ENABLE_LOWER: - hw->sku = TILE_SKU_LOWER_MTL; - hw->config = WP_CONFIG_1_TILE_5_3_RATIO; - ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Lower\n"); - break; - case TILE_FUSE_ENABLE_UPPER: - hw->sku = TILE_SKU_UPPER_MTL; - hw->config = WP_CONFIG_1_TILE_4_3_RATIO; - ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Upper\n"); - break; - case TILE_FUSE_ENABLE_BOTH: - hw->sku = TILE_SKU_BOTH_MTL; - hw->config = WP_CONFIG_2_TILE_5_3_RATIO; - ivpu_dbg(vdev, MISC, "Tile Fuse: Enable Both\n"); - break; - default: - hw->config = WP_CONFIG_0_TILE_PLL_OFF; - ivpu_dbg(vdev, MISC, "Tile Fuse: Disable\n"); - break; - } + + hw->tile_fuse = TILE_FUSE_ENABLE_BOTH; + hw->sku = TILE_SKU_BOTH_MTL; + hw->config = WP_CONFIG_2_TILE_4_3_RATIO; ivpu_pll_init_frequency_ratios(vdev); From 252776b2973ea8c7e2e6db09993e6f33c81b776e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 23 Mar 2023 13:55:03 +0100 Subject: [PATCH 434/898] accel/ivpu: Fix VPU clock calculation The driver calculates the wrong frequency because it ignores the workpoint config and this cause undesired power/performance characteristics. Fix this by using the workpoint config in the freq calculations. Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU") Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-8-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_mtl.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 98c8a4aa25f03..382ec127be8ea 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -29,7 +29,6 @@ #define PLL_REF_CLK_FREQ (50 * 1000000) #define PLL_SIMULATION_FREQ (10 * 1000000) -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) #define PLL_DEFAULT_EPP_VALUE 0x80 #define TIM_SAFE_ENABLE 0xf1d0dead @@ -789,6 +788,19 @@ static void ivpu_hw_mtl_wdt_disable(struct ivpu_device *vdev) REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val); } +static u32 ivpu_hw_mtl_pll_to_freq(u32 ratio, u32 config) +{ + u32 pll_clock = PLL_REF_CLK_FREQ * ratio; + u32 cpu_clock; + + if ((config & 0xff) == PLL_RATIO_4_3) + cpu_clock = pll_clock * 2 / 4; + else + cpu_clock = pll_clock * 2 / 5; + + return cpu_clock; +} + /* Register indirect accesses */ static u32 ivpu_hw_mtl_reg_pll_freq_get(struct ivpu_device *vdev) { @@ -800,7 +812,7 @@ static u32 ivpu_hw_mtl_reg_pll_freq_get(struct ivpu_device *vdev) if (!ivpu_is_silicon(vdev)) return PLL_SIMULATION_FREQ; - return PLL_RATIO_TO_FREQ(pll_curr_ratio); + return ivpu_hw_mtl_pll_to_freq(pll_curr_ratio, vdev->hw->config); } static u32 ivpu_hw_mtl_reg_telemetry_offset_get(struct ivpu_device *vdev) From 34224133458ca48caf2b57df335e870f1abb7af5 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Thu, 23 Mar 2023 13:55:04 +0100 Subject: [PATCH 435/898] accel/ivpu: Fix IPC buffer header status field value IPC messages transmitted to the device must be marked as allocated - status field must be set to 1. The VPU driver has IVPU_IPC_HDR_ALLOCATED incorrectly defined. Future VPU firmware versions will reject all IPC messages with invalid status and will not work with a VPU driver that is missing this fix. Fixes: 5d7422cfb498 ("accel/ivpu: Add IPC driver and JSM messages") Signed-off-by: Andrzej Kacprowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230323125504.2586442-9-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_ipc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index 9838202ecfadf..68f5b6668e00b 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -21,7 +21,7 @@ struct ivpu_bo; #define IVPU_IPC_ALIGNMENT 64 #define IVPU_IPC_HDR_FREE 0 -#define IVPU_IPC_HDR_ALLOCATED 0 +#define IVPU_IPC_HDR_ALLOCATED 1 /** * struct ivpu_ipc_hdr - The IPC message header structure, exchanged From e313de5b5b04176f28384b45ebebd552c0c7dae3 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 24 Mar 2023 05:30:35 -0700 Subject: [PATCH 436/898] MAINTAINERS: xtensa: drop linux-xtensa@linux-xtensa.org mailing list The linux-xtensa@linux-xtensa.org mailing list has been bouncing emails for a few months now. Drop it from the xtensa entries in the MAINTAINERS file. Signed-off-by: Max Filippov --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ec57c42ed5440..b915c364bcad1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20647,7 +20647,6 @@ F: sound/soc/codecs/tscs*.h TENSILICA XTENSA PORT (xtensa) M: Chris Zankel M: Max Filippov -L: linux-xtensa@linux-xtensa.org S: Maintained T: git https://github.com/jcmvbkbc/linux-xtensa.git F: arch/xtensa/ @@ -23039,7 +23038,6 @@ F: drivers/gpio/gpio-xra1403.c XTENSA XTFPGA PLATFORM SUPPORT M: Max Filippov -L: linux-xtensa@linux-xtensa.org S: Maintained F: drivers/spi/spi-xtensa-xtfpga.c F: sound/soc/xtensa/xtfpga-i2s.c From 7c3650a8007c43bc1a60d8bdb4cdbea3ad9a7dd4 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 14 Mar 2023 11:12:56 -0700 Subject: [PATCH 437/898] HID: hid-sensor-custom: Fix buffer overrun in device name On some platforms there are some platform devices created with invalid names. For example: "HID-SENSOR-INT-020b?.39.auto" instead of "HID-SENSOR-INT-020b.39.auto" This string include some invalid characters, hence it will fail to properly load the driver which will handle this custom sensor. Also it is a problem for some user space tools, which parses the device names from ftrace and dmesg. This is because the string, real_usage, is not NULL terminated and printed with %s to form device name. To address this, initialize the real_usage string with 0s. Reported-and-tested-by: Todd Brandt Link: https://bugzilla.kernel.org/show_bug.cgi?id=217169 Fixes: 98c062e82451 ("HID: hid-sensor-custom: Allow more custom iio sensors") Cc: stable@vger.kernel.org Suggested-by: Philipp Jungkamp Signed-off-by: Philipp Jungkamp Signed-off-by: Todd Brandt Reviewed-by: Andi Shyti Reviewed-by: Jonathan Cameron Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-custom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c index 3e3f89e01d819..d85398721659a 100644 --- a/drivers/hid/hid-sensor-custom.c +++ b/drivers/hid/hid-sensor-custom.c @@ -940,7 +940,7 @@ hid_sensor_register_platform_device(struct platform_device *pdev, struct hid_sensor_hub_device *hsdev, const struct hid_sensor_custom_match *match) { - char real_usage[HID_SENSOR_USAGE_LENGTH]; + char real_usage[HID_SENSOR_USAGE_LENGTH] = { 0 }; struct platform_device *custom_pdev; const char *dev_name; char *c; From fddc6ccc487e5de07b98df8d04118d5dcb5e0407 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 17 Mar 2023 12:51:17 +0000 Subject: [PATCH 438/898] cifs: append path to open_enter trace event We do not dump the file path for smb3_open_enter ftrace calls, which is a severe handicap while debugging using ftrace evens. This change adds that info. Unfortunately, we're not updating the path in open params in many places; which I had to do as a part of this change. SMB2_open gets path in utf16 format, but it's easier of path is supplied as char pointer in oparms. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cached_dir.c | 1 + fs/cifs/link.c | 2 ++ fs/cifs/smb2inode.c | 1 + fs/cifs/smb2ops.c | 11 +++++++++++ fs/cifs/smb2pdu.c | 4 ++-- fs/cifs/trace.h | 12 ++++++++---- 6 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c index 75d5e06306ea5..71fabb4c09a4b 100644 --- a/fs/cifs/cached_dir.c +++ b/fs/cifs/cached_dir.c @@ -184,6 +184,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_FILE), .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 7d97c10f24535..c66be4904e1fa 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -360,6 +360,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, + .path = path, .desired_access = GENERIC_READ, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR), .disposition = FILE_OPEN, @@ -427,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, + .path = path, .desired_access = GENERIC_WRITE, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR), .disposition = FILE_CREATE, diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 8dd3791b5c538..163a03298430d 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -107,6 +107,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, vars->oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = desired_access, .disposition = create_disposition, .create_options = cifs_create_options(cifs_sb, create_options), diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f7e18ab7ee9cd..a81758225fcdc 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -745,6 +745,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = "", .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -788,6 +789,7 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = "", .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -835,6 +837,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -1119,6 +1122,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = FILE_WRITE_EA, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2110,6 +2114,7 @@ smb3_notify(const unsigned int xid, struct file *pfile, tcon = cifs_sb_master_tcon(cifs_sb); oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2182,6 +2187,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2514,6 +2520,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = desired_access, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2648,6 +2655,7 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = "", .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2942,6 +2950,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, create_options), @@ -3082,6 +3091,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, OPEN_REPARSE_POINT), @@ -3222,6 +3232,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = READ_CONTROL, .disposition = FILE_OPEN, /* diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0e53265e1462a..caeb92a69b5f4 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2705,7 +2705,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, rqst.rq_nvec = n_iov; /* no need to inc num_remote_opens because we close it just below */ - trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, CREATE_NOT_FILE, + trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, full_path, CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES); /* resource #4: response buffer */ rc = cifs_send_recv(xid, ses, server, @@ -2973,7 +2973,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc) goto creat_exit; - trace_smb3_open_enter(xid, tcon->tid, tcon->ses->Suid, + trace_smb3_open_enter(xid, tcon->tid, tcon->ses->Suid, oparms->path, oparms->create_options, oparms->desired_access); rc = cifs_send_recv(xid, ses, server, diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 110070ba8b04e..d3053bd8ae731 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -701,13 +701,15 @@ DECLARE_EVENT_CLASS(smb3_open_enter_class, TP_PROTO(unsigned int xid, __u32 tid, __u64 sesid, + const char *full_path, int create_options, int desired_access), - TP_ARGS(xid, tid, sesid, create_options, desired_access), + TP_ARGS(xid, tid, sesid, full_path, create_options, desired_access), TP_STRUCT__entry( __field(unsigned int, xid) __field(__u32, tid) __field(__u64, sesid) + __string(path, full_path) __field(int, create_options) __field(int, desired_access) ), @@ -715,11 +717,12 @@ DECLARE_EVENT_CLASS(smb3_open_enter_class, __entry->xid = xid; __entry->tid = tid; __entry->sesid = sesid; + __assign_str(path, full_path); __entry->create_options = create_options; __entry->desired_access = desired_access; ), - TP_printk("xid=%u sid=0x%llx tid=0x%x cr_opts=0x%x des_access=0x%x", - __entry->xid, __entry->sesid, __entry->tid, + TP_printk("xid=%u sid=0x%llx tid=0x%x path=%s cr_opts=0x%x des_access=0x%x", + __entry->xid, __entry->sesid, __entry->tid, __get_str(path), __entry->create_options, __entry->desired_access) ) @@ -728,9 +731,10 @@ DEFINE_EVENT(smb3_open_enter_class, smb3_##name, \ TP_PROTO(unsigned int xid, \ __u32 tid, \ __u64 sesid, \ + const char *full_path, \ int create_options, \ int desired_access), \ - TP_ARGS(xid, tid, sesid, create_options, desired_access)) + TP_ARGS(xid, tid, sesid, full_path, create_options, desired_access)) DEFINE_SMB3_OPEN_ENTER_EVENT(open_enter); DEFINE_SMB3_OPEN_ENTER_EVENT(posix_mkdir_enter); From bc962159e8e326af634a506508034a375bf2b858 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 20 Mar 2023 06:08:19 +0000 Subject: [PATCH 439/898] cifs: avoid race conditions with parallel reconnects When multiple processes/channels do reconnects in parallel we used to return success immediately negotiate/session-setup/tree-connect, causing race conditions between processes that enter the function in parallel. This caused several errors related to session not found to show up during parallel reconnects. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/connect.c | 48 ++++++++++++++++++++++++++++++----------- fs/cifs/smb2pdu.c | 44 +++++++++++++++++++++---------------- fs/cifs/smb2transport.c | 17 ++++++++++++--- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f42cc70773122..c3162ef9c9e93 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -212,31 +212,42 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, cifs_chan_update_iface(ses, server); spin_lock(&ses->chan_lock); - if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) - goto next_session; + if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) { + spin_unlock(&ses->chan_lock); + continue; + } if (mark_smb_session) CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses); else cifs_chan_set_need_reconnect(ses, server); + cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n", + __func__, ses->chans_need_reconnect); + /* If all channels need reconnect, then tcon needs reconnect */ - if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) - goto next_session; + if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { + spin_unlock(&ses->chan_lock); + continue; + } + spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); ses->ses_status = SES_NEED_RECON; + spin_unlock(&ses->ses_lock); list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { tcon->need_reconnect = true; + spin_lock(&tcon->tc_lock); tcon->status = TID_NEED_RECON; + spin_unlock(&tcon->tc_lock); } if (ses->tcon_ipc) { ses->tcon_ipc->need_reconnect = true; + spin_lock(&ses->tcon_ipc->tc_lock); ses->tcon_ipc->status = TID_NEED_RECON; + spin_unlock(&ses->tcon_ipc->tc_lock); } - -next_session: - spin_unlock(&ses->chan_lock); } spin_unlock(&cifs_tcp_ses_lock); } @@ -3653,11 +3664,19 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, /* only send once per connect */ spin_lock(&server->srv_lock); - if (!server->ops->need_neg(server) || + if (server->tcpStatus != CifsGood && + server->tcpStatus != CifsNew && server->tcpStatus != CifsNeedNegotiate) { + spin_unlock(&server->srv_lock); + return -EHOSTDOWN; + } + + if (!server->ops->need_neg(server) && + server->tcpStatus == CifsGood) { spin_unlock(&server->srv_lock); return 0; } + server->tcpStatus = CifsInNegotiate; spin_unlock(&server->srv_lock); @@ -3691,23 +3710,28 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, bool is_binding = false; spin_lock(&ses->ses_lock); + cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n", + __func__, ses->chans_need_reconnect); + if (ses->ses_status != SES_GOOD && ses->ses_status != SES_NEW && ses->ses_status != SES_NEED_RECON) { spin_unlock(&ses->ses_lock); - return 0; + return -EHOSTDOWN; } /* only send once per connect */ spin_lock(&ses->chan_lock); - if (CIFS_ALL_CHANS_GOOD(ses) || - cifs_chan_in_reconnect(ses, server)) { + if (CIFS_ALL_CHANS_GOOD(ses)) { + if (ses->ses_status == SES_NEED_RECON) + ses->ses_status = SES_GOOD; spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); return 0; } - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + cifs_chan_set_in_reconnect(ses, server); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); spin_unlock(&ses->chan_lock); if (!is_binding) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index caeb92a69b5f4..a9fb95b7ef829 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -203,6 +203,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, } spin_unlock(&server->srv_lock); +again: rc = cifs_wait_for_server_reconnect(server, tcon->retry); if (rc) return rc; @@ -221,6 +222,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, nls_codepage = load_nls_default(); + mutex_lock(&ses->session_mutex); /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed @@ -229,6 +231,11 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); + mutex_unlock(&ses->session_mutex); + + if (tcon->retry) + goto again; + rc = -EHOSTDOWN; goto out; } @@ -238,19 +245,22 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session */ + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - if (!cifs_chan_needs_reconnect(ses, server)) { + if (!cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD) { spin_unlock(&ses->chan_lock); - + spin_unlock(&ses->ses_lock); /* this means that we only need to tree connect */ if (tcon->need_reconnect) goto skip_sess_setup; + mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { rc = cifs_setup_session(0, ses, server, nls_codepage); @@ -266,10 +276,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, mutex_unlock(&ses->session_mutex); goto out; } - mutex_unlock(&ses->session_mutex); skip_sess_setup: - mutex_lock(&ses->session_mutex); if (!tcon->need_reconnect) { mutex_unlock(&ses->session_mutex); goto out; @@ -284,7 +292,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) { /* If sess reconnected but tcon didn't, something strange ... */ - pr_warn_once("reconnect tcon failed rc = %d\n", rc); + cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); goto out; } @@ -1256,9 +1264,9 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) if (rc) return rc; - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); if (is_binding) { req->hdr.SessionId = cpu_to_le64(ses->Suid); @@ -1416,9 +1424,9 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) goto out_put_spnego_key; } - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); /* keep session key if binding */ if (!is_binding) { @@ -1542,9 +1550,9 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); /* keep existing ses id and flags if binding */ if (!is_binding) { @@ -1610,9 +1618,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base; - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); /* keep existing ses id and flags if binding */ if (!is_binding) { diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index d827b7547ffad..790acf65a0926 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -81,6 +81,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) struct cifs_ses *ses = NULL; int i; int rc = 0; + bool is_binding = false; spin_lock(&cifs_tcp_ses_lock); @@ -97,9 +98,12 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) goto out; found: + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - if (cifs_chan_needs_reconnect(ses, server) && - !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { + + is_binding = (cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD); + if (is_binding) { /* * If we are in the process of binding a new channel * to an existing session, use the master connection @@ -107,6 +111,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) */ memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); goto out; } @@ -119,10 +124,12 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) if (chan->server == server) { memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE); spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); goto out; } } spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); cifs_dbg(VFS, "%s: Could not find channel signing key for session 0x%llx\n", @@ -392,11 +399,15 @@ generate_smb3signingkey(struct cifs_ses *ses, bool is_binding = false; int chan_index = 0; + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + is_binding = (cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD); + chan_index = cifs_ses_get_chan_index(ses, server); /* TODO: introduce ref counting for channels when the can be freed */ spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); /* * All channels use the same encryption/decryption keys but From fcde88af6a783d32e735dd2615528e2bf7a0f533 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 18 Mar 2023 20:58:40 -0700 Subject: [PATCH 440/898] xfs: pass the correct cursor to xfs_iomap_prealloc_size In xfs_buffered_write_iomap_begin, @icur is the iext cursor for the data fork and @ccur is the cursor for the cow fork. Pass in whichever cursor corresponds to allocfork, because otherwise the xfs_iext_prev_extent call can use the data fork cursor to walk off the end of the cow fork structure. Best case it returns the wrong results, worst case it does this: stack segment: 0000 [#1] PREEMPT SMP CPU: 2 PID: 3141909 Comm: fsstress Tainted: G W 6.3.0-rc2-xfsx #6.3.0-rc2 7bf5cc2e98997627cae5c930d890aba3aeec65dd Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20171121_152543-x86-ol7-builder-01.us.oracle.com-4.el7.1 04/01/2014 RIP: 0010:xfs_iext_prev+0x71/0x150 [xfs] RSP: 0018:ffffc90002233aa8 EFLAGS: 00010297 RAX: 000000000000000f RBX: 000000000000000e RCX: 000000000000000c RDX: 0000000000000002 RSI: 000000000000000e RDI: ffff8883d0019ba0 RBP: 989642409af8a7a7 R08: ffffea0000000001 R09: 0000000000000002 R10: 0000000000000000 R11: 000000000000000c R12: ffffc90002233b00 R13: ffff8883d0019ba0 R14: 989642409af8a6bf R15: 000ffffffffe0000 FS: 00007fdf8115f740(0000) GS:ffff88843fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdf8115e000 CR3: 0000000357256000 CR4: 00000000003506e0 Call Trace: xfs_iomap_prealloc_size.constprop.0.isra.0+0x1a6/0x410 [xfs 619a268fb2406d68bd34e007a816b27e70abc22c] xfs_buffered_write_iomap_begin+0xa87/0xc60 [xfs 619a268fb2406d68bd34e007a816b27e70abc22c] iomap_iter+0x132/0x2f0 iomap_file_buffered_write+0x92/0x330 xfs_file_buffered_write+0xb1/0x330 [xfs 619a268fb2406d68bd34e007a816b27e70abc22c] vfs_write+0x2eb/0x410 ksys_write+0x65/0xe0 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Found by xfs/538 in alwayscow mode, but this doesn't seem particular to that test. Fixes: 590b16516ef3 ("xfs: refactor xfs_iomap_prealloc_size") Actually-Fixes: 66ae56a53f0e ("xfs: introduce an always_cow mode") Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 69dbe78141280..285885c308bd7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1090,9 +1090,12 @@ xfs_buffered_write_iomap_begin( */ if (xfs_has_allocsize(mp)) prealloc_blocks = mp->m_allocsize_blocks; - else + else if (allocfork == XFS_DATA_FORK) prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset, count, &icur); + else + prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, + offset, count, &ccur); if (prealloc_blocks) { xfs_extlen_t align; xfs_off_t end_offset; From e2e63b071b2da53ad6a154e34c387bb064137f74 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Mar 2023 16:33:20 -0700 Subject: [PATCH 441/898] xfs: clear incore AGFL_RESET state if it's not needed Prior to commit 7ac2ff8bb371, when we loaded the incore perag structure with information from the AGF header, we would set or clear the pagf_agfl_reset field based on whether or not the AGFL list was misaligned within the block. IOWs, it's an incore state bit that's supposed to cache something in the ondisk metadata. Therefore, the code still needs to support clearing the incore bit if (somehow) the AGFL were to correct itself. It turns out that xfs_repair does exactly this -- phase 4 loads the AGF to scan the rmapbt for corrupt records, which can set NEEDS_AGFL_RESET. The scan unsets AGF_INIT but doesn't unset NEEDS_AGFL_RESET. Phase 5 totally rewrites the AGFL and fixes the alignment problem, didn't clear NEEDS_AGFL_RESET historically, and reloads the perag state to fix the freelist. This results in the AGFL being reset based on stale data, which then causes the new AGFL blocks to be leaked. A subsequent xfs_repair -n then complains about the leaks. One could argue that phase 5 ought to clear this bit directly when it reloads the perag AGF data after rewriting the AGFL, but libxfs used to handle this for us, so it should go back to doing that. Found by fuzzing flfirst = ones in xfs/352. Fixes: 7ac2ff8bb371 ("xfs: perags need atomic operational state") Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 55ae08a6144c3..badc213384a4b 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3045,6 +3045,8 @@ xfs_alloc_read_agf( pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); if (xfs_agfl_needs_reset(pag->pag_mount, agf)) set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); + else + clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); /* * Update the in-core allocbt counter. Filter out the rmapbt From 2d0ab14634a26e54f8d6d231b47b7ef233e84599 Mon Sep 17 00:00:00 2001 From: Aymeric Wibo Date: Sun, 19 Mar 2023 03:12:05 +0100 Subject: [PATCH 442/898] ACPI: resource: Add Medion S17413 to IRQ override quirk Add DMI info of the Medion S17413 (board M1xA) to the IRQ override quirk table. This fixes the keyboard not working on these laptops. Link: https://bugzilla.kernel.org/show_bug.cgi?id=213031 Signed-off-by: Aymeric Wibo [ rjw: Fixed up white space ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 7c9125df5a651..7b4801ce62d6b 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -400,6 +400,13 @@ static const struct dmi_system_id medion_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "M17T"), }, }, + { + .ident = "MEDION S17413", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), + DMI_MATCH(DMI_BOARD_NAME, "M1xA"), + }, + }, { } }; From 59513714f6659409adf717b0f85e0a6e35480a01 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 22 Mar 2023 18:36:32 +0800 Subject: [PATCH 443/898] net: wangxun: Fix vector length of interrupt cause There is 64-bit interrupt cause register for txgbe. Fix to clear upper 32 bits. Fixes: 3f703186113f ("net: libwx: Add irq flow functions") Signed-off-by: Jiawen Wu Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230322103632.132011-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_type.h | 2 +- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 2 +- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 77d8d7f1707e9..97e2c1e13b806 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -222,7 +222,7 @@ #define WX_PX_INTA 0x110 #define WX_PX_GPIE 0x118 #define WX_PX_GPIE_MODEL BIT(0) -#define WX_PX_IC 0x120 +#define WX_PX_IC(_i) (0x120 + (_i) * 4) #define WX_PX_IMS(_i) (0x140 + (_i) * 4) #define WX_PX_IMC(_i) (0x150 + (_i) * 4) #define WX_PX_ISB_ADDR_L 0x160 diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 5b564d348c091..17412e5282ded 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -352,7 +352,7 @@ static void ngbe_up(struct wx *wx) netif_tx_start_all_queues(wx->netdev); /* clear any pending interrupts, may auto mask */ - rd32(wx, WX_PX_IC); + rd32(wx, WX_PX_IC(0)); rd32(wx, WX_PX_MISC_IC); ngbe_irq_enable(wx, true); if (wx->gpio_ctrl) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 6c0a982305576..a58ce5463686a 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -229,7 +229,8 @@ static void txgbe_up_complete(struct wx *wx) wx_napi_enable_all(wx); /* clear any pending interrupts, may auto mask */ - rd32(wx, WX_PX_IC); + rd32(wx, WX_PX_IC(0)); + rd32(wx, WX_PX_IC(1)); rd32(wx, WX_PX_MISC_IC); txgbe_irq_enable(wx, true); From f6887a71bdd2f0dcba9b8180dd2223cfa8637e85 Mon Sep 17 00:00:00 2001 From: Jason Montleon Date: Fri, 24 Mar 2023 13:07:11 -0400 Subject: [PATCH 444/898] ASoC: hdac_hdmi: use set_stream() instead of set_tdm_slots() hdac_hdmi was not updated to use set_stream() instead of set_tdm_slots() in the original commit so HDMI no longer produces audio. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/regressions/CAJD_bPKQdtaExvVEKxhQ47G-ZXDA=k+gzhMJRHLBe=mysPnuKA@mail.gmail.com/ Fixes: 636110411ca7 ("ASoC: Intel/SOF: use set_stream() instead of set_tdm_slots() for HDAudio") Signed-off-by: Jason Montleon Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230324170711.2526-1-jmontleo@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index ed4f7cdda04ff..8b6b760296948 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -436,23 +436,28 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_device *hdev, return 0; } -static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai, - unsigned int tx_mask, unsigned int rx_mask, - int slots, int slot_width) +static int hdac_hdmi_set_stream(struct snd_soc_dai *dai, + void *stream, int direction) { struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai); struct hdac_device *hdev = hdmi->hdev; struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_pcm *pcm; + struct hdac_stream *hstream; - dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, tx_mask); + if (!stream) + return -EINVAL; + + hstream = (struct hdac_stream *)stream; + + dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, hstream->stream_tag); dai_map = &hdmi->dai_map[dai->id]; pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, dai_map->cvt); if (pcm) - pcm->stream_tag = (tx_mask << 4); + pcm->stream_tag = (hstream->stream_tag << 4); return 0; } @@ -1544,7 +1549,7 @@ static const struct snd_soc_dai_ops hdmi_dai_ops = { .startup = hdac_hdmi_pcm_open, .shutdown = hdac_hdmi_pcm_close, .hw_params = hdac_hdmi_set_hw_params, - .set_tdm_slot = hdac_hdmi_set_tdm_slot, + .set_stream = hdac_hdmi_set_stream, }; /* From c24bb1a87dc3f2d77d410eaac2c6a295961bf50e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 24 Mar 2023 16:05:19 -0300 Subject: [PATCH 445/898] cifs: fix missing unload_nls() in smb2_reconnect() Make sure to unload_nls() @nls_codepage if we no longer need it. Fixes: bc962159e8e3 ("cifs: avoid race conditions with parallel reconnects") Signed-off-by: Paulo Alcantara (SUSE) Cc: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a9fb95b7ef829..20af1af34fa5d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -144,7 +144,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, struct TCP_Server_Info *server) { int rc = 0; - struct nls_table *nls_codepage; + struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; /* @@ -220,8 +220,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, tcon->ses->chans_need_reconnect, tcon->need_reconnect); - nls_codepage = load_nls_default(); - mutex_lock(&ses->session_mutex); /* * Recheck after acquire mutex. If another thread is negotiating @@ -241,6 +239,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, } spin_unlock(&server->srv_lock); + nls_codepage = load_nls_default(); + /* * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session From 7e0e76d99079be13c9961dde7c93b2d1ee665af4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Mar 2023 15:10:26 -0500 Subject: [PATCH 446/898] smb3: lower default deferred close timeout to address perf regression Performance tests with large number of threads noted that the change of the default closetimeo (deferred close timeout between when close is done by application and when client has to send the close to the server), to 5 seconds from 1 second, significantly degraded perf in some cases like this (in the filebench example reported, the stats show close requests on the wire taking twice as long, and 50% regression in filebench perf). This is stil configurable via mount parm closetimeo, but to be safe, decrease default back to its previous value of 1 second. Reported-by: Yin Fengwei Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/997614df-10d4-af53-9571-edec36b0e2f3@intel.com/ Fixes: 5efdd9122eff ("smb3: allow deferred close timeout to be configurable") Cc: stable@vger.kernel.org # 6.0+ Tested-by: Yin Fengwei Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/fs_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 1b8d4e27f831c..3de00e7127ec4 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -286,5 +286,5 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); * max deferred close timeout (jiffies) - 2^30 */ #define SMB3_MAX_DCLOSETIMEO (1 << 30) -#define SMB3_DEF_DCLOSETIMEO (5 * HZ) /* Can increase later, other clients use larger */ +#define SMB3_DEF_DCLOSETIMEO (1 * HZ) /* even 1 sec enough to help eg open/write/close/open/read */ #endif From be4fde79812f02914e350bde0bc4cfeae8429378 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 24 Mar 2023 13:56:33 -0300 Subject: [PATCH 447/898] cifs: fix dentry lookups in directory handle cache Get rid of any prefix paths in @path before lookup_positive_unlocked() as it will call ->lookup() which already adds those prefix paths through build_path_from_dentry(). This has caused a performance regression when mounting shares with a prefix path where readdir(2) would end up retrying several times to open bad directory names that contained duplicate prefix paths. Fix this by skipping any prefix paths in @path before calling lookup_positive_unlocked(). Fixes: e4029e072673 ("cifs: find and use the dentry for cached non-root directories also") Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cached_dir.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c index 71fabb4c09a4b..bfc964b36c72e 100644 --- a/fs/cifs/cached_dir.c +++ b/fs/cifs/cached_dir.c @@ -99,6 +99,23 @@ path_to_dentry(struct cifs_sb_info *cifs_sb, const char *path) return dentry; } +static const char *path_no_prefix(struct cifs_sb_info *cifs_sb, + const char *path) +{ + size_t len = 0; + + if (!*path) + return path; + + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + cifs_sb->prepath) { + len = strlen(cifs_sb->prepath) + 1; + if (unlikely(len > strlen(path))) + return ERR_PTR(-EINVAL); + } + return path + len; +} + /* * Open the and cache a directory handle. * If error then *cfid is not initialized. @@ -125,6 +142,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, struct dentry *dentry = NULL; struct cached_fid *cfid; struct cached_fids *cfids; + const char *npath; if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache || is_smb1_server(tcon->ses->server)) @@ -160,6 +178,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return 0; } + /* + * Skip any prefix paths in @path as lookup_positive_unlocked() ends up + * calling ->lookup() which already adds those through + * build_path_from_dentry(). Also, do it earlier as we might reconnect + * below when trying to send compounded request and then potentially + * having a different prefix path (e.g. after DFS failover). + */ + npath = path_no_prefix(cifs_sb, path); + if (IS_ERR(npath)) { + rc = PTR_ERR(npath); + kfree(utf16_path); + return rc; + } + /* * We do not hold the lock for the open because in case * SMB2_open needs to reconnect. @@ -252,10 +284,10 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, (char *)&cfid->file_all_info)) cfid->file_all_info_is_valid = true; - if (!path[0]) + if (!npath[0]) dentry = dget(cifs_sb->root); else { - dentry = path_to_dentry(cifs_sb, path); + dentry = path_to_dentry(cifs_sb, npath); if (IS_ERR(dentry)) { rc = -ENOENT; goto oshr_free; From 491eafce1a51c457701351a4bf40733799745314 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Mar 2023 16:20:02 -0500 Subject: [PATCH 448/898] smb3: fix unusable share after force unmount failure If user does forced unmount ("umount -f") while files are still open on the share (as was seen in a Kubernetes example running on SMB3.1.1 mount) then we were marking the share as "TID_EXITING" in umount_begin() which caused all subsequent operations (except write) to fail ... but unfortunately when umount_begin() is called we do not know yet that there are open files or active references on the share that would prevent unmount from succeeding. Kubernetes had example when they were doing umount -f when files were open which caused the share to become unusable until the files were closed (and the umount retried). Fix this so that TID_EXITING is not set until we are about to send the tree disconnect (not at the beginning of forced umounts in umount_begin) so that if "umount -f" fails (due to open files or references) the mount is still usable. Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 9 ++++++--- fs/cifs/cifssmb.c | 6 ++---- fs/cifs/connect.c | 1 + fs/cifs/smb2pdu.c | 8 ++------ 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index cbcf210d56e48..ac9034fce409d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -731,13 +731,16 @@ static void cifs_umount_begin(struct super_block *sb) spin_lock(&tcon->tc_lock); if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { /* we have other mounts to same share or we have - already tried to force umount this and woken up + already tried to umount this and woken up all waiting network requests, nothing to do */ spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); return; - } else if (tcon->tc_count == 1) - tcon->status = TID_EXITING; + } + /* + * can not set tcon->status to TID_EXITING yet since we don't know if umount -f will + * fail later (e.g. due to open files). TID_EXITING will be set just before tdis req sent + */ spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a43c78396dd88..38a697eca3050 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,13 +86,11 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) /* * only tree disconnect, open, and write, (and ulogoff which does not - * have tcon) are allowed as we start force umount + * have tcon) are allowed as we start umount */ spin_lock(&tcon->tc_lock); if (tcon->status == TID_EXITING) { - if (smb_command != SMB_COM_WRITE_ANDX && - smb_command != SMB_COM_OPEN_ANDX && - smb_command != SMB_COM_TREE_DISCONNECT) { + if (smb_command != SMB_COM_TREE_DISCONNECT) { spin_unlock(&tcon->tc_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb_command); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c3162ef9c9e93..1cbb905879957 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2324,6 +2324,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) WARN_ON(tcon->tc_count < 0); list_del_init(&tcon->tcon_list); + tcon->status = TID_EXITING; spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 20af1af34fa5d..6bd2aa6af18f3 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -165,13 +165,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, spin_lock(&tcon->tc_lock); if (tcon->status == TID_EXITING) { /* - * only tree disconnect, open, and write, - * (and ulogoff which does not have tcon) - * are allowed as we start force umount. + * only tree disconnect allowed when disconnecting ... */ - if ((smb2_command != SMB2_WRITE) && - (smb2_command != SMB2_CREATE) && - (smb2_command != SMB2_TREE_DISCONNECT)) { + if (smb2_command != SMB2_TREE_DISCONNECT) { spin_unlock(&tcon->tc_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb2_command); From 4dfb02d5cae80289384c4d3c6ddfbd92d30aced9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 24 Mar 2023 13:14:48 -0700 Subject: [PATCH 449/898] xfs: fix mismerged tracepoints At some point in between sending this patch to the list and merging it into for-next, the tracepoints got all mixed up because I've over-reliant on automated tools not sucking. The end result is that the tracepoints are all wrong, so fix them. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index badc213384a4b..203f16c48c199 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3413,7 +3413,7 @@ xfs_alloc_vextent_start_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; - trace_xfs_alloc_vextent_first_ag(args); + trace_xfs_alloc_vextent_start_ag(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { @@ -3466,7 +3466,7 @@ xfs_alloc_vextent_first_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; - trace_xfs_alloc_vextent_start_ag(args); + trace_xfs_alloc_vextent_first_ag(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { @@ -3500,7 +3500,7 @@ xfs_alloc_vextent_exact_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); - trace_xfs_alloc_vextent_near_bno(args); + trace_xfs_alloc_vextent_exact_bno(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { @@ -3538,7 +3538,7 @@ xfs_alloc_vextent_near_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); - trace_xfs_alloc_vextent_exact_bno(args); + trace_xfs_alloc_vextent_near_bno(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { From ca4a80e4bb7e87daf33b27d2ab9e4f5311018a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 23 Mar 2023 09:34:17 +0100 Subject: [PATCH 450/898] sfc: ef10: don't overwrite offload features at NIC reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At NIC reset, some offload features related to encapsulated traffic might have changed (this mainly happens if the firmware-variant is changed with the sfboot userspace tool). Because of this, features are checked and set again at reset time. However, this was not done right, and some features were improperly overwritten at NIC reset: - Tunneled IPv6 segmentation was always disabled - Features disabled with ethtool were reenabled - Features that becomes unsupported after the reset were not disabled Also, checking if the device supports IPV6_CSUM to enable TSO6 is no longer necessary because all currently supported devices support it. Additionally, move the assignment of some other features to the EF10_OFFLOAD_FEATURES macro, like it is done in ef100, leaving the selection of features in efx_pci_probe_post_io a bit cleaner. Fixes: ffffd2454a7a ("sfc: correctly advertise tunneled IPv6 segmentation") Fixes: 24b2c3751aa3 ("sfc: advertise encapsulated offloads on EF10") Reported-by: Tianhao Zhao Suggested-by: Jonathan Cooper Tested-by: Jonathan Cooper Signed-off-by: Íñigo Huguet Acked-by: Edward Cree Link: https://lore.kernel.org/r/20230323083417.7345-1-ihuguet@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef10.c | 38 ++++++++++++++++++++++----------- drivers/net/ethernet/sfc/efx.c | 17 ++++++--------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 7022fb2005a2f..d30459dbfe8f8 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1304,7 +1304,8 @@ static void efx_ef10_fini_nic(struct efx_nic *efx) static int efx_ef10_init_nic(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; - netdev_features_t hw_enc_features = 0; + struct net_device *net_dev = efx->net_dev; + netdev_features_t tun_feats, tso_feats; int rc; if (nic_data->must_check_datapath_caps) { @@ -1349,20 +1350,30 @@ static int efx_ef10_init_nic(struct efx_nic *efx) nic_data->must_restore_piobufs = false; } - /* add encapsulated checksum offload features */ + /* encap features might change during reset if fw variant changed */ if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx)) - hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - /* add encapsulated TSO features */ - if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) { - netdev_features_t encap_tso_features; + net_dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + else + net_dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | - NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM; + tun_feats = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM; + tso_feats = NETIF_F_TSO | NETIF_F_TSO6; - hw_enc_features |= encap_tso_features | NETIF_F_TSO; - efx->net_dev->features |= encap_tso_features; + if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) { + /* If this is first nic_init, or if it is a reset and a new fw + * variant has added new features, enable them by default. + * If the features are not new, maintain their current value. + */ + if (!(net_dev->hw_features & tun_feats)) + net_dev->features |= tun_feats; + net_dev->hw_enc_features |= tun_feats | tso_feats; + net_dev->hw_features |= tun_feats; + } else { + net_dev->hw_enc_features &= ~(tun_feats | tso_feats); + net_dev->hw_features &= ~tun_feats; + net_dev->features &= ~tun_feats; } - efx->net_dev->hw_enc_features = hw_enc_features; /* don't fail init if RSS setup doesn't work */ rc = efx->type->rx_push_rss_config(efx, false, @@ -4021,7 +4032,10 @@ static unsigned int efx_ef10_recycle_ring_size(const struct efx_nic *efx) NETIF_F_HW_VLAN_CTAG_FILTER | \ NETIF_F_IPV6_CSUM | \ NETIF_F_RXHASH | \ - NETIF_F_NTUPLE) + NETIF_F_NTUPLE | \ + NETIF_F_SG | \ + NETIF_F_RXCSUM | \ + NETIF_F_RXALL) const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .is_vf = true, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 02c2adeb0a120..884d8d1688625 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1001,21 +1001,18 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) } /* Determine netdevice features */ - net_dev->features |= (efx->type->offload_features | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); - if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) { - net_dev->features |= NETIF_F_TSO6; - if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) - net_dev->hw_enc_features |= NETIF_F_TSO6; - } - /* Check whether device supports TSO */ - if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) - net_dev->features &= ~NETIF_F_ALL_TSO; + net_dev->features |= efx->type->offload_features; + + /* Add TSO features */ + if (efx->type->tso_versions && efx->type->tso_versions(efx)) + net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6; + /* Mask for features that also apply to VLAN devices */ net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | NETIF_F_RXCSUM); + /* Determine user configurable features */ net_dev->hw_features |= net_dev->features & ~efx->fixed_features; /* Disable receiving frames with bad FCS, by default. */ From 82e2c39f9ef78896e9b634dfd82dc042e6956bb7 Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Thu, 23 Mar 2023 12:25:36 +0200 Subject: [PATCH 451/898] net: phy: dp83869: fix default value for tx-/rx-internal-delay dp83869 internally uses a look-up table for mapping supported delays in nanoseconds to register values. When specific delays are defined in device-tree, phy_get_internal_delay does the lookup automatically returning an index. The default case wrongly assigns the nanoseconds value from the lookup table, resulting in numeric value 2000 applied to delay configuration register, rather than the expected index values 0-7 (7 for 2000). Ultimately this issue broke RX for 1Gbps links. Fix default delay configuration by assigning the intended index value directly. Cc: stable@vger.kernel.org Fixes: 736b25afe284 ("net: dp83869: Add RGMII internal delay configuration") Co-developed-by: Yazan Shhady Signed-off-by: Yazan Shhady Signed-off-by: Josua Mayer Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230323102536.31988-1-josua@solid-run.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83869.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index b4ff9c5073a3c..9ab5eff502b71 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -588,15 +588,13 @@ static int dp83869_of_init(struct phy_device *phydev) &dp83869_internal_delay[0], delay_size, true); if (dp83869->rx_int_delay < 0) - dp83869->rx_int_delay = - dp83869_internal_delay[DP83869_CLK_DELAY_DEF]; + dp83869->rx_int_delay = DP83869_CLK_DELAY_DEF; dp83869->tx_int_delay = phy_get_internal_delay(phydev, dev, &dp83869_internal_delay[0], delay_size, false); if (dp83869->tx_int_delay < 0) - dp83869->tx_int_delay = - dp83869_internal_delay[DP83869_CLK_DELAY_DEF]; + dp83869->tx_int_delay = DP83869_CLK_DELAY_DEF; return ret; } From 0808ed6ebbc292222ca069d339744870f6d801da Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 24 Mar 2023 14:52:49 +0100 Subject: [PATCH 452/898] scsi: megaraid_sas: Fix fw_crash_buffer_show() If crash_dump_buf is not allocated then crash dump can't be available. Replace logical 'and' with 'or'. Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230324135249.9733-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3ceece9883383..c895189375e2b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3298,7 +3298,7 @@ fw_crash_buffer_show(struct device *cdev, spin_lock_irqsave(&instance->crashdump_lock, flags); buff_offset = instance->fw_crash_buffer_offset; - if (!instance->crash_dump_buf && + if (!instance->crash_dump_buf || !((instance->fw_crash_state == AVAILABLE) || (instance->fw_crash_state == COPYING))) { dev_err(&instance->pdev->dev, From 2309df27111a51734cb9240b4d3c25f2f3c6ab06 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 24 Mar 2023 16:01:34 +0100 Subject: [PATCH 453/898] scsi: megaraid_sas: Fix crash after a double completion When a physical disk is attached directly "without JBOD MAP support" (see megasas_get_tm_devhandle()) then there is no real error handling in the driver. Return FAILED instead of SUCCESS. Fixes: 18365b138508 ("megaraid_sas: Task management support") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230324150134.14696-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 84c9a55a5794c..8a83f3fc2b865 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4771,7 +4771,7 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd) devhandle = megasas_get_tm_devhandle(scmd->device); if (devhandle == (u16)ULONG_MAX) { - ret = SUCCESS; + ret = FAILED; sdev_printk(KERN_INFO, scmd->device, "task abort issued for invalid devhandle\n"); mutex_unlock(&instance->reset_mutex); @@ -4841,7 +4841,7 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd) devhandle = megasas_get_tm_devhandle(scmd->device); if (devhandle == (u16)ULONG_MAX) { - ret = SUCCESS; + ret = FAILED; sdev_printk(KERN_INFO, scmd->device, "target reset issued for invalid devhandle\n"); mutex_unlock(&instance->reset_mutex); From f0aa59a33d2ac2267d260fe21eaf92500df8e7b4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Mar 2023 11:22:11 +0900 Subject: [PATCH 454/898] scsi: core: Improve scsi_vpd_inquiry() checks Some USB-SATA adapters have broken behavior when an unsupported VPD page is probed: Depending on the VPD page number, a 4-byte header with a valid VPD page number but with a 0 length is returned. Currently, scsi_vpd_inquiry() only checks that the page number is valid to determine if the page is valid, which results in receiving only the 4-byte header for the non-existent page. This error manifests itself very often with page 0xb9 for the Concurrent Positioning Ranges detection done by sd_read_cpr(), resulting in the following error message: sd 0:0:0:0: [sda] Invalid Concurrent Positioning Ranges VPD page Prevent such misleading error message by adding a check in scsi_vpd_inquiry() to verify that the page length is not 0. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230322022211.116327-1-damien.lemoal@opensource.wdc.com Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 5cce1ba70fc60..09ef0b31dfc09 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -314,11 +314,18 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, if (result) return -EIO; - /* Sanity check that we got the page back that we asked for */ + /* + * Sanity check that we got the page back that we asked for and that + * the page size is not 0. + */ if (buffer[1] != page) return -EIO; - return get_unaligned_be16(&buffer[2]) + 4; + result = get_unaligned_be16(&buffer[2]); + if (!result) + return -EIO; + + return result + 4; } static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) From d684a7a26f7d2c7122a4581ac966ed64e88fb29c Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Fri, 24 Mar 2023 12:32:04 -0700 Subject: [PATCH 455/898] scsi: mpt3sas: Don't print sense pool info twice _base_allocate_sense_dma_pool() already prints out the sense pool information, so don't print it a second time after calling it in _base_allocate_memory_pools(). In addition the version in _base_allocate_memory_pools() was using the wrong size value, sz, which was last assigned when doing some nvme calculations instead of sense_sz to determine the pool size in kilobytes. Cc: Sathya Prakash Cc: Sreekanth Reddy Cc: Suganath Prabu Subramani Cc: MPT-FusionLinux.pdl@broadcom.com Cc: "Martin K. Petersen" Cc: "James E.J. Bottomley" Fixes: 970ac2bb70e7 ("scsi: mpt3sas: Force sense buffer allocations to be within same 4 GB region") Signed-off-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20230324193204.567932-1-jsnitsel@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 2ee9ea57554d7..14ae0a9c5d3d8 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -6616,11 +6616,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) else if (rc == -EAGAIN) goto try_32bit_dma; total_sz += sense_sz; - ioc_info(ioc, - "sense pool(0x%p)- dma(0x%llx): depth(%d)," - "element_size(%d), pool_size(%d kB)\n", - ioc->sense, (unsigned long long)ioc->sense_dma, ioc->scsiio_depth, - SCSI_SENSE_BUFFERSIZE, sz / 1024); /* reply pool, 4 byte align */ sz = ioc->reply_free_queue_depth * ioc->reply_sz; rc = _base_allocate_reply_pool(ioc, sz); From b93eb564869321d0dffaf23fcc5c88112ed62466 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 23 Mar 2023 11:37:35 +0100 Subject: [PATCH 456/898] net: dsa: realtek: fix out-of-bounds access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function sets priv->chip_data to (void *)priv + sizeof(*priv) with the expectation that priv has enough trailing space. However, only realtek-smi actually allocated this chip_data space. Do likewise in realtek-mdio to fix out-of-bounds accesses. These accesses likely went unnoticed so far, because of an (unused) buf[4096] member in struct realtek_priv, which caused kmalloc to round up the allocated buffer to a big enough size, so nothing of value was overwritten. With a different allocator (like in the barebox bootloader port of the driver) or with KASAN, the memory corruption becomes quickly apparent. Fixes: aac94001067d ("net: dsa: realtek: add new mdio interface for drivers") Reviewed-by: Florian Fainelli Reviewed-by: Luiz Angelo Daros de Luca Reviewed-by: Alvin Šipraga Reviewed-by: Linus Walleij Signed-off-by: Ahmad Fatoum Link: https://lore.kernel.org/r/20230323103735.2331786-1-a.fatoum@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/realtek/realtek-mdio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c index 3e54fac5f9027..5a8fe707ca25e 100644 --- a/drivers/net/dsa/realtek/realtek-mdio.c +++ b/drivers/net/dsa/realtek/realtek-mdio.c @@ -21,6 +21,7 @@ #include #include +#include #include #include "realtek.h" @@ -152,7 +153,9 @@ static int realtek_mdio_probe(struct mdio_device *mdiodev) if (!var) return -EINVAL; - priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&mdiodev->dev, + size_add(sizeof(*priv), var->chip_data_sz), + GFP_KERNEL); if (!priv) return -ENOMEM; From 3bced313b9a5a237c347e0f079c8c2fe4b3935aa Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Thu, 23 Mar 2023 13:07:21 -0700 Subject: [PATCH 457/898] vmxnet3: use gro callback when UPT is enabled Currently, vmxnet3 uses GRO callback only if LRO is disabled. However, on smartNic based setups where UPT is supported, LRO can be enabled from guest VM but UPT devicve does not support LRO as of now. In such cases, there can be performance degradation as GRO is not being done. This patch fixes this issue by calling GRO API when UPT is enabled. We use updateRxProd to determine if UPT mode is active or not. To clarify few things discussed over the thread: The patch is not neglecting any feature bits nor disabling GRO. It uses GRO callback when UPT is active as LRO is not available in UPT. GRO callback cannot be used as default for all cases as it degrades performance for non-UPT cases or for cases when LRO is already done in ESXi. Cc: stable@vger.kernel.org Fixes: 6f91f4ba046e ("vmxnet3: add support for capability registers") Signed-off-by: Ronak Doshi Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230323200721.27622-1-doshir@vmware.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 682987040ea82..da488cbb05428 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1688,7 +1688,9 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (unlikely(rcd->ts)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci); - if (adapter->netdev->features & NETIF_F_LRO) + /* Use GRO callback if UPT is enabled */ + if ((adapter->netdev->features & NETIF_F_LRO) && + !rq->shared->updateRxProd) netif_receive_skb(skb); else napi_gro_receive(&rq->napi, skb); From f33642224e38d7e0d59336e10e7b4e370b1c4506 Mon Sep 17 00:00:00 2001 From: SongJingyi Date: Fri, 24 Mar 2023 11:14:06 +0800 Subject: [PATCH 458/898] ptp_qoriq: fix memory leak in probe() Smatch complains that: drivers/ptp/ptp_qoriq.c ptp_qoriq_probe() warn: 'base' from ioremap() not released. Fix this by revising the parameter from 'ptp_qoriq->base' to 'base'. This is only a bug if ptp_qoriq_init() returns on the first -ENODEV error path. For other error paths ptp_qoriq->base and base are the same. And this change makes the code more readable. Fixes: 7f4399ba405b ("ptp_qoriq: fix NULL access if ptp dt node missing") Signed-off-by: SongJingyi Reviewed-by: Dan Carpenter Reviewed-by: Dongliang Mu Link: https://lore.kernel.org/r/20230324031406.1895159-1-u201912584@hust.edu.cn Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_qoriq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 61530167efe4a..350154e4c2b54 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -637,7 +637,7 @@ static int ptp_qoriq_probe(struct platform_device *dev) return 0; no_clock: - iounmap(ptp_qoriq->base); + iounmap(base); no_ioremap: release_resource(ptp_qoriq->rsrc); no_resource: From 88e943e83827a349f70c3464b3eba7260be7461d Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 09:06:03 +0100 Subject: [PATCH 459/898] net: dsa: microchip: ksz8: fix ksz8_fdb_dump() Before this patch, the ksz8_fdb_dump() function had several issues, such as uninitialized variables and incorrect usage of source port as a bit mask. These problems caused inaccurate reporting of vid information and port assignment in the bridge fdb. Fixes: e587be759e6e ("net: dsa: microchip: update fdb add/del/dump in ksz_common") Signed-off-by: Oleksij Rempel Acked-by: Arun Ramadoss Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 003b0ac2854c9..3fffd5da8d3b0 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -958,15 +958,14 @@ int ksz8_fdb_dump(struct ksz_device *dev, int port, u16 entries = 0; u8 timestamp = 0; u8 fid; - u8 member; - struct alu_struct alu; + u8 src_port; + u8 mac[ETH_ALEN]; do { - alu.is_static = false; - ret = ksz8_r_dyn_mac_table(dev, i, alu.mac, &fid, &member, + ret = ksz8_r_dyn_mac_table(dev, i, mac, &fid, &src_port, ×tamp, &entries); - if (!ret && (member & BIT(port))) { - ret = cb(alu.mac, alu.fid, alu.is_static, data); + if (!ret && port == src_port) { + ret = cb(mac, fid, false, data); if (ret) break; } From 5d90492dd4ff50ad65c582c76c345d0b90001728 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 09:06:04 +0100 Subject: [PATCH 460/898] net: dsa: microchip: ksz8: fix ksz8_fdb_dump() to extract all 1024 entries Current ksz8_fdb_dump() is able to extract only max 249 entries on the ksz8863/ksz8873 series of switches. This happened due to wrong bit mask and offset calculation. This commit corrects the issue and allows for the complete extraction of all 1024 entries. Fixes: 4b20a07e103f ("net: dsa: microchip: ksz8795: add support for ksz88xx chips") Signed-off-by: Oleksij Rempel Acked-by: Arun Ramadoss Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7fc2155d93d6e..3a1afc9f46216 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -407,10 +407,10 @@ static const u32 ksz8863_masks[] = { [STATIC_MAC_TABLE_FID] = GENMASK(29, 26), [STATIC_MAC_TABLE_OVERRIDE] = BIT(20), [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(18, 16), - [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(5, 0), + [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(1, 0), [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(7), [DYNAMIC_MAC_TABLE_NOT_READY] = BIT(7), - [DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 28), + [DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 24), [DYNAMIC_MAC_TABLE_FID] = GENMASK(19, 16), [DYNAMIC_MAC_TABLE_SRC_PORT] = GENMASK(21, 20), [DYNAMIC_MAC_TABLE_TIMESTAMP] = GENMASK(23, 22), @@ -420,7 +420,7 @@ static u8 ksz8863_shifts[] = { [VLAN_TABLE_MEMBERSHIP_S] = 16, [STATIC_MAC_FWD_PORTS] = 16, [STATIC_MAC_FID] = 22, - [DYNAMIC_MAC_ENTRIES_H] = 3, + [DYNAMIC_MAC_ENTRIES_H] = 8, [DYNAMIC_MAC_ENTRIES] = 24, [DYNAMIC_MAC_FID] = 16, [DYNAMIC_MAC_TIMESTAMP] = 24, From b3177aab89be540dc50d2328427b073361093e38 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 09:06:05 +0100 Subject: [PATCH 461/898] net: dsa: microchip: ksz8: fix offset for the timestamp filed We are using wrong offset, so we will get not a timestamp. Fixes: 4b20a07e103f ("net: dsa: microchip: ksz8795: add support for ksz88xx chips") Signed-off-by: Oleksij Rempel Acked-by: Arun Ramadoss Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 3a1afc9f46216..c914449645caf 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -423,7 +423,7 @@ static u8 ksz8863_shifts[] = { [DYNAMIC_MAC_ENTRIES_H] = 8, [DYNAMIC_MAC_ENTRIES] = 24, [DYNAMIC_MAC_FID] = 16, - [DYNAMIC_MAC_TIMESTAMP] = 24, + [DYNAMIC_MAC_TIMESTAMP] = 22, [DYNAMIC_MAC_SRC_PORT] = 20, }; From 492606cdc74804d372ab1bdb8f3ef4a6fb6f9f59 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 09:06:06 +0100 Subject: [PATCH 462/898] net: dsa: microchip: ksz8: ksz8_fdb_dump: avoid extracting ghost entry from empty dynamic MAC table. If the dynamic MAC table is empty, we will still extract one outdated entry. Fix it by using correct bit offset. Fixes: 4b20a07e103f ("net: dsa: microchip: ksz8795: add support for ksz88xx chips") Signed-off-by: Oleksij Rempel Acked-by: Arun Ramadoss Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index c914449645caf..4929fb29ed061 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -408,7 +408,7 @@ static const u32 ksz8863_masks[] = { [STATIC_MAC_TABLE_OVERRIDE] = BIT(20), [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(18, 16), [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(1, 0), - [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(7), + [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(2), [DYNAMIC_MAC_TABLE_NOT_READY] = BIT(7), [DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 24), [DYNAMIC_MAC_TABLE_FID] = GENMASK(19, 16), From 392ff7a84cbca34118ca286dfbfe8aee24605897 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 09:06:07 +0100 Subject: [PATCH 463/898] net: dsa: microchip: ksz8863_smi: fix bulk access Current regmap bulk access is broken, resulting to wrong reads/writes if ksz_read64/ksz_write64 functions are used. Mostly this issue was visible by using ksz8_fdb_dump(), which returned corrupt MAC address. The reason is that regmap was configured to have max_raw_read/write, even if ksz8863_mdio_read/write functions are able to handle unlimited read/write accesses. On ksz_read64 function we are using multiple 32bit accesses by incrementing each access by 1 instead of 4. Resulting buffer had 01234567.12345678 instead of 01234567.89abcdef. We have multiple ways to fix it: - enable 4 byte alignment for 32bit accesses. Since the HW do not have this requirement. It will break driver. - disable max_raw_* limit. This patch is removing max_raw_* limit for regmap accesses in ksz8863_smi. Fixes: 60a364760002 ("net: dsa: microchip: Add Microchip KSZ8863 SMI based driver support") Signed-off-by: Oleksij Rempel Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8863_smi.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c index 2f4623f3bd852..3698112138b78 100644 --- a/drivers/net/dsa/microchip/ksz8863_smi.c +++ b/drivers/net/dsa/microchip/ksz8863_smi.c @@ -82,22 +82,16 @@ static const struct regmap_bus regmap_smi[] = { { .read = ksz8863_mdio_read, .write = ksz8863_mdio_write, - .max_raw_read = 1, - .max_raw_write = 1, }, { .read = ksz8863_mdio_read, .write = ksz8863_mdio_write, .val_format_endian_default = REGMAP_ENDIAN_BIG, - .max_raw_read = 2, - .max_raw_write = 2, }, { .read = ksz8863_mdio_read, .write = ksz8863_mdio_write, .val_format_endian_default = REGMAP_ENDIAN_BIG, - .max_raw_read = 4, - .max_raw_write = 4, } }; @@ -108,7 +102,6 @@ static const struct regmap_config ksz8863_regmap_config[] = { .pad_bits = 24, .val_bits = 8, .cache_type = REGCACHE_NONE, - .use_single_read = 1, .lock = ksz_regmap_lock, .unlock = ksz_regmap_unlock, }, @@ -118,7 +111,6 @@ static const struct regmap_config ksz8863_regmap_config[] = { .pad_bits = 24, .val_bits = 16, .cache_type = REGCACHE_NONE, - .use_single_read = 1, .lock = ksz_regmap_lock, .unlock = ksz_regmap_unlock, }, @@ -128,7 +120,6 @@ static const struct regmap_config ksz8863_regmap_config[] = { .pad_bits = 24, .val_bits = 32, .cache_type = REGCACHE_NONE, - .use_single_read = 1, .lock = ksz_regmap_lock, .unlock = ksz_regmap_unlock, } From 9aa5757e1f71d85facdc3c98028762cbab8d15c7 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 09:06:08 +0100 Subject: [PATCH 464/898] net: dsa: microchip: ksz8: fix MDB configuration with non-zero VID FID is directly mapped to VID. However, configuring a MAC address with a VID != 0 resulted in incorrect configuration due to an incorrect bit mask. This kernel commit fixed the issue by correcting the bit mask and ensuring proper configuration of MAC addresses with non-zero VID. Fixes: 4b20a07e103f ("net: dsa: microchip: ksz8795: add support for ksz88xx chips") Signed-off-by: Oleksij Rempel Acked-by: Arun Ramadoss Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 4929fb29ed061..74c56d05ab0b9 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -404,7 +404,7 @@ static const u32 ksz8863_masks[] = { [VLAN_TABLE_VALID] = BIT(19), [STATIC_MAC_TABLE_VALID] = BIT(19), [STATIC_MAC_TABLE_USE_FID] = BIT(21), - [STATIC_MAC_TABLE_FID] = GENMASK(29, 26), + [STATIC_MAC_TABLE_FID] = GENMASK(25, 22), [STATIC_MAC_TABLE_OVERRIDE] = BIT(20), [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(18, 16), [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(1, 0), From 33189f0a94b9639c058781fcf82e4ea3803b1682 Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Thu, 23 Mar 2023 22:33:09 +0800 Subject: [PATCH 465/898] r8169: fix RTL8168H and RTL8107E rx crc error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When link speed is 10 Mbps and temperature is under -20°C, RTL8168H and RTL8107E may have rx crc error. Disable phy 10 Mbps pll off to fix this issue. Fixes: 6e1d0b898818 ("r8169:add support for RTL8168H and RTL8107E") Signed-off-by: ChunHao Lin Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_phy_config.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index 930496cd34ed0..b50f16786c246 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -826,6 +826,9 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp, /* disable phy pfm mode */ phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0); + /* disable 10m pll off */ + phy_modify_paged(phydev, 0x0a43, 0x10, BIT(0), 0); + rtl8168g_disable_aldps(phydev); rtl8168g_config_eee_phy(phydev); } From e416ea62a9166e6075a07a970cc5bf79255d2700 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Thu, 16 Mar 2023 14:40:43 +0100 Subject: [PATCH 466/898] ksmbd: do not call kvmalloc() with __GFP_NORETRY | __GFP_NO_WARN Commit 83dcedd5540d ("ksmbd: fix infinite loop in ksmbd_conn_handler_loop()"), changes GFP modifiers passed to kvmalloc(). This cause xfstests generic/551 test to fail. We limit pdu length size according to connection status and maximum number of connections. In the rest, memory allocation of request is limited by credit management. so these flags are no longer needed. Fixes: 83dcedd5540d ("ksmbd: fix infinite loop in ksmbd_conn_handler_loop()") Cc: stable@vger.kernel.org Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 115a67d2cf785..3f5dfebaa0418 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -326,10 +326,7 @@ int ksmbd_conn_handler_loop(void *p) /* 4 for rfc1002 length field */ size = pdu_size + 4; - conn->request_buf = kvmalloc(size, - GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); + conn->request_buf = kvmalloc(size, GFP_KERNEL); if (!conn->request_buf) break; From 2824861773eb512b37547516d81ef78108032cb2 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 24 Mar 2023 13:30:56 -0400 Subject: [PATCH 467/898] ksmbd: remove unused is_char_allowed function clang with W=1 reports fs/ksmbd/unicode.c:122:19: error: unused function 'is_char_allowed' [-Werror,-Wunused-function] static inline int is_char_allowed(char *ch) ^ This function is not used so remove it. Signed-off-by: Tom Rix Reviewed-by: Sergey Senozhatsky Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/unicode.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/fs/ksmbd/unicode.c b/fs/ksmbd/unicode.c index a0db699ddafda..9ae676906ed39 100644 --- a/fs/ksmbd/unicode.c +++ b/fs/ksmbd/unicode.c @@ -113,24 +113,6 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, goto out; } -/* - * is_char_allowed() - check for valid character - * @ch: input character to be checked - * - * Return: 1 if char is allowed, otherwise 0 - */ -static inline int is_char_allowed(char *ch) -{ - /* check for control chars, wildcards etc. */ - if (!(*ch & 0x80) && - (*ch <= 0x1f || - *ch == '?' || *ch == '"' || *ch == '<' || - *ch == '>' || *ch == '|')) - return 0; - - return 1; -} - /* * smb_from_utf16() - convert utf16le string to local charset * @to: destination buffer From 197b6b60ae7bc51dd0814953c562833143b292aa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Mar 2023 14:40:20 -0700 Subject: [PATCH 468/898] Linux 6.3-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a2c310df21459..da2586d4c7281 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 707823e7f22f3864ddc7d85e8e9b614afe4f1b16 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Sat, 11 Mar 2023 16:50:25 +0400 Subject: [PATCH 469/898] 9P FS: Fix wild-memory-access write in v9fs_get_acl KASAN reported the following issue: [ 36.825817][ T5923] BUG: KASAN: wild-memory-access in v9fs_get_acl+0x1a4/0x390 [ 36.827479][ T5923] Write of size 4 at addr 9fffeb37f97f1c00 by task syz-executor798/5923 [ 36.829303][ T5923] [ 36.829846][ T5923] CPU: 0 PID: 5923 Comm: syz-executor798 Not tainted 6.2.0-syzkaller-18302-g596b6b709632 #0 [ 36.832110][ T5923] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 [ 36.834464][ T5923] Call trace: [ 36.835196][ T5923] dump_backtrace+0x1c8/0x1f4 [ 36.836229][ T5923] show_stack+0x2c/0x3c [ 36.837100][ T5923] dump_stack_lvl+0xd0/0x124 [ 36.838103][ T5923] print_report+0xe4/0x4c0 [ 36.839068][ T5923] kasan_report+0xd4/0x130 [ 36.840052][ T5923] kasan_check_range+0x264/0x2a4 [ 36.841199][ T5923] __kasan_check_write+0x2c/0x3c [ 36.842216][ T5923] v9fs_get_acl+0x1a4/0x390 [ 36.843232][ T5923] v9fs_mount+0x77c/0xa5c [ 36.844163][ T5923] legacy_get_tree+0xd4/0x16c [ 36.845173][ T5923] vfs_get_tree+0x90/0x274 [ 36.846137][ T5923] do_new_mount+0x25c/0x8c8 [ 36.847066][ T5923] path_mount+0x590/0xe58 [ 36.848147][ T5923] __arm64_sys_mount+0x45c/0x594 [ 36.849273][ T5923] invoke_syscall+0x98/0x2c0 [ 36.850421][ T5923] el0_svc_common+0x138/0x258 [ 36.851397][ T5923] do_el0_svc+0x64/0x198 [ 36.852398][ T5923] el0_svc+0x58/0x168 [ 36.853224][ T5923] el0t_64_sync_handler+0x84/0xf0 [ 36.854293][ T5923] el0t_64_sync+0x190/0x194 Calling '__v9fs_get_acl' method in 'v9fs_get_acl' creates the following chain of function calls: __v9fs_get_acl v9fs_fid_get_acl v9fs_fid_xattr_get p9_client_xattrwalk Function p9_client_xattrwalk accepts a pointer to u64-typed variable attr_size and puts some u64 value into it. However, after the executing the p9_client_xattrwalk, in some circumstances we assign the value of u64-typed variable 'attr_size' to the variable 'retval', which we will return. However, the type of 'retval' is ssize_t, and if the value of attr_size is larger than SSIZE_MAX, we will face the signed type overflow. If the overflow occurs, the result of v9fs_fid_xattr_get may be negative, but not classified as an error. When we try to allocate an acl with 'broken' size we receive an error, but don't process it. When we try to free this acl, we face the 'wild-memory-access' error (because it wasn't allocated). This patch will add new condition to the 'v9fs_fid_xattr_get' function, so it will return an EOVERFLOW error if the 'attr_size' is larger than SSIZE_MAX. In this version of the patch I simplified the condition. In previous (v2) version of the patch I removed explicit type conversion and added separate condition to check the possible overflow and return an error (in v1 version I've just modified the existing condition). Tested via syzkaller. Suggested-by: Christian Schoenebeck Reported-by: syzbot+cb1d16facb3cc90de5fb@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=fbbef66d9e4d096242f3617de5d14d12705b4659 Signed-off-by: Ivan Orlov Reviewed-by: Christian Schoenebeck Signed-off-by: Eric Van Hensbergen --- fs/9p/xattr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 50f7f3f6b55e9..1974a38bce206 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -35,10 +35,12 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, return retval; } if (attr_size > buffer_size) { - if (!buffer_size) /* request to get the attr_size */ - retval = attr_size; - else + if (buffer_size) retval = -ERANGE; + else if (attr_size > SSIZE_MAX) + retval = -EOVERFLOW; + else /* request to get the attr_size */ + retval = attr_size; } else { iov_iter_truncate(&to, attr_size); retval = p9_client_read(attr_fid, 0, &to, &err); From 02bcba0b9f9da706d5bd1e8cbeb83493863e17b5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Mar 2023 10:29:33 +0200 Subject: [PATCH 470/898] regulator: Handle deferred clk devm_clk_get() can return -EPROBE_DEFER. So it is better to return the error code from devm_clk_get(), instead of a hard coded -ENOENT. This gives more opportunities to successfully probe the driver. Fixes: 8959e5324485 ("regulator: fixed: add possibility to enable by clock") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/18459fae3d017a66313699c7c8456b28158b2dd0.1679819354.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/regulator/fixed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index 2a9867abba20c..e6724a229d237 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -215,7 +215,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) drvdata->enable_clock = devm_clk_get(dev, NULL); if (IS_ERR(drvdata->enable_clock)) { dev_err(dev, "Can't get enable-clock from devicetree\n"); - return -ENOENT; + return PTR_ERR(drvdata->enable_clock); } } else if (drvtype && drvtype->has_performance_state) { drvdata->desc.ops = &fixed_voltage_domain_ops; From e3cbdcb0fbb61045ef3ce0e072927cc41737f787 Mon Sep 17 00:00:00 2001 From: Faicker Mo Date: Fri, 24 Mar 2023 17:19:54 +0800 Subject: [PATCH 471/898] net/net_failover: fix txq exceeding warning The failover txq is inited as 16 queues. when a packet is transmitted from the failover device firstly, the failover device will select the queue which is returned from the primary device if the primary device is UP and running. If the primary device txq is bigger than the default 16, it can lead to the following warning: eth0 selects TX queue 18, but real number of TX queues is 16 The warning backtrace is: [ 32.146376] CPU: 18 PID: 9134 Comm: chronyd Tainted: G E 6.2.8-1.el7.centos.x86_64 #1 [ 32.147175] Hardware name: Red Hat KVM, BIOS 1.10.2-3.el7_4.1 04/01/2014 [ 32.147730] Call Trace: [ 32.147971] [ 32.148183] dump_stack_lvl+0x48/0x70 [ 32.148514] dump_stack+0x10/0x20 [ 32.148820] netdev_core_pick_tx+0xb1/0xe0 [ 32.149180] __dev_queue_xmit+0x529/0xcf0 [ 32.149533] ? __check_object_size.part.0+0x21c/0x2c0 [ 32.149967] ip_finish_output2+0x278/0x560 [ 32.150327] __ip_finish_output+0x1fe/0x2f0 [ 32.150690] ip_finish_output+0x2a/0xd0 [ 32.151032] ip_output+0x7a/0x110 [ 32.151337] ? __pfx_ip_finish_output+0x10/0x10 [ 32.151733] ip_local_out+0x5e/0x70 [ 32.152054] ip_send_skb+0x19/0x50 [ 32.152366] udp_send_skb.isra.0+0x163/0x3a0 [ 32.152736] udp_sendmsg+0xba8/0xec0 [ 32.153060] ? __folio_memcg_unlock+0x25/0x60 [ 32.153445] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 32.153854] ? sock_has_perm+0x85/0xa0 [ 32.154190] inet_sendmsg+0x6d/0x80 [ 32.154508] ? inet_sendmsg+0x6d/0x80 [ 32.154838] sock_sendmsg+0x62/0x70 [ 32.155152] ____sys_sendmsg+0x134/0x290 [ 32.155499] ___sys_sendmsg+0x81/0xc0 [ 32.155828] ? _get_random_bytes.part.0+0x79/0x1a0 [ 32.156240] ? ip4_datagram_release_cb+0x5f/0x1e0 [ 32.156649] ? get_random_u16+0x69/0xf0 [ 32.156989] ? __fget_light+0xcf/0x110 [ 32.157326] __sys_sendmmsg+0xc4/0x210 [ 32.157657] ? __sys_connect+0xb7/0xe0 [ 32.157995] ? __audit_syscall_entry+0xce/0x140 [ 32.158388] ? syscall_trace_enter.isra.0+0x12c/0x1a0 [ 32.158820] __x64_sys_sendmmsg+0x24/0x30 [ 32.159171] do_syscall_64+0x38/0x90 [ 32.159493] entry_SYSCALL_64_after_hwframe+0x72/0xdc Fix that by reducing txq number as the non-existent primary-dev does. Fixes: cfc80d9a1163 ("net: Introduce net_failover driver") Signed-off-by: Faicker Mo Signed-off-by: David S. Miller --- drivers/net/net_failover.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 7a28e082436e4..d0c916a53d7ce 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -130,14 +130,10 @@ static u16 net_failover_select_queue(struct net_device *dev, txq = ops->ndo_select_queue(primary_dev, skb, sb_dev); else txq = netdev_pick_tx(primary_dev, skb, NULL); - - qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; - - return txq; + } else { + txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; } - txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; - /* Save the original txq to restore before passing to the driver */ qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; From a90ac762d345890b40d88a1385a34a2449c2d75e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 24 Mar 2023 09:23:42 +0000 Subject: [PATCH 472/898] net: sfp: make sfp_bus_find_fwnode() take a const fwnode sfp_bus_find_fwnode() does not write to the fwnode, so let's make it const. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 2 +- include/linux/sfp.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index daac293e8edec..8284f29b36444 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -593,7 +593,7 @@ static void sfp_upstream_clear(struct sfp_bus *bus) * - %-ENOMEM if we failed to allocate the bus. * - an error from the upstream's connect_phy() method. */ -struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) +struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode) { struct fwnode_reference_args ref; struct sfp_bus *bus; diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 52b98f9666a2f..ef06a195b3c2b 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -557,7 +557,7 @@ int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); void sfp_bus_put(struct sfp_bus *bus); -struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode); +struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode); int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops); void sfp_bus_del_upstream(struct sfp_bus *bus); @@ -619,7 +619,8 @@ static inline void sfp_bus_put(struct sfp_bus *bus) { } -static inline struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) +static inline struct sfp_bus * +sfp_bus_find_fwnode(const struct fwnode_handle *fwnode) { return NULL; } From 850a8d2dc712abeea9a39b6cb53db6b78069ace0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 24 Mar 2023 09:23:48 +0000 Subject: [PATCH 473/898] net: sfp: constify sfp-bus internal fwnode uses Constify sfp-bus internal fwnode uses, since we do not modify the fwnode structures. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 8284f29b36444..9fc50fcc8fc99 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -17,7 +17,7 @@ struct sfp_bus { /* private: */ struct kref kref; struct list_head node; - struct fwnode_handle *fwnode; + const struct fwnode_handle *fwnode; const struct sfp_socket_ops *socket_ops; struct device *sfp_dev; @@ -390,7 +390,7 @@ static const struct sfp_upstream_ops *sfp_get_upstream_ops(struct sfp_bus *bus) return bus->registered ? bus->upstream_ops : NULL; } -static struct sfp_bus *sfp_bus_get(struct fwnode_handle *fwnode) +static struct sfp_bus *sfp_bus_get(const struct fwnode_handle *fwnode) { struct sfp_bus *sfp, *new, *found = NULL; From 4a0faa02d419a6728abef0f1d8a32d8c35ef95e6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 24 Mar 2023 09:23:53 +0000 Subject: [PATCH 474/898] net: phy: constify fwnode_get_phy_node() fwnode argument fwnode_get_phy_node() does not motify the fwnode structure, so make the argument const, Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1785f1cead971..1de3e339b31a5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3057,7 +3057,7 @@ EXPORT_SYMBOL_GPL(device_phy_find_device); * and "phy-device" are not supported in ACPI. DT supports all the three * named references to the phy node. */ -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode) +struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode) { struct fwnode_handle *phy_node; diff --git a/include/linux/phy.h b/include/linux/phy.h index 36bf0bbc8efa0..db7c0bd67559d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1547,7 +1547,7 @@ int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); struct phy_device *device_phy_find_device(struct device *dev); -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); void phy_device_free(struct phy_device *phydev); From b465518dc27da1ed74b8cbada4659708aac35adb Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 24 Mar 2023 12:54:50 +0100 Subject: [PATCH 475/898] vsock/loopback: use only sk_buff_head.lock to protect the packet queue pkt_list_lock was used before commit 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") to protect the packet queue. After that commit we switched to sk_buff and we are using sk_buff_head.lock in almost every place to protect the packet queue except in vsock_loopback_work() when we call skb_queue_splice_init(). As reported by syzbot, this caused unlocked concurrent access to the packet queue between vsock_loopback_work() and vsock_loopback_cancel_pkt() since it is not holding pkt_list_lock. With the introduction of sk_buff_head, pkt_list_lock is redundant and can cause confusion, so let's remove it and use sk_buff_head.lock everywhere to protect the packet queue access. Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Cc: bobby.eshleman@bytedance.com Reported-and-tested-by: syzbot+befff0a9536049e7902e@syzkaller.appspotmail.com Signed-off-by: Stefano Garzarella Reviewed-by: Bobby Eshleman Reviewed-by: Arseniy Krasnov Signed-off-by: David S. Miller --- net/vmw_vsock/vsock_loopback.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 671e03240fc52..89905c092645a 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -15,7 +15,6 @@ struct vsock_loopback { struct workqueue_struct *workqueue; - spinlock_t pkt_list_lock; /* protects pkt_list */ struct sk_buff_head pkt_queue; struct work_struct pkt_work; }; @@ -32,9 +31,7 @@ static int vsock_loopback_send_pkt(struct sk_buff *skb) struct vsock_loopback *vsock = &the_vsock_loopback; int len = skb->len; - spin_lock_bh(&vsock->pkt_list_lock); skb_queue_tail(&vsock->pkt_queue, skb); - spin_unlock_bh(&vsock->pkt_list_lock); queue_work(vsock->workqueue, &vsock->pkt_work); @@ -113,9 +110,9 @@ static void vsock_loopback_work(struct work_struct *work) skb_queue_head_init(&pkts); - spin_lock_bh(&vsock->pkt_list_lock); + spin_lock_bh(&vsock->pkt_queue.lock); skb_queue_splice_init(&vsock->pkt_queue, &pkts); - spin_unlock_bh(&vsock->pkt_list_lock); + spin_unlock_bh(&vsock->pkt_queue.lock); while ((skb = __skb_dequeue(&pkts))) { virtio_transport_deliver_tap_pkt(skb); @@ -132,7 +129,6 @@ static int __init vsock_loopback_init(void) if (!vsock->workqueue) return -ENOMEM; - spin_lock_init(&vsock->pkt_list_lock); skb_queue_head_init(&vsock->pkt_queue); INIT_WORK(&vsock->pkt_work, vsock_loopback_work); @@ -156,9 +152,7 @@ static void __exit vsock_loopback_exit(void) flush_work(&vsock->pkt_work); - spin_lock_bh(&vsock->pkt_list_lock); virtio_vsock_skb_queue_purge(&vsock->pkt_queue); - spin_unlock_bh(&vsock->pkt_list_lock); destroy_workqueue(vsock->workqueue); } From f2e9d083f768ec147da3e3e5209030d3c090c689 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 14:39:08 +0100 Subject: [PATCH 476/898] net: phy: micrel: correct KSZ9131RNX EEE capabilities and advertisement The KSZ9131RNX incorrectly shows EEE capabilities in its registers. Although the "EEE control and capability 1" (Register 3.20) is set to 0, indicating no EEE support, the "EEE advertisement 1" (Register 7.60) is set to 0x6, advertising EEE support for 1000BaseT/Full and 100BaseT/Full. This inconsistency causes PHYlib to assume there is no EEE support, preventing control over EEE advertisement, which is enabled by default. This patch resolves the issue by utilizing the ksz9477_get_features() function to correctly set the EEE capabilities for the KSZ9131RNX. This adjustment allows proper control over EEE advertisement and ensures accurate representation of the device's capabilities. Fixes: 8b68710a3121 ("net: phy: start using genphy_c45_ethtool_get/set_eee()") Reported-by: Marek Vasut Tested-by: Marek Vasut Signed-off-by: Oleksij Rempel Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2c84fccef4f64..4e884e4ba0ead 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4151,6 +4151,7 @@ static struct phy_driver ksphy_driver[] = { .resume = kszphy_resume, .cable_test_start = ksz9x31_cable_test_start, .cable_test_get_status = ksz9x31_cable_test_get_status, + .get_features = ksz9477_get_features, }, { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = MICREL_PHY_ID_MASK, From 45977e58ce65ed0459edc9a0466d9dfea09463f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 23 Mar 2023 20:48:41 +0100 Subject: [PATCH 477/898] net: dsa: b53: mmap: add phy ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement phy_read16() and phy_write16() ops for B53 MMAP to avoid accessing B53_PORT_MII_PAGE registers which hangs the device. This access should be done through the MDIO Mux bus controller. Signed-off-by: Álvaro Fernández Rojas Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_mmap.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 70887e0aece33..d9434ed9450df 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -216,6 +216,18 @@ static int b53_mmap_write64(struct b53_device *dev, u8 page, u8 reg, return 0; } +static int b53_mmap_phy_read16(struct b53_device *dev, int addr, int reg, + u16 *value) +{ + return -EIO; +} + +static int b53_mmap_phy_write16(struct b53_device *dev, int addr, int reg, + u16 value) +{ + return -EIO; +} + static const struct b53_io_ops b53_mmap_ops = { .read8 = b53_mmap_read8, .read16 = b53_mmap_read16, @@ -227,6 +239,8 @@ static const struct b53_io_ops b53_mmap_ops = { .write32 = b53_mmap_write32, .write48 = b53_mmap_write48, .write64 = b53_mmap_write64, + .phy_read16 = b53_mmap_phy_read16, + .phy_write16 = b53_mmap_phy_write16, }; static int b53_mmap_probe_of(struct platform_device *pdev, From d1366b283d94ac4537a4b3a1e8668da4df7ce7e9 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 Mar 2023 14:01:41 +0100 Subject: [PATCH 478/898] can: j1939: prevent deadlock by moving j1939_sk_errqueue() This commit addresses a deadlock situation that can occur in certain scenarios, such as when running data TP/ETP transfer and subscribing to the error queue while receiving a net down event. The deadlock involves locks in the following order: 3 j1939_session_list_lock -> active_session_list_lock j1939_session_activate ... j1939_sk_queue_activate_next -> sk_session_queue_lock ... j1939_xtp_rx_eoma_one 2 j1939_sk_queue_drop_all -> sk_session_queue_lock ... j1939_sk_netdev_event_netdown -> j1939_socks_lock j1939_netdev_notify 1 j1939_sk_errqueue -> j1939_socks_lock __j1939_session_cancel -> active_session_list_lock j1939_tp_rxtimer CPU0 CPU1 ---- ---- lock(&priv->active_session_list_lock); lock(&jsk->sk_session_queue_lock); lock(&priv->active_session_list_lock); lock(&priv->j1939_socks_lock); The solution implemented in this commit is to move the j1939_sk_errqueue() call out of the active_session_list_lock context, thus preventing the deadlock situation. Reported-by: syzbot+ee1cd780f69483a8616b@syzkaller.appspotmail.com Fixes: 5b9272e93f2e ("can: j1939: extend UAPI to notify about RX status") Co-developed-by: Hillf Danton Signed-off-by: Hillf Danton Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/all/20230324130141.2132787-1-o.rempel@pengutronix.de Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fce9b9ebf13f6..fb92c3609e172 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1124,8 +1124,6 @@ static void __j1939_session_cancel(struct j1939_session *session, if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); - else - j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static void j1939_session_cancel(struct j1939_session *session, @@ -1140,6 +1138,9 @@ static void j1939_session_cancel(struct j1939_session *session, } j1939_session_list_unlock(session->priv); + + if (!session->sk) + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) @@ -1253,6 +1254,9 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); } j1939_session_list_unlock(session->priv); + + if (!session->sk) + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } j1939_session_put(session); From a7602e7332b97cfbec7bacb0f1ade99a575fe104 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 25 Mar 2023 13:28:15 +0200 Subject: [PATCH 479/898] net: stmmac: don't reject VLANs when IFF_PROMISC is set The blamed commit has introduced the following tests to dwmac4_add_hw_vlan_rx_fltr(), called from stmmac_vlan_rx_add_vid(): if (hw->promisc) { netdev_err(dev, "Adding VLAN in promisc mode not supported\n"); return -EPERM; } "VLAN promiscuous" mode is keyed in this driver to IFF_PROMISC, and so, vlan_vid_add() and vlan_vid_del() calls cannot take place in IFF_PROMISC mode. I have the following 2 arguments that this restriction is.... hm, how shall I put it nicely... unproductive :) First, take the case of a Linux bridge. If the kernel is compiled with CONFIG_BRIDGE_VLAN_FILTERING=y, then this bridge shall have a VLAN database. The bridge shall try to call vlan_add_vid() on its bridge ports for each VLAN in the VLAN table. It will do this irrespectively of whether that port is *currently* VLAN-aware or not. So it will do this even when the bridge was created with vlan_filtering 0. But the Linux bridge, in VLAN-unaware mode, configures its ports in promiscuous (IFF_PROMISC) mode, so that they accept packets with any MAC DA (a switch must do this in order to forward those packets which are not directly targeted to its MAC address). As a result, the stmmac driver does not work as a bridge port, when the kernel is compiled with CONFIG_BRIDGE_VLAN_FILTERING=y. $ ip link add br0 type bridge && ip link set br0 up $ ip link set eth0 master br0 && ip link set eth0 up [ 2333.943296] br0: port 1(eth0) entered blocking state [ 2333.943381] br0: port 1(eth0) entered disabled state [ 2333.943782] device eth0 entered promiscuous mode [ 2333.944080] 4033c000.ethernet eth0: Adding VLAN in promisc mode not supported [ 2333.976509] 4033c000.ethernet eth0: failed to initialize vlan filtering on this port RTNETLINK answers: Operation not permitted Secondly, take the case of stmmac as DSA master. Some switch tagging protocols are based on 802.1Q VLANs (tag_sja1105.c), and as such, tag_8021q.c uses vlan_vid_add() to work with VLAN-filtering DSA masters. But also, when a DSA port becomes promiscuous (for example when it joins a bridge), the DSA framework also makes the DSA master promiscuous. Moreover, for every VLAN that a DSA switch sends to the CPU, DSA also programs a VLAN filter on the DSA master, because if the the DSA switch uses a tail tag, then the hardware frame parser of the DSA master will see VLAN as VLAN, and might filter them out, for being unknown. Due to the above 2 reasons, my belief is that the stmmac driver does not get to choose to not accept vlan_vid_add() calls while IFF_PROMISC is enabled, because the 2 are completely independent and there are code paths in the network stack which directly lead to this situation occurring, without the user's direct input. In fact, my belief is that "VLAN promiscuous" mode should have never been keyed on IFF_PROMISC in the first place, but rather, on the NETIF_F_HW_VLAN_CTAG_FILTER feature flag which can be toggled by the user through ethtool -k, when present in netdev->hw_features. In the stmmac driver, NETIF_F_HW_VLAN_CTAG_FILTER is only present in "features", making this feature "on [fixed]". I have this belief because I am unaware of any definition of promiscuity which implies having an effect on anything other than MAC DA (therefore not VLAN). However, I seem to be rather alone in having this opinion, looking back at the disagreements from this discussion: https://lore.kernel.org/netdev/20201110153958.ci5ekor3o2ekg3ky@ipetronik.com/ In any case, to remove the vlan_vid_add() dependency on !IFF_PROMISC, one would need to remove the check and see what fails. I guess the test was there because of the way in which dwmac4_vlan_promisc_enable() is implemented. For context, the dwmac4 supports Perfect Filtering for a limited number of VLANs - dwmac4_get_num_vlan(), priv->hw->num_vlan, with a fallback on Hash Filtering - priv->dma_cap.vlhash - see stmmac_vlan_update(), also visible in cat /sys/kernel/debug/stmmaceth/eth0/dma_cap | grep 'VLAN Hash Filtering'. The perfect filtering is based on MAC_VLAN_Tag_Filter/MAC_VLAN_Tag_Data registers, accessed in the driver through dwmac4_write_vlan_filter(). The hash filtering is based on the MAC_VLAN_Hash_Table register, named GMAC_VLAN_HASH_TABLE in the driver and accessed by dwmac4_update_vlan_hash(). The control bit for enabling hash filtering is GMAC_VLAN_VTHM (MAC_VLAN_Tag_Ctrl bit VTHM: VLAN Tag Hash Table Match Enable). Now, the description of dwmac4_vlan_promisc_enable() is that it iterates through the driver's cache of perfect filter entries (hw->vlan_filter[i], added by dwmac4_add_hw_vlan_rx_fltr()), and evicts them from hardware by unsetting their GMAC_VLAN_TAG_DATA_VEN (MAC_VLAN_Tag_Data bit VEN - VLAN Tag Enable) bit. Then it unsets the GMAC_VLAN_VTHM bit, which disables hash matching. This leaves the MAC, according to table "VLAN Match Status" from the documentation, to always enter these data paths: VID |VLAN Perfect Filter |VTHM Bit |VLAN Hash Filter |Final VLAN Match |Match Result | |Match Result |Status -------|--------------------|---------|-----------------|---------------- VID!=0 |Fail |0 |don't care |Pass So, dwmac4_vlan_promisc_enable() does its job, but by unsetting GMAC_VLAN_VTHM, it conflicts with the other code path which controls this bit: dwmac4_update_vlan_hash(), called through stmmac_update_vlan_hash() from stmmac_vlan_rx_add_vid() and from stmmac_vlan_rx_kill_vid(). This is, I guess, why dwmac4_add_hw_vlan_rx_fltr() is not allowed to run after dwmac4_vlan_promisc_enable() has unset GMAC_VLAN_VTHM: because if it did, then dwmac4_update_vlan_hash() would set GMAC_VLAN_VTHM again, breaking the "VLAN promiscuity". It turns out that dwmac4_vlan_promisc_enable() is way too complicated for what needs to be done. The MAC_Packet_Filter register also has the VTFE bit (VLAN Tag Filter Enable), which simply controls whether VLAN tagged packets which don't match the filtering tables (either perfect or hash) are dropped or not. At the moment, this driver unconditionally sets GMAC_PACKET_FILTER_VTFE if NETIF_F_HW_VLAN_CTAG_FILTER was detected through the priv->dma_cap.vlhash capability bits of the device, in stmmac_dvr_probe(). I would suggest deleting the unnecessarily complex logic from dwmac4_vlan_promisc_enable(), and simply unsetting GMAC_PACKET_FILTER_VTFE when becoming IFF_PROMISC, which has the same effect of allowing packets with any VLAN tags, but has the additional benefit of being able to run concurrently with stmmac_vlan_rx_add_vid() and stmmac_vlan_rx_kill_vid(). As much as I believe that the VTFE bit should have been exclusively controlled by NETIF_F_HW_VLAN_CTAG_FILTER through ethtool, and not by IFF_PROMISC, changing that is not a punctual fix to the problem, and it would probably break the VFFQ feature added by the later commit e0f9956a3862 ("net: stmmac: Add option for VLAN filter fail queue enable"). From the commit description, VFFQ needs IFF_PROMISC=on and VTFE=off in order to work (and this change respects that). But if VTFE was changed to be controlled through ethtool -k, then a user-visible change would have been introduced in Intel's scripts (a need to run "ethtool -k eth0 rx-vlan-filter off" which did not exist before). The patch was tested with this set of commands: ip link set eth0 up ip link add link eth0 name eth0.100 type vlan id 100 ip addr add 192.168.100.2/24 dev eth0.100 && ip link set eth0.100 up ip link set eth0 promisc on ip link add link eth0 name eth0.101 type vlan id 101 ip addr add 192.168.101.2/24 dev eth0.101 && ip link set eth0.101 up ip link set eth0 promisc off ping -c 5 192.168.100.1 ping -c 5 192.168.101.1 ip link set eth0 promisc on ping -c 5 192.168.100.1 ping -c 5 192.168.101.1 ip link del eth0.100 ip link del eth0.101 # Wait for VLAN-tagged pings from the other end... # Check with "tcpdump -i eth0 -e -n -p" and we should see them ip link set eth0 promisc off # Wait for VLAN-tagged pings from the other end... # Check with "tcpdump -i eth0 -e -n -p" and we shouldn't see them # anymore, but remove the "-p" argument from tcpdump and they're there. Fixes: c89f44ff10fd ("net: stmmac: Add support for VLAN promiscuous mode") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 - .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 61 +------------------ 2 files changed, 3 insertions(+), 59 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index ec9c130276d89..54bb072aeb2d3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -532,7 +532,6 @@ struct mac_device_info { unsigned int xlgmac; unsigned int num_vlan; u32 vlan_filter[32]; - unsigned int promisc; bool vlan_fail_q_en; u8 vlan_fail_q; }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 8c7a0b7c99520..36251ec2589c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -472,12 +472,6 @@ static int dwmac4_add_hw_vlan_rx_fltr(struct net_device *dev, if (vid > 4095) return -EINVAL; - if (hw->promisc) { - netdev_err(dev, - "Adding VLAN in promisc mode not supported\n"); - return -EPERM; - } - /* Single Rx VLAN Filter */ if (hw->num_vlan == 1) { /* For single VLAN filter, VID 0 means VLAN promiscuous */ @@ -527,12 +521,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev, { int i, ret = 0; - if (hw->promisc) { - netdev_err(dev, - "Deleting VLAN in promisc mode not supported\n"); - return -EPERM; - } - /* Single Rx VLAN Filter */ if (hw->num_vlan == 1) { if ((hw->vlan_filter[0] & GMAC_VLAN_TAG_VID) == vid) { @@ -557,39 +545,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev, return ret; } -static void dwmac4_vlan_promisc_enable(struct net_device *dev, - struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; - u32 hash; - u32 val; - int i; - - /* Single Rx VLAN Filter */ - if (hw->num_vlan == 1) { - dwmac4_write_single_vlan(dev, 0); - return; - } - - /* Extended Rx VLAN Filter Enable */ - for (i = 0; i < hw->num_vlan; i++) { - if (hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VEN) { - val = hw->vlan_filter[i] & ~GMAC_VLAN_TAG_DATA_VEN; - dwmac4_write_vlan_filter(dev, hw, i, val); - } - } - - hash = readl(ioaddr + GMAC_VLAN_HASH_TABLE); - if (hash & GMAC_VLAN_VLHT) { - value = readl(ioaddr + GMAC_VLAN_TAG); - if (value & GMAC_VLAN_VTHM) { - value &= ~GMAC_VLAN_VTHM; - writel(value, ioaddr + GMAC_VLAN_TAG); - } - } -} - static void dwmac4_restore_hw_vlan_rx_fltr(struct net_device *dev, struct mac_device_info *hw) { @@ -709,22 +664,12 @@ static void dwmac4_set_filter(struct mac_device_info *hw, } /* VLAN filtering */ - if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en) + value &= ~GMAC_PACKET_FILTER_VTFE; + else if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) value |= GMAC_PACKET_FILTER_VTFE; writel(value, ioaddr + GMAC_PACKET_FILTER); - - if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en) { - if (!hw->promisc) { - hw->promisc = 1; - dwmac4_vlan_promisc_enable(dev, hw); - } - } else { - if (hw->promisc) { - hw->promisc = 0; - dwmac4_restore_hw_vlan_rx_fltr(dev, hw); - } - } } static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, From 12d4eb20d9d86fae5f84117ff047e966e470f7b9 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 15 Mar 2023 17:48:00 -0700 Subject: [PATCH 480/898] drm/i915/pmu: Use functions common with sysfs to read actual freq Expose intel_rps_read_actual_frequency_fw to read the actual freq without taking forcewake for use by PMU. The code is refactored to use a common set of functions across sysfs and PMU. Using common functions with sysfs in PMU solves the issues of missing support for MTL and missing support for older generations (prior to Gen6). It also future proofs the PMU where sometimes code has been updated for sysfs and PMU has been missed. v2: Remove runtime_pm_if_in_use from read_actual_frequency_fw (Tvrtko) v3: (Tvrtko) - Remove goto in __read_cagf - Unexport intel_rps_get_cagf and intel_rps_read_punit_req Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8280 Signed-off-by: Ashutosh Dixit Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230316004800.2539753-1-ashutosh.dixit@intel.com (cherry picked from commit 44df42e66139b5fac8db49ee354be279210f9816) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_rps.c | 38 ++++++++++++++++------------- drivers/gpu/drm/i915/gt/intel_rps.h | 4 +-- drivers/gpu/drm/i915/i915_pmu.c | 10 +++----- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index f5d7b51264331..2c92fa9d19427 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2075,16 +2075,6 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - i915_reg_t rpstat; - - rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; - - return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); -} - u32 intel_rps_read_rpstat(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -2095,7 +2085,7 @@ u32 intel_rps_read_rpstat(struct intel_rps *rps) return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); } -u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) +static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; @@ -2118,10 +2108,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) return cagf; } -static u32 read_cagf(struct intel_rps *rps) +static u32 __read_cagf(struct intel_rps *rps, bool take_fw) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + i915_reg_t r = INVALID_MMIO_REG; u32 freq; /* @@ -2129,22 +2120,30 @@ static u32 read_cagf(struct intel_rps *rps) * registers will return 0 freq when GT is in RC6 */ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { - freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + r = MTL_MIRROR_TARGET_WP1; } else if (GRAPHICS_VER(i915) >= 12) { - freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + r = GEN12_RPSTAT1; } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); } else if (GRAPHICS_VER(i915) >= 6) { - freq = intel_uncore_read(uncore, GEN6_RPSTAT1); + r = GEN6_RPSTAT1; } else { - freq = intel_uncore_read(uncore, MEMSTAT_ILK); + r = MEMSTAT_ILK; } + if (i915_mmio_reg_valid(r)) + freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); + return intel_rps_get_cagf(rps, freq); } +static u32 read_cagf(struct intel_rps *rps) +{ + return __read_cagf(rps, true); +} + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -2157,7 +2156,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } -u32 intel_rps_read_punit_req(struct intel_rps *rps) +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) +{ + return intel_gpu_freq(rps, __read_cagf(rps, false)); +} + +static u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index c622962c6befb..a3fa987aa91f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -37,8 +37,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); -u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); u32 intel_rps_get_min_frequency(struct intel_rps *rps); u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); @@ -49,10 +49,8 @@ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); -u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_rpstat(struct intel_rps *rps); -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); void intel_rps_raise_unslice(struct intel_rps *rps); void intel_rps_lower_unslice(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 52531ab28c5f5..6d422b056f8a8 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -393,14 +393,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * case we assume the system is running at the intended * frequency. Fortunately, the read should rarely fail! */ - val = intel_rps_read_rpstat_fw(rps); - if (val) - val = intel_rps_get_cagf(rps, val); - else - val = rps->cur_freq; + val = intel_rps_read_actual_frequency_fw(rps); + if (!val) + val = intel_gpu_freq(rps, rps->cur_freq); add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(rps, val), period_ns / 1000); + val, period_ns / 1000); } if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { From 76b767d4d1cd052e455cf18e06929e8b2b70101d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 20 Mar 2023 11:54:33 +0200 Subject: [PATCH 481/898] drm/i915: Split icl_color_commit_noarm() from skl_color_commit_noarm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're going to want different behavior for skl/glk vs. icl in .color_commit_noarm(), so split the hook into two. Arguably we already had slightly different behaviour since csc_enable/gamma_enable are never set on icl+, so the old code was perhaps a bit confusing as well. Cc: #v5.19+ Cc: Manasi Navare Cc: Drew Davenport Cc: Imre Deak Cc: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230320095438.17328-2-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit f161eb01f50ab31f2084975b43bce54b7b671e17) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_color.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 8d97c299e6577..ce2c3819146c1 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -677,6 +677,25 @@ static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state) crtc_state->csc_mode); } +static void icl_color_commit_arm(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + + /* + * We don't (yet) allow userspace to control the pipe background color, + * so force it to black. + */ + intel_de_write(i915, SKL_BOTTOM_COLOR(pipe), 0); + + intel_de_write(i915, GAMMA_MODE(crtc->pipe), + crtc_state->gamma_mode); + + intel_de_write_fw(i915, PIPE_CSC_MODE(crtc->pipe), + crtc_state->csc_mode); +} + static struct drm_property_blob * create_linear_lut(struct drm_i915_private *i915, int lut_size) { @@ -3067,7 +3086,7 @@ static const struct intel_color_funcs i9xx_color_funcs = { static const struct intel_color_funcs icl_color_funcs = { .color_check = icl_color_check, .color_commit_noarm = icl_color_commit_noarm, - .color_commit_arm = skl_color_commit_arm, + .color_commit_arm = icl_color_commit_arm, .load_luts = icl_load_luts, .read_luts = icl_read_luts, .lut_equal = icl_lut_equal, From a8e03e00b62073b494886dbff32f8b5338066c8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 20 Mar 2023 11:54:34 +0200 Subject: [PATCH 482/898] drm/i915: Move CSC load back into .color_commit_arm() when PSR is enabled on skl/glk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SKL/GLK CSC unit suffers from a nasty issue where a CSC coeff/offset register read or write between DC5 exit and PSR exit will undo the CSC arming performed by DMC, and then during PSR exit the hardware will latch zeroes into the active CSC registers. This causes any plane going through the CSC to output all black. We can sidestep the issue by making sure the PSR exit has already actually happened before we touch the CSC coeff/offset registers. Easiest way to guarantee that is to just move the CSC programming back into the .color_commir_arm() as we force a PSR exit (and crucially wait for it to actually happen) prior to touching the arming registers. When PSR (and thus also DC states) are disabled we don't have anything to worry about, so we can keep using the more optional _noarm() hook for writing the CSC registers. Cc: #v5.19+ Cc: Manasi Navare Cc: Drew Davenport Cc: Imre Deak Cc: Jouni Högander Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8283 Fixes: d13dde449580 ("drm/i915: Split pipe+output CSC programming to noarm+arm pair") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230320095438.17328-3-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 80a892a4c2428b65366721599fc5fe50eaed35fd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_color.c | 23 ++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index ce2c3819146c1..aa702292779fd 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -617,6 +617,22 @@ static void icl_color_commit_noarm(const struct intel_crtc_state *crtc_state) icl_load_csc_matrix(crtc_state); } +static void skl_color_commit_noarm(const struct intel_crtc_state *crtc_state) +{ + /* + * Possibly related to display WA #1184, SKL CSC loses the latched + * CSC coeff/offset register values if the CSC registers are disarmed + * between DC5 exit and PSR exit. This will cause the plane(s) to + * output all black (until CSC_MODE is rearmed and properly latched). + * Once PSR exit (and proper register latching) has occurred the + * danger is over. Thus when PSR is enabled the CSC coeff/offset + * register programming will be peformed from skl_color_commit_arm() + * which is called after PSR exit. + */ + if (!crtc_state->has_psr) + ilk_load_csc_matrix(crtc_state); +} + static void ilk_color_commit_noarm(const struct intel_crtc_state *crtc_state) { ilk_load_csc_matrix(crtc_state); @@ -659,6 +675,9 @@ static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; u32 val = 0; + if (crtc_state->has_psr) + ilk_load_csc_matrix(crtc_state); + /* * We don't (yet) allow userspace to control the pipe background color, * so force it to black, but apply pipe gamma and CSC appropriately @@ -3094,7 +3113,7 @@ static const struct intel_color_funcs icl_color_funcs = { static const struct intel_color_funcs glk_color_funcs = { .color_check = glk_color_check, - .color_commit_noarm = ilk_color_commit_noarm, + .color_commit_noarm = skl_color_commit_noarm, .color_commit_arm = skl_color_commit_arm, .load_luts = glk_load_luts, .read_luts = glk_read_luts, @@ -3103,7 +3122,7 @@ static const struct intel_color_funcs glk_color_funcs = { static const struct intel_color_funcs skl_color_funcs = { .color_check = ivb_color_check, - .color_commit_noarm = ilk_color_commit_noarm, + .color_commit_noarm = skl_color_commit_noarm, .color_commit_arm = skl_color_commit_arm, .load_luts = bdw_load_luts, .read_luts = bdw_read_luts, From c880f855d1e240a956dcfce884269bad92fc849c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 20 Mar 2023 11:54:35 +0200 Subject: [PATCH 483/898] drm/i915: Add a .color_post_update() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're going to need stuff after the color management register latching has happened. Add a corresponding hook. Cc: #v5.19+ Cc: Manasi Navare Cc: Drew Davenport Cc: Imre Deak Cc: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230320095438.17328-4-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 3962ca4e080a525fc9eae87aa6b2286f1fae351d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_color.c | 13 +++++++++++++ drivers/gpu/drm/i915/display/intel_color.h | 1 + drivers/gpu/drm/i915/display/intel_display.c | 3 +++ 3 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index aa702292779fd..b1d0b49fe8ef7 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -46,6 +46,11 @@ struct intel_color_funcs { * registers involved with the same commit. */ void (*color_commit_arm)(const struct intel_crtc_state *crtc_state); + /* + * Perform any extra tasks needed after all the + * double buffered registers have been latched. + */ + void (*color_post_update)(const struct intel_crtc_state *crtc_state); /* * Load LUTs (and other single buffered color management * registers). Will (hopefully) be called during the vblank @@ -1411,6 +1416,14 @@ void intel_color_commit_arm(const struct intel_crtc_state *crtc_state) i915->display.funcs.color->color_commit_arm(crtc_state); } +void intel_color_post_update(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + + if (i915->display.funcs.color->color_post_update) + i915->display.funcs.color->color_post_update(crtc_state); +} + void intel_color_prepare_commit(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); diff --git a/drivers/gpu/drm/i915/display/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h index d620b5b1e2a6e..8002492be7090 100644 --- a/drivers/gpu/drm/i915/display/intel_color.h +++ b/drivers/gpu/drm/i915/display/intel_color.h @@ -21,6 +21,7 @@ void intel_color_prepare_commit(struct intel_crtc_state *crtc_state); void intel_color_cleanup_commit(struct intel_crtc_state *crtc_state); void intel_color_commit_noarm(const struct intel_crtc_state *crtc_state); void intel_color_commit_arm(const struct intel_crtc_state *crtc_state); +void intel_color_post_update(const struct intel_crtc_state *crtc_state); void intel_color_load_luts(const struct intel_crtc_state *crtc_state); void intel_color_get_config(struct intel_crtc_state *crtc_state); bool intel_color_lut_equal(const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 208b1b5b15dd4..1a5ffa9642e86 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1209,6 +1209,9 @@ static void intel_post_plane_update(struct intel_atomic_state *state, if (needs_cursorclk_wa(old_crtc_state) && !needs_cursorclk_wa(new_crtc_state)) icl_wa_cursorclkgating(dev_priv, pipe, false); + + if (intel_crtc_needs_color_update(new_crtc_state)) + intel_color_post_update(new_crtc_state); } static void intel_crtc_enable_flip_done(struct intel_atomic_state *state, From 4d4e766f8b7dbdefa7a78e91eb9c7a29d0d818b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 20 Mar 2023 11:54:36 +0200 Subject: [PATCH 484/898] drm/i915: Workaround ICL CSC_MODE sticky arming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike SKL/GLK the ICL CSC unit suffers from a new issue where CSC_MODE arming is sticky. That is, once armed it remains armed causing the CSC coeff/offset registers to become effectively self-arming. CSC coeff/offset registers writes no longer disarm the CSC, but fortunately register read still do. So we can use that to disarm the CSC unit once the registers for the current frame have been latched. This avoid s the self-arming behaviour from persisting into the next frame's .color_commit_noarm() call. Cc: #v5.19+ Cc: Manasi Navare Cc: Drew Davenport Cc: Imre Deak Cc: Jouni Högander Fixes: d13dde449580 ("drm/i915: Split pipe+output CSC programming to noarm+arm pair") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230320095438.17328-5-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 92736f1b452bbb8a66bdb5b1d263ad00e04dd3b8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_color.c | 44 +++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index b1d0b49fe8ef7..bd598a7f5047d 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -619,6 +619,14 @@ static void ilk_lut_12p4_pack(struct drm_color_lut *entry, u32 ldw, u32 udw) static void icl_color_commit_noarm(const struct intel_crtc_state *crtc_state) { + /* + * Despite Wa_1406463849, ICL no longer suffers from the SKL + * DC5/PSR CSC black screen issue (see skl_color_commit_noarm()). + * Possibly due to the extra sticky CSC arming + * (see icl_color_post_update()). + * + * On TGL+ all CSC arming issues have been properly fixed. + */ icl_load_csc_matrix(crtc_state); } @@ -720,6 +728,28 @@ static void icl_color_commit_arm(const struct intel_crtc_state *crtc_state) crtc_state->csc_mode); } +static void icl_color_post_update(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + + /* + * Despite Wa_1406463849, ICL CSC is no longer disarmed by + * coeff/offset register *writes*. Instead, once CSC_MODE + * is armed it stays armed, even after it has been latched. + * Afterwards the coeff/offset registers become effectively + * self-arming. That self-arming must be disabled before the + * next icl_color_commit_noarm() tries to write the next set + * of coeff/offset registers. Fortunately register *reads* + * do still disarm the CSC. Naturally this must not be done + * until the previously written CSC registers have actually + * been latched. + * + * TGL+ no longer need this workaround. + */ + intel_de_read_fw(i915, PIPE_CSC_PREOFF_HI(crtc->pipe)); +} + static struct drm_property_blob * create_linear_lut(struct drm_i915_private *i915, int lut_size) { @@ -3115,10 +3145,20 @@ static const struct intel_color_funcs i9xx_color_funcs = { .lut_equal = i9xx_lut_equal, }; +static const struct intel_color_funcs tgl_color_funcs = { + .color_check = icl_color_check, + .color_commit_noarm = icl_color_commit_noarm, + .color_commit_arm = icl_color_commit_arm, + .load_luts = icl_load_luts, + .read_luts = icl_read_luts, + .lut_equal = icl_lut_equal, +}; + static const struct intel_color_funcs icl_color_funcs = { .color_check = icl_color_check, .color_commit_noarm = icl_color_commit_noarm, .color_commit_arm = icl_color_commit_arm, + .color_post_update = icl_color_post_update, .load_luts = icl_load_luts, .read_luts = icl_read_luts, .lut_equal = icl_lut_equal, @@ -3231,7 +3271,9 @@ void intel_color_init_hooks(struct drm_i915_private *i915) else i915->display.funcs.color = &i9xx_color_funcs; } else { - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(i915) >= 12) + i915->display.funcs.color = &tgl_color_funcs; + else if (DISPLAY_VER(i915) == 11) i915->display.funcs.color = &icl_color_funcs; else if (DISPLAY_VER(i915) == 10) i915->display.funcs.color = &glk_color_funcs; From a2b6e99d8a623544f3bdccd28ee35b9c1b00daa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 20 Mar 2023 20:35:32 +0200 Subject: [PATCH 485/898] drm/i915: Disable DC states for all commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keeping DC states enabled is incompatible with the _noarm()/_arm() split we use for writing pipe/plane registers. When DC5 and PSR are enabled, all pipe/plane registers effectively become self-arming on account of DC5 exit arming the update, and PSR exit latching it. What probably saves us most of the time is that (with PIPE_MISC[21]=0) all pipe register writes themselves trigger PSR exit, and then we don't re-enter PSR until the idle frame count has elapsed. So it may be that the PSR exit happens already before we've updated the state too much. Also the PSR1 panel (at least on this KBL) seems to discard the first frame we trasmit, presumably still scanning out from its internal framebuffer at that point. So only the second frame we transmit is actually visible. But I suppose that could also be panel specific behaviour. I haven't checked out how other PSR panels behave, nor did I bother to check what the eDP spec has to say about this. And since this really is all about DC states, let's switch from the MODESET domain to the DC_OFF domain. Functionally they are 100% identical. We should probably remove the MODESET domain... And for good measure let's toss in an assert to the place where we do the _noarm() register writes to make sure DC states are in fact off. v2: Just use intel_display_power_is_enabled() (Imre) Cc: #v5.17+ Cc: Manasi Navare Cc: Drew Davenport Cc: Jouni Högander Reviewed-by: Imre Deak Fixes: d13dde449580 ("drm/i915: Split pipe+output CSC programming to noarm+arm pair") Fixes: f8a005eb8972 ("drm/i915: Optimize icl+ universal plane programming") Fixes: 890b6ec4a522 ("drm/i915: Split skl+ plane update into noarm+arm pair") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230320183532.17727-1-ville.syrjala@linux.intel.com (cherry picked from commit 41b4c7fe72b6105a4b49395eea9aa40cef94288d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 28 +++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1a5ffa9642e86..63b4b73f47c6a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7094,6 +7094,8 @@ static void intel_update_crtc(struct intel_atomic_state *state, intel_fbc_update(state, crtc); + drm_WARN_ON(&i915->drm, !intel_display_power_is_enabled(i915, POWER_DOMAIN_DC_OFF)); + if (!modeset && intel_crtc_needs_color_update(new_crtc_state)) intel_color_commit_noarm(new_crtc_state); @@ -7461,8 +7463,28 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) drm_atomic_helper_wait_for_dependencies(&state->base); drm_dp_mst_atomic_wait_for_dependencies(&state->base); - if (state->modeset) - wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); + /* + * During full modesets we write a lot of registers, wait + * for PLLs, etc. Doing that while DC states are enabled + * is not a good idea. + * + * During fastsets and other updates we also need to + * disable DC states due to the following scenario: + * 1. DC5 exit and PSR exit happen + * 2. Some or all _noarm() registers are written + * 3. Due to some long delay PSR is re-entered + * 4. DC5 entry -> DMC saves the already written new + * _noarm() registers and the old not yet written + * _arm() registers + * 5. DC5 exit -> DMC restores a mixture of old and + * new register values and arms the update + * 6. PSR exit -> hardware latches a mixture of old and + * new register values -> corrupted frame, or worse + * 7. New _arm() registers are finally written + * 8. Hardware finally latches a complete set of new + * register values, and subsequent frames will be OK again + */ + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_DC_OFF); intel_atomic_prepare_plane_clear_colors(state); @@ -7611,8 +7633,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) * the culprit. */ intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); - intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref); } + intel_display_power_put(dev_priv, POWER_DOMAIN_DC_OFF, wakeref); intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); /* From 38c583019484f190d5b33f59b8ae810e6b1763c6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 16 Mar 2023 15:17:13 +0200 Subject: [PATCH 486/898] drm/i915/tc: Fix the ICL PHY ownership check in TC-cold state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit renaming icl_tc_phy_is_in_safe_mode() to icl_tc_phy_take_ownership() didn't flip the function's return value accordingly, fix this up. This didn't cause an actual problem besides state check errors, since the function is only used during HW readout. Cc: José Roberto de Souza Fixes: f53979d68a77 ("drm/i915/display/tc: Rename safe_mode functions ownership") Reviewed-by: José Roberto de Souza Reviewed-by: Ville Syrjälä Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20230316131724.359612-4-imre.deak@intel.com (cherry picked from commit f2c7959dda614d9b7c6a41510492de39d31705ec) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index f45328712bff1..be510b9c0d07e 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -418,9 +418,9 @@ static bool icl_tc_phy_is_owned(struct intel_digital_port *dig_port) val = intel_de_read(i915, PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia)); if (val == 0xffffffff) { drm_dbg_kms(&i915->drm, - "Port %s: PHY in TCCOLD, assume safe mode\n", + "Port %s: PHY in TCCOLD, assume not owned\n", dig_port->tc_port_name); - return true; + return false; } return val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); From d032ca43f2c80049ce5aabd3f208dc3849359497 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Mar 2023 17:59:18 +0100 Subject: [PATCH 487/898] drm/i915/gem: Flush lmem contents after construction i915_gem_object_create_lmem_from_data() lacks the flush of the data written to lmem to ensure the object is marked as dirty and the writes flushed to the backing store. Once created, we can immediately release the obj->mm.mapping caching of the vmap. Fixes: 7acbbc7cf485 ("drm/i915/guc: put all guc objects in lmem when available") Cc: Matthew Auld Cc: Daniele Ceraolo Spurio Cc: Andi Shyti Cc: Matthew Brost Cc: John Harrison Signed-off-by: Chris Wilson Cc: # v5.16+ Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230316165918.13074-1-nirmoy.das@intel.com (cherry picked from commit e2ee10474ce766686e7a7496585cdfaf79e3a1bf) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 8949fb0a944f6..3198b64ad7dbc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -127,7 +127,8 @@ i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, memcpy(map, data, size); - i915_gem_object_unpin_map(obj); + i915_gem_object_flush_map(obj); + __i915_gem_object_release_map(obj); return obj; } From 3413881e1ecc3cba722a2e87ec099692eed5be28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 20 Mar 2023 11:05:17 +0200 Subject: [PATCH 488/898] drm/i915/dpt: Treat the DPT BO as a framebuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently i915_gem_object_is_framebuffer() doesn't treat the BO containing the framebuffer's DPT as a framebuffer itself. This means eg. that the shrinker can evict the DPT BO while leaving the actual FB BO bound, when the DPT is allocated from regular shmem. That causes an immediate oops during hibernate as we try to rewrite the PTEs inside the already evicted DPT obj. TODO: presumably this might also be the reason for the DPT related display faults under heavy memory pressure, but I'm still not sure how that would happen as the object should be pinned by intel_dpt_pin() while in active use by the display engine... Cc: stable@vger.kernel.org Cc: Juha-Pekka Heikkila Cc: Matthew Auld Cc: Imre Deak Fixes: 0dc987b699ce ("drm/i915/display: Add smem fallback allocation for dpt") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230320090522.9909-2-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila (cherry picked from commit 779cb5ba64ec7df80675a956c9022929514f517a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dpt.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index ad1a37b515fb1..2a9f40a2b3ed0 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -301,6 +301,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->pte_encode = gen8_ggtt_pte_encode; dpt->obj = dpt_obj; + dpt->obj->is_dpt = true; return &dpt->vm; } @@ -309,5 +310,6 @@ void intel_dpt_destroy(struct i915_address_space *vm) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); + dpt->obj->is_dpt = false; i915_vm_put(&dpt->vm); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index f9a8acbba715e..885ccde9dc3c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -303,7 +303,7 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj) static inline bool i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) { - return READ_ONCE(obj->frontbuffer); + return READ_ONCE(obj->frontbuffer) || obj->is_dpt; } static inline unsigned int diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 19c9bdd8f905f..5dcbbef31d445 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -491,6 +491,9 @@ struct drm_i915_gem_object { */ unsigned int cache_dirty:1; + /* @is_dpt: Object houses a display page table (DPT) */ + unsigned int is_dpt:1; + /** * @read_domains: Read memory domains. * From 5c95b2d5d44fa250ce8aeee27bdb39b381d03857 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2023 15:58:51 -0700 Subject: [PATCH 489/898] drm/i915/perf: Drop wakeref on GuC RC error If we fail to adjust the GuC run-control on opening the perf stream, make sure we unwind the wakeref just taken. v2: Retain old goto label names (Ashutosh) v3: Drop bitfield boolean Fixes: 01e742746785 ("drm/i915/guc: Support OA when Wa_16011777198 is enabled") Signed-off-by: Chris Wilson Reviewed-by: Ashutosh Dixit Signed-off-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-2-umesh.nerlige.ramappa@intel.com (cherry picked from commit 2810ac6c753d17ee2572ffb57fe2382a786a080a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 14 +++++++++----- drivers/gpu/drm/i915/i915_perf_types.h | 6 ++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 824a34ec0b838..283a4a3c6862f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1592,9 +1592,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) /* * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6. */ - if (intel_uc_uses_guc_rc(>->uc) && - (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) + if (stream->override_gucrc) drm_WARN_ON(>->i915->drm, intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc)); @@ -3305,8 +3303,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) { drm_dbg(&stream->perf->i915->drm, "Unable to override gucrc mode\n"); - goto err_config; + goto err_gucrc; } + + stream->override_gucrc = true; } ret = alloc_oa_buffer(stream); @@ -3345,11 +3345,15 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_buffer(stream); err_oa_buf_alloc: - free_oa_configs(stream); + if (stream->override_gucrc) + intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc); +err_gucrc: intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); intel_engine_pm_put(stream->engine); + free_oa_configs(stream); + err_config: free_noa_wait(stream); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index ca150b7af3f29..4d5d8c365d9e2 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -316,6 +316,12 @@ struct i915_perf_stream { * buffer should be checked for available data. */ u64 poll_oa_period; + + /** + * @override_gucrc: GuC RC has been overridden for the perf stream, + * and we need to restore the default configuration on release. + */ + bool override_gucrc; }; /** From fb5755100a0a5aa5957bdb204fd1e249684557fc Mon Sep 17 00:00:00 2001 From: Rajvi Jingar Date: Mon, 20 Mar 2023 14:20:29 -0700 Subject: [PATCH 490/898] platform/x86/intel/pmc: Alder Lake PCH slp_s0_residency fix For platforms with Alder Lake PCH (Alder Lake S and Raptor Lake S) the slp_s0_residency attribute has been reporting the wrong value. Unlike other platforms, ADL PCH does not have a counter for the time that the SLP_S0 signal was asserted. Instead, firmware uses the aggregate of the Low Power Mode (LPM) substate counters as the S0ix value. Since the LPM counters run at a different frequency, this lead to misreporting of the S0ix time. Add a check for Alder Lake PCH and adjust the frequency accordingly when display slp_s0_residency. Fixes: bbab31101f44 ("platform/x86/intel: pmc/core: Add Alderlake support to pmc core driver") Signed-off-by: Rajvi Jingar Signed-off-by: David E. Box Reviewed-by: Rajneesh Bhardwaj Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230320212029.3154407-1-david.e.box@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 3a15d32d7644c..b9591969e0fa1 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -66,7 +66,18 @@ static inline void pmc_core_reg_write(struct pmc_dev *pmcdev, int reg_offset, static inline u64 pmc_core_adjust_slp_s0_step(struct pmc_dev *pmcdev, u32 value) { - return (u64)value * pmcdev->map->slp_s0_res_counter_step; + /* + * ADL PCH does not have the SLP_S0 counter and LPM Residency counters are + * used as a workaround which uses 30.5 usec tick. All other client + * programs have the legacy SLP_S0 residency counter that is using the 122 + * usec tick. + */ + const int lpm_adj_x2 = pmcdev->map->lpm_res_counter_step_x2; + + if (pmcdev->map == &adl_reg_map) + return (u64)value * GET_X2_COUNTER((u64)lpm_adj_x2); + else + return (u64)value * pmcdev->map->slp_s0_res_counter_step; } static int set_etr3(struct pmc_dev *pmcdev) From 2b4c99f7d9a57ecd644eda9b1fb0a1072414959f Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 14 Mar 2023 16:04:45 +0400 Subject: [PATCH 491/898] can: bcm: bcm_tx_setup(): fix KMSAN uninit-value in vfs_write Syzkaller reported the following issue: ===================================================== BUG: KMSAN: uninit-value in aio_rw_done fs/aio.c:1520 [inline] BUG: KMSAN: uninit-value in aio_write+0x899/0x950 fs/aio.c:1600 aio_rw_done fs/aio.c:1520 [inline] aio_write+0x899/0x950 fs/aio.c:1600 io_submit_one+0x1d1c/0x3bf0 fs/aio.c:2019 __do_sys_io_submit fs/aio.c:2078 [inline] __se_sys_io_submit+0x293/0x770 fs/aio.c:2048 __x64_sys_io_submit+0x92/0xd0 fs/aio.c:2048 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook mm/slab.h:766 [inline] slab_alloc_node mm/slub.c:3452 [inline] __kmem_cache_alloc_node+0x71f/0xce0 mm/slub.c:3491 __do_kmalloc_node mm/slab_common.c:967 [inline] __kmalloc+0x11d/0x3b0 mm/slab_common.c:981 kmalloc_array include/linux/slab.h:636 [inline] bcm_tx_setup+0x80e/0x29d0 net/can/bcm.c:930 bcm_sendmsg+0x3a2/0xce0 net/can/bcm.c:1351 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] sock_write_iter+0x495/0x5e0 net/socket.c:1108 call_write_iter include/linux/fs.h:2189 [inline] aio_write+0x63a/0x950 fs/aio.c:1600 io_submit_one+0x1d1c/0x3bf0 fs/aio.c:2019 __do_sys_io_submit fs/aio.c:2078 [inline] __se_sys_io_submit+0x293/0x770 fs/aio.c:2048 __x64_sys_io_submit+0x92/0xd0 fs/aio.c:2048 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd CPU: 1 PID: 5034 Comm: syz-executor350 Not tainted 6.2.0-rc6-syzkaller-80422-geda666ff2276 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023 ===================================================== We can follow the call chain and find that 'bcm_tx_setup' function calls 'memcpy_from_msg' to copy some content to the newly allocated frame of 'op->frames'. After that the 'len' field of copied structure being compared with some constant value (64 or 8). However, if 'memcpy_from_msg' returns an error, we will compare some uninitialized memory. This triggers 'uninit-value' issue. This patch will add 'memcpy_from_msg' possible errors processing to avoid uninit-value issue. Tested via syzkaller Reported-by: syzbot+c9bfd85eca611ebf5db1@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=47f897f8ad958bbde5790ebf389b5e7e0a345089 Signed-off-by: Ivan Orlov Fixes: 6f3b911d5f29b ("can: bcm: add support for CAN FD frames") Acked-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230314120445.12407-1-ivan.orlov0322@gmail.com Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 27706f6ace34a..a962ec2b8ba5b 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -941,6 +941,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); + if (err < 0) + goto free_op; if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) @@ -950,12 +952,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, err = -EINVAL; } - if (err < 0) { - if (op->frames != &op->sframe) - kfree(op->frames); - kfree(op); - return err; - } + if (err < 0) + goto free_op; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ @@ -1026,6 +1024,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, bcm_tx_start_timer(op); return msg_head->nframes * op->cfsiz + MHSIZ; + +free_op: + if (op->frames != &op->sframe) + kfree(op->frames); + kfree(op); + return err; } /* From 441d901fbf669f6360566a4437b1e563b854de4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 Mar 2023 13:05:02 +0000 Subject: [PATCH 492/898] platform/x86: gigabyte-wmi: add support for B650 AORUS ELITE AX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This has been reported as working. Suggested-by: got3nks Link: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/15#issuecomment-1483942966 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20230327-gigabyte-wmi-b650-elite-ax-v1-1-d4d645c21d0b@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 4dd39ab6ecfa2..5e5b17c50eb67 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -151,6 +151,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B650 AORUS ELITE AX"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660I AORUS PRO DDR4"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), From e5c972c1fadacc858b6a564d056f177275238040 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 24 Mar 2023 15:52:33 +0100 Subject: [PATCH 493/898] KVM: SVM: Flush Hyper-V TLB when required The Hyper-V "EnlightenedNptTlb" enlightenment is always enabled when KVM is running on top of Hyper-V and Hyper-V exposes support for it (which is always). On AMD CPUs this enlightenment results in ASID invalidations not flushing TLB entries derived from the NPT. To force the underlying (L0) hypervisor to rebuild its shadow page tables, an explicit hypercall is needed. The original KVM implementation of Hyper-V's "EnlightenedNptTlb" on SVM only added remote TLB flush hooks. This worked out fine for a while, as sufficient remote TLB flushes where being issued in KVM to mask the problem. Since v5.17, changes in the TDP code reduced the number of flushes and the out-of-sync TLB prevents guests from booting successfully. Split svm_flush_tlb_current() into separate callbacks for the 3 cases (guest/all/current), and issue the required Hyper-V hypercall when a Hyper-V TLB flush is needed. The most important case where the TLB flush was missing is when loading a new PGD, which is followed by what is now svm_flush_tlb_current(). Cc: stable@vger.kernel.org # v5.17+ Fixes: 1e0c7d40758b ("KVM: SVM: hyper-v: Remote TLB flush for SVM") Link: https://lore.kernel.org/lkml/43980946-7bbf-dcef-7e40-af904c456250@linux.microsoft.com/ Suggested-by: Sean Christopherson Signed-off-by: Jeremi Piotrowski Reviewed-by: Vitaly Kuznetsov Message-Id: <20230324145233.4585-1-jpiotrowski@linux.microsoft.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_onhyperv.h | 5 +++++ arch/x86/kvm/svm/svm.c | 37 ++++++++++++++++++++++++++++++--- arch/x86/kvm/svm/svm_onhyperv.h | 15 +++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h index 287e98ef9df3d..6272dabec02da 100644 --- a/arch/x86/kvm/kvm_onhyperv.h +++ b/arch/x86/kvm/kvm_onhyperv.h @@ -12,6 +12,11 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm, int hv_remote_flush_tlb(struct kvm *kvm); void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp); #else /* !CONFIG_HYPERV */ +static inline int hv_remote_flush_tlb(struct kvm *kvm) +{ + return -EOPNOTSUPP; +} + static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp) { } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 252e7f37e4e2e..f25bc3cbb2500 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3729,7 +3729,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); } -static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3753,6 +3753,37 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) svm->current_vmcb->asid_generation--; } +static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + hpa_t root_tdp = vcpu->arch.mmu->root.hpa; + + /* + * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly + * flush the NPT mappings via hypercall as flushing the ASID only + * affects virtual to physical mappings, it does not invalidate guest + * physical to host physical mappings. + */ + if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp)) + hyperv_flush_guest_mapping(root_tdp); + + svm_flush_tlb_asid(vcpu); +} + +static void svm_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + /* + * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB + * flushes should be routed to hv_remote_flush_tlb() without requesting + * a "regular" remote flush. Reaching this point means either there's + * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of + * which might be fatal to the guest. Yell, but try to recover. + */ + if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu))) + hv_remote_flush_tlb(vcpu->kvm); + + svm_flush_tlb_asid(vcpu); +} + static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4745,10 +4776,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_rflags = svm_set_rflags, .get_if_flag = svm_get_if_flag, - .flush_tlb_all = svm_flush_tlb_current, + .flush_tlb_all = svm_flush_tlb_all, .flush_tlb_current = svm_flush_tlb_current, .flush_tlb_gva = svm_flush_tlb_gva, - .flush_tlb_guest = svm_flush_tlb_current, + .flush_tlb_guest = svm_flush_tlb_asid, .vcpu_pre_run = svm_vcpu_pre_run, .vcpu_run = svm_vcpu_run, diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index cff838f15db53..786d46d73a8e5 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -6,6 +6,8 @@ #ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__ #define __ARCH_X86_KVM_SVM_ONHYPERV_H__ +#include + #if IS_ENABLED(CONFIG_HYPERV) #include "kvm_onhyperv.h" @@ -15,6 +17,14 @@ static struct kvm_x86_ops svm_x86_ops; int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu); +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) +{ + struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments; + + return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB && + !!hve->hv_enlightenments_control.enlightened_npt_tlb; +} + static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; @@ -80,6 +90,11 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu) } #else +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) +{ + return false; +} + static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { } From d583fbd7066a2dea43050521a95d9770f7d7593e Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:43 +0100 Subject: [PATCH 494/898] KVM: irqfd: Make resampler_list an RCU list It is useful to be able to do read-only traversal of the list of all the registered irqfd resamplers without locking the resampler_lock mutex. In particular, we are going to traverse it to search for a resampler registered for the given irq of an irqchip, and that will be done with an irqchip spinlock (ioapic->lock) held, so it is undesirable to lock a mutex in this context. So turn this list into an RCU list. For protecting the read side, reuse kvm->irq_srcu which is already used for protecting a number of irq related things (kvm->irq_routing, irqfd->resampler->list, kvm->irq_ack_notifier_list, kvm->arch.mask_notifier_list). Signed-off-by: Dmytro Maluka Message-Id: <20230322204344.50138-2-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/linux/kvm_irqfd.h | 2 +- virt/kvm/eventfd.c | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8ada23756b0ec..9f508c8e66e12 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -755,6 +755,7 @@ struct kvm { struct { spinlock_t lock; struct list_head items; + /* resampler_list update side is protected by resampler_lock. */ struct list_head resampler_list; struct mutex resampler_lock; } irqfds; diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index dac047abdba7c..8ad43692e3bbb 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -31,7 +31,7 @@ struct kvm_kernel_irqfd_resampler { /* * Entry in list of kvm->irqfd.resampler_list. Use for sharing * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock + * RCU list modified under kvm->irqfds.resampler_lock */ struct list_head link; }; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2a3ed401ce465..61aea70dd888d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -96,8 +96,12 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { - list_del(&resampler->link); + list_del_rcu(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); + /* + * synchronize_srcu(&kvm->irq_srcu) already called + * in kvm_unregister_irq_ack_notifier(). + */ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); kfree(resampler); @@ -369,7 +373,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) resampler->notifier.irq_acked = irqfd_resampler_ack; INIT_LIST_HEAD(&resampler->link); - list_add(&resampler->link, &kvm->irqfds.resampler_list); + list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list); kvm_register_irq_ack_notifier(kvm, &resampler->notifier); irqfd->resampler = resampler; From fef8f2b90edbd7089a4278021314f11f056b0cbb Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:44 +0100 Subject: [PATCH 495/898] KVM: x86/ioapic: Resample the pending state of an IRQ when unmasking KVM irqfd based emulation of level-triggered interrupts doesn't work quite correctly in some cases, particularly in the case of interrupts that are handled in a Linux guest as oneshot interrupts (IRQF_ONESHOT). Such an interrupt is acked to the device in its threaded irq handler, i.e. later than it is acked to the interrupt controller (EOI at the end of hardirq), not earlier. Linux keeps such interrupt masked until its threaded handler finishes, to prevent the EOI from re-asserting an unacknowledged interrupt. However, with KVM + vfio (or whatever is listening on the resamplefd) we always notify resamplefd at the EOI, so vfio prematurely unmasks the host physical IRQ, thus a new physical interrupt is fired in the host. This extra interrupt in the host is not a problem per se. The problem is that it is unconditionally queued for injection into the guest, so the guest sees an extra bogus interrupt. [*] There are observed at least 2 user-visible issues caused by those extra erroneous interrupts for a oneshot irq in the guest: 1. System suspend aborted due to a pending wakeup interrupt from ChromeOS EC (drivers/platform/chrome/cros_ec.c). 2. Annoying "invalid report id data" errors from ELAN0000 touchpad (drivers/input/mouse/elan_i2c_core.c), flooding the guest dmesg every time the touchpad is touched. The core issue here is that by the time when the guest unmasks the IRQ, the physical IRQ line is no longer asserted (since the guest has acked the interrupt to the device in the meantime), yet we unconditionally inject the interrupt queued into the guest by the previous resampling. So to fix the issue, we need a way to detect that the IRQ is no longer pending, and cancel the queued interrupt in this case. With IOAPIC we are not able to probe the physical IRQ line state directly (at least not if the underlying physical interrupt controller is an IOAPIC too), so in this patch we use irqfd resampler for that. Namely, instead of injecting the queued interrupt, we just notify the resampler that this interrupt is done. If the IRQ line is actually already deasserted, we are done. If it is still asserted, a new interrupt will be shortly triggered through irqfd and injected into the guest. In the case if there is no irqfd resampler registered for this IRQ, we cannot fix the issue, so we keep the existing behavior: immediately unconditionally inject the queued interrupt. This patch fixes the issue for x86 IOAPIC only. In the long run, we can fix it for other irqchips and other architectures too, possibly taking advantage of reading the physical state of the IRQ line, which is possible with some other irqchips (e.g. with arm64 GIC, maybe even with the legacy x86 PIC). [*] In this description we assume that the interrupt is a physical host interrupt forwarded to the guest e.g. by vfio. Potentially the same issue may occur also with a purely virtual interrupt from an emulated device, e.g. if the guest handles this interrupt, again, as a oneshot interrupt. Signed-off-by: Dmytro Maluka Link: https://lore.kernel.org/kvm/31420943-8c5f-125c-a5ee-d2fde2700083@semihalf.com/ Link: https://lore.kernel.org/lkml/87o7wrug0w.wl-maz@kernel.org/ Message-Id: <20230322204344.50138-3-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 36 ++++++++++++++++++++++++++++++++--- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/eventfd.c | 41 ++++++++++++++++++++++++++++++++++------ 3 files changed, 78 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 042dee5561258..995eb50543601 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG - && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index, false); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && + ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) { + /* + * Pending status in irr may be outdated: the IRQ line may have + * already been deasserted by a device while the IRQ was masked. + * This occurs, for instance, if the interrupt is handled in a + * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this + * case the guest acknowledges the interrupt to the device in + * its threaded irq handler, i.e. after the EOI but before + * unmasking, so at the time of unmasking the IRQ line is + * already down but our pending irr bit is still set. In such + * cases, injecting this pending interrupt to the guest is + * buggy: the guest will receive an extra unwanted interrupt. + * + * So we need to check here if the IRQ is actually still pending. + * As we are generally not able to probe the IRQ line status + * directly, we do it through irqfd resampler. Namely, we clear + * the pending status and notify the resampler that this interrupt + * is done, without actually injecting it into the guest. If the + * IRQ line is actually already deasserted, we are done. If it is + * still asserted, a new interrupt will be shortly triggered + * through irqfd and injected into the guest. + * + * If, however, it's not possible to resample (no irqfd resampler + * registered for this irq), then unconditionally inject this + * pending interrupt into the guest, so the guest will not miss + * an interrupt, although may get an extra unwanted interrupt. + */ + if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index)) + ioapic->irr &= ~(1 << index); + else + ioapic_service(ioapic, index, false); + } if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9f508c8e66e12..a9adf75344bee 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1987,6 +1987,9 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); +bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin); void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) @@ -1995,6 +1998,13 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) } static inline void kvm_irqfd_release(struct kvm *kvm) {} + +static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin) +{ + return false; +} #endif #else diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 61aea70dd888d..b0af834ffa950 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -55,6 +55,15 @@ irqfd_inject(struct work_struct *work) irqfd->gsi, 1, false); } +static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler) +{ + struct kvm_kernel_irqfd *irqfd; + + list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, + srcu_read_lock_held(&resampler->kvm->irq_srcu)) + eventfd_signal(irqfd->resamplefd, 1); +} + /* * Since resampler irqfds share an IRQ source ID, we de-assert once * then notify all of the resampler irqfds using this GSI. We can't @@ -65,7 +74,6 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; - struct kvm_kernel_irqfd *irqfd; int idx; resampler = container_of(kian, @@ -76,11 +84,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) resampler->notifier.gsi, 0, false); idx = srcu_read_lock(&kvm->irq_srcu); - - list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, - srcu_read_lock_held(&kvm->irq_srcu)) - eventfd_signal(irqfd->resamplefd, 1); - + irqfd_resampler_notify(resampler); srcu_read_unlock(&kvm->irq_srcu, idx); } @@ -648,6 +652,31 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_unlock_irq(&kvm->irqfds.lock); } +bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin) +{ + struct kvm_kernel_irqfd_resampler *resampler; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) { + list_for_each_entry_srcu(resampler, + &kvm->irqfds.resampler_list, link, + srcu_read_lock_held(&kvm->irq_srcu)) { + if (resampler->notifier.gsi == gsi) { + irqfd_resampler_notify(resampler); + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + } + } + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} + /* * create a host-wide workqueue for issuing deferred shutdown requests * aggregated from all vm* instances. We need our own isolated From 0dc902267cb32ade1c29eed8208e566ad743518a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 22 Mar 2023 07:12:20 -0700 Subject: [PATCH 496/898] KVM: x86: Suppress pending MMIO write exits if emulator detects exception Clear vcpu->mmio_needed when injecting an exception from the emulator to squash a (legitimate) warning about vcpu->mmio_needed being true at the start of KVM_RUN without a callback being registered to complete the userspace MMIO exit. Suppressing the MMIO write exit is inarguably wrong from an architectural perspective, but it is the least awful hack-a-fix due to shortcomings in KVM's uAPI, not to mention that KVM already suppresses MMIO writes in this scenario. Outside of REP string instructions, KVM doesn't provide a way to resume an instruction at the exact point where it was "interrupted" if said instruction partially completed before encountering an MMIO access. For MMIO reads, KVM immediately exits to userspace upon detecting MMIO as userspace provides the to-be-read value in a buffer, and so KVM can safely (more or less) restart the instruction from the beginning. When the emulator re-encounters the MMIO read, KVM will service the MMIO by getting the value from the buffer instead of exiting to userspace, i.e. KVM won't put the vCPU into an infinite loop. On an emulated MMIO write, KVM finishes the instruction before exiting to userspace, as exiting immediately would ultimately hang the vCPU due to the aforementioned shortcoming of KVM not being able to resume emulation in the middle of an instruction. For the vast majority of _emulated_ instructions, deferring the userspace exit doesn't cause problems as very few x86 instructions (again ignoring string operations) generate multiple writes. But for instructions that generate multiple writes, e.g. PUSHA (multiple pushes onto the stack), deferring the exit effectively results in only the final write triggering an exit to userspace. KVM does support multiple MMIO "fragments", but only for page splits; if an instruction performs multiple distinct MMIO writes, the number of fragments gets reset when the next MMIO write comes along and any previous MMIO writes are dropped. Circling back to the warning, if a deferred MMIO write coincides with an exception, e.g. in this case a #SS due to PUSHA underflowing the stack after queueing a write to an MMIO page on a previous push, KVM injects the exceptions and leaves the deferred MMIO pending without registering a callback, thus triggering the splat. Sweep the problem under the proverbial rug as dropping MMIO writes is not unique to the exception scenario (see above), i.e. instructions like PUSHA are fundamentally broken with respect to MMIO, and have been since KVM's inception. Reported-by: zhangjianguo Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com Reported-by: syzbot+8accb43ddc6bd1f5713a@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20230322141220.2206241-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7713420abab09..45017576ad5ef 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8903,6 +8903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, } if (ctxt->have_exception) { + WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write); + vcpu->mmio_needed = false; r = 1; inject_emulated_exception(vcpu); } else if (vcpu->arch.pio.count) { From 6c41468c7c12d74843bb414fc00307ea8a6318c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 22 Mar 2023 07:32:59 -0700 Subject: [PATCH 497/898] KVM: x86: Clear "has_error_code", not "error_code", for RM exception injection When injecting an exception into a vCPU in Real Mode, suppress the error code by clearing the flag that tracks whether the error code is valid, not by clearing the error code itself. The "typo" was introduced by recent fix for SVM's funky Paged Real Mode. Opportunistically hoist the logic above the tracepoint so that the trace is coherent with respect to what is actually injected (this was also the behavior prior to the buggy commit). Fixes: b97f07458373 ("KVM: x86: determine if an exception has an error code only when injecting it.") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20230322143300.2209476-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 45017576ad5ef..7d6f98b7635fc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9908,13 +9908,20 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu) { + /* + * Suppress the error code if the vCPU is in Real Mode, as Real Mode + * exceptions don't report error codes. The presence of an error code + * is carried with the exception and only stripped when the exception + * is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do + * report an error code despite the CPU being in Real Mode. + */ + vcpu->arch.exception.has_error_code &= is_protmode(vcpu); + trace_kvm_inj_exception(vcpu->arch.exception.vector, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, vcpu->arch.exception.injected); - if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) - vcpu->arch.exception.error_code = false; static_call(kvm_x86_inject_exception)(vcpu); } From 80962ec912db56d323883154efc2297473e692cb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 22 Mar 2023 07:33:00 -0700 Subject: [PATCH 498/898] KVM: nVMX: Do not report error code when synthesizing VM-Exit from Real Mode Don't report an error code to L1 when synthesizing a nested VM-Exit and L2 is in Real Mode. Per Intel's SDM, regarding the error code valid bit: This bit is always 0 if the VM exit occurred while the logical processor was in real-address mode (CR0.PE=0). The bug was introduced by a recent fix for AMD's Paged Real Mode, which moved the error code suppression from the common "queue exception" path to the "inject exception" path, but missed VMX's "synthesize VM-Exit" path. Fixes: b97f07458373 ("KVM: x86: determine if an exception has an error code only when injecting it.") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20230322143300.2209476-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1bc2b80273c97..768487611db78 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3868,7 +3868,12 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu) exit_qual = 0; } - if (ex->has_error_code) { + /* + * Unlike AMD's Paged Real Mode, which reports an error code on #PF + * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the + * "has error code" flags on VM-Exit if the CPU is in Real Mode. + */ + if (ex->has_error_code && is_protmode(vcpu)) { /* * Intel CPUs do not generate error codes with bits 31:16 set, * and more importantly VMX disallows setting bits 31:16 in the From 89aba4c26fae4e459f755a18912845c348ee48f3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 23 Mar 2023 13:09:16 +0100 Subject: [PATCH 499/898] s390/uaccess: add missing earlyclobber annotations to __clear_user() Add missing earlyclobber annotation to size, to, and tmp2 operands of the __clear_user() inline assembly since they are modified or written to before the last usage of all input operands. This can lead to incorrect register allocation for the inline assembly. Fixes: 6c2a9e6df604 ("[S390] Use alternative user-copy operations for new hardware.") Reported-by: Mark Rutland Link: https://lore.kernel.org/all/20230321122514.1743889-3-mark.rutland@arm.com/ Cc: stable@vger.kernel.org Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 720036fb19242..d44214072779e 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -172,7 +172,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) + : "+&a" (size), "+&a" (to), "+a" (tmp1), "=&a" (tmp2) : "a" (empty_zero_page), [spec] "d" (spec.val) : "cc", "memory", "0"); return size; From 8f8cf767589f2131ae5d40f3758429095c701c84 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 20 Mar 2023 11:04:47 -0400 Subject: [PATCH 500/898] s390/vfio-ap: fix memory leak in vfio_ap device driver The device release callback function invoked to release the matrix device uses the dev_get_drvdata(device *dev) function to retrieve the pointer to the vfio_matrix_dev object in order to free its storage. The problem is, this object is not stored as drvdata with the device; since the kfree function will accept a NULL pointer, the memory for the vfio_matrix_dev object is never freed. Since the device being released is contained within the vfio_matrix_dev object, the container_of macro will be used to retrieve its pointer. Fixes: 1fde573413b5 ("s390: vfio-ap: base implementation of VFIO AP device driver") Signed-off-by: Tony Krowiak Reviewed-by: Harald Freudenberger Link: https://lore.kernel.org/r/20230320150447.34557-1-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index 997b524bdd2b5..a48c6938ae68f 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -54,8 +54,9 @@ static struct ap_driver vfio_ap_drv = { static void vfio_ap_matrix_dev_release(struct device *dev) { - struct ap_matrix_dev *matrix_dev = dev_get_drvdata(dev); + struct ap_matrix_dev *matrix_dev; + matrix_dev = container_of(dev, struct ap_matrix_dev, device); kfree(matrix_dev); } From 7bb2107e63d8a4a13bbb6fe0e1cbd68784a2e9ac Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 16 Mar 2023 12:28:09 +0100 Subject: [PATCH 501/898] s390: reintroduce expoline dependence to scripts Expolines depend on scripts/basic/fixdep. And build of expolines can now race with the fixdep build: make[1]: *** Deleting file 'arch/s390/lib/expoline/expoline.o' /bin/sh: line 1: scripts/basic/fixdep: Permission denied make[1]: *** [../scripts/Makefile.build:385: arch/s390/lib/expoline/expoline.o] Error 126 make: *** [../arch/s390/Makefile:166: expoline_prepare] Error 2 The dependence was removed in the below Fixes: commit. So reintroduce the dependence on scripts. Fixes: a0b0987a7811 ("s390/nospec: remove unneeded header includes") Cc: Joe Lawrence Cc: stable@vger.kernel.org Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: linux-s390@vger.kernel.org Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20230316112809.7903-1-jirislaby@kernel.org Signed-off-by: Vasily Gorbik --- arch/s390/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index b3235ab0ace83..ed646c583e4fe 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -162,7 +162,7 @@ vdso_prepare: prepare0 ifdef CONFIG_EXPOLINE_EXTERN modules_prepare: expoline_prepare -expoline_prepare: +expoline_prepare: scripts $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o endif endif From f9bbf25e7b2b74b52b2f269216a92657774f239c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Mar 2023 12:31:30 +0100 Subject: [PATCH 502/898] s390/ptrace: fix PTRACE_GET_LAST_BREAK error handling Return -EFAULT if put_user() for the PTRACE_GET_LAST_BREAK request fails, instead of silently ignoring it. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index cf9659e13f03d..ea244a73efad9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -474,9 +474,7 @@ long arch_ptrace(struct task_struct *child, long request, } return 0; case PTRACE_GET_LAST_BREAK: - put_user(child->thread.last_break, - (unsigned long __user *) data); - return 0; + return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: if (!MACHINE_HAS_TE) return -EIO; @@ -824,9 +822,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } return 0; case PTRACE_GET_LAST_BREAK: - put_user(child->thread.last_break, - (unsigned int __user *) data); - return 0; + return put_user(child->thread.last_break, (unsigned int __user *)data); } return compat_ptrace_request(child, request, addr, data); } From c56610a869bce03490faf4f157076370c71b8ae3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 24 Mar 2023 14:33:42 +0100 Subject: [PATCH 503/898] ACPI: bus: Rework system-level device notification handling For ACPI drivers that provide a ->notify() callback and set ACPI_DRIVER_ALL_NOTIFY_EVENTS in their flags, that callback can be invoked while either the ->add() or the ->remove() callback is running without any synchronization at the bus type level which is counter to the common-sense expectation that notification handling should only be enabled when the driver is actually bound to the device. As a result, if the driver is not careful enough, it's ->notify() callback may crash when it is invoked too early or too late [1]. This issue has been amplified by commit d6fb6ee1820c ("ACPI: bus: Drop driver member of struct acpi_device") that made acpi_bus_notify() check for the presence of the driver and its ->notify() callback directly instead of using an extra driver pointer that was only set and cleared by the bus type code, but it was present before that commit although it was harder to reproduce then. It can be addressed by using the observation that acpi_device_install_notify_handler() can be modified to install the handler for all types of events when ACPI_DRIVER_ALL_NOTIFY_EVENTS is set in the driver flags, in which case acpi_bus_notify() will not need to invoke the driver's ->notify() callback any more and that callback will only be invoked after acpi_device_install_notify_handler() has run and before acpi_device_remove_notify_handler() runs, which implies the correct ordering with respect to the other ACPI driver callbacks. Modify the code accordingly and while at it, drop two redundant local variables from acpi_bus_notify() and turn its description comment into a proper kerneldoc one. Fixes: d6fb6ee1820c ("ACPI: bus: Drop driver member of struct acpi_device") Link: https://lore.kernel.org/linux-acpi/9f6cba7a8a57e5a687c934e8e406e28c.squirrel@mail.panix.com # [1] Reported-by: Pierre Asselin Signed-off-by: Rafael J. Wysocki Tested-by: Pierre Asselin --- drivers/acpi/bus.c | 83 +++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 9531dd0fef509..a96da65057b19 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -459,85 +459,67 @@ static void acpi_bus_osc_negotiate_usb_control(void) Notification Handling -------------------------------------------------------------------------- */ -/* - * acpi_bus_notify - * --------------- - * Callback for all 'system-level' device notifications (values 0x00-0x7F). +/** + * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler + * @handle: Target ACPI object. + * @type: Notification type. + * @data: Ignored. + * + * This only handles notifications related to device hotplug. */ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) { struct acpi_device *adev; - u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; - bool hotplug_event = false; switch (type) { case ACPI_NOTIFY_BUS_CHECK: acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n"); - hotplug_event = true; break; case ACPI_NOTIFY_DEVICE_CHECK: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n"); - hotplug_event = true; break; case ACPI_NOTIFY_DEVICE_WAKE: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n"); - break; + return; case ACPI_NOTIFY_EJECT_REQUEST: acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n"); - hotplug_event = true; break; case ACPI_NOTIFY_DEVICE_CHECK_LIGHT: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n"); /* TBD: Exactly what does 'light' mean? */ - break; + return; case ACPI_NOTIFY_FREQUENCY_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a frequency mismatch\n"); - break; + return; case ACPI_NOTIFY_BUS_MODE_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a bus mode mismatch\n"); - break; + return; case ACPI_NOTIFY_POWER_FAULT: acpi_handle_err(handle, "Device has suffered a power fault\n"); - break; + return; default: acpi_handle_debug(handle, "Unknown event type 0x%x\n", type); - break; + return; } adev = acpi_get_acpi_dev(handle); - if (!adev) - goto err; - - if (adev->dev.driver) { - struct acpi_driver *driver = to_acpi_driver(adev->dev.driver); - - if (driver && driver->ops.notify && - (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS)) - driver->ops.notify(adev, type); - } - - if (!hotplug_event) { - acpi_put_acpi_dev(adev); - return; - } - if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) + if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) return; acpi_put_acpi_dev(adev); - err: - acpi_evaluate_ost(handle, type, ost_code, NULL); + acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); } static void acpi_notify_device(acpi_handle handle, u32 event, void *data) @@ -562,42 +544,51 @@ static u32 acpi_device_fixed_event(void *data) return ACPI_INTERRUPT_HANDLED; } -static int acpi_device_install_notify_handler(struct acpi_device *device) +static int acpi_device_install_notify_handler(struct acpi_device *device, + struct acpi_driver *acpi_drv) { acpi_status status; - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) { status = acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, acpi_device_fixed_event, device); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) { status = acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, acpi_device_fixed_event, device); - else - status = acpi_install_notify_handler(device->handle, - ACPI_DEVICE_NOTIFY, + } else { + u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? + ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; + + status = acpi_install_notify_handler(device->handle, type, acpi_notify_device, device); + } if (ACPI_FAILURE(status)) return -EINVAL; return 0; } -static void acpi_device_remove_notify_handler(struct acpi_device *device) +static void acpi_device_remove_notify_handler(struct acpi_device *device, + struct acpi_driver *acpi_drv) { - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) { acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, acpi_device_fixed_event); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) { acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, acpi_device_fixed_event); - else - acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + } else { + u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? + ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; + + acpi_remove_notify_handler(device->handle, type, acpi_notify_device); + } } /* Handle events targeting \_SB device (at present only graceful shutdown) */ @@ -1039,7 +1030,7 @@ static int acpi_device_probe(struct device *dev) acpi_drv->name, acpi_dev->pnp.bus_id); if (acpi_drv->ops.notify) { - ret = acpi_device_install_notify_handler(acpi_dev); + ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv); if (ret) { if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev); @@ -1062,7 +1053,7 @@ static void acpi_device_remove(struct device *dev) struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); if (acpi_drv->ops.notify) - acpi_device_remove_notify_handler(acpi_dev); + acpi_device_remove_notify_handler(acpi_dev, acpi_drv); if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev); From bb430b69422640891b0b8db762885730579a4145 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Mon, 20 Mar 2023 13:54:30 +0100 Subject: [PATCH 504/898] loop: LOOP_CONFIGURE: send uevents for partitions LOOP_CONFIGURE is, as far as I understand it, supposed to be a way to combine LOOP_SET_FD and LOOP_SET_STATUS64 into a single syscall. When using LOOP_SET_FD+LOOP_SET_STATUS64, a single uevent would be sent for each partition found on the loop device after the second ioctl(), but when using LOOP_CONFIGURE, no such uevent was being sent. In the old setup, uevents are disabled for LOOP_SET_FD, but not for LOOP_SET_STATUS64. This makes sense, as it prevents uevents being sent for a partially configured device during LOOP_SET_FD - they're only sent at the end of LOOP_SET_STATUS64. But for LOOP_CONFIGURE, uevents were disabled for the entire operation, so that final notification was never issued. To fix this, reduce the critical section to exclude the loop_reread_partitions() call, which causes the uevents to be issued, to after uevents are re-enabled, matching the behaviour of the LOOP_SET_FD+LOOP_SET_STATUS64 combination. I noticed this because Busybox's losetup program recently changed from using LOOP_SET_FD+LOOP_SET_STATUS64 to LOOP_CONFIGURE, and this broke my setup, for which I want a notification from the kernel any time a new partition becomes available. Signed-off-by: Alyssa Ross [hch: reduced the critical section] Signed-off-by: Christoph Hellwig Fixes: 3448914e8cc5 ("loop: Add LOOP_CONFIGURE ioctl") Link: https://lore.kernel.org/r/20230320125430.55367-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 28eb59fd71ca2..bc31bb7072a2c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1010,9 +1010,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - /* suppress uevents while reconfiguring the device */ - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1067,6 +1064,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, } } + /* suppress uevents while reconfiguring the device */ + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); @@ -1109,17 +1109,17 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); + /* enable and uncork uevent now that we are done */ + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); + if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); - error = 0; -done: - /* enable and uncork uevent now that we are done */ - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); - return error; + return 0; out_unlock: loop_global_unlock(lo, is_loop); @@ -1130,7 +1130,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, fput(file); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); - goto done; + return error; } static void __loop_clr_fd(struct loop_device *lo, bool release) From 2f1a6be12ab6c8470d5776e68644726c94257c54 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 22 Mar 2023 10:33:28 +0000 Subject: [PATCH 505/898] btrfs: fix race between quota disable and quota assign ioctls The quota assign ioctl can currently run in parallel with a quota disable ioctl call. The assign ioctl uses the quota root, while the disable ioctl frees that root, and therefore we can have a use-after-free triggered in the assign ioctl, leading to a trace like the following when KASAN is enabled: [672.723][T736] BUG: KASAN: slab-use-after-free in btrfs_search_slot+0x2962/0x2db0 [672.723][T736] Read of size 8 at addr ffff888022ec0208 by task btrfs_search_sl/27736 [672.724][T736] [672.725][T736] CPU: 1 PID: 27736 Comm: btrfs_search_sl Not tainted 6.3.0-rc3 #37 [672.723][T736] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [672.727][T736] Call Trace: [672.728][T736] [672.728][T736] dump_stack_lvl+0xd9/0x150 [672.725][T736] print_report+0xc1/0x5e0 [672.720][T736] ? __virt_addr_valid+0x61/0x2e0 [672.727][T736] ? __phys_addr+0xc9/0x150 [672.725][T736] ? btrfs_search_slot+0x2962/0x2db0 [672.722][T736] kasan_report+0xc0/0xf0 [672.729][T736] ? btrfs_search_slot+0x2962/0x2db0 [672.724][T736] btrfs_search_slot+0x2962/0x2db0 [672.723][T736] ? fs_reclaim_acquire+0xba/0x160 [672.722][T736] ? split_leaf+0x13d0/0x13d0 [672.726][T736] ? rcu_is_watching+0x12/0xb0 [672.723][T736] ? kmem_cache_alloc+0x338/0x3c0 [672.722][T736] update_qgroup_status_item+0xf7/0x320 [672.724][T736] ? add_qgroup_rb+0x3d0/0x3d0 [672.739][T736] ? do_raw_spin_lock+0x12d/0x2b0 [672.730][T736] ? spin_bug+0x1d0/0x1d0 [672.737][T736] btrfs_run_qgroups+0x5de/0x840 [672.730][T736] ? btrfs_qgroup_rescan_worker+0xa70/0xa70 [672.738][T736] ? __del_qgroup_relation+0x4ba/0xe00 [672.738][T736] btrfs_ioctl+0x3d58/0x5d80 [672.735][T736] ? tomoyo_path_number_perm+0x16a/0x550 [672.737][T736] ? tomoyo_execute_permission+0x4a0/0x4a0 [672.731][T736] ? btrfs_ioctl_get_supported_features+0x50/0x50 [672.737][T736] ? __sanitizer_cov_trace_switch+0x54/0x90 [672.734][T736] ? do_vfs_ioctl+0x132/0x1660 [672.730][T736] ? vfs_fileattr_set+0xc40/0xc40 [672.730][T736] ? _raw_spin_unlock_irq+0x2e/0x50 [672.732][T736] ? sigprocmask+0xf2/0x340 [672.737][T736] ? __fget_files+0x26a/0x480 [672.732][T736] ? bpf_lsm_file_ioctl+0x9/0x10 [672.738][T736] ? btrfs_ioctl_get_supported_features+0x50/0x50 [672.736][T736] __x64_sys_ioctl+0x198/0x210 [672.736][T736] do_syscall_64+0x39/0xb0 [672.731][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.739][T736] RIP: 0033:0x4556ad [672.742][T736] [672.743][T736] [672.748][T736] Allocated by task 27677: [672.743][T736] kasan_save_stack+0x22/0x40 [672.741][T736] kasan_set_track+0x25/0x30 [672.741][T736] __kasan_kmalloc+0xa4/0xb0 [672.749][T736] btrfs_alloc_root+0x48/0x90 [672.746][T736] btrfs_create_tree+0x146/0xa20 [672.744][T736] btrfs_quota_enable+0x461/0x1d20 [672.743][T736] btrfs_ioctl+0x4a1c/0x5d80 [672.747][T736] __x64_sys_ioctl+0x198/0x210 [672.749][T736] do_syscall_64+0x39/0xb0 [672.744][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.756][T736] [672.757][T736] Freed by task 27677: [672.759][T736] kasan_save_stack+0x22/0x40 [672.759][T736] kasan_set_track+0x25/0x30 [672.756][T736] kasan_save_free_info+0x2e/0x50 [672.751][T736] ____kasan_slab_free+0x162/0x1c0 [672.758][T736] slab_free_freelist_hook+0x89/0x1c0 [672.752][T736] __kmem_cache_free+0xaf/0x2e0 [672.752][T736] btrfs_put_root+0x1ff/0x2b0 [672.759][T736] btrfs_quota_disable+0x80a/0xbc0 [672.752][T736] btrfs_ioctl+0x3e5f/0x5d80 [672.756][T736] __x64_sys_ioctl+0x198/0x210 [672.753][T736] do_syscall_64+0x39/0xb0 [672.765][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.769][T736] [672.768][T736] The buggy address belongs to the object at ffff888022ec0000 [672.768][T736] which belongs to the cache kmalloc-4k of size 4096 [672.769][T736] The buggy address is located 520 bytes inside of [672.769][T736] freed 4096-byte region [ffff888022ec0000, ffff888022ec1000) [672.760][T736] [672.764][T736] The buggy address belongs to the physical page: [672.761][T736] page:ffffea00008bb000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x22ec0 [672.766][T736] head:ffffea00008bb000 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [672.779][T736] flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) [672.770][T736] raw: 00fff00000010200 ffff888012842140 ffffea000054ba00 dead000000000002 [672.770][T736] raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 [672.771][T736] page dumped because: kasan: bad access detected [672.778][T736] page_owner tracks the page as allocated [672.777][T736] page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd2040(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 88 [672.779][T736] get_page_from_freelist+0x119c/0x2d50 [672.779][T736] __alloc_pages+0x1cb/0x4a0 [672.776][T736] alloc_pages+0x1aa/0x270 [672.773][T736] allocate_slab+0x260/0x390 [672.771][T736] ___slab_alloc+0xa9a/0x13e0 [672.778][T736] __slab_alloc.constprop.0+0x56/0xb0 [672.771][T736] __kmem_cache_alloc_node+0x136/0x320 [672.789][T736] __kmalloc+0x4e/0x1a0 [672.783][T736] tomoyo_realpath_from_path+0xc3/0x600 [672.781][T736] tomoyo_path_perm+0x22f/0x420 [672.782][T736] tomoyo_path_unlink+0x92/0xd0 [672.780][T736] security_path_unlink+0xdb/0x150 [672.788][T736] do_unlinkat+0x377/0x680 [672.788][T736] __x64_sys_unlink+0xca/0x110 [672.789][T736] do_syscall_64+0x39/0xb0 [672.783][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.784][T736] page last free stack trace: [672.787][T736] free_pcp_prepare+0x4e5/0x920 [672.787][T736] free_unref_page+0x1d/0x4e0 [672.784][T736] __unfreeze_partials+0x17c/0x1a0 [672.797][T736] qlist_free_all+0x6a/0x180 [672.796][T736] kasan_quarantine_reduce+0x189/0x1d0 [672.797][T736] __kasan_slab_alloc+0x64/0x90 [672.793][T736] kmem_cache_alloc+0x17c/0x3c0 [672.799][T736] getname_flags.part.0+0x50/0x4e0 [672.799][T736] getname_flags+0x9e/0xe0 [672.792][T736] vfs_fstatat+0x77/0xb0 [672.791][T736] __do_sys_newlstat+0x84/0x100 [672.798][T736] do_syscall_64+0x39/0xb0 [672.796][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.790][T736] [672.791][T736] Memory state around the buggy address: [672.799][T736] ffff888022ec0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.805][T736] ffff888022ec0180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.802][T736] >ffff888022ec0200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.809][T736] ^ [672.809][T736] ffff888022ec0280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.809][T736] ffff888022ec0300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fix this by having the qgroup assign ioctl take the qgroup ioctl mutex before calling btrfs_run_qgroups(), which is what all qgroup ioctls should call. Reported-by: butt3rflyh4ck Link: https://lore.kernel.org/linux-btrfs/CAFcO6XN3VD8ogmHwqRk4kbiwtpUSNySu2VAxN8waEPciCHJvMA@mail.gmail.com/ CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 ++ fs/btrfs/qgroup.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 439a5bf5ebc67..f9dbf41fc8039 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3732,7 +3732,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) } /* update qgroup status and info */ + mutex_lock(&fs_info->qgroup_ioctl_lock); err = btrfs_run_qgroups(trans); + mutex_unlock(&fs_info->qgroup_ioctl_lock); if (err < 0) btrfs_handle_fs_error(fs_info, err, "failed to update qgroup status and info"); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 52a7d2fa2284f..f41da7ac360d8 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2828,13 +2828,22 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) } /* - * called from commit_transaction. Writes all changed qgroups to disk. + * Writes all changed qgroups to disk. + * Called by the transaction commit path and the qgroup assign ioctl. */ int btrfs_run_qgroups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; int ret = 0; + /* + * In case we are called from the qgroup assign ioctl, assert that we + * are holding the qgroup_ioctl_lock, otherwise we can race with a quota + * disable operation (ioctl) and access a freed quota root. + */ + if (trans->transaction->state != TRANS_STATE_COMMIT_DOING) + lockdep_assert_held(&fs_info->qgroup_ioctl_lock); + if (!fs_info->quota_root) return ret; From 50d281fc434cb8e2497f5e70a309ccca6b1a09f0 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 23 Mar 2023 15:56:48 +0800 Subject: [PATCH 506/898] btrfs: scan device in non-exclusive mode This fixes mkfs/mount/check failures due to race with systemd-udevd scan. During the device scan initiated by systemd-udevd, other user space EXCL operations such as mkfs, mount, or check may get blocked and result in a "Device or resource busy" error. This is because the device scan process opens the device with the EXCL flag in the kernel. Two reports were received: - btrfs/179 test case, where the fsck command failed with the -EBUSY error - LTP pwritev03 test case, where mkfs.vfs failed with the -EBUSY error, when mkfs.vfs tried to overwrite old btrfs filesystem on the device. In both cases, fsck and mkfs (respectively) were racing with a systemd-udevd device scan, and systemd-udevd won, resulting in the -EBUSY error for fsck and mkfs. Reproducing the problem has been difficult because there is a very small window during which these userspace threads can race to acquire the exclusive device open. Even on the system where the problem was observed, the problem occurrences were anywhere between 10 to 400 iterations and chances of reproducing decreases with debug printk()s. However, an exclusive device open is unnecessary for the scan process, as there are no write operations on the device during scan. Furthermore, during the mount process, the superblock is re-read in the below function call chain: btrfs_mount_root btrfs_open_devices open_fs_devices btrfs_open_one_device btrfs_get_bdev_and_sb So, to fix this issue, removes the FMODE_EXCL flag from the scan operation, and add a comment. The case where mkfs may still write to the device and a scan is running, the btrfs signature is not written at that time so scan will not recognize such device. Reported-by: Sherry Yang Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202303170839.fdf23068-oliver.sang@intel.com CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6d0124b6e79e3..ac0e8fb92fc80 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1366,8 +1366,17 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, * So, we need to add a special mount option to scan for * later supers, using BTRFS_SUPER_MIRROR_MAX instead */ - flags |= FMODE_EXCL; + /* + * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may + * initiate the device scan which may race with the user's mount + * or mkfs command, resulting in failure. + * Since the device scan is solely for reading purposes, there is + * no need for FMODE_EXCL. Additionally, the devices are read again + * during the mount process. It is ok to get some inconsistent + * values temporarily, as the device paths of the fsid are the only + * required information for assembling the volume. + */ bdev = blkdev_get_by_path(path, flags, holder); if (IS_ERR(bdev)) return ERR_CAST(bdev); From 2d82a40aa7d6fcae0250ec68b8566cdee7bfd44c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 22 Mar 2023 09:46:34 +0000 Subject: [PATCH 507/898] btrfs: fix deadlock when aborting transaction during relocation with scrub Before relocating a block group we pause scrub, then do the relocation and then unpause scrub. The relocation process requires starting and committing a transaction, and if we have a failure in the critical section of the transaction commit path (transaction state >= TRANS_STATE_COMMIT_START), we will deadlock if there is a paused scrub. That results in stack traces like the following: [42.479] BTRFS info (device sdc): relocating block group 53876686848 flags metadata|raid6 [42.936] BTRFS warning (device sdc): Skipping commit of aborted transaction. [42.936] ------------[ cut here ]------------ [42.936] BTRFS: Transaction aborted (error -28) [42.936] WARNING: CPU: 11 PID: 346822 at fs/btrfs/transaction.c:1977 btrfs_commit_transaction+0xcc8/0xeb0 [btrfs] [42.936] Modules linked in: dm_flakey dm_mod loop btrfs (...) [42.936] CPU: 11 PID: 346822 Comm: btrfs Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [42.936] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [42.936] RIP: 0010:btrfs_commit_transaction+0xcc8/0xeb0 [btrfs] [42.936] Code: ff ff 45 8b (...) [42.936] RSP: 0018:ffffb58649633b48 EFLAGS: 00010282 [42.936] RAX: 0000000000000000 RBX: ffff8be6ef4d5bd8 RCX: 0000000000000000 [42.936] RDX: 0000000000000002 RSI: ffffffffb35e7782 RDI: 00000000ffffffff [42.936] RBP: ffff8be6ef4d5c98 R08: 0000000000000000 R09: ffffb586496339e8 [42.936] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8be6d38c7c00 [42.936] R13: 00000000ffffffe4 R14: ffff8be6c268c000 R15: ffff8be6ef4d5cf0 [42.936] FS: 00007f381a82b340(0000) GS:ffff8beddfcc0000(0000) knlGS:0000000000000000 [42.936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [42.936] CR2: 00007f1e35fb7638 CR3: 0000000117680006 CR4: 0000000000370ee0 [42.936] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [42.936] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [42.936] Call Trace: [42.936] [42.936] ? start_transaction+0xcb/0x610 [btrfs] [42.936] prepare_to_relocate+0x111/0x1a0 [btrfs] [42.936] relocate_block_group+0x57/0x5d0 [btrfs] [42.936] ? btrfs_wait_nocow_writers+0x25/0xb0 [btrfs] [42.936] btrfs_relocate_block_group+0x248/0x3c0 [btrfs] [42.936] ? __pfx_autoremove_wake_function+0x10/0x10 [42.936] btrfs_relocate_chunk+0x3b/0x150 [btrfs] [42.936] btrfs_balance+0x8ff/0x11d0 [btrfs] [42.936] ? __kmem_cache_alloc_node+0x14a/0x410 [42.936] btrfs_ioctl+0x2334/0x32c0 [btrfs] [42.937] ? mod_objcg_state+0xd2/0x360 [42.937] ? refill_obj_stock+0xb0/0x160 [42.937] ? seq_release+0x25/0x30 [42.937] ? __rseq_handle_notify_resume+0x3b5/0x4b0 [42.937] ? percpu_counter_add_batch+0x2e/0xa0 [42.937] ? __x64_sys_ioctl+0x88/0xc0 [42.937] __x64_sys_ioctl+0x88/0xc0 [42.937] do_syscall_64+0x38/0x90 [42.937] entry_SYSCALL_64_after_hwframe+0x72/0xdc [42.937] RIP: 0033:0x7f381a6ffe9b [42.937] Code: 00 48 89 44 24 (...) [42.937] RSP: 002b:00007ffd45ecf060 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [42.937] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f381a6ffe9b [42.937] RDX: 00007ffd45ecf150 RSI: 00000000c4009420 RDI: 0000000000000003 [42.937] RBP: 0000000000000003 R08: 0000000000000013 R09: 0000000000000000 [42.937] R10: 00007f381a60c878 R11: 0000000000000246 R12: 00007ffd45ed0423 [42.937] R13: 00007ffd45ecf150 R14: 0000000000000000 R15: 00007ffd45ecf148 [42.937] [42.937] ---[ end trace 0000000000000000 ]--- [42.937] BTRFS: error (device sdc: state A) in cleanup_transaction:1977: errno=-28 No space left [59.196] INFO: task btrfs:346772 blocked for more than 120 seconds. [59.196] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.196] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.196] task:btrfs state:D stack:0 pid:346772 ppid:1 flags:0x00004002 [59.196] Call Trace: [59.196] [59.196] __schedule+0x392/0xa70 [59.196] ? __pv_queued_spin_lock_slowpath+0x165/0x370 [59.196] schedule+0x5d/0xd0 [59.196] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.197] ? __pfx_autoremove_wake_function+0x10/0x10 [59.197] scrub_pause_off+0x21/0x50 [btrfs] [59.197] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.197] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.198] ? __pfx_autoremove_wake_function+0x10/0x10 [59.198] scrub_stripe+0x20d/0x740 [btrfs] [59.198] scrub_chunk+0xc4/0x130 [btrfs] [59.198] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.198] ? __pfx_autoremove_wake_function+0x10/0x10 [59.198] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.199] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.199] ? _copy_from_user+0x7b/0x80 [59.199] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.199] ? refill_stock+0x33/0x50 [59.199] ? should_failslab+0xa/0x20 [59.199] ? kmem_cache_alloc_node+0x151/0x460 [59.199] ? alloc_io_context+0x1b/0x80 [59.199] ? preempt_count_add+0x70/0xa0 [59.199] ? __x64_sys_ioctl+0x88/0xc0 [59.199] __x64_sys_ioctl+0x88/0xc0 [59.199] do_syscall_64+0x38/0x90 [59.199] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.199] RIP: 0033:0x7f82ffaffe9b [59.199] RSP: 002b:00007f82ff9fcc50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.199] RAX: ffffffffffffffda RBX: 000055b191e36310 RCX: 00007f82ffaffe9b [59.199] RDX: 000055b191e36310 RSI: 00000000c400941b RDI: 0000000000000003 [59.199] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.199] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82ff9fd640 [59.199] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.199] [59.199] INFO: task btrfs:346773 blocked for more than 120 seconds. [59.200] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.200] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.201] task:btrfs state:D stack:0 pid:346773 ppid:1 flags:0x00004002 [59.201] Call Trace: [59.201] [59.201] __schedule+0x392/0xa70 [59.201] ? __pv_queued_spin_lock_slowpath+0x165/0x370 [59.201] schedule+0x5d/0xd0 [59.201] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.201] ? __pfx_autoremove_wake_function+0x10/0x10 [59.201] scrub_pause_off+0x21/0x50 [btrfs] [59.202] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.202] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.202] ? __pfx_autoremove_wake_function+0x10/0x10 [59.202] scrub_stripe+0x20d/0x740 [btrfs] [59.202] scrub_chunk+0xc4/0x130 [btrfs] [59.203] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.203] ? __pfx_autoremove_wake_function+0x10/0x10 [59.203] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.203] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.203] ? _copy_from_user+0x7b/0x80 [59.203] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.204] ? should_failslab+0xa/0x20 [59.204] ? kmem_cache_alloc_node+0x151/0x460 [59.204] ? alloc_io_context+0x1b/0x80 [59.204] ? preempt_count_add+0x70/0xa0 [59.204] ? __x64_sys_ioctl+0x88/0xc0 [59.204] __x64_sys_ioctl+0x88/0xc0 [59.204] do_syscall_64+0x38/0x90 [59.204] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.204] RIP: 0033:0x7f82ffaffe9b [59.204] RSP: 002b:00007f82ff1fbc50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.204] RAX: ffffffffffffffda RBX: 000055b191e36790 RCX: 00007f82ffaffe9b [59.204] RDX: 000055b191e36790 RSI: 00000000c400941b RDI: 0000000000000003 [59.204] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.204] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82ff1fc640 [59.204] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.204] [59.204] INFO: task btrfs:346774 blocked for more than 120 seconds. [59.205] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.205] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.206] task:btrfs state:D stack:0 pid:346774 ppid:1 flags:0x00004002 [59.206] Call Trace: [59.206] [59.206] __schedule+0x392/0xa70 [59.206] schedule+0x5d/0xd0 [59.206] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.206] ? __pfx_autoremove_wake_function+0x10/0x10 [59.206] scrub_pause_off+0x21/0x50 [btrfs] [59.207] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.207] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.207] ? __pfx_autoremove_wake_function+0x10/0x10 [59.207] scrub_stripe+0x20d/0x740 [btrfs] [59.208] scrub_chunk+0xc4/0x130 [btrfs] [59.208] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.208] ? __mutex_unlock_slowpath.isra.0+0x9a/0x120 [59.208] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.208] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.209] ? _copy_from_user+0x7b/0x80 [59.209] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.209] ? should_failslab+0xa/0x20 [59.209] ? kmem_cache_alloc_node+0x151/0x460 [59.209] ? alloc_io_context+0x1b/0x80 [59.209] ? preempt_count_add+0x70/0xa0 [59.209] ? __x64_sys_ioctl+0x88/0xc0 [59.209] __x64_sys_ioctl+0x88/0xc0 [59.209] do_syscall_64+0x38/0x90 [59.209] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.209] RIP: 0033:0x7f82ffaffe9b [59.209] RSP: 002b:00007f82fe9fac50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.209] RAX: ffffffffffffffda RBX: 000055b191e36c10 RCX: 00007f82ffaffe9b [59.209] RDX: 000055b191e36c10 RSI: 00000000c400941b RDI: 0000000000000003 [59.209] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.209] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fe9fb640 [59.209] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.209] [59.209] INFO: task btrfs:346775 blocked for more than 120 seconds. [59.210] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.210] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.211] task:btrfs state:D stack:0 pid:346775 ppid:1 flags:0x00004002 [59.211] Call Trace: [59.211] [59.211] __schedule+0x392/0xa70 [59.211] schedule+0x5d/0xd0 [59.211] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.211] ? __pfx_autoremove_wake_function+0x10/0x10 [59.211] scrub_pause_off+0x21/0x50 [btrfs] [59.212] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.212] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.212] ? __pfx_autoremove_wake_function+0x10/0x10 [59.212] scrub_stripe+0x20d/0x740 [btrfs] [59.213] scrub_chunk+0xc4/0x130 [btrfs] [59.213] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.213] ? __mutex_unlock_slowpath.isra.0+0x9a/0x120 [59.213] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.213] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.214] ? _copy_from_user+0x7b/0x80 [59.214] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.214] ? should_failslab+0xa/0x20 [59.214] ? kmem_cache_alloc_node+0x151/0x460 [59.214] ? alloc_io_context+0x1b/0x80 [59.214] ? preempt_count_add+0x70/0xa0 [59.214] ? __x64_sys_ioctl+0x88/0xc0 [59.214] __x64_sys_ioctl+0x88/0xc0 [59.214] do_syscall_64+0x38/0x90 [59.214] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.214] RIP: 0033:0x7f82ffaffe9b [59.214] RSP: 002b:00007f82fe1f9c50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.214] RAX: ffffffffffffffda RBX: 000055b191e37090 RCX: 00007f82ffaffe9b [59.214] RDX: 000055b191e37090 RSI: 00000000c400941b RDI: 0000000000000003 [59.214] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.214] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fe1fa640 [59.214] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.214] [59.214] INFO: task btrfs:346776 blocked for more than 120 seconds. [59.215] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.216] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.217] task:btrfs state:D stack:0 pid:346776 ppid:1 flags:0x00004002 [59.217] Call Trace: [59.217] [59.217] __schedule+0x392/0xa70 [59.217] ? __pv_queued_spin_lock_slowpath+0x165/0x370 [59.217] schedule+0x5d/0xd0 [59.217] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.217] ? __pfx_autoremove_wake_function+0x10/0x10 [59.217] scrub_pause_off+0x21/0x50 [btrfs] [59.217] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.217] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.218] ? __pfx_autoremove_wake_function+0x10/0x10 [59.218] scrub_stripe+0x20d/0x740 [btrfs] [59.218] scrub_chunk+0xc4/0x130 [btrfs] [59.218] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.219] ? __pfx_autoremove_wake_function+0x10/0x10 [59.219] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.219] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.219] ? _copy_from_user+0x7b/0x80 [59.219] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.219] ? should_failslab+0xa/0x20 [59.219] ? kmem_cache_alloc_node+0x151/0x460 [59.219] ? alloc_io_context+0x1b/0x80 [59.219] ? preempt_count_add+0x70/0xa0 [59.219] ? __x64_sys_ioctl+0x88/0xc0 [59.219] __x64_sys_ioctl+0x88/0xc0 [59.219] do_syscall_64+0x38/0x90 [59.219] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.219] RIP: 0033:0x7f82ffaffe9b [59.219] RSP: 002b:00007f82fd9f8c50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.219] RAX: ffffffffffffffda RBX: 000055b191e37510 RCX: 00007f82ffaffe9b [59.219] RDX: 000055b191e37510 RSI: 00000000c400941b RDI: 0000000000000003 [59.219] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.219] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fd9f9640 [59.219] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.219] [59.219] INFO: task btrfs:346822 blocked for more than 120 seconds. [59.220] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.221] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.222] task:btrfs state:D stack:0 pid:346822 ppid:1 flags:0x00004002 [59.222] Call Trace: [59.222] [59.222] __schedule+0x392/0xa70 [59.222] schedule+0x5d/0xd0 [59.222] btrfs_scrub_cancel+0x91/0x100 [btrfs] [59.222] ? __pfx_autoremove_wake_function+0x10/0x10 [59.222] btrfs_commit_transaction+0x572/0xeb0 [btrfs] [59.223] ? start_transaction+0xcb/0x610 [btrfs] [59.223] prepare_to_relocate+0x111/0x1a0 [btrfs] [59.223] relocate_block_group+0x57/0x5d0 [btrfs] [59.223] ? btrfs_wait_nocow_writers+0x25/0xb0 [btrfs] [59.223] btrfs_relocate_block_group+0x248/0x3c0 [btrfs] [59.224] ? __pfx_autoremove_wake_function+0x10/0x10 [59.224] btrfs_relocate_chunk+0x3b/0x150 [btrfs] [59.224] btrfs_balance+0x8ff/0x11d0 [btrfs] [59.224] ? __kmem_cache_alloc_node+0x14a/0x410 [59.224] btrfs_ioctl+0x2334/0x32c0 [btrfs] [59.225] ? mod_objcg_state+0xd2/0x360 [59.225] ? refill_obj_stock+0xb0/0x160 [59.225] ? seq_release+0x25/0x30 [59.225] ? __rseq_handle_notify_resume+0x3b5/0x4b0 [59.225] ? percpu_counter_add_batch+0x2e/0xa0 [59.225] ? __x64_sys_ioctl+0x88/0xc0 [59.225] __x64_sys_ioctl+0x88/0xc0 [59.225] do_syscall_64+0x38/0x90 [59.225] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.225] RIP: 0033:0x7f381a6ffe9b [59.225] RSP: 002b:00007ffd45ecf060 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.225] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f381a6ffe9b [59.225] RDX: 00007ffd45ecf150 RSI: 00000000c4009420 RDI: 0000000000000003 [59.225] RBP: 0000000000000003 R08: 0000000000000013 R09: 0000000000000000 [59.225] R10: 00007f381a60c878 R11: 0000000000000246 R12: 00007ffd45ed0423 [59.225] R13: 00007ffd45ecf150 R14: 0000000000000000 R15: 00007ffd45ecf148 [59.225] What happens is the following: 1) A scrub is running, so fs_info->scrubs_running is 1; 2) Task A starts block group relocation, and at btrfs_relocate_chunk() it pauses scrub by calling btrfs_scrub_pause(). That increments fs_info->scrub_pause_req from 0 to 1 and waits for the scrub task to pause (for fs_info->scrubs_paused to be == to fs_info->scrubs_running); 3) The scrub task pauses at scrub_pause_off(), waiting for fs_info->scrub_pause_req to decrease to 0; 4) Task A then enters btrfs_relocate_block_group(), and down that call chain we start a transaction and then attempt to commit it; 5) When task A calls btrfs_commit_transaction(), it either will do the commit itself or wait for some other task that already started the commit of the transaction - it doesn't matter which case; 6) The transaction commit enters state TRANS_STATE_COMMIT_START; 7) An error happens during the transaction commit, like -ENOSPC when running delayed refs or delayed items for example; 8) This results in calling transaction.c:cleanup_transaction(), where we call btrfs_scrub_cancel(), incrementing fs_info->scrub_cancel_req from 0 to 1, and blocking this task waiting for fs_info->scrubs_running to decrease to 0; 9) From this point on, both the transaction commit and the scrub task hang forever: 1) The transaction commit is waiting for fs_info->scrubs_running to be decreased to 0; 2) The scrub task is at scrub_pause_off() waiting for fs_info->scrub_pause_req to decrease to 0 - so it can not proceed to stop the scrub and decrement fs_info->scrubs_running from 0 to 1. Therefore resulting in a deadlock. Fix this by having cleanup_transaction(), called if a transaction commit fails, not call btrfs_scrub_cancel() if relocation is in progress, and having btrfs_relocate_block_group() call btrfs_scrub_cancel() instead if the relocation failed and a transaction abort happened. This was triggered with btrfs/061 from fstests. Fixes: 55e3a601c81c ("btrfs: Fix data checksum error cause by replace with io-load.") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 15 ++++++++++++++- fs/btrfs/volumes.c | 9 ++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 18329ebcb1cbf..b8d5b1fa9a03b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2035,7 +2035,20 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) if (current->journal_info == trans) current->journal_info = NULL; - btrfs_scrub_cancel(fs_info); + + /* + * If relocation is running, we can't cancel scrub because that will + * result in a deadlock. Before relocating a block group, relocation + * pauses scrub, then starts and commits a transaction before unpausing + * scrub. If the transaction commit is being done by the relocation + * task or triggered by another task and the relocation task is waiting + * for the commit, and we end up here due to an error in the commit + * path, then calling btrfs_scrub_cancel() will deadlock, as we are + * asking for scrub to stop while having it asked to be paused higher + * above in relocation code. + */ + if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) + btrfs_scrub_cancel(fs_info); kmem_cache_free(btrfs_trans_handle_cachep, trans); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ac0e8fb92fc80..c6d5928704001 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3275,8 +3275,15 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) btrfs_scrub_pause(fs_info); ret = btrfs_relocate_block_group(fs_info, chunk_offset); btrfs_scrub_continue(fs_info); - if (ret) + if (ret) { + /* + * If we had a transaction abort, stop all running scrubs. + * See transaction.c:cleanup_transaction() why we do it here. + */ + if (BTRFS_FS_ERROR(fs_info)) + btrfs_scrub_cancel(fs_info); return ret; + } block_group = btrfs_lookup_block_group(fs_info, chunk_offset); if (!block_group) From 44378cd113e5f15bb0a89f5ac5a0e687b52feb90 Mon Sep 17 00:00:00 2001 From: Duy Nguyen Date: Tue, 28 Mar 2023 00:03:03 +0000 Subject: [PATCH 508/898] ASoC: da7213.c: add missing pm_runtime_disable() da7213.c is missing pm_runtime_disable(), thus we will get below error when rmmod -> insmod. $ rmmod snd-soc-da7213.ko $ insmod snd-soc-da7213.ko da7213 0-001a: Unbalanced pm_runtime_enable!" [Kuninori adjusted to latest upstream] Signed-off-by: Duy Nguyen Signed-off-by: Kuninori Morimoto Tested-by: Khanh Le Link: https://lore.kernel.org/r/87mt3xg2tk.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7213.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index 0068780fe0a74..1c1f211a8e2e7 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -2022,6 +2022,11 @@ static int da7213_i2c_probe(struct i2c_client *i2c) return ret; } +static void da7213_i2c_remove(struct i2c_client *i2c) +{ + pm_runtime_disable(&i2c->dev); +} + static int __maybe_unused da7213_runtime_suspend(struct device *dev) { struct da7213_priv *da7213 = dev_get_drvdata(dev); @@ -2065,6 +2070,7 @@ static struct i2c_driver da7213_i2c_driver = { .pm = &da7213_pm, }, .probe_new = da7213_i2c_probe, + .remove = da7213_i2c_remove, .id_table = da7213_i2c_id, }; From 1231363aec86704a6b0467a12e3ca7bdf890e01d Mon Sep 17 00:00:00 2001 From: Juraj Pecigos Date: Sun, 26 Mar 2023 11:29:49 +0200 Subject: [PATCH 509/898] nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN A system with more than one of these SSDs will only have one usable. The kernel fails to detect more than one nvme device due to duplicate cntlids. before: [ 9.395229] nvme 0000:01:00.0: platform quirk: setting simple suspend [ 9.395262] nvme nvme0: pci function 0000:01:00.0 [ 9.395282] nvme 0000:03:00.0: platform quirk: setting simple suspend [ 9.395305] nvme nvme1: pci function 0000:03:00.0 [ 9.409873] nvme nvme0: Duplicate cntlid 1 with nvme1, subsys nqn.2022-07.com.siliconmotion:nvm-subsystem-sn- , rejecting [ 9.409982] nvme nvme0: Removing after probe failure status: -22 [ 9.427487] nvme nvme1: allocated 64 MiB host memory buffer. [ 9.445088] nvme nvme1: 16/0/0 default/read/poll queues [ 9.449898] nvme nvme1: Ignoring bogus Namespace Identifiers after: [ 1.161890] nvme 0000:01:00.0: platform quirk: setting simple suspend [ 1.162660] nvme nvme0: pci function 0000:01:00.0 [ 1.162684] nvme 0000:03:00.0: platform quirk: setting simple suspend [ 1.162707] nvme nvme1: pci function 0000:03:00.0 [ 1.191354] nvme nvme0: allocated 64 MiB host memory buffer. [ 1.193378] nvme nvme1: allocated 64 MiB host memory buffer. [ 1.211044] nvme nvme1: 16/0/0 default/read/poll queues [ 1.211080] nvme nvme0: 16/0/0 default/read/poll queues [ 1.216145] nvme nvme0: Ignoring bogus Namespace Identifiers [ 1.216261] nvme nvme1: Ignoring bogus Namespace Identifiers Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue. Signed-off-by: Juraj Pecigos Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b615906263f37..282d808400c5b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3441,7 +3441,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */ - .driver_data = NVME_QUIRK_BOGUS_NID, }, + .driver_data = NVME_QUIRK_BOGUS_NID | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), From ac9bba3ff1ef18ef0038eb7a590f93529eb566de Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 23 Mar 2023 10:59:57 -0400 Subject: [PATCH 510/898] net: fman: Add myself as a reviewer I've read through or reworked a good portion of this driver. Add myself as a reviewer. Signed-off-by: Sean Anderson Reviewed-by: Simon Horman Acked-by: Madalin Bucur Link: https://lore.kernel.org/r/20230323145957.2999211-1-sean.anderson@seco.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d8ebab595b2a9..883f9b88911b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8216,6 +8216,7 @@ F: drivers/net/ethernet/freescale/dpaa FREESCALE QORIQ DPAA FMAN DRIVER M: Madalin Bucur +R: Sean Anderson L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/fsl-fman.txt From 4f7702ab6fc3c03cc10b0cf0bdbd0433146483e0 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 24 Mar 2023 09:16:13 +0100 Subject: [PATCH 511/898] MAINTAINERS: remove the linux-nfc@lists.01.org list Some MAINTAINERS sections mention to mail patches to the list linux-nfc@lists.01.org. Probably due to changes on Intel's 01.org website and servers, the list server lists.01.org/ml01.01.org is simply gone. Considering emails recorded on lore.kernel.org, only a handful of emails where sent to the linux-nfc@lists.01.org list, and they are usually also sent to the netdev mailing list as well, where they are then picked up. So, there is no big benefit in restoring the linux-nfc elsewhere. Remove all occurrences of the linux-nfc@lists.01.org list in MAINTAINERS. Suggested-by: Krzysztof Kozlowski Link: https://lore.kernel.org/all/CAKXUXMzggxQ43DUZZRkPMGdo5WkzgA=i14ySJUFw4kZfE5ZaZA@mail.gmail.com/ Signed-off-by: Lukas Bulwahn Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230324081613.32000-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 883f9b88911b2..3bc692d653287 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14657,10 +14657,8 @@ F: net/ipv4/nexthop.c NFC SUBSYSTEM M: Krzysztof Kozlowski -L: linux-nfc@lists.01.org (subscribers-only) L: netdev@vger.kernel.org S: Maintained -B: mailto:linux-nfc@lists.01.org F: Documentation/devicetree/bindings/net/nfc/ F: drivers/nfc/ F: include/linux/platform_data/nfcmrvl.h @@ -14671,7 +14669,6 @@ F: net/nfc/ NFC VIRTUAL NCI DEVICE DRIVER M: Bongsu Jeon L: netdev@vger.kernel.org -L: linux-nfc@lists.01.org (subscribers-only) S: Supported F: drivers/nfc/virtual_ncidev.c F: tools/testing/selftests/nci/ @@ -15043,7 +15040,6 @@ F: Documentation/devicetree/bindings/sound/nxp,tfa989x.yaml F: sound/soc/codecs/tfa989x.c NXP-NCI NFC DRIVER -L: linux-nfc@lists.01.org (subscribers-only) S: Orphan F: Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml F: drivers/nfc/nxp-nci @@ -18488,7 +18484,6 @@ F: include/media/drv-intf/s3c_camif.h SAMSUNG S3FWRN5 NFC DRIVER M: Krzysztof Kozlowski -L: linux-nfc@lists.01.org (subscribers-only) S: Maintained F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml F: drivers/nfc/s3fwrn5 @@ -20982,7 +20977,6 @@ F: drivers/iio/magnetometer/tmag5273.c TI TRF7970A NFC DRIVER M: Mark Greer L: linux-wireless@vger.kernel.org -L: linux-nfc@lists.01.org (subscribers-only) S: Supported F: Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml F: drivers/nfc/trf7970a.c From 21f27df854008b86349a203bf97fef79bb11f53e Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Mon, 13 Feb 2023 09:55:20 +0100 Subject: [PATCH 512/898] KVM: s390: pv: fix external interruption loop not always detected To determine whether the guest has caused an external interruption loop upon code 20 (external interrupt) intercepts, the ext_new_psw needs to be inspected to see whether external interrupts are enabled. Under non-PV, ext_new_psw can simply be taken from guest lowcore. Under PV, KVM can only access the encrypted guest lowcore and hence the ext_new_psw must not be taken from guest lowcore. handle_external_interrupt() incorrectly did that and hence was not able to reliably tell whether an external interruption loop is happening or not. False negatives cause spurious failures of my kvm-unit-test for extint loops[1] under PV. Since code 20 is only caused under PV if and only if the guest's ext_new_psw is enabled for external interrupts, false positive detection of a external interruption loop can not happen. Fix this issue by instead looking at the guest PSW in the state description. Since the PSW swap for external interrupt is done by the ultravisor before the intercept is caused, this reliably tells whether the guest is enabled for external interrupts in the ext_new_psw. Also update the comments to explain better what is happening. [1] https://lore.kernel.org/kvm/20220812062151.1980937-4-nrb@linux.ibm.com/ Signed-off-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Fixes: 201ae986ead7 ("KVM: s390: protvirt: Implement interrupt injection") Link: https://lore.kernel.org/r/20230213085520.100756-2-nrb@linux.ibm.com Message-Id: <20230213085520.100756-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/intercept.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 0ee02dae14b2b..2cda8d9d7c6ef 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -271,10 +271,18 @@ static int handle_prog(struct kvm_vcpu *vcpu) * handle_external_interrupt - used for external interruption interceptions * @vcpu: virtual cpu * - * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if - * the new PSW does not have external interrupts disabled. In the first case, - * we've got to deliver the interrupt manually, and in the second case, we - * drop to userspace to handle the situation there. + * This interception occurs if: + * - the CPUSTAT_EXT_INT bit was already set when the external interrupt + * occurred. In this case, the interrupt needs to be injected manually to + * preserve interrupt priority. + * - the external new PSW has external interrupts enabled, which will cause an + * interruption loop. We drop to userspace in this case. + * + * The latter case can be detected by inspecting the external mask bit in the + * external new psw. + * + * Under PV, only the latter case can occur, since interrupt priorities are + * handled in the ultravisor. */ static int handle_external_interrupt(struct kvm_vcpu *vcpu) { @@ -285,10 +293,18 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) vcpu->stat.exit_external_interrupt++; - rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t)); - if (rc) - return rc; - /* We can not handle clock comparator or timer interrupt with bad PSW */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + newpsw = vcpu->arch.sie_block->gpsw; + } else { + rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t)); + if (rc) + return rc; + } + + /* + * Clock comparator or timer interrupt with external interrupt enabled + * will cause interrupt loop. Drop to userspace. + */ if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) && (newpsw.mask & PSW_MASK_EXT)) return -EOPNOTSUPP; From 580031ff9952b7dbf48dedba6b56a100ae002bef Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 27 Mar 2023 17:42:32 -0700 Subject: [PATCH 513/898] bpf: tcp: Use sock_gen_put instead of sock_put in bpf_iter_tcp While reviewing the udp-iter batching patches, noticed the bpf_iter_tcp calling sock_put() is incorrect. It should call sock_gen_put instead because bpf_iter_tcp is iterating the ehash table which has the req sk and tw sk. This patch replaces all sock_put with sock_gen_put in the bpf_iter_tcp codepath. Fixes: 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230328004232.2134233-1-martin.lau@linux.dev --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea370afa70ed9..b9d55277cb858 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2780,7 +2780,7 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) { while (iter->cur_sk < iter->end_sk) - sock_put(iter->batch[iter->cur_sk++]); + sock_gen_put(iter->batch[iter->cur_sk++]); } static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, @@ -2941,7 +2941,7 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) * st->bucket. See tcp_seek_last_pos(). */ st->offset++; - sock_put(iter->batch[iter->cur_sk++]); + sock_gen_put(iter->batch[iter->cur_sk++]); } if (iter->cur_sk < iter->end_sk) From a6f6a95f25803500079513780d11a911ce551d76 Mon Sep 17 00:00:00 2001 From: George Guo Date: Tue, 28 Mar 2023 15:13:35 +0800 Subject: [PATCH 514/898] LoongArch, bpf: Fix jit to skip speculation barrier opcode Just skip the opcode(BPF_ST | BPF_NOSPEC) in the BPF JIT instead of failing to JIT the entire program, given LoongArch currently has no couterpart of a speculation barrier instruction. To verify the issue, use the ltp testcase as shown below. Also, Wang says: I can confirm there's currently no speculation barrier equivalent on LonogArch. (Loongson says there are builtin mitigations for Spectre-V1 and V2 on their chips, and AFAIK efforts to port the exploits to mips/LoongArch have all failed a few years ago.) Without this patch: $ ./bpf_prog02 [...] bpf_common.c:123: TBROK: Failed verification: ??? (524) [...] Summary: passed 0 failed 0 broken 1 skipped 0 warnings 0 With this patch: $ ./bpf_prog02 [...] Summary: passed 0 failed 0 broken 0 skipped 0 warnings 0 Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: George Guo Signed-off-by: Daniel Borkmann Acked-by: WANG Xuerui Cc: Tiezhu Yang Link: https://lore.kernel.org/bpf/20230328071335.2664966-1-guodongtai@kylinos.cn --- arch/loongarch/net/bpf_jit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 288003a9f0cae..d586df48ecc64 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1022,6 +1022,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext emit_atomic(insn, ctx); break; + /* Speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + default: pr_err("bpf_jit: unknown opcode %02x\n", code); return -EINVAL; From 9a251cae51d57289908222e6c322ca61fccc25fd Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Sat, 25 Mar 2023 17:40:29 +0100 Subject: [PATCH 515/898] net: mvpp2: classifier flow fix fragmentation flags Add missing IP Fragmentation Flag. Fixes: f9358e12a0af ("net: mvpp2: split ingress traffic into multiple flows") Signed-off-by: Sven Auhagen Reviewed-by: Marcin Wojtas Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/mvpp2/mvpp2_cls.c | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index 41d935d1aaf6f..40aeaa7bd739f 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -62,35 +62,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG, MVPP22_CLS_HEK_IP4_2T, MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 | - MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG, MVPP22_CLS_HEK_IP4_2T, MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT | - MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG, MVPP22_CLS_HEK_IP4_2T, MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER | - MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), /* TCP over IPv4 flows, fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, - MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE | + MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, - MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE | + MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, - MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP, + MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE | + MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), /* UDP over IPv4 flows, Not fragmented, no vlan tag */ @@ -132,35 +135,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG, MVPP22_CLS_HEK_IP4_2T, MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 | - MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG, MVPP22_CLS_HEK_IP4_2T, MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT | - MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG, MVPP22_CLS_HEK_IP4_2T, MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER | - MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK), /* UDP over IPv4 flows, fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, - MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE | + MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, - MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE | + MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, - MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP, + MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE | + MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), /* TCP over IPv6 flows, not fragmented, no vlan tag */ From a587a84813b90372cb0a7565e201a4075da67919 Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Sat, 25 Mar 2023 17:40:53 +0100 Subject: [PATCH 516/898] net: mvpp2: parser fix QinQ The mvpp2 parser entry for QinQ has the inner and outer VLAN in the wrong order. Fix the problem by swapping them. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Sven Auhagen Reviewed-by: Marcin Wojtas Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 75ba57bd1d46d..ed8be396428b9 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -1539,8 +1539,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) if (!priv->prs_double_vlans) return -ENOMEM; - /* Double VLAN: 0x8100, 0x88A8 */ - err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021Q, ETH_P_8021AD, + /* Double VLAN: 0x88A8, 0x8100 */ + err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021AD, ETH_P_8021Q, MVPP2_PRS_PORT_MASK); if (err) return err; From 031a416c2170866be5132ae42e14453d669b0cb1 Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Sat, 25 Mar 2023 17:41:05 +0100 Subject: [PATCH 517/898] net: mvpp2: parser fix PPPoE In PPPoE add all IPv4 header option length to the parser and adjust the L3 and L4 offset accordingly. Currently the L4 match does not work with PPPoE and all packets are matched as L3 IP4 OPT. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Sven Auhagen Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/mvpp2/mvpp2_prs.c | 82 ++++++++----------- 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index ed8be396428b9..9af22f497a40f 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -1607,59 +1607,45 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) { struct mvpp2_prs_entry pe; - int tid; - - /* IPv4 over PPPoE with options */ - tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, - MVPP2_PE_LAST_FREE_TID); - if (tid < 0) - return tid; - - memset(&pe, 0, sizeof(pe)); - mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE); - pe.index = tid; - - mvpp2_prs_match_etype(&pe, 0, PPP_IP); - - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); - mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT, - MVPP2_PRS_RI_L3_PROTO_MASK); - /* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + - sizeof(struct iphdr) - 4, - MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); - /* Set L3 offset */ - mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, - MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - - /* Update shadow table and hw entry */ - mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE); - mvpp2_prs_hw_write(priv, &pe); + int tid, ihl; - /* IPv4 over PPPoE without options */ - tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, - MVPP2_PE_LAST_FREE_TID); - if (tid < 0) - return tid; + /* IPv4 over PPPoE with header length >= 5 */ + for (ihl = MVPP2_PRS_IPV4_IHL_MIN; ihl <= MVPP2_PRS_IPV4_IHL_MAX; ihl++) { + tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, + MVPP2_PE_LAST_FREE_TID); + if (tid < 0) + return tid; - pe.index = tid; + memset(&pe, 0, sizeof(pe)); + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE); + pe.index = tid; - mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_IPV4_HEAD | - MVPP2_PRS_IPV4_IHL_MIN, - MVPP2_PRS_IPV4_HEAD_MASK | - MVPP2_PRS_IPV4_IHL_MASK); + mvpp2_prs_match_etype(&pe, 0, PPP_IP); + mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, + MVPP2_PRS_IPV4_HEAD | ihl, + MVPP2_PRS_IPV4_HEAD_MASK | + MVPP2_PRS_IPV4_IHL_MASK); - /* Clear ri before updating */ - pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0; - pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0; - mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, - MVPP2_PRS_RI_L3_PROTO_MASK); + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, + MVPP2_PRS_RI_L3_PROTO_MASK); + /* goto ipv4 dst-address (skip eth_type + IP-header-size - 4) */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + + sizeof(struct iphdr) - 4, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* Set L3 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, + MVPP2_ETH_TYPE_LEN, + MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); + /* Set L4 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, + MVPP2_ETH_TYPE_LEN + (ihl * 4), + MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - /* Update shadow table and hw entry */ - mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE); - mvpp2_prs_hw_write(priv, &pe); + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE); + mvpp2_prs_hw_write(priv, &pe); + } /* IPv6 over PPPoE */ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, From 1abce0580b89464546ae06abd5891ebec43c9470 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 3 Mar 2023 09:59:47 +1100 Subject: [PATCH 518/898] powerpc/64s: Fix __pte_needs_flush() false positive warning Userspace PROT_NONE ptes set _PAGE_PRIVILEGED, triggering a false positive debug assertion that __pte_flags_need_flush() is not called on a kernel mapping. Detect when it is a userspace PROT_NONE page by checking the required bits of PAGE_NONE are set, and none of the RWX bits are set. pte_protnone() is insufficient here because it always returns 0 when CONFIG_NUMA_BALANCING=n. Fixes: b11931e9adc1 ("powerpc/64s: add pte_needs_flush and huge_pmd_needs_flush") Cc: stable@vger.kernel.org # v6.1+ Reported-by: Russell Currey Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230302225947.81083-1-bgray@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/tlbflush.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 2bbc0fcce04a3..5e26c7f2c25ab 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -148,6 +148,11 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, */ } +static inline bool __pte_protnone(unsigned long pte) +{ + return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE); +} + static inline bool __pte_flags_need_flush(unsigned long oldval, unsigned long newval) { @@ -164,8 +169,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval, /* * We do not expect kernel mappings or non-PTEs or not-present PTEs. */ - VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED); - VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED); VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(newval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT)); From fd7276189450110ed835eb0a334e62d2f1c4e3be Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Mar 2023 16:15:57 -0600 Subject: [PATCH 519/898] powerpc: Don't try to copy PPR for task with NULL pt_regs powerpc sets up PF_KTHREAD and PF_IO_WORKER with a NULL pt_regs, which from my (arguably very short) checking is not commonly done for other archs. This is fine, except when PF_IO_WORKER's have been created and the task does something that causes a coredump to be generated. Then we get this crash: Kernel attempted to read user page (160) - exploit attempt? (uid: 1000) BUG: Kernel NULL pointer dereference on read at 0x00000160 Faulting instruction address: 0xc0000000000c3a60 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=32 NUMA pSeries Modules linked in: bochs drm_vram_helper drm_kms_helper xts binfmt_misc ecb ctr syscopyarea sysfillrect cbc sysimgblt drm_ttm_helper aes_generic ttm sg libaes evdev joydev virtio_balloon vmx_crypto gf128mul drm dm_mod fuse loop configfs drm_panel_orientation_quirks ip_tables x_tables autofs4 hid_generic usbhid hid xhci_pci xhci_hcd usbcore usb_common sd_mod CPU: 1 PID: 1982 Comm: ppc-crash Not tainted 6.3.0-rc2+ #88 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c0000000000c3a60 LR: c000000000039944 CTR: c0000000000398e0 REGS: c0000000041833b0 TRAP: 0300 Not tainted (6.3.0-rc2+) MSR: 800000000280b033 CR: 88082828 XER: 200400f8 ... NIP memcpy_power7+0x200/0x7d0 LR ppr_get+0x64/0xb0 Call Trace: ppr_get+0x40/0xb0 (unreliable) __regset_get+0x180/0x1f0 regset_get_alloc+0x64/0x90 elf_core_dump+0xb98/0x1b60 do_coredump+0x1c34/0x24a0 get_signal+0x71c/0x1410 do_notify_resume+0x140/0x6f0 interrupt_exit_user_prepare_main+0x29c/0x320 interrupt_exit_user_prepare+0x6c/0xa0 interrupt_return_srr_user+0x8/0x138 Because ppr_get() is trying to copy from a PF_IO_WORKER with a NULL pt_regs. Check for a valid pt_regs in both ppc_get/ppr_set, and return an error if not set. The actual error value doesn't seem to be important here, so just pick -EINVAL. Fixes: fa439810cc1b ("powerpc/ptrace: Enable support for NT_PPPC_TAR, NT_PPC_PPR, NT_PPC_DSCR") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Jens Axboe [mpe: Trim oops in change log, add Fixes & Cc stable] Signed-off-by: Michael Ellerman Link: https://msgid.link/d9f63344-fe7c-56ae-b420-4a1a04a2ae4c@kernel.dk --- arch/powerpc/kernel/ptrace/ptrace-view.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 2087a785f05f1..5fff0d04b23f7 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, static int ppr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + if (!target->thread.regs) + return -EINVAL; + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); } @@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + if (!target->thread.regs) + return -EINVAL; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.regs->ppr, 0, sizeof(u64)); } From f22c993f31fa9615df46e49cd768b713d39a852f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 27 Mar 2023 10:31:38 +0200 Subject: [PATCH 520/898] smsc911x: avoid PHY being resumed when interface is not up SMSC911x doesn't need mdiobus suspend/resume, that's why it sets 'mac_managed_pm'. However, setting it needs to be moved from init to probe, so mdiobus PM functions will really never be called (e.g. when the interface is not up yet during suspend/resume). Fixes: 3ce9f2bef755 ("net: smsc911x: Stop and start PHY during suspend and resume") Suggested-by: Heiner Kallweit Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230327083138.6044-1-wsa+renesas@sang-engineering.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/smsc/smsc911x.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index a2e511912e6a9..a690d139e1770 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1037,8 +1037,6 @@ static int smsc911x_mii_probe(struct net_device *dev) return ret; } - /* Indicate that the MAC is responsible for managing PHY PM */ - phydev->mac_managed_pm = true; phy_attached_info(phydev); phy_set_max_speed(phydev, SPEED_100); @@ -1066,6 +1064,7 @@ static int smsc911x_mii_init(struct platform_device *pdev, struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); + struct phy_device *phydev; int err = -ENXIO; pdata->mii_bus = mdiobus_alloc(); @@ -1108,6 +1107,10 @@ static int smsc911x_mii_init(struct platform_device *pdev, goto err_out_free_bus_2; } + phydev = phy_find_first(pdata->mii_bus); + if (phydev) + phydev->mac_managed_pm = true; + return 0; err_out_free_bus_2: From 38518593ec55e897abda4b4be77b2ec8ec4447d1 Mon Sep 17 00:00:00 2001 From: Tanu Malhotra Date: Mon, 27 Mar 2023 11:58:38 -0700 Subject: [PATCH 521/898] HID: intel-ish-hid: Fix kernel panic during warm reset During warm reset device->fw_client is set to NULL. If a bus driver is registered after this NULL setting and before new firmware clients are enumerated by ISHTP, kernel panic will result in the function ishtp_cl_bus_match(). This is because of reference to device->fw_client->props.protocol_name. ISH firmware after getting successfully loaded, sends a warm reset notification to remove all clients from the bus and sets device->fw_client to NULL. Until kernel v5.15, all enabled ISHTP kernel module drivers were loaded right after any of the first ISHTP device was registered, regardless of whether it was a matched or an unmatched device. This resulted in all drivers getting registered much before the warm reset notification from ISH. Starting kernel v5.16, this issue got exposed after the change was introduced to load only bus drivers for the respective matching devices. In this scenario, cros_ec_ishtp device and cros_ec_ishtp driver are registered after the warm reset device fw_client NULL setting. cros_ec_ishtp driver_register() triggers the callback to ishtp_cl_bus_match() to match ISHTP driver to the device and causes kernel panic in guid_equal() when dereferencing fw_client NULL pointer to get protocol_name. Fixes: f155dfeaa4ee ("platform/x86: isthp_eclite: only load for matching devices") Fixes: facfe0a4fdce ("platform/chrome: chros_ec_ishtp: only load for matching devices") Fixes: 0d0cccc0fd83 ("HID: intel-ish-hid: hid-client: only load for matching devices") Fixes: 44e2a58cb880 ("HID: intel-ish-hid: fw-loader: only load for matching devices") Cc: # 5.16+ Signed-off-by: Tanu Malhotra Tested-by: Shaunak Saha Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp/bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index f68aba8794fe5..d4296681cf720 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -241,8 +241,8 @@ static int ishtp_cl_bus_match(struct device *dev, struct device_driver *drv) struct ishtp_cl_device *device = to_ishtp_cl_device(dev); struct ishtp_cl_driver *driver = to_ishtp_cl_driver(drv); - return guid_equal(&driver->id[0].guid, - &device->fw_client->props.protocol_name); + return(device->fw_client ? guid_equal(&driver->id[0].guid, + &device->fw_client->props.protocol_name) : 0); } /** From 05310f31ca74673a96567fb14637b7d5d6c82ea5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 27 Mar 2023 10:36:45 +0200 Subject: [PATCH 522/898] xen/netback: don't do grant copy across page boundary Fix xenvif_get_requests() not to do grant copy operations across local page boundaries. This requires to double the maximum number of copy operations per queue, as each copy could now be split into 2. Make sure that struct xenvif_tx_cb doesn't grow too large. Cc: stable@vger.kernel.org Fixes: ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") Signed-off-by: Juergen Gross Reviewed-by: Paul Durrant Signed-off-by: Paolo Abeni --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/netback.c | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 3dbfc8a6924ed..1fcbd83f7ff2e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -166,7 +166,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; + struct gnttab_copy tx_copy_ops[2 * MAX_PENDING_REQS]; struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; /* passed to gnttab_[un]map_refs with pages under (un)mapping */ diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1b42676ca1418..111c179f161b5 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -334,6 +334,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; u8 copy_count; + u32 split_mask; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) @@ -361,6 +362,8 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) struct sk_buff *skb = alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC | __GFP_NOWARN); + + BUILD_BUG_ON(sizeof(*XENVIF_TX_CB(skb)) > sizeof(skb->cb)); if (unlikely(skb == NULL)) return NULL; @@ -396,11 +399,13 @@ static void xenvif_get_requests(struct xenvif_queue *queue, nr_slots = shinfo->nr_frags + 1; copy_count(skb) = 0; + XENVIF_TX_CB(skb)->split_mask = 0; /* Create copy ops for exactly data_len bytes into the skb head. */ __skb_put(skb, data_len); while (data_len > 0) { int amount = data_len > txp->size ? txp->size : data_len; + bool split = false; cop->source.u.ref = txp->gref; cop->source.domid = queue->vif->domid; @@ -413,6 +418,13 @@ static void xenvif_get_requests(struct xenvif_queue *queue, cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) - data_len); + /* Don't cross local page boundary! */ + if (cop->dest.offset + amount > XEN_PAGE_SIZE) { + amount = XEN_PAGE_SIZE - cop->dest.offset; + XENVIF_TX_CB(skb)->split_mask |= 1U << copy_count(skb); + split = true; + } + cop->len = amount; cop->flags = GNTCOPY_source_gref; @@ -420,7 +432,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, pending_idx = queue->pending_ring[index]; callback_param(queue, pending_idx).ctx = NULL; copy_pending_idx(skb, copy_count(skb)) = pending_idx; - copy_count(skb)++; + if (!split) + copy_count(skb)++; cop++; data_len -= amount; @@ -441,7 +454,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, nr_slots--; } else { /* The copy op partially covered the tx_request. - * The remainder will be mapped. + * The remainder will be mapped or copied in the next + * iteration. */ txp->offset += amount; txp->size -= amount; @@ -539,6 +553,13 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, pending_idx = copy_pending_idx(skb, i); newerr = (*gopp_copy)->status; + + /* Split copies need to be handled together. */ + if (XENVIF_TX_CB(skb)->split_mask & (1U << i)) { + (*gopp_copy)++; + if (!newerr) + newerr = (*gopp_copy)->status; + } if (likely(!newerr)) { /* The first frag might still have this slot mapped */ if (i < copy_count(skb) - 1 || !sharedslot) From 8fb8ebf9487785184846d04e915bfe327bf4ccd5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 27 Mar 2023 10:36:46 +0200 Subject: [PATCH 523/898] xen/netback: remove not needed test in xenvif_tx_build_gops() The tests for the number of grant mapping or copy operations reaching the array size of the operations buffer at the end of the main loop in xenvif_tx_build_gops() isn't needed. The loop can handle at maximum MAX_PENDING_REQS transfer requests, as XEN_RING_NR_UNCONSUMED_REQUESTS() is taking unsent responses into consideration, too. Remove the tests. Suggested-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: Paul Durrant Signed-off-by: Paolo Abeni --- drivers/net/xen-netback/netback.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 111c179f161b5..4943be4fd99d8 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1082,10 +1082,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - - if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || - (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) - break; } return; From 005308f7bdacf5685ed1a431244a183dbbb9e0e8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Mar 2023 19:56:18 -0600 Subject: [PATCH 524/898] io_uring/poll: clear single/double poll flags on poll arming Unless we have at least one entry queued, then don't call into io_poll_remove_entries(). Normally this isn't possible, but if we retry poll then we can have ->nr_entries cleared again as we're setting it up. If this happens for a poll retry, then we'll still have at least REQ_F_SINGLE_POLL set. io_poll_remove_entries() then thinks it has entries to remove. Clear REQ_F_SINGLE_POLL and REQ_F_DOUBLE_POLL unconditionally when arming a poll request. Fixes: c16bda37594f ("io_uring/poll: allow some retries for poll triggering spuriously") Cc: stable@vger.kernel.org Reported-by: Pengfei Xu Signed-off-by: Jens Axboe --- io_uring/poll.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/poll.c b/io_uring/poll.c index 795facbd0e9f1..55306e8010813 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -726,6 +726,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) apoll = io_req_alloc_apoll(req, issue_flags); if (!apoll) return IO_APOLL_ABORTED; + req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL); req->flags |= REQ_F_POLLED; ipt.pt._qproc = io_async_queue_proc; From b26cd9325be4c1fcd331b77f10acb627c560d4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Mon, 20 Mar 2023 09:32:59 +0000 Subject: [PATCH 525/898] pinctrl: amd: Disable and mask interrupts on resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a similar problem to the one observed in: commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe"). On some systems, during suspend/resume cycle firmware leaves an interrupt enabled on a pin that is not used by the kernel. This confuses the AMD pinctrl driver and causes spurious interrupts. The driver already has logic to detect if a pin is used by the kernel. Leverage it to re-initialize interrupt fields of a pin only if it's not used by us. Cc: stable@vger.kernel.org Fixes: dbad75dd1f25 ("pinctrl: add AMD GPIO driver support.") Signed-off-by: Kornel Dulęba Link: https://lore.kernel.org/r/20230320093259.845178-1-korneld@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 36 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 9236a132c7bab..609821b756c2f 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -872,32 +872,34 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; -static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +static void amd_gpio_irq_init_pin(struct amd_gpio *gpio_dev, int pin) { - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + const struct pin_desc *pd; unsigned long flags; u32 pin_reg, mask; - int i; mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | BIT(WAKE_CNTRL_OFF_S4); - for (i = 0; i < desc->npins; i++) { - int pin = desc->pins[i].number; - const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); - - if (!pd) - continue; + pd = pin_desc_get(gpio_dev->pctrl, pin); + if (!pd) + return; - raw_spin_lock_irqsave(&gpio_dev->lock, flags); + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + pin_reg = readl(gpio_dev->base + pin * 4); + pin_reg &= ~mask; + writel(pin_reg, gpio_dev->base + pin * 4); + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); +} - pin_reg = readl(gpio_dev->base + i * 4); - pin_reg &= ~mask; - writel(pin_reg, gpio_dev->base + i * 4); +static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +{ + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + int i; - raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); - } + for (i = 0; i < desc->npins; i++) + amd_gpio_irq_init_pin(gpio_dev, i); } #ifdef CONFIG_PM_SLEEP @@ -950,8 +952,10 @@ static int amd_gpio_resume(struct device *dev) for (i = 0; i < desc->npins; i++) { int pin = desc->pins[i].number; - if (!amd_gpio_should_save(gpio_dev, pin)) + if (!amd_gpio_should_save(gpio_dev, pin)) { + amd_gpio_irq_init_pin(gpio_dev, pin); continue; + } raw_spin_lock_irqsave(&gpio_dev->lock, flags); gpio_dev->saved_regs[i] |= readl(gpio_dev->base + pin * 4) & PIN_IRQ_PENDING; From f91bf3272a18356e8585f6bbba896d794632f2af Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 16 Mar 2023 00:25:14 +0100 Subject: [PATCH 526/898] iommu/exynos: Fix set_platform_dma_ops() callback There are some subtle differences between release_device() and set_platform_dma_ops() callbacks, so separate those two callbacks. Device links should be removed only in release_device(), because they were created in probe_device() on purpose and they are needed for proper Exynos IOMMU driver operation. While fixing this, remove the conditional code as it is not really needed. Reported-by: Jason Gunthorpe Fixes: 189d496b48b1 ("iommu/exynos: Add missing set_platform_dma_ops callback") Signed-off-by: Marek Szyprowski Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20230315232514.1046589-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 483aaaeb6daea..1abd187c6075e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1415,23 +1415,26 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev) return &data->iommu; } -static void exynos_iommu_release_device(struct device *dev) +static void exynos_iommu_set_platform_dma(struct device *dev) { struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); - struct sysmmu_drvdata *data; if (owner->domain) { struct iommu_group *group = iommu_group_get(dev); if (group) { -#ifndef CONFIG_ARM - WARN_ON(owner->domain != - iommu_group_default_domain(group)); -#endif exynos_iommu_detach_device(owner->domain, dev); iommu_group_put(group); } } +} + +static void exynos_iommu_release_device(struct device *dev) +{ + struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); + struct sysmmu_drvdata *data; + + exynos_iommu_set_platform_dma(dev); list_for_each_entry(data, &owner->controllers, owner_node) device_link_del(data->link); @@ -1479,7 +1482,7 @@ static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, .device_group = generic_device_group, #ifdef CONFIG_ARM - .set_platform_dma_ops = exynos_iommu_release_device, + .set_platform_dma_ops = exynos_iommu_set_platform_dma, #endif .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, From 66ceaa4c4507f2b598d37b528796dd34158d31bf Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 13 Mar 2023 13:36:07 -0700 Subject: [PATCH 527/898] ice: fix W=1 headers mismatch make modules W=1 returns: .../ice/ice_txrx_lib.c:448: warning: Function parameter or member 'first_idx' not described in 'ice_finalize_xdp_rx' .../ice/ice_txrx.c:948: warning: Function parameter or member 'ntc' not described in 'ice_get_rx_buf' .../ice/ice_txrx.c:1038: warning: Excess function parameter 'rx_buf' description in 'ice_construct_skb' Fix these warnings by adding and deleting the deviant arguments. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Fixes: d7956d81f150 ("ice: Pull out next_to_clean bump out of ice_put_rx_buf()") CC: Maciej Fijalkowski Signed-off-by: Jesse Brandeburg Reviewed-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index b61dd9f015405..4fcf2d07eb853 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -938,6 +938,7 @@ ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use * @rx_ring: Rx descriptor ring to transact packets on * @size: size of buffer to add to skb + * @ntc: index of next to clean element * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. @@ -1026,7 +1027,6 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) /** * ice_construct_skb - Allocate skb and populate it * @rx_ring: Rx descriptor ring to transact packets on - * @rx_buf: Rx buffer to pull data from * @xdp: xdp_buff pointing to the data * * This function allocates an skb. It then populates it with the page diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 7bc5aa340c7df..c8322fb6f2b37 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -438,6 +438,7 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map * @xdp_ring: XDP ring * @xdp_res: Result of the receive batch + * @first_idx: index to write from caller * * This function bumps XDP Tx tail and/or flush redirect map, and * should be called when a batch of packets has been processed in the From d94dbdc4e0209b5e7d736ab696f8d635b034e3ee Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 13 Mar 2023 13:36:08 -0700 Subject: [PATCH 528/898] ice: Fix ice_cfg_rdma_fltr() to only update relevant fields The current implementation causes ice_vsi_update() to update all VSI fields based on the cached VSI context. This also assumes that the ICE_AQ_VSI_PROP_Q_OPT_VALID bit is set. This can cause problems if the VSI context is not correctly synced by the driver. Fix this by only updating the fields that correspond to ICE_AQ_VSI_PROP_Q_OPT_VALID. Also, make sure to save the updated result in the cached VSI context on success. Fixes: 348048e724a0 ("ice: Implement iidc operations") Co-developed-by: Robert Malz Signed-off-by: Robert Malz Signed-off-by: Brett Creeley Signed-off-by: Jesse Brandeburg Reviewed-by: Piotr Raczynski Tested-by: Jakub Andrysiak Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_switch.c | 26 +++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 61f844d225123..46b36851af460 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1780,18 +1780,36 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, int ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable) { - struct ice_vsi_ctx *ctx; + struct ice_vsi_ctx *ctx, *cached_ctx; + int status; + + cached_ctx = ice_get_vsi_ctx(hw, vsi_handle); + if (!cached_ctx) + return -ENOENT; - ctx = ice_get_vsi_ctx(hw, vsi_handle); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) - return -EIO; + return -ENOMEM; + + ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss; + ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc; + ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags; + + ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); if (enable) ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; else ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; - return ice_update_vsi(hw, vsi_handle, ctx, NULL); + status = ice_update_vsi(hw, vsi_handle, ctx, NULL); + if (!status) { + cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags; + cached_ctx->info.valid_sections |= ctx->info.valid_sections; + } + + kfree(ctx); + return status; } /** From 29486b6df3e6a63b57d1ed1dce06051267282ff4 Mon Sep 17 00:00:00 2001 From: Junfeng Guo Date: Tue, 14 Mar 2023 10:03:15 +0800 Subject: [PATCH 529/898] ice: add profile conflict check for AVF FDIR Add profile conflict check while adding some FDIR rules to avoid unexpected flow behavior, rules may have conflict including: IPv4 <---> {IPv4_UDP, IPv4_TCP, IPv4_SCTP} IPv6 <---> {IPv6_UDP, IPv6_TCP, IPv6_SCTP} For example, when we create an FDIR rule for IPv4, this rule will work on packets including IPv4, IPv4_UDP, IPv4_TCP and IPv4_SCTP. But if we then create an FDIR rule for IPv4_UDP and then destroy it, the first FDIR rule for IPv4 cannot work on pkt IPv4_UDP then. To prevent this unexpected behavior, we add restriction in software when creating FDIR rules by adding necessary profile conflict check. Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Signed-off-by: Junfeng Guo Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- .../ethernet/intel/ice/ice_virtchnl_fdir.c | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index e6ef6b303222d..5fd75e75772ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -541,6 +541,72 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf) } } +/** + * ice_vc_fdir_has_prof_conflict + * @vf: pointer to the VF structure + * @conf: FDIR configuration for each filter + * + * Check if @conf has conflicting profile with existing profiles + * + * Return: true on success, and false on error. + */ +static bool +ice_vc_fdir_has_prof_conflict(struct ice_vf *vf, + struct virtchnl_fdir_fltr_conf *conf) +{ + struct ice_fdir_fltr *desc; + + list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) { + struct virtchnl_fdir_fltr_conf *existing_conf; + enum ice_fltr_ptype flow_type_a, flow_type_b; + struct ice_fdir_fltr *a, *b; + + existing_conf = to_fltr_conf_from_desc(desc); + a = &existing_conf->input; + b = &conf->input; + flow_type_a = a->flow_type; + flow_type_b = b->flow_type; + + /* No need to compare two rules with different tunnel types or + * with the same protocol type. + */ + if (existing_conf->ttype != conf->ttype || + flow_type_a == flow_type_b) + continue; + + switch (flow_type_a) { + case ICE_FLTR_PTYPE_NONF_IPV4_UDP: + case ICE_FLTR_PTYPE_NONF_IPV4_TCP: + case ICE_FLTR_PTYPE_NONF_IPV4_SCTP: + if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) + return true; + break; + case ICE_FLTR_PTYPE_NONF_IPV4_OTHER: + if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_UDP || + flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_TCP || + flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) + return true; + break; + case ICE_FLTR_PTYPE_NONF_IPV6_UDP: + case ICE_FLTR_PTYPE_NONF_IPV6_TCP: + case ICE_FLTR_PTYPE_NONF_IPV6_SCTP: + if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) + return true; + break; + case ICE_FLTR_PTYPE_NONF_IPV6_OTHER: + if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_UDP || + flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_TCP || + flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_SCTP) + return true; + break; + default: + break; + } + } + + return false; +} + /** * ice_vc_fdir_write_flow_prof * @vf: pointer to the VF structure @@ -677,6 +743,13 @@ ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr, enum ice_fltr_ptype flow; int ret; + ret = ice_vc_fdir_has_prof_conflict(vf, conf); + if (ret) { + dev_dbg(dev, "Found flow profile conflict for VF %d\n", + vf->vf_id); + return ret; + } + flow = input->flow_type; ret = ice_vc_fdir_alloc_prof(vf, flow); if (ret) { From e9a1cc2e4c4ee7c7e60fb26345618c2522a2a10f Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Mon, 20 Mar 2023 13:48:15 +0100 Subject: [PATCH 530/898] ice: fix invalid check for empty list in ice_sched_assoc_vsi_to_agg() The code implicitly assumes that the list iterator finds a correct handle. If 'vsi_handle' is not found the 'old_agg_vsi_info' was pointing to an bogus memory location. For safety a separate list iterator variable should be used to make the != NULL check on 'old_agg_vsi_info' correct under any circumstances. Additionally Linus proposed to avoid any use of the list iterator variable after the loop, in the attempt to move the list iterator variable declaration into the macro to avoid any potential misuse after the loop. Using it in a pointer comparison after the loop is undefined behavior and should be omitted if possible [1]. Fixes: 37c592062b16 ("ice: remove the VSI info from previous agg") Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Signed-off-by: Jakob Koschel Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sched.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 4eca8d195ef04..b7682de0ae057 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -2788,7 +2788,7 @@ static int ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle, unsigned long *tc_bitmap) { - struct ice_sched_agg_vsi_info *agg_vsi_info, *old_agg_vsi_info = NULL; + struct ice_sched_agg_vsi_info *agg_vsi_info, *iter, *old_agg_vsi_info = NULL; struct ice_sched_agg_info *agg_info, *old_agg_info; struct ice_hw *hw = pi->hw; int status = 0; @@ -2806,11 +2806,13 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, if (old_agg_info && old_agg_info != agg_info) { struct ice_sched_agg_vsi_info *vtmp; - list_for_each_entry_safe(old_agg_vsi_info, vtmp, + list_for_each_entry_safe(iter, vtmp, &old_agg_info->agg_vsi_list, list_entry) - if (old_agg_vsi_info->vsi_handle == vsi_handle) + if (iter->vsi_handle == vsi_handle) { + old_agg_vsi_info = iter; break; + } } /* check if entry already exist */ From b57841fb0b564c61508222e885ac8f30a2811089 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 28 Mar 2023 20:43:26 +0200 Subject: [PATCH 531/898] thermal: core: Drop excessive lockdep_assert_held() calls The lockdep_assert_held() calls added to cooling_device_stats_setup() and cooling_device_stats_destroy() by commit 790930f44289 ("thermal: core: Introduce thermal_cooling_device_update()") trigger false-positive lockdep reports in code paths that are not subject to race conditions (before cooling device registration and after cooling device removal). For this reason, remove the lockdep_assert_held() calls from both cooling_device_stats_setup() and cooling_device_stats_destroy() and add one to thermal_cooling_device_stats_reinit() that has to be called under the cdev lock. Fixes: 790930f44289 ("thermal: core: Introduce thermal_cooling_device_update()") Link: https://lore.kernel.org/linux-acpi/ZCIDTLFt27Ei7+V6@ideak-desk.fi.intel.com Reported-by: Imre Deak Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_sysfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index a4aba7b8bb8b9..6c20c9f90a05a 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -876,8 +876,6 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) unsigned long states = cdev->max_state + 1; int var; - lockdep_assert_held(&cdev->lock); - var = sizeof(*stats); var += sizeof(*stats->time_in_state) * states; var += sizeof(*stats->trans_table) * states * states; @@ -903,8 +901,6 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev) { - lockdep_assert_held(&cdev->lock); - kfree(cdev->stats); cdev->stats = NULL; } @@ -931,6 +927,8 @@ void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev) void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev) { + lockdep_assert_held(&cdev->lock); + cooling_device_stats_destroy(cdev); cooling_device_stats_setup(cdev); } From 1a4b52ce8548355f09170faa67070bc2b8e3ba53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonard=20G=C3=B6hrs?= Date: Tue, 21 Mar 2023 15:55:25 +0100 Subject: [PATCH 532/898] =?UTF-8?q?.mailmap:=20add=20entry=20for=20Leonard?= =?UTF-8?q?=20G=C3=B6hrs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My very first kernel commit: e4e1d47c7906 ("ALSA: ppc: remove redundant checks in PS3 driver probe") was sent with the umlaut in my last name transcribed (Göhrs -> Goehrs). Add a mailmap entry so all my commits use the same name. Link: https://lkml.kernel.org/r/20230321145525.1317230-1-l.goehrs@pengutronix.de Signed-off-by: Leonard Göhrs Acked-by: Uwe Kleine-König Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index e2af78f67f7cc..e2fd5ae96a00d 100644 --- a/.mailmap +++ b/.mailmap @@ -266,6 +266,7 @@ Krzysztof Kozlowski Kuninori Morimoto Kuogee Hsieh Leonardo Bras +Leonard Göhrs Leonid I Ananiev Leon Romanovsky Leon Romanovsky From 35260cf545226c3b21d52a9d21083f7ff999969c Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Sun, 29 Jan 2023 11:10:09 +0800 Subject: [PATCH 533/898] Kconfig.debug: fix SCHED_DEBUG dependency The path for SCHED_DEBUG is /sys/kernel/debug/sched. So, SCHED_DEBUG should depend on DEBUG_FS, not PROC_FS. Link: https://lkml.kernel.org/r/202301291110098787982@zte.com.cn Signed-off-by: ye xingchen Cc: Dan Williams Cc: Geert Uytterhoeven Cc: Josh Poimboeuf Cc: Kees Cook Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Randy Dunlap Cc: Rasmus Villemoes Cc: Vlastimil Babka Cc: Zhaoyang Huang Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c8b379e2e9adc..3cc5d239964a6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1143,7 +1143,7 @@ menu "Scheduler Debugging" config SCHED_DEBUG bool "Collect scheduler debugging info" - depends on DEBUG_KERNEL && PROC_FS + depends on DEBUG_KERNEL && DEBUG_FS default y help If you say Y here, the /sys/kernel/debug/sched file will be provided From f478b9987cc8236b412d9f2afc958d3e15a7cf85 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 21 Mar 2023 14:35:08 +0800 Subject: [PATCH 534/898] lib/Kconfig.debug: correct help info of LOCKDEP_STACK_TRACE_HASH_BITS We can see the following definition in kernel/locking/lockdep_internals.h: #define STACK_TRACE_HASH_SIZE (1 << CONFIG_LOCKDEP_STACK_TRACE_HASH_BITS) CONFIG_LOCKDEP_STACK_TRACE_HASH_BITS is related with STACK_TRACE_HASH_SIZE instead of MAX_STACK_TRACE_ENTRIES, fix it. Link: https://lkml.kernel.org/r/1679380508-20830-1-git-send-email-yangtiezhu@loongson.cn Fixes: 5dc33592e955 ("lockdep: Allow tuning tracing capacity constants.") Signed-off-by: Tiezhu Yang Cc: Dmitry Vyukov Cc: Tetsuo Handa Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3cc5d239964a6..39d1d93164bd0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1392,7 +1392,7 @@ config LOCKDEP_STACK_TRACE_HASH_BITS range 10 30 default 14 help - Try increasing this value if you need large MAX_STACK_TRACE_ENTRIES. + Try increasing this value if you need large STACK_TRACE_HASH_SIZE. config LOCKDEP_CIRCULAR_QUEUE_BITS int "Bitsize for elements in circular_queue struct" From 13dd4e04625f600e5affb1b3f0b6c35268ab839b Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Wed, 22 Mar 2023 11:11:09 +0000 Subject: [PATCH 535/898] fsdax: unshare: zero destination if srcmap is HOLE or UNWRITTEN unshare copies data from source to destination. But if the source is HOLE or UNWRITTEN extents, we should zero the destination, otherwise the HOLE or UNWRITTEN part will be user-visible old data of the new allocated extent. Found by running generic/649 while mounting with -o dax=always on pmem. Link: https://lkml.kernel.org/r/1679483469-2-1-git-send-email-ruansy.fnst@fujitsu.com Fixes: d984648e428b ("fsdax,xfs: port unshare to fsdax") Signed-off-by: Shiyang Ruan Cc: Dan Williams Cc: Darrick J. Wong Cc: Jan Kara Cc: Matthew Wilcox (Oracle) Cc: Alistair Popple Cc: Jason Gunthorpe Cc: John Hubbard Cc: Signed-off-by: Andrew Morton --- fs/dax.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 3e457a16c7d18..e48d68902a8c1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1258,15 +1258,20 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) /* don't bother with blocks that are not shared to start with */ if (!(iomap->flags & IOMAP_F_SHARED)) return length; - /* don't bother with holes or unwritten extents */ - if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) - return length; id = dax_read_lock(); ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL); if (ret < 0) goto out_unlock; + /* zero the distance if srcmap is HOLE or UNWRITTEN */ + if (srcmap->flags & IOMAP_F_SHARED || srcmap->type == IOMAP_UNWRITTEN) { + memset(daddr, 0, length); + dax_flush(iomap->dax_dev, daddr, length); + ret = length; + goto out_unlock; + } + ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL); if (ret < 0) goto out_unlock; From e900ba10d15041a6236cc75778cc6e06c3590a58 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Wed, 22 Mar 2023 07:25:58 +0000 Subject: [PATCH 536/898] fsdax: dedupe should compare the min of two iters' length In an dedupe comparison iter loop, the length of iomap_iter decreases because it implies the remaining length after each iteration. The dedupe command will fail with -EIO if the range is larger than one page size and not aligned to the page size. Also report warning in dmesg: [ 4338.498374] ------------[ cut here ]------------ [ 4338.498689] WARNING: CPU: 3 PID: 1415645 at fs/iomap/iter.c:16 ... The compare function should use the min length of the current iters, not the total length. Link: https://lkml.kernel.org/r/1679469958-2-1-git-send-email-ruansy.fnst@fujitsu.com Fixes: 0e79e3736d54 ("fsdax: dedupe: iter two files at the same time") Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Cc: Dan Williams Cc: Jan Kara Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton --- fs/dax.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index e48d68902a8c1..5d2e9b10030e9 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -2027,8 +2027,8 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, while ((ret = iomap_iter(&src_iter, ops)) > 0 && (ret = iomap_iter(&dst_iter, ops)) > 0) { - compared = dax_range_compare_iter(&src_iter, &dst_iter, len, - same); + compared = dax_range_compare_iter(&src_iter, &dst_iter, + min(src_iter.len, dst_iter.len), same); if (compared < 0) return ret; src_iter.processed = dst_iter.processed = compared; From 3ee2d7471fa4963a2ced0a84f0653ce88b43c5b2 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 20 Mar 2023 11:00:59 +0800 Subject: [PATCH 537/898] mm: kfence: fix PG_slab and memcg_data clearing It does not reset PG_slab and memcg_data when KFENCE fails to initialize kfence pool at runtime. It is reporting a "Bad page state" message when kfence pool is freed to buddy. The checking of whether it is a compound head page seems unnecessary since we already guarantee this when allocating kfence pool. Remove the check to simplify the code. Link: https://lkml.kernel.org/r/20230320030059.20189-1-songmuchun@bytedance.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Muchun Song Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jann Horn Cc: Marco Elver Cc: Roman Gushchin Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/kfence/core.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 79c94ee55f97b..d66092dd187c8 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -561,10 +561,6 @@ static unsigned long kfence_init_pool(void) if (!i || (i % 2)) continue; - /* Verify we do not have a compound head page. */ - if (WARN_ON(compound_head(&pages[i]) != &pages[i])) - return addr; - __folio_set_slab(slab_folio(slab)); #ifdef CONFIG_MEMCG slab->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg | @@ -597,12 +593,26 @@ static unsigned long kfence_init_pool(void) /* Protect the right redzone. */ if (unlikely(!kfence_protect(addr + PAGE_SIZE))) - return addr; + goto reset_slab; addr += 2 * PAGE_SIZE; } return 0; + +reset_slab: + for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { + struct slab *slab = page_slab(&pages[i]); + + if (!i || (i % 2)) + continue; +#ifdef CONFIG_MEMCG + slab->memcg_data = 0; +#endif + __folio_clear_slab(slab_folio(slab)); + } + + return addr; } static bool __init kfence_init_pool_early(void) @@ -632,16 +642,6 @@ static bool __init kfence_init_pool_early(void) * fails for the first page, and therefore expect addr==__kfence_pool in * most failure cases. */ - for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) { - struct slab *slab = virt_to_slab(p); - - if (!slab) - continue; -#ifdef CONFIG_MEMCG - slab->memcg_data = 0; -#endif - __folio_clear_slab(slab_folio(slab)); - } memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); __kfence_pool = NULL; return false; From 1f2803b2660f4b04d48d065072c0ae0c9ca255fd Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 23 Mar 2023 10:50:03 +0800 Subject: [PATCH 538/898] mm: kfence: fix handling discontiguous page The struct pages could be discontiguous when the kfence pool is allocated via alloc_contig_pages() with CONFIG_SPARSEMEM and !CONFIG_SPARSEMEM_VMEMMAP. This may result in setting PG_slab and memcg_data to a arbitrary address (may be not used as a struct page), which in the worst case might corrupt the kernel. So the iteration should use nth_page(). Link: https://lkml.kernel.org/r/20230323025003.94447-1-songmuchun@bytedance.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Muchun Song Reviewed-by: Marco Elver Reviewed-by: Kefeng Wang Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jann Horn Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/kfence/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index d66092dd187c8..1065e0568d05a 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -556,7 +556,7 @@ static unsigned long kfence_init_pool(void) * enters __slab_free() slow-path. */ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { - struct slab *slab = page_slab(&pages[i]); + struct slab *slab = page_slab(nth_page(pages, i)); if (!i || (i % 2)) continue; @@ -602,7 +602,7 @@ static unsigned long kfence_init_pool(void) reset_slab: for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { - struct slab *slab = page_slab(&pages[i]); + struct slab *slab = page_slab(nth_page(pages, i)); if (!i || (i % 2)) continue; From bdd034de3a28ffdacab528aebad17f1df968180c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 24 Mar 2023 06:07:36 -0700 Subject: [PATCH 539/898] mailmap: add an entry for Leonard Crestez Link: https://lkml.kernel.org/r/20230324130737.3360169-1-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Cc: Baolin Wang Cc: Colin Ian King Cc: Jakub Kicinski Cc: Kirill Tkhai Cc: Konrad Dybcio Cc: Leonard Crestez Cc: Qais Yousef Cc: Stephen Hemminger Cc: Vasily Averin Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index e2fd5ae96a00d..e42486317d18c 100644 --- a/.mailmap +++ b/.mailmap @@ -265,6 +265,7 @@ Krzysztof Kozlowski Krzysztof Kozlowski Kuninori Morimoto Kuogee Hsieh +Leonard Crestez Leonard Crestez Leonardo Bras Leonard Göhrs Leonid I Ananiev From 2280d425ba3599bdd85c41bd0ec8ba568f00c032 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 28 Mar 2023 10:45:20 +0100 Subject: [PATCH 540/898] btrfs: ignore fiemap path cache when there are multiple paths for a node During fiemap, when walking backreferences to determine if a b+tree node/leaf is shared, we may find a tree block (leaf or node) for which two parents were added to the references ulist. This happens if we get for example one direct ref (shared tree block ref) and one indirect ref (non-shared tree block ref) for the tree block at the current level, which can happen during relocation. In that case the fiemap path cache can not be used since it's meant for a single path, with one tree block at each possible level, so having multiple references for a tree block at any level may result in getting the level counter exceed BTRFS_MAX_LEVEL and eventually trigger the warning: WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL) at lookup_backref_shared_cache() and at store_backref_shared_cache(). This is harmless since the code ignores any level >= BTRFS_MAX_LEVEL, the warning is there just to catch any unexpected case like the one described above. However if a user finds this it may be scary and get reported. So just ignore the path cache once we find a tree block for which there are more than one reference, which is the less common case, and update the cache with the sharedness check result for all levels below the level for which we found multiple references. Reported-by: Jarno Pelkonen Link: https://lore.kernel.org/linux-btrfs/CAKv8qLmDNAGJGCtsevxx_VZ_YOvvs1L83iEJkTgyA4joJertng@mail.gmail.com/ Fixes: 12a824dc67a6 ("btrfs: speedup checking for extent sharedness during fiemap") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 85 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 90e40d5ceccd1..e54f0884802a0 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1921,8 +1921,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, level = -1; ULIST_ITER_INIT(&uiter); while (1) { - bool is_shared; - bool cached; + const unsigned long prev_ref_count = ctx->refs.nnodes; walk_ctx.bytenr = bytenr; ret = find_parent_nodes(&walk_ctx, &shared); @@ -1940,21 +1939,36 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, ret = 0; /* - * If our data extent was not directly shared (without multiple - * reference items), than it might have a single reference item - * with a count > 1 for the same offset, which means there are 2 - * (or more) file extent items that point to the data extent - - * this happens when a file extent item needs to be split and - * then one item gets moved to another leaf due to a b+tree leaf - * split when inserting some item. In this case the file extent - * items may be located in different leaves and therefore some - * of the leaves may be referenced through shared subtrees while - * others are not. Since our extent buffer cache only works for - * a single path (by far the most common case and simpler to - * deal with), we can not use it if we have multiple leaves - * (which implies multiple paths). + * More than one extent buffer (bytenr) may have been added to + * the ctx->refs ulist, in which case we have to check multiple + * tree paths in case the first one is not shared, so we can not + * use the path cache which is made for a single path. Multiple + * extent buffers at the current level happen when: + * + * 1) level -1, the data extent: If our data extent was not + * directly shared (without multiple reference items), then + * it might have a single reference item with a count > 1 for + * the same offset, which means there are 2 (or more) file + * extent items that point to the data extent - this happens + * when a file extent item needs to be split and then one + * item gets moved to another leaf due to a b+tree leaf split + * when inserting some item. In this case the file extent + * items may be located in different leaves and therefore + * some of the leaves may be referenced through shared + * subtrees while others are not. Since our extent buffer + * cache only works for a single path (by far the most common + * case and simpler to deal with), we can not use it if we + * have multiple leaves (which implies multiple paths). + * + * 2) level >= 0, a tree node/leaf: We can have a mix of direct + * and indirect references on a b+tree node/leaf, so we have + * to check multiple paths, and the extent buffer (the + * current bytenr) may be shared or not. One example is + * during relocation as we may get a shared tree block ref + * (direct ref) and a non-shared tree block ref (indirect + * ref) for the same node/leaf. */ - if (level == -1 && ctx->refs.nnodes > 1) + if ((ctx->refs.nnodes - prev_ref_count) > 1) ctx->use_path_cache = false; if (level >= 0) @@ -1964,18 +1978,45 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, if (!node) break; bytenr = node->val; - level++; - cached = lookup_backref_shared_cache(ctx, root, bytenr, level, - &is_shared); - if (cached) { - ret = (is_shared ? 1 : 0); - break; + if (ctx->use_path_cache) { + bool is_shared; + bool cached; + + level++; + cached = lookup_backref_shared_cache(ctx, root, bytenr, + level, &is_shared); + if (cached) { + ret = (is_shared ? 1 : 0); + break; + } } shared.share_count = 0; shared.have_delayed_delete_refs = false; cond_resched(); } + /* + * If the path cache is disabled, then it means at some tree level we + * got multiple parents due to a mix of direct and indirect backrefs or + * multiple leaves with file extent items pointing to the same data + * extent. We have to invalidate the cache and cache only the sharedness + * result for the levels where we got only one node/reference. + */ + if (!ctx->use_path_cache) { + int i = 0; + + level--; + if (ret >= 0 && level >= 0) { + bytenr = ctx->path_cache_entries[level].bytenr; + ctx->use_path_cache = true; + store_backref_shared_cache(ctx, root, bytenr, level, ret); + i = level + 1; + } + + for ( ; i < BTRFS_MAX_LEVEL; i++) + ctx->path_cache_entries[i].bytenr = 0; + } + /* * Cache the sharedness result for the data extent if we know our inode * has more than 1 file extent item that refers to the data extent. From 07b3af42d8d528374d4f42d688bae86eeb30831a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 24 Mar 2023 15:04:04 +0100 Subject: [PATCH 541/898] net: ethernet: mtk_eth_soc: fix tx throughput regression with direct 1G links Using the QDMA tx scheduler to throttle tx to line speed works fine for switch ports, but apparently caused a regression on non-switch ports. Based on a number of tests, it seems that this throttling can be safely dropped without re-introducing the issues on switch ports that the tx scheduling changes resolved. Link: https://lore.kernel.org/netdev/trinity-92c3826f-c2c8-40af-8339-bc6d0d3ffea4-1678213958520@3c-app-gmx-bs16/ Fixes: f63959c7eec3 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Reported-by: Frank Wunderlich Reported-by: Daniel Golle Tested-by: Daniel Golle Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230324140404.95745-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3cb43623d3dbe..1835d92afe4b2 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -763,8 +763,6 @@ static void mtk_mac_link_up(struct phylink_config *config, break; } - mtk_set_queue_speed(mac->hw, mac->id, speed); - /* Configure duplex */ if (duplex == DUPLEX_FULL) mcr |= MAC_MCR_FORCE_DPX; From f33fc1576757741479452255132d6e3aaf558ffe Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Wed, 29 Mar 2023 06:24:22 +0300 Subject: [PATCH 542/898] ALSA: ymfpci: Create card with device-managed snd_devm_card_new() snd_card_ymfpci_remove() was removed in commit c6e6bb5eab74 ("ALSA: ymfpci: Allocate resources with device-managed APIs"), but the call to snd_card_new() was not replaced with snd_devm_card_new(). Since there was no longer a call to snd_card_free, unloading the module would eventually result in Oops: [697561.532887] BUG: unable to handle page fault for address: ffffffffc0924480 [697561.532893] #PF: supervisor read access in kernel mode [697561.532896] #PF: error_code(0x0000) - not-present page [697561.532899] PGD ae1e15067 P4D ae1e15067 PUD ae1e17067 PMD 11a8f5067 PTE 0 [697561.532905] Oops: 0000 [#1] PREEMPT SMP NOPTI [697561.532909] CPU: 21 PID: 5080 Comm: wireplumber Tainted: G W OE 6.2.7 #1 [697561.532914] Hardware name: System manufacturer System Product Name/TUF GAMING X570-PLUS, BIOS 4408 10/28/2022 [697561.532916] RIP: 0010:try_module_get.part.0+0x1a/0xe0 [697561.532924] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 49 89 fc bf 01 00 00 00 e8 56 3c f8 ff <41> 83 3c 24 02 0f 84 96 00 00 00 41 8b 84 24 30 03 00 00 85 c0 0f [697561.532927] RSP: 0018:ffffbe9b858c3bd8 EFLAGS: 00010246 [697561.532930] RAX: ffff9815d14f1900 RBX: ffff9815c14e6000 RCX: 0000000000000000 [697561.532933] RDX: 0000000000000000 RSI: ffffffffc055092c RDI: ffffffffb3778c1a [697561.532935] RBP: ffffbe9b858c3be8 R08: 0000000000000040 R09: ffff981a1a741380 [697561.532937] R10: ffffbe9b858c3c80 R11: 00000009d56533a6 R12: ffffffffc0924480 [697561.532939] R13: ffff9823439d8500 R14: 0000000000000025 R15: ffff9815cd109f80 [697561.532942] FS: 00007f13084f1f80(0000) GS:ffff9824aef40000(0000) knlGS:0000000000000000 [697561.532945] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [697561.532947] CR2: ffffffffc0924480 CR3: 0000000145344000 CR4: 0000000000350ee0 [697561.532949] Call Trace: [697561.532951] [697561.532955] try_module_get+0x13/0x30 [697561.532960] snd_ctl_open+0x61/0x1c0 [snd] [697561.532976] snd_open+0xb4/0x1e0 [snd] [697561.532989] chrdev_open+0xc7/0x240 [697561.532995] ? fsnotify_perm.part.0+0x6e/0x160 [697561.533000] ? __pfx_chrdev_open+0x10/0x10 [697561.533005] do_dentry_open+0x169/0x440 [697561.533009] vfs_open+0x2d/0x40 [697561.533012] path_openat+0xa9d/0x10d0 [697561.533017] ? debug_smp_processor_id+0x17/0x20 [697561.533022] ? trigger_load_balance+0x65/0x370 [697561.533026] do_filp_open+0xb2/0x160 [697561.533032] ? _raw_spin_unlock+0x19/0x40 [697561.533036] ? alloc_fd+0xa9/0x190 [697561.533040] do_sys_openat2+0x9f/0x160 [697561.533044] __x64_sys_openat+0x55/0x90 [697561.533048] do_syscall_64+0x3b/0x90 [697561.533052] entry_SYSCALL_64_after_hwframe+0x72/0xdc [697561.533056] RIP: 0033:0x7f1308a40db4 [697561.533059] Code: 24 20 eb 8f 66 90 44 89 54 24 0c e8 46 68 f8 ff 44 8b 54 24 0c 44 89 e2 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 32 44 89 c7 89 44 24 0c e8 78 68 f8 ff 8b 44 [697561.533062] RSP: 002b:00007ffcce664450 EFLAGS: 00000293 ORIG_RAX: 0000000000000101 [697561.533066] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f1308a40db4 [697561.533068] RDX: 0000000000080000 RSI: 00007ffcce664690 RDI: 00000000ffffff9c [697561.533070] RBP: 00007ffcce664690 R08: 0000000000000000 R09: 0000000000000012 [697561.533072] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000080000 [697561.533074] R13: 00007f13054b069b R14: 0000565209f83200 R15: 0000000000000000 [697561.533078] Fixes: c6e6bb5eab74 ("ALSA: ymfpci: Allocate resources with device-managed APIs") Signed-off-by: Tasos Sahanidis Link: https://lore.kernel.org/r/20230329032422.170024-1-tasos@tasossah.com Signed-off-by: Takashi Iwai --- sound/pci/ymfpci/ymfpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c index 1e198e4d57b8d..82d4e0fda91be 100644 --- a/sound/pci/ymfpci/ymfpci.c +++ b/sound/pci/ymfpci/ymfpci.c @@ -170,7 +170,7 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci, return -ENOENT; } - err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE, + err = snd_devm_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE, sizeof(*chip), &card); if (err < 0) return err; From 6be2e7522eb529b41c16d459f33bbdbcddbf5c15 Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Wed, 29 Mar 2023 06:28:08 +0300 Subject: [PATCH 543/898] ALSA: ymfpci: Fix BUG_ON in probe function The snd_dma_buffer.bytes field now contains the aligned size, which this snd_BUG_ON() did not account for, resulting in the following: [ 9.625915] ------------[ cut here ]------------ [ 9.633440] WARNING: CPU: 0 PID: 126 at sound/pci/ymfpci/ymfpci_main.c:2168 snd_ymfpci_create+0x681/0x698 [snd_ymfpci] [ 9.648926] Modules linked in: snd_ymfpci(+) snd_intel_dspcfg kvm(+) snd_intel_sdw_acpi snd_ac97_codec snd_mpu401_uart snd_opl3_lib irqbypass snd_hda_codec gameport snd_rawmidi crct10dif_pclmul crc32_pclmul cfg80211 snd_hda_core polyval_clmulni polyval_generic gf128mul snd_seq_device ghash_clmulni_intel snd_hwdep ac97_bus sha512_ssse3 rfkill snd_pcm aesni_intel tg3 snd_timer crypto_simd snd mxm_wmi libphy cryptd k10temp fam15h_power pcspkr soundcore sp5100_tco wmi acpi_cpufreq mac_hid dm_multipath sg loop fuse dm_mod bpf_preload ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 sr_mod cdrom ata_generic pata_acpi firewire_ohci crc32c_intel firewire_core xhci_pci crc_itu_t pata_via xhci_pci_renesas floppy [ 9.711849] CPU: 0 PID: 126 Comm: kworker/0:2 Not tainted 6.1.21-1-lts #1 08d2e5ece03136efa7c6aeea9a9c40916b1bd8da [ 9.722200] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./990FX Extreme4, BIOS P2.70 06/05/2014 [ 9.732204] Workqueue: events work_for_cpu_fn [ 9.736580] RIP: 0010:snd_ymfpci_create+0x681/0x698 [snd_ymfpci] [ 9.742594] Code: 8c c0 4c 89 e2 48 89 df 48 c7 c6 92 c6 8c c0 e8 15 d0 e9 ff 48 83 c4 08 44 89 e8 5b 5d 41 5c 41 5d 41 5e 41 5f e9 d3 7a 33 e3 <0f> 0b e9 cb fd ff ff 41 bd fb ff ff ff eb db 41 bd f4 ff ff ff eb [ 9.761358] RSP: 0018:ffffab64804e7da0 EFLAGS: 00010287 [ 9.766594] RAX: ffff8fa2df06c400 RBX: ffff8fa3073a8000 RCX: ffff8fa303fbc4a8 [ 9.773734] RDX: ffff8fa2df06d000 RSI: 0000000000000010 RDI: 0000000000000020 [ 9.780876] RBP: ffff8fa300b5d0d0 R08: ffff8fa3073a8e50 R09: 00000000df06bf00 [ 9.788018] R10: ffff8fa2df06bf00 R11: 00000000df068200 R12: ffff8fa3073a8918 [ 9.795159] R13: 0000000000000000 R14: 0000000000000080 R15: ffff8fa2df068200 [ 9.802317] FS: 0000000000000000(0000) GS:ffff8fa9fec00000(0000) knlGS:0000000000000000 [ 9.810414] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9.816158] CR2: 000055febaf66500 CR3: 0000000101a2e000 CR4: 00000000000406f0 [ 9.823301] Call Trace: [ 9.825747] [ 9.827889] snd_card_ymfpci_probe+0x194/0x950 [snd_ymfpci b78a5fe64b5663a6390a909c67808567e3e73615] [ 9.837030] ? finish_task_switch.isra.0+0x90/0x2d0 [ 9.841918] local_pci_probe+0x45/0x80 [ 9.845680] work_for_cpu_fn+0x1a/0x30 [ 9.849431] process_one_work+0x1c7/0x380 [ 9.853464] worker_thread+0x1af/0x390 [ 9.857225] ? rescuer_thread+0x3b0/0x3b0 [ 9.861254] kthread+0xde/0x110 [ 9.864414] ? kthread_complete_and_exit+0x20/0x20 [ 9.869210] ret_from_fork+0x22/0x30 [ 9.872792] [ 9.874985] ---[ end trace 0000000000000000 ]--- Fixes: 5c1733e33c88 ("ALSA: memalloc: Align buffer allocations in page size") Signed-off-by: Tasos Sahanidis Link: https://lore.kernel.org/r/20230329032808.170403-1-tasos@tasossah.com Signed-off-by: Takashi Iwai --- sound/pci/ymfpci/ymfpci_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c index c80114c0ad7bf..b492c32ce0704 100644 --- a/sound/pci/ymfpci/ymfpci_main.c +++ b/sound/pci/ymfpci/ymfpci_main.c @@ -2165,7 +2165,7 @@ static int snd_ymfpci_memalloc(struct snd_ymfpci *chip) chip->work_base = ptr; chip->work_base_addr = ptr_addr; - snd_BUG_ON(ptr + chip->work_size != + snd_BUG_ON(ptr + PAGE_ALIGN(chip->work_size) != chip->work_ptr->area + chip->work_ptr->bytes); snd_ymfpci_writel(chip, YDSXGR_PLAYCTRLBASE, chip->bank_base_playback_addr); From 5f70bcbca469a087b54ad2d934185ed69a098576 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Tue, 28 Mar 2023 11:58:44 +0530 Subject: [PATCH 544/898] net: wwan: iosm: fixes 7560 modem crash ModemManger/Apps probing the wwan0xmmrpc0 port for 7560 Modem results in modem crash. 7560 Modem FW uses the MBIM interface for control command communication whereas 7360 uses Intel RPC interface so disable wwan0xmmrpc0 port for 7560. Fixes: d08b0f8f46e4 ("net: wwan: iosm: add rpc interface for xmm modems") Reported-and-tested-by: Martin Link: https://bugzilla.kernel.org/show_bug.cgi?id=217200 Signed-off-by: M Chetan Kumar Signed-off-by: Shane Parslow Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index 1e6a479766429..c066b0040a3fe 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -587,6 +587,13 @@ static void ipc_imem_run_state_worker(struct work_struct *instance) while (ctrl_chl_idx < IPC_MEM_MAX_CHANNELS) { if (!ipc_chnl_cfg_get(&chnl_cfg_port, ctrl_chl_idx)) { ipc_imem->ipc_port[ctrl_chl_idx] = NULL; + + if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7560_ID && + chnl_cfg_port.wwan_port_type == WWAN_PORT_XMMRPC) { + ctrl_chl_idx++; + continue; + } + if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7360_ID && chnl_cfg_port.wwan_port_type == WWAN_PORT_MBIM) { ctrl_chl_idx++; From 4c7f9d2e413dc06a157c4e5dccde84aaf4655eb3 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Mon, 27 Mar 2023 17:55:48 +0800 Subject: [PATCH 545/898] usb: xhci: tegra: fix sleep in atomic call When we set the dual-role port to Host mode, we observed the following splat: [ 167.057718] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:229 [ 167.057872] Workqueue: events tegra_xusb_usb_phy_work [ 167.057954] Call trace: [ 167.057962] dump_backtrace+0x0/0x210 [ 167.057996] show_stack+0x30/0x50 [ 167.058020] dump_stack_lvl+0x64/0x84 [ 167.058065] dump_stack+0x14/0x34 [ 167.058100] __might_resched+0x144/0x180 [ 167.058140] __might_sleep+0x64/0xd0 [ 167.058171] slab_pre_alloc_hook.constprop.0+0xa8/0x110 [ 167.058202] __kmalloc_track_caller+0x74/0x2b0 [ 167.058233] kvasprintf+0xa4/0x190 [ 167.058261] kasprintf+0x58/0x90 [ 167.058285] tegra_xusb_find_port_node.isra.0+0x58/0xd0 [ 167.058334] tegra_xusb_find_port+0x38/0xa0 [ 167.058380] tegra_xusb_padctl_get_usb3_companion+0x38/0xd0 [ 167.058430] tegra_xhci_id_notify+0x8c/0x1e0 [ 167.058473] notifier_call_chain+0x88/0x100 [ 167.058506] atomic_notifier_call_chain+0x44/0x70 [ 167.058537] tegra_xusb_usb_phy_work+0x60/0xd0 [ 167.058581] process_one_work+0x1dc/0x4c0 [ 167.058618] worker_thread+0x54/0x410 [ 167.058650] kthread+0x188/0x1b0 [ 167.058672] ret_from_fork+0x10/0x20 The function tegra_xusb_padctl_get_usb3_companion eventually calls tegra_xusb_find_port and this in turn calls kasprintf which might sleep and so cannot be called from an atomic context. Fix this by moving the call to tegra_xusb_padctl_get_usb3_companion to the tegra_xhci_id_work function where it is really needed. Fixes: f836e7843036 ("usb: xhci-tegra: Add OTG support") Cc: stable@vger.kernel.org Signed-off-by: Wayne Chang Signed-off-by: Haotien Hsu Link: https://lore.kernel.org/r/20230327095548.1599470-1-haotienh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 1ff22f675930c..a88c39e525c23 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1360,6 +1360,9 @@ static void tegra_xhci_id_work(struct work_struct *work) mutex_unlock(&tegra->lock); + tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl, + tegra->otg_usb2_port); + if (tegra->host_mode) { /* switch to host mode */ if (tegra->otg_usb3_port >= 0) { @@ -1474,9 +1477,6 @@ static int tegra_xhci_id_notify(struct notifier_block *nb, } tegra->otg_usb2_port = tegra_xusb_get_usb2_port(tegra, usbphy); - tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion( - tegra->padctl, - tegra->otg_usb2_port); tegra->host_mode = (usbphy->last_event == USB_EVENT_ID) ? true : false; From e4056e38ec87b4c21eb34bb8e38b1b0ca1221744 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Mar 2023 09:41:27 +0100 Subject: [PATCH 546/898] dt-bindings: pinctrl: qcom,sm8550-lpass-lpi: allow input-enabled and bias-bus-hold Add missing common pin configuration properties: input-enabled and bias-bus-hold. Fixes: 268e97ccc311 ("dt-bindings: pinctrl: qcom,sm8550-lpass-lpi-pinctrl: add SM8550 LPASS") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230324084127.29362-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- .../bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml index 5e90051ed314a..8f60a9113e7a2 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml @@ -96,9 +96,11 @@ $defs: 2: Lower Slew rate (slower edges) 3: Reserved (No adjustments) + bias-bus-hold: true bias-pull-down: true bias-pull-up: true bias-disable: true + input-enable: true output-high: true output-low: true From b43a18647f03c87e77d50d6fe74904b61b96323e Mon Sep 17 00:00:00 2001 From: Biju Das Date: Fri, 17 Mar 2023 15:04:03 +0000 Subject: [PATCH 547/898] tty: serial: sh-sci: Fix transmit end interrupt handler The fourth interrupt on SCI port is transmit end interrupt compared to the break interrupt on other port types. So, shuffle the interrupts to fix the transmit end interrupt handler. Fixes: e1d0be616186 ("sh-sci: Add h8300 SCI") Cc: stable Suggested-by: Geert Uytterhoeven Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20230317150403.154094-1-biju.das.jz@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 7bd0807209299..c07663fe80bf9 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -2864,6 +2865,13 @@ static int sci_init_single(struct platform_device *dev, sci_port->irqs[i] = platform_get_irq(dev, i); } + /* + * The fourth interrupt on SCI port is transmit end interrupt, so + * shuffle the interrupts. + */ + if (p->type == PORT_SCI) + swap(sci_port->irqs[SCIx_BRI_IRQ], sci_port->irqs[SCIx_TEI_IRQ]); + /* The SCI generates several interrupts. They can be muxed together or * connected to different interrupt lines. In the muxed case only one * interrupt resource is specified as there is only one interrupt ID. From 7b21f329ae0ab6361c0aebfc094db95821490cd1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 17 Mar 2023 10:46:57 +0100 Subject: [PATCH 548/898] dt-bindings: serial: renesas,scif: Fix 4th IRQ for 4-IRQ SCIFs The fourth interrupt on SCIF variants with four interrupts (RZ/A1) is the Break interrupt, not the Transmit End interrupt (like on SCI(g)). Update the description and interrupt name to fix this. Fixes: 384d00fae8e51f8f ("dt-bindings: serial: sh-sci: Convert to json-schema") Cc: stable Signed-off-by: Geert Uytterhoeven Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/719d1582e0ebbe3d674e3a48fc26295e1475a4c3.1679046394.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/renesas,scif.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml index 1989bd67d04e8..54e4f41be9b42 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml @@ -92,7 +92,7 @@ properties: - description: Error interrupt - description: Receive buffer full interrupt - description: Transmit buffer empty interrupt - - description: Transmit End interrupt + - description: Break interrupt - items: - description: Error interrupt - description: Receive buffer full interrupt @@ -107,7 +107,7 @@ properties: - const: eri - const: rxi - const: txi - - const: tei + - const: bri - items: - const: eri - const: rxi From 90b8596ac46043e4a782d9111f5b285251b13756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 17 Mar 2023 12:30:34 +0200 Subject: [PATCH 549/898] serial: 8250: Prevent starting up DMA Rx on THRI interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hans de Goede reported Bluetooth adapters (HCIs) connected over an UART connection failed due corrupted Rx payload. The problem was narrowed down to DMA Rx starting on UART_IIR_THRI interrupt. The problem occurs despite LSR having DR bit set, which is precondition for attempting to start DMA Rx in the first place. From a debug patch: [x.807834] 8250irq: iir=cc lsr+saved=60 received=0/15 ier=0f dma_t/rx/err=0/0/0 [x.808676] 8250irq: iir=c2 lsr+saved=61 received=0/0 ier=0f dma_t/rx/err=0/0/0 [x.808776] 8250irq: iir=cc lsr+saved=60 received=1/12 ier=0d dma_t/rx/err=0/1/0 [x.808870] Bluetooth: hci0: Frame reassembly failed (-84) In the debug snippet, received field indicates 1 byte was transferred over DMA and 12 bytes after that with the non-DMA Rx. The sole byte DMA handled was corrupted (gets zeroed) which leads to the HCI failure. This problem became apparent after commit e8ffbb71f783 ("serial: 8250: use THRE & __stop_tx also with DMA") changed Tx stop behavior. Tx stop is now triggered from a THRI interrupt. Despite that this problem looks like a HW bug, this fix is not adding UART_BUG_xx flag to the driver beucase it seems useful in general to avoid starting DMA when there are only a few bytes to transfer. Skipping DMA for small transfers avoids the extra overhead DMA incurs. Thus, don't setup DMA Rx on UART_IIR_THRI but leave it to a subsequent interrupt which has Rx a related IIR value. By returning false from handle_rx_dma(), the DMA vs non-DMA decision is postponed until either UART_IIR_RDI (FIFO threshold worth of bytes awaiting) or UART_IIR_TIMEOUT (inter-character timeout) triggers at a later time which allows better to discern whether the number of bytes warrants starting DMA or not. Reported-by: Hans de Goede Tested-by: Hans de Goede Fixes: e8ffbb71f783 ("serial: 8250: use THRE & __stop_tx also with DMA") Cc: stable@vger.kernel.org Signed-off-by: Ilpo Järvinen Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230317103034.12881-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index fa43df05342bd..3ba9c8b93ae6c 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1903,6 +1903,17 @@ EXPORT_SYMBOL_GPL(serial8250_modem_status); static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) { switch (iir & 0x3f) { + case UART_IIR_THRI: + /* + * Postpone DMA or not decision to IIR_RDI or IIR_RX_TIMEOUT + * because it's impossible to do an informed decision about + * that with IIR_THRI. + * + * This also fixes one known DMA Rx corruption issue where + * DR is asserted but DMA Rx only gets a corrupted zero byte + * (too early DR?). + */ + return false; case UART_IIR_RDI: if (!up->dma->rx_running) break; From 9425914f3de6febbd6250395f56c8279676d9c3c Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 23 Mar 2023 13:44:15 +0800 Subject: [PATCH 550/898] tty: serial: fsl_lpuart: avoid checking for transfer complete when UARTCTRL_SBK is asserted in lpuart32_tx_empty According to LPUART RM, Transmission Complete Flag becomes 0 if queuing a break character by writing 1 to CTRL[SBK], so here need to avoid checking for transmission complete when UARTCTRL_SBK is asserted, otherwise the lpuart32_tx_empty may never get TIOCSER_TEMT. Commit 2411fd94ceaa("tty: serial: fsl_lpuart: skip waiting for transmission complete when UARTCTRL_SBK is asserted") only fix it in lpuart32_set_termios(), here also fix it in lpuart32_tx_empty(). Fixes: 380c966c093e ("tty: serial: fsl_lpuart: add 32-bit register interface support") Cc: stable Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20230323054415.20363-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 56e6ba3250cd1..edc6e35b701a4 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -858,11 +858,17 @@ static unsigned int lpuart32_tx_empty(struct uart_port *port) struct lpuart_port, port); unsigned long stat = lpuart32_read(port, UARTSTAT); unsigned long sfifo = lpuart32_read(port, UARTFIFO); + unsigned long ctrl = lpuart32_read(port, UARTCTRL); if (sport->dma_tx_in_progress) return 0; - if (stat & UARTSTAT_TC && sfifo & UARTFIFO_TXEMPT) + /* + * LPUART Transmission Complete Flag may never be set while queuing a break + * character, so avoid checking for transmission complete when UARTCTRL_SBK + * is asserted. + */ + if ((stat & UARTSTAT_TC && sfifo & UARTFIFO_TXEMPT) || ctrl & UARTCTRL_SBK) return TIOCSER_TEMT; return 0; From 178e00f36f934a88682d96aa046c1f90cb6f83a7 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 23 Mar 2023 19:09:23 +0800 Subject: [PATCH 551/898] tty: serial: fsl_lpuart: fix crash in lpuart_uport_is_active For serdev framework, tty->dev is a NULL pointer, lpuart_uport_is_active calling device_may_wakeup() may cause kernel NULL pointer crash, so here add the NULL pointer check before using it. Fixes: 4f5cb8c5e915 ("tty: serial: fsl_lpuart: enable wakeup source for lpuart") Cc: stable Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20230323110923.24581-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index edc6e35b701a4..074bfed57fc9e 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2948,7 +2948,7 @@ static bool lpuart_uport_is_active(struct lpuart_port *sport) tty = tty_port_tty_get(port); if (tty) { tty_dev = tty->dev; - may_wake = device_may_wakeup(tty_dev); + may_wake = tty_dev && device_may_wakeup(tty_dev); tty_kref_put(tty); } From f92ed0cd9328aed918ebb0ebb64d259eccbcc6e7 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 21 Mar 2023 11:47:50 +0000 Subject: [PATCH 552/898] tty: serial: sh-sci: Fix Rx on RZ/G2L SCI SCI IP on RZ/G2L alike SoCs do not need regshift compared to other SCI IPs on the SH platform. Currently, it does regshift and configuring Rx wrongly. Drop adding regshift for RZ/G2L alike SoCs. Fixes: dfc80387aefb ("serial: sh-sci: Compute the regshift value for SCI ports") Cc: stable@vger.kernel.org Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20230321114753.75038-3-biju.das.jz@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index c07663fe80bf9..caa09a0c48f45 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2937,7 +2937,7 @@ static int sci_init_single(struct platform_device *dev, port->flags = UPF_FIXED_PORT | UPF_BOOT_AUTOCONF | p->flags; port->fifosize = sci_port->params->fifosize; - if (port->type == PORT_SCI) { + if (port->type == PORT_SCI && !dev->dev.of_node) { if (sci_port->reg_size >= 0x20) port->regshift = 2; else From 7708a3858e69db91a8b69487994f33b96d20192a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 28 Mar 2023 20:41:31 +0200 Subject: [PATCH 553/898] USB: serial: option: add Quectel RM500U-CN modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This modem supports several modes with a class network function and a number of serial functions, all using ff/00/00 The device ID is the same in all modes. RNDIS mode ---------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0900 Rev= 4.04 S: Manufacturer=Quectel S: Product=RM500U-CN S: SerialNumber=0123456789ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=c0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms ECM mode -------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0900 Rev= 4.04 S: Manufacturer=Quectel S: Product=RM500U-CN S: SerialNumber=0123456789ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=c0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms NCM mode -------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0900 Rev= 4.04 S: Manufacturer=Quectel S: Product=RM500U-CN S: SerialNumber=0123456789ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=c0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0d Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0d Prot=00 Driver=cdc_ncm E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Reported-by: Andrew Green Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1621f66db25f7..f31cc3c763299 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1198,6 +1198,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0900, 0xff, 0, 0), /* RM500U-CN */ + .driver_info = ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, From 30fb97ba4a8e082ba0a5432479d6995472edbd7b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 26 Mar 2023 22:54:33 +0200 Subject: [PATCH 554/898] drm/nouveau/kms: Fix backlight registration The nouveau code used to call drm_fb_helper_initial_config() from nouveau_fbcon_init() before calling drm_dev_register(). This would probe all connectors so that drm_connector->status could be used during backlight registration which runs from nouveau_connector_late_register(). After commit 4a16dd9d18a0 ("drm/nouveau/kms: switch to drm fbdev helpers") the fbdev emulation code, which now is a drm-client, can only run after drm_dev_register(). So during backlight registration the connectors are not probed yet and the drm_connector->status == connected check in nv50_backlight_init() would now always fail. Replace the drm_connector->status == connected check with a drm_helper_probe_detect() == connected check to fix nv_backlight no longer getting registered because of this. Fixes: 4a16dd9d18a0 ("drm/nouveau/kms: switch to drm fbdev helpers") Link: https://gitlab.freedesktop.org/drm/nouveau/-/issues/202 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181941 Signed-off-by: Hans de Goede Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230326205433.36485-1-hdegoede@redhat.com --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index 40409a29f5b69..91b5ecc575380 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "nouveau_drv.h" #include "nouveau_reg.h" @@ -299,8 +300,12 @@ nv50_backlight_init(struct nouveau_backlight *bl, struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nvif_object *device = &drm->client.device.object; + /* + * Note when this runs the connectors have not been probed yet, + * so nv_conn->base.status is not set yet. + */ if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)) || - nv_conn->base.status != connector_status_connected) + drm_helper_probe_detect(&nv_conn->base, NULL, false) != connector_status_connected) return -ENODEV; if (nv_conn->type == DCB_CONNECTOR_eDP) { From 5c2712387d4850e0b64121d5fd3e6c4e84ea3266 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 28 Mar 2023 19:49:15 +0800 Subject: [PATCH 555/898] cacheinfo: Fix LLC is not exported through sysfs After entering 6.3-rc1 the LLC cacheinfo is not exported on our ACPI based arm64 server. This is because the LLC cacheinfo is partly reset when secondary CPUs boot up. On arm64 the primary cpu will allocate and setup cacheinfo: init_cpu_topology() for_each_possible_cpu() fetch_cache_info() // Allocate cacheinfo and init levels detect_cache_attributes() cache_shared_cpu_map_setup() if (!last_level_cache_is_valid()) // not valid, setup LLC cache_setup_properties() // setup LLC On secondary CPU boot up: detect_cache_attributes() populate_cache_leaves() get_cache_type() // Get cache type from clidr_el1, // for LLC type=CACHE_TYPE_NOCACHE cache_shared_cpu_map_setup() if (!last_level_cache_is_valid()) // Valid and won't go to this branch, // leave LLC's type=CACHE_TYPE_NOCACHE The last_level_cache_is_valid() use cacheinfo->{attributes, fw_token} to test it's valid or not, but populate_cache_leaves() will only reset LLC's type, so we won't try to re-setup LLC's type and leave it CACHE_TYPE_NOCACHE and won't export it through sysfs. This patch tries to fix this by not re-populating the cache leaves if the LLC is valid. Fixes: 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU") Signed-off-by: Yicong Yang Reviewed-by: Pierre Gondois Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20230328114915.33340-1-yangyicong@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index f6573c335f4c4..f3903d002819e 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -474,12 +474,18 @@ int detect_cache_attributes(unsigned int cpu) populate_leaves: /* - * populate_cache_leaves() may completely setup the cache leaves and - * shared_cpu_map or it may leave it partially setup. + * If LLC is valid the cache leaves were already populated so just go to + * update the cpu map. */ - ret = populate_cache_leaves(cpu); - if (ret) - goto free_ci; + if (!last_level_cache_is_valid(cpu)) { + /* + * populate_cache_leaves() may completely setup the cache leaves and + * shared_cpu_map or it may leave it partially setup. + */ + ret = populate_cache_leaves(cpu); + if (ret) + goto free_ci; + } /* * For systems using DT for cache hierarchy, fw_token From eca9f6e6f83b6725b84e1c76fdde19b003cff0eb Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 20 Mar 2023 19:50:08 -0700 Subject: [PATCH 556/898] powerpc/pseries/vas: Ignore VAS update for DLPAR if copy/paste is not enabled The hypervisor supports user-mode NX from Power10. pseries_vas_dlpar_cpu() is called from lparcfg_write() to update VAS windows for DLPAR event in shared processor mode and the kernel gets -ENOTSUPP for HCALLs if the user-mode NX is not supported. The current VAS implementation also supports only with Radix page tables. Whereas in dedicated processor mode, pseries_vas_notifier() is registered only if the copy/paste feature is enabled. So instead of displaying HCALL error messages, update VAS capabilities if the copy/paste feature is available. This patch ignores updating VAS capabilities in pseries_vas_dlpar_cpu() and returns success if the copy/paste feature is not enabled. Then lparcfg_write() completes the processor DLPAR operations without any failures. Fixes: 2147783d6bf0 ("powerpc/pseries: Use lparcfg to reconfig VAS windows for DLPAR CPU") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Haren Myneni Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/1d0e727e7dbd9a28627ef08ca9df9c86a50175e2.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 5591123128107..513180467562b 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -856,6 +856,13 @@ int pseries_vas_dlpar_cpu(void) { int new_nr_creds, rc; + /* + * NX-GZIP is not enabled. Nothing to do for DLPAR event + */ + if (!copypaste_feat) + return 0; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, (u64)virt_to_phys(&hv_cop_caps)); @@ -1012,6 +1019,7 @@ static int __init pseries_vas_init(void) * Linux supports user space COPY/PASTE only with Radix */ if (!radix_enabled()) { + copypaste_feat = false; pr_err("API is supported only with radix page tables\n"); return -ENOTSUPP; } From 52aad39385e1bfdb34a1b405f699a8ef302c58b0 Mon Sep 17 00:00:00 2001 From: huangwenhui Date: Tue, 28 Mar 2023 15:46:44 +0800 Subject: [PATCH 557/898] ALSA: hda/realtek: Add quirk for Lenovo ZhaoYang CF4620Z Fix headset microphone detection on Lenovo ZhaoYang CF4620Z. [ adjusted to be applicable to the latest tree -- tiwai ] Signed-off-by: huangwenhui Cc: Link: https://lore.kernel.org/r/20230328074644.30142-1-huangwenhuia@uniontech.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b501f9489fc1b..a2706fd87b141 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9713,6 +9713,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), + SND_PCI_QUIRK(0x17aa, 0x9e56, "Lenovo ZhaoYang CF4620Z", ALC286_FIXUP_SONY_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK), SND_PCI_QUIRK(0x1849, 0xa233, "Positivo Master C6300", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), From e3720f92e0237921da537e47a0b24e27899203f8 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 29 Mar 2023 14:38:28 +0300 Subject: [PATCH 558/898] ASoC: SOF: avoid a NULL dereference with unsupported widgets If an IPC4 topology contains an unsupported widget, its .module_info field won't be set, then sof_ipc4_route_setup() will cause a kernel Oops trying to dereference it. Add a check for such cases. Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Guennadi Liakhovetski Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230329113828.28562-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index a623707c8ffc7..669b99a4f76e6 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -1805,6 +1805,14 @@ static int sof_ipc4_route_setup(struct snd_sof_dev *sdev, struct snd_sof_route * u32 header, extension; int ret; + if (!src_fw_module || !sink_fw_module) { + /* The NULL module will print as "(efault)" */ + dev_err(sdev->dev, "source %s or sink %s widget weren't set up properly\n", + src_fw_module->man4_module_entry.name, + sink_fw_module->man4_module_entry.name); + return -ENODEV; + } + sroute->src_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget, SOF_PIN_TYPE_SOURCE); if (sroute->src_queue_id < 0) { From 4453545b5b4c3eff941f69a5530f916d899db025 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 29 Mar 2023 14:55:32 +0800 Subject: [PATCH 559/898] drm: buddy_allocator: Fix buddy allocator init on 32-bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm buddy allocator tests were broken on 32-bit systems, as rounddown_pow_of_two() takes a long, and the buddy allocator handles 64-bit sizes even on 32-bit systems. This can be reproduced with the drm_buddy_allocator KUnit tests on i386: ./tools/testing/kunit/kunit.py run --arch i386 \ --kunitconfig ./drivers/gpu/drm/tests drm_buddy (It results in kernel BUG_ON() when too many blocks are created, due to the block size being too small.) This was independently uncovered (and fixed) by Luís Mendes, whose patch added a new u64 variant of rounddown_pow_of_two(). This version instead recalculates the size based on the order. Reported-by: Luís Mendes Link: https://lore.kernel.org/lkml/CAEzXK1oghXAB_KpKpm=-CviDQbNaH0qfgYTSSjZgvvyj4U78AA@mail.gmail.com/T/ Signed-off-by: David Gow Acked-by: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20230329065532.2122295-1-davidgow@google.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 3d1f50f481cfd..7098f125b54a9 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -146,8 +146,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) unsigned int order; u64 root_size; - root_size = rounddown_pow_of_two(size); - order = ilog2(root_size) - ilog2(chunk_size); + order = ilog2(size) - ilog2(chunk_size); + root_size = chunk_size << order; root = drm_block_alloc(mm, NULL, order, offset); if (!root) From 25bbe844ef5c4fb4d7d8dcaa0080f922b7cd3a16 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 29 Mar 2023 14:55:34 +0800 Subject: [PATCH 560/898] drm: test: Fix 32-bit issue in drm_buddy_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm_buddy_test KUnit tests verify that returned blocks have sizes which are powers of two using is_power_of_2(). However, is_power_of_2() operations on a 'long', but the block size is a u64. So on systems where long is 32-bit, this can sometimes fail even on correctly sized blocks. This only reproduces randomly, as the parameters passed to the buddy allocator in this test are random. The seed 0xb2e06022 reproduced it fine here. For now, just hardcode an is_power_of_2() implementation using x & (x - 1). Signed-off-by: David Gow Acked-by: Christian König Reviewed-by: Maíra Canal Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20230329065532.2122295-2-davidgow@google.com Signed-off-by: Christian König --- drivers/gpu/drm/tests/drm_buddy_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index f8ee714df3967..09ee6f6af896b 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -89,7 +89,8 @@ static int check_block(struct kunit *test, struct drm_buddy *mm, err = -EINVAL; } - if (!is_power_of_2(block_size)) { + /* We can't use is_power_of_2() for a u64 on 32-bit systems. */ + if (block_size & (block_size - 1)) { kunit_err(test, "block size not power of two\n"); err = -EINVAL; } From 4ff0b50de8cabba055efe50bbcb7506c41a69835 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 29 Mar 2023 15:03:43 +0100 Subject: [PATCH 561/898] io_uring/rsrc: fix rogue rsrc node grabbing We should not be looking at ctx->rsrc_node and anyhow modifying the node without holding uring_lock, grabbing references in such a way is not safe either. Cc: stable@vger.kernel.org Fixes: 5106dd6e74ab6 ("io_uring: propagate issue_flags state down to file assignment") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1202ede2d7bb90136e3482b2b84aad9ed483e5d6.1680098433.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 2b8743645efcb..f27f4975217d9 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -144,15 +144,13 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req, unsigned int issue_flags) { if (!req->rsrc_node) { - req->rsrc_node = ctx->rsrc_node; + io_ring_submit_lock(ctx, issue_flags); - if (!(issue_flags & IO_URING_F_UNLOCKED)) { - lockdep_assert_held(&ctx->uring_lock); + lockdep_assert_held(&ctx->uring_lock); - io_charge_rsrc_node(ctx); - } else { - percpu_ref_get(&req->rsrc_node->refs); - } + req->rsrc_node = ctx->rsrc_node; + io_charge_rsrc_node(ctx); + io_ring_submit_unlock(ctx, issue_flags); } } From 433279beba1d4872da10b7b60a539e0cb828b32b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 28 Mar 2023 17:44:00 +0800 Subject: [PATCH 562/898] md: fix regression for null-ptr-deference in __md_stop() Commit 3e453522593d ("md: Free resources in __md_stop") tried to fix null-ptr-deference for 'active_io' by moving percpu_ref_exit() to __md_stop(), however, the commit also moving 'writes_pending' to __md_stop(), and this will cause mdadm tests broken: BUG: kernel NULL pointer dereference, address: 0000000000000038 Oops: 0000 [#1] PREEMPT SMP CPU: 15 PID: 17830 Comm: mdadm Not tainted 6.3.0-rc3-next-20230324-00009-g520d37 RIP: 0010:free_percpu+0x465/0x670 Call Trace: __percpu_ref_exit+0x48/0x70 percpu_ref_exit+0x1a/0x90 __md_stop+0xe9/0x170 do_md_stop+0x1e1/0x7b0 md_ioctl+0x90c/0x1aa0 blkdev_ioctl+0x19b/0x400 vfs_ioctl+0x20/0x50 __x64_sys_ioctl+0xba/0xe0 do_syscall_64+0x6c/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0xcd And the problem can be reporduced 100% by following test: mdadm -CR /dev/md0 -l1 -n1 /dev/sda --force echo inactive > /sys/block/md0/md/array_state echo read-auto > /sys/block/md0/md/array_state echo inactive > /sys/block/md0/md/array_state Root cause: // start raid raid1_run mddev_init_writes_pending percpu_ref_init // inactive raid array_state_store do_md_stop __md_stop percpu_ref_exit // start raid again array_state_store do_md_run raid1_run mddev_init_writes_pending if (mddev->writes_pending.percpu_count_ptr) // won't reinit // inactive raid again ... percpu_ref_exit -> null-ptr-deference Before the commit, 'writes_pending' is exited when mddev is freed, and it's safe to restart raid because mddev_init_writes_pending() already make sure that 'writes_pending' will only be initialized once. Fix the prblem by moving 'writes_pending' back, it's a litter hard to find the relationship between alloc memory and free memory, however, code changes is much less and we lived with this for a long time already. Fixes: 3e453522593d ("md: Free resources in __md_stop") Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230328094400.1448955-1-yukuai1@huaweicloud.com --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 39e49e5d71823..13321dbb5fbcf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6260,7 +6260,6 @@ static void __md_stop(struct mddev *mddev) module_put(pers->owner); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - percpu_ref_exit(&mddev->writes_pending); percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); @@ -6273,6 +6272,7 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); + percpu_ref_exit(&mddev->writes_pending); } EXPORT_SYMBOL_GPL(md_stop); @@ -7843,6 +7843,7 @@ static void md_free_disk(struct gendisk *disk) { struct mddev *mddev = disk->private_data; + percpu_ref_exit(&mddev->writes_pending); mddev_free(mddev); } From a57cc2dbb3738930d9cb361b9b473f90c8ede0b8 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 29 Mar 2023 08:22:07 -0700 Subject: [PATCH 563/898] thermal: intel: int340x: processor_thermal: Fix additional deadlock Commit 52f04f10b900 ("thermal: intel: int340x: processor_thermal: Fix deadlock") addressed deadlock issue during user space trip update. But it missed a case when thermal zone device is disabled when user writes 0. Call to thermal_zone_device_disable() also causes deadlock as it also tries to lock tz->lock, which is already claimed by trip_point_temp_store() in the thermal core code. Remove call to thermal_zone_device_disable() in the function sys_set_trip_temp(), which is called from trip_point_temp_store(). Fixes: 52f04f10b900 ("thermal: intel: int340x: processor_thermal: Fix deadlock") Signed-off-by: Srinivas Pandruvada Cc: 6.2+ # 6.2+ Signed-off-by: Rafael J. Wysocki --- .../thermal/intel/int340x_thermal/processor_thermal_device_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 90526f46c9b11..d71ee50e7878f 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -153,7 +153,6 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp cancel_delayed_work_sync(&pci_info->work); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0); - thermal_zone_device_disable(tzd); pci_info->stored_thres = 0; return 0; } From 292fd843de26c551856e66faf134512c52dd78b4 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 17 Mar 2023 11:15:05 -0400 Subject: [PATCH 564/898] cgroup/cpuset: Fix partition root's cpuset.cpus update bug It was found that commit 7a2127e66a00 ("cpuset: Call set_cpus_allowed_ptr() with appropriate mask for task") introduced a bug that corrupted "cpuset.cpus" of a partition root when it was updated. It is because the tmp->new_cpus field of the passed tmp parameter of update_parent_subparts_cpumask() should not be used at all as it contains important cpumask data that should not be overwritten. Fix it by using tmp->addmask instead. Also update update_cpumask() to make sure that trialcs->cpu_allowed will not be corrupted until it is no longer needed. Fixes: 7a2127e66a00 ("cpuset: Call set_cpus_allowed_ptr() with appropriate mask for task") Signed-off-by: Waiman Long Cc: stable@vger.kernel.org # v6.2+ Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 636f1c682ac07..f310915d12575 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, spin_unlock_irq(&callback_lock); if (adding || deleting) - update_tasks_cpumask(parent, tmp->new_cpus); + update_tasks_cpumask(parent, tmp->addmask); /* * Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary. @@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, /* * Use the cpumasks in trialcs for tmpmasks when they are pointers * to allocated cpumasks. + * + * Note that update_parent_subparts_cpumask() uses only addmask & + * delmask, but not new_cpus. */ tmp.addmask = trialcs->subparts_cpus; tmp.delmask = trialcs->effective_cpus; - tmp.new_cpus = trialcs->cpus_allowed; + tmp.new_cpus = NULL; #endif retval = validate_change(cs, trialcs); @@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, } spin_unlock_irq(&callback_lock); +#ifdef CONFIG_CPUMASK_OFFSTACK + /* Now trialcs->cpus_allowed is available */ + tmp.new_cpus = trialcs->cpus_allowed; +#endif + /* effective_cpus will be updated here */ update_cpumasks_hier(cs, &tmp, false); From 1aa866931b8026a0dd636e9ef7b5c5dfb4cc5ce8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 24 Mar 2023 10:05:38 +0000 Subject: [PATCH 565/898] RISC-V: add non-alternative fallback for riscv_has_extension_[un]likely() The has_fpu() check, which in turn calls riscv_has_extension_likely(), relies on alternatives to figure out whether the system has an FPU. As a result, it will malfunction on XIP kernels, as they do not support the alternatives mechanism. When alternatives support is not present, fall back to using __riscv_isa_extension_available() in riscv_has_extension_[un]likely() instead stead, which handily takes the same argument, so that kernels that do not support alternatives can accurately report the presence of FPU support. Fixes: 702e64550b12 ("riscv: fpu: switch has_fpu() to riscv_has_extension_likely()") Link: https://lore.kernel.org/all/ad445951-3d13-4644-94d9-e0989cda39c3@spud/ Signed-off-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/20230324100538.3514663-2-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 50 ++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e3021b2590de0..6263a0de1c6a1 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -57,18 +57,31 @@ struct riscv_isa_ext_data { unsigned int isa_ext_id; }; +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); + +#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + static __always_inline bool riscv_has_extension_likely(const unsigned long ext) { compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - asm_volatile_goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_no); + } else { + if (!__riscv_isa_extension_available(NULL, ext)) + goto l_no; + } return true; l_no: @@ -81,26 +94,23 @@ riscv_has_extension_unlikely(const unsigned long ext) compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - asm_volatile_goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_yes); + } else { + if (__riscv_isa_extension_available(NULL, ext)) + goto l_yes; + } return false; l_yes: return true; } -unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - #endif #endif /* _ASM_RISCV_HWCAP_H */ From 1ee7fc3f4d0a93831a20d5566f203d5ad6d44de8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 24 Mar 2023 10:05:39 +0000 Subject: [PATCH 566/898] RISC-V: always select RISCV_ALTERNATIVE for non-xip kernels When moving switch_to's has_fpu() over to using riscv_has_extension_likely() rather than static branches, the FPU code gained a dependency on the alternatives framework. That dependency has now been removed, as riscv_has_extension_ikely() now contains a fallback path, using __riscv_isa_extension_available(), but if CONFIG_RISCV_ALTERNATIVE isn't selected when CONFIG_FPU is, has_fpu() checks will not benefit from the "fast path" that the alternatives framework provides. We want to ensure that alternatives are available whenever riscv_has_extension_[un]likely() is used, rather than silently falling back to the slow path, but rather than rely on selecting RISCV_ALTERNATIVE in the myriad of locations that may use riscv_has_extension_[un]likely(), select it (almost) always instead by adding it to the main RISCV config entry. xip kernels cannot make use of the alternatives framework, so it is not enabled for those configurations, although this is the status quo. All current sites that select RISCV_ALTERNATIVE are converted to dependencies on the option instead. The explicit dependencies on !XIP_KERNEL can be dropped, as RISCV_ALTERNATIVE is not user selectable. Fixes: 702e64550b12 ("riscv: fpu: switch has_fpu() to riscv_has_extension_likely()") Link: https://lore.kernel.org/all/ZBruFRwt3rUVngPu@zx2c4.com/ Reported-by: Jason A. Donenfeld Signed-off-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/20230324100538.3514663-3-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 12 ++++++------ arch/riscv/Kconfig.erratas | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c5e42cc376048..2f6976418d0a3 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -126,6 +126,7 @@ config RISCV select OF_IRQ select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI + select RISCV_ALTERNATIVE if !XIP_KERNEL select RISCV_INTC select RISCV_TIMER if RISCV_SBI select SIFIVE_PLIC @@ -401,9 +402,8 @@ config RISCV_ISA_C config RISCV_ISA_SVPBMT bool "SVPBMT extension support" depends on 64BIT && MMU - depends on !XIP_KERNEL + depends on RISCV_ALTERNATIVE default y - select RISCV_ALTERNATIVE help Adds support to dynamically detect the presence of the SVPBMT ISA-extension (Supervisor-mode: page-based memory types) and @@ -428,8 +428,8 @@ config TOOLCHAIN_HAS_ZBB config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" depends on TOOLCHAIN_HAS_ZBB - depends on !XIP_KERNEL && MMU - select RISCV_ALTERNATIVE + depends on MMU + depends on RISCV_ALTERNATIVE default y help Adds support to dynamically detect the presence of the ZBB @@ -443,9 +443,9 @@ config RISCV_ISA_ZBB config RISCV_ISA_ZICBOM bool "Zicbom extension support for non-coherent DMA operation" - depends on !XIP_KERNEL && MMU + depends on MMU + depends on RISCV_ALTERNATIVE default y - select RISCV_ALTERNATIVE select RISCV_DMA_NONCOHERENT help Adds support to dynamically detect the presence of the ZICBOM diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index 69621ae6d647a..0c8f4652cd828 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -2,8 +2,7 @@ menu "CPU errata selection" config ERRATA_SIFIVE bool "SiFive errata" - depends on !XIP_KERNEL - select RISCV_ALTERNATIVE + depends on RISCV_ALTERNATIVE help All SiFive errata Kconfig depend on this Kconfig. Disabling this Kconfig will disable all SiFive errata. Please say "Y" @@ -35,8 +34,7 @@ config ERRATA_SIFIVE_CIP_1200 config ERRATA_THEAD bool "T-HEAD errata" - depends on !XIP_KERNEL - select RISCV_ALTERNATIVE + depends on RISCV_ALTERNATIVE help All T-HEAD errata Kconfig depend on this Kconfig. Disabling this Kconfig will disable all T-HEAD errata. Please say "Y" From aa874cdfec07d4dd9c6f0c356d65c609ba31a26f Mon Sep 17 00:00:00 2001 From: Tharun Kumar P Date: Mon, 20 Mar 2023 19:52:37 +0530 Subject: [PATCH 567/898] i2c: mchp-pci1xxxx: Update Timing registers Update I2C timing registers based on latest hardware design. This fix does not break functionality of chips with older design and existing users will not be affected. Fixes: 361693697249 ("i2c: microchip: pci1xxxx: Add driver for I2C host controller in multifunction endpoint of pci1xxxx switch") Signed-off-by: Tharun Kumar P Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mchp-pci1xxxx.c | 60 +++++++++++++------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c index 09af759211478..b21ffd6df9276 100644 --- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c +++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c @@ -48,9 +48,9 @@ * SR_HOLD_TIME_XK_TICKS field will indicate the number of ticks of the * baud clock required to program 'Hold Time' at X KHz. */ -#define SR_HOLD_TIME_100K_TICKS 133 -#define SR_HOLD_TIME_400K_TICKS 20 -#define SR_HOLD_TIME_1000K_TICKS 11 +#define SR_HOLD_TIME_100K_TICKS 150 +#define SR_HOLD_TIME_400K_TICKS 20 +#define SR_HOLD_TIME_1000K_TICKS 12 #define SMB_CORE_COMPLETION_REG_OFF3 (SMBUS_MAST_CORE_ADDR_BASE + 0x23) @@ -65,17 +65,17 @@ * the baud clock required to program 'fair idle delay' at X KHz. Fair idle * delay establishes the MCTP T(IDLE_DELAY) period. */ -#define FAIR_BUS_IDLE_MIN_100K_TICKS 969 -#define FAIR_BUS_IDLE_MIN_400K_TICKS 157 -#define FAIR_BUS_IDLE_MIN_1000K_TICKS 157 +#define FAIR_BUS_IDLE_MIN_100K_TICKS 992 +#define FAIR_BUS_IDLE_MIN_400K_TICKS 500 +#define FAIR_BUS_IDLE_MIN_1000K_TICKS 500 /* * FAIR_IDLE_DELAY_XK_TICKS field will indicate the number of ticks of the * baud clock required to satisfy the fairness protocol at X KHz. */ -#define FAIR_IDLE_DELAY_100K_TICKS 1000 -#define FAIR_IDLE_DELAY_400K_TICKS 500 -#define FAIR_IDLE_DELAY_1000K_TICKS 500 +#define FAIR_IDLE_DELAY_100K_TICKS 963 +#define FAIR_IDLE_DELAY_400K_TICKS 156 +#define FAIR_IDLE_DELAY_1000K_TICKS 156 #define SMB_IDLE_SCALING_100K \ ((FAIR_IDLE_DELAY_100K_TICKS << 16) | FAIR_BUS_IDLE_MIN_100K_TICKS) @@ -105,7 +105,7 @@ */ #define BUS_CLK_100K_LOW_PERIOD_TICKS 156 #define BUS_CLK_400K_LOW_PERIOD_TICKS 41 -#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15 +#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15 /* * BUS_CLK_XK_HIGH_PERIOD_TICKS field defines the number of I2C Baud Clock @@ -131,7 +131,7 @@ */ #define CLK_SYNC_100K 4 #define CLK_SYNC_400K 4 -#define CLK_SYNC_1000K 4 +#define CLK_SYNC_1000K 4 #define SMB_CORE_DATA_TIMING_REG_OFF (SMBUS_MAST_CORE_ADDR_BASE + 0x40) @@ -142,25 +142,25 @@ * determines the SCLK hold time following SDAT driven low during the first * START bit in a transfer. */ -#define FIRST_START_HOLD_100K_TICKS 22 -#define FIRST_START_HOLD_400K_TICKS 16 -#define FIRST_START_HOLD_1000K_TICKS 6 +#define FIRST_START_HOLD_100K_TICKS 23 +#define FIRST_START_HOLD_400K_TICKS 8 +#define FIRST_START_HOLD_1000K_TICKS 12 /* * STOP_SETUP_XK_TICKS will indicate the number of ticks of the baud clock * required to program 'STOP_SETUP' timer at X KHz. This timer determines the * SDAT setup time from the rising edge of SCLK for a STOP condition. */ -#define STOP_SETUP_100K_TICKS 157 +#define STOP_SETUP_100K_TICKS 150 #define STOP_SETUP_400K_TICKS 20 -#define STOP_SETUP_1000K_TICKS 12 +#define STOP_SETUP_1000K_TICKS 12 /* * RESTART_SETUP_XK_TICKS will indicate the number of ticks of the baud clock * required to program 'RESTART_SETUP' timer at X KHz. This timer determines the * SDAT setup time from the rising edge of SCLK for a repeated START condition. */ -#define RESTART_SETUP_100K_TICKS 157 +#define RESTART_SETUP_100K_TICKS 156 #define RESTART_SETUP_400K_TICKS 20 #define RESTART_SETUP_1000K_TICKS 12 @@ -169,7 +169,7 @@ * required to program 'DATA_HOLD' timer at X KHz. This timer determines the * SDAT hold time following SCLK driven low. */ -#define DATA_HOLD_100K_TICKS 2 +#define DATA_HOLD_100K_TICKS 12 #define DATA_HOLD_400K_TICKS 2 #define DATA_HOLD_1000K_TICKS 2 @@ -190,35 +190,35 @@ * Bus Idle Minimum time = BUS_IDLE_MIN[7:0] x Baud_Clock_Period x * (BUS_IDLE_MIN_XK_TICKS[7] ? 4,1) */ -#define BUS_IDLE_MIN_100K_TICKS 167UL -#define BUS_IDLE_MIN_400K_TICKS 139UL -#define BUS_IDLE_MIN_1000K_TICKS 133UL +#define BUS_IDLE_MIN_100K_TICKS 36UL +#define BUS_IDLE_MIN_400K_TICKS 10UL +#define BUS_IDLE_MIN_1000K_TICKS 4UL /* * CTRL_CUM_TIME_OUT_XK_TICKS defines SMBus Controller Cumulative Time-Out. * SMBus Controller Cumulative Time-Out duration = * CTRL_CUM_TIME_OUT_XK_TICKS[7:0] x Baud_Clock_Period x 2048 */ -#define CTRL_CUM_TIME_OUT_100K_TICKS 159 -#define CTRL_CUM_TIME_OUT_400K_TICKS 159 -#define CTRL_CUM_TIME_OUT_1000K_TICKS 159 +#define CTRL_CUM_TIME_OUT_100K_TICKS 76 +#define CTRL_CUM_TIME_OUT_400K_TICKS 76 +#define CTRL_CUM_TIME_OUT_1000K_TICKS 76 /* * TARGET_CUM_TIME_OUT_XK_TICKS defines SMBus Target Cumulative Time-Out duration. * SMBus Target Cumulative Time-Out duration = TARGET_CUM_TIME_OUT_XK_TICKS[7:0] x * Baud_Clock_Period x 4096 */ -#define TARGET_CUM_TIME_OUT_100K_TICKS 199 -#define TARGET_CUM_TIME_OUT_400K_TICKS 199 -#define TARGET_CUM_TIME_OUT_1000K_TICKS 199 +#define TARGET_CUM_TIME_OUT_100K_TICKS 95 +#define TARGET_CUM_TIME_OUT_400K_TICKS 95 +#define TARGET_CUM_TIME_OUT_1000K_TICKS 95 /* * CLOCK_HIGH_TIME_OUT_XK defines Clock High time out period. * Clock High time out period = CLOCK_HIGH_TIME_OUT_XK[7:0] x Baud_Clock_Period x 8 */ -#define CLOCK_HIGH_TIME_OUT_100K_TICKS 204 -#define CLOCK_HIGH_TIME_OUT_400K_TICKS 204 -#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 204 +#define CLOCK_HIGH_TIME_OUT_100K_TICKS 97 +#define CLOCK_HIGH_TIME_OUT_400K_TICKS 97 +#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 97 #define TO_SCALING_100K \ ((BUS_IDLE_MIN_100K_TICKS << 24) | (CTRL_CUM_TIME_OUT_100K_TICKS << 16) | \ From f4f3b7dedbe849e780c779ba67365bb1db0d8637 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Fri, 24 Feb 2023 13:45:21 -0500 Subject: [PATCH 568/898] drm/amd/display: Add DSC Support for Synaptics Cascaded MST Hub Traditional synaptics hub has one MST branch device without virtual dpcd. Synaptics cascaded hub has two chained MST branch devices. DSC decoding is performed via root MST branch device, instead of the second MST branch device. Reviewed-by: Hersen Wu Acked-by: Qingqing Zhuo Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 19 +++++++++++++++++++ .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index e25e1b2bf1949..30de0aa084d88 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -212,6 +212,21 @@ bool needs_dsc_aux_workaround(struct dc_link *link) return false; } +bool is_synaptics_cascaded_panamera(struct dc_link *link, struct drm_dp_mst_port *port) +{ + u8 branch_vendor_data[4] = { 0 }; // Vendor data 0x50C ~ 0x50F + + if (drm_dp_dpcd_read(port->mgr->aux, DP_BRANCH_VENDOR_SPECIFIC_START, &branch_vendor_data, 4) == 4) { + if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + IS_SYNAPTICS_CASCADED_PANAMERA(link->dpcd_caps.branch_dev_name, branch_vendor_data)) { + DRM_INFO("Synaptics Cascaded MST hub\n"); + return true; + } + } + + return false; +} + static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector) { struct dc_sink *dc_sink = aconnector->dc_sink; @@ -235,6 +250,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto needs_dsc_aux_workaround(aconnector->dc_link)) aconnector->dsc_aux = &aconnector->mst_root->dm_dp_aux.aux; + /* synaptics cascaded MST hub case */ + if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port)) + aconnector->dsc_aux = port->mgr->aux; + if (!aconnector->dsc_aux) return false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 97fd70df531bf..0b5750202e732 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -34,6 +34,18 @@ #define SYNAPTICS_RC_OFFSET 0x4BC #define SYNAPTICS_RC_DATA 0x4C0 +#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C + +/** + * Panamera MST Hub detection + * Offset DPCD 050Eh == 0x5A indicates cascaded MST hub case + * Check from beginning of branch device vendor specific field (050Ch) + */ +#define IS_SYNAPTICS_PANAMERA(branchDevName) (((int)branchDevName[4] & 0xF0) == 0x50 ? 1 : 0) +#define BRANCH_HW_REVISION_PANAMERA_A2 0x10 +#define SYNAPTICS_CASCADED_HUB_ID 0x5A +#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) + struct amdgpu_display_manager; struct amdgpu_dm_connector; From 68dc1846c3a44d5e633be145c169ce2fd5420695 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Tue, 28 Feb 2023 21:34:58 -0500 Subject: [PATCH 569/898] drm/amd/display: Take FEC Overhead into Timeslot Calculation 8b/10b encoding needs to add 3% fec overhead into the pbn. In the Synapcis Cascaded MST hub, the first stage MST branch device needs the information to determine the timeslot count for the second stage MST branch device. Missing this overhead will leads to insufficient timeslot allocation. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Hersen Wu Acked-by: Qingqing Zhuo Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 32 ++++++++++++++----- .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 3 ++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 30de0aa084d88..8dc442f90eafa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -681,12 +681,25 @@ struct dsc_mst_fairness_params { struct amdgpu_dm_connector *aconnector; }; -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -780,11 +793,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -880,6 +894,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -909,7 +924,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -922,7 +937,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -951,6 +966,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1012,7 +1028,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1031,7 +1047,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1039,7 +1055,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 0b5750202e732..1e4ede1e57abd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + struct amdgpu_display_manager; struct amdgpu_dm_connector; From 1e020e1b96afdecd20680b5b5be2a6ffc3d27628 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 6 Mar 2023 09:33:08 +0800 Subject: [PATCH 570/898] ubi: Fix failure attaching when vid_hdr offset equals to (sub)page size Following process will make ubi attaching failed since commit 1b42b1a36fc946 ("ubi: ensure that VID header offset ... size"): ID="0xec,0xa1,0x00,0x15" # 128M 128KB 2KB modprobe nandsim id_bytes=$ID flash_eraseall /dev/mtd0 modprobe ubi mtd="0,2048" # set vid_hdr offset as 2048 (one page) (dmesg): ubi0 error: ubi_attach_mtd_dev [ubi]: VID header offset 2048 too large. UBI error: cannot attach mtd0 UBI error: cannot initialize UBI, error -22 Rework original solution, the key point is making sure 'vid_hdr_shift + UBI_VID_HDR_SIZE < ubi->vid_hdr_alsize', so we should check vid_hdr_shift rather not vid_hdr_offset. Then, ubi still support (sub)page aligined VID header offset. Fixes: 1b42b1a36fc946 ("ubi: ensure that VID header offset ... size") Signed-off-by: Zhihao Cheng Tested-by: Nicolas Schichan Tested-by: Miquel Raynal # v5.10, v4.19 Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/build.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 0904eb40c95fa..ad025b2ee4177 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -666,12 +666,6 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024) ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); - if (ubi->vid_hdr_offset && ((ubi->vid_hdr_offset + UBI_VID_HDR_SIZE) > - ubi->vid_hdr_alsize)) { - ubi_err(ubi, "VID header offset %d too large.", ubi->vid_hdr_offset); - return -EINVAL; - } - dbg_gen("min_io_size %d", ubi->min_io_size); dbg_gen("max_write_size %d", ubi->max_write_size); dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size); @@ -689,6 +683,21 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024) ubi->vid_hdr_aloffset; } + /* + * Memory allocation for VID header is ubi->vid_hdr_alsize + * which is described in comments in io.c. + * Make sure VID header shift + UBI_VID_HDR_SIZE not exceeds + * ubi->vid_hdr_alsize, so that all vid header operations + * won't access memory out of bounds. + */ + if ((ubi->vid_hdr_shift + UBI_VID_HDR_SIZE) > ubi->vid_hdr_alsize) { + ubi_err(ubi, "Invalid VID header offset %d, VID header shift(%d)" + " + VID header size(%zu) > VID header aligned size(%d).", + ubi->vid_hdr_offset, ubi->vid_hdr_shift, + UBI_VID_HDR_SIZE, ubi->vid_hdr_alsize); + return -EINVAL; + } + /* Similar for the data offset */ ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE; ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); From 88eaba80328b31ef81813a1207b4056efd7006a6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 20 Mar 2023 15:33:34 +0200 Subject: [PATCH 571/898] nvme-tcp: fix a possible UAF when failing to allocate an io queue When we allocate a nvme-tcp queue, we set the data_ready callback before we actually need to use it. This creates the potential that if a stray controller sends us data on the socket before we connect, we can trigger the io_work and start consuming the socket. In this case reported: we failed to allocate one of the io queues, and as we start releasing the queues that we already allocated, we get a UAF [1] from the io_work which is running before it should really. Fix this by setting the socket ops callbacks only before we start the queue, so that we can't accidentally schedule the io_work in the initialization phase before the queue started. While we are at it, rename nvme_tcp_restore_sock_calls to pair with nvme_tcp_setup_sock_ops. [1]: [16802.107284] nvme nvme4: starting error recovery [16802.109166] nvme nvme4: Reconnecting in 10 seconds... [16812.173535] nvme nvme4: failed to connect socket: -111 [16812.173745] nvme nvme4: Failed reconnect attempt 1 [16812.173747] nvme nvme4: Reconnecting in 10 seconds... [16822.413555] nvme nvme4: failed to connect socket: -111 [16822.413762] nvme nvme4: Failed reconnect attempt 2 [16822.413765] nvme nvme4: Reconnecting in 10 seconds... [16832.661274] nvme nvme4: creating 32 I/O queues. [16833.919887] BUG: kernel NULL pointer dereference, address: 0000000000000088 [16833.920068] nvme nvme4: Failed reconnect attempt 3 [16833.920094] #PF: supervisor write access in kernel mode [16833.920261] nvme nvme4: Reconnecting in 10 seconds... [16833.920368] #PF: error_code(0x0002) - not-present page [16833.921086] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] [16833.921191] RIP: 0010:_raw_spin_lock_bh+0x17/0x30 ... [16833.923138] Call Trace: [16833.923271] [16833.923402] lock_sock_nested+0x1e/0x50 [16833.923545] nvme_tcp_try_recv+0x40/0xa0 [nvme_tcp] [16833.923685] nvme_tcp_io_work+0x68/0xa0 [nvme_tcp] [16833.923824] process_one_work+0x1e8/0x390 [16833.923969] worker_thread+0x53/0x3d0 [16833.924104] ? process_one_work+0x390/0x390 [16833.924240] kthread+0x124/0x150 [16833.924376] ? set_kthread_struct+0x50/0x50 [16833.924518] ret_from_fork+0x1f/0x30 [16833.924655] Reported-by: Yanjun Zhang Signed-off-by: Sagi Grimberg Tested-by: Yanjun Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 46 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 42c0598c31f2b..49c9e7bc9116b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1620,22 +1620,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) if (ret) goto err_init_connect; - queue->rd_enabled = true; set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); - nvme_tcp_init_recv_ctx(queue); - - write_lock_bh(&queue->sock->sk->sk_callback_lock); - queue->sock->sk->sk_user_data = queue; - queue->state_change = queue->sock->sk->sk_state_change; - queue->data_ready = queue->sock->sk->sk_data_ready; - queue->write_space = queue->sock->sk->sk_write_space; - queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; - queue->sock->sk->sk_state_change = nvme_tcp_state_change; - queue->sock->sk->sk_write_space = nvme_tcp_write_space; -#ifdef CONFIG_NET_RX_BUSY_POLL - queue->sock->sk->sk_ll_usec = 1; -#endif - write_unlock_bh(&queue->sock->sk->sk_callback_lock); return 0; @@ -1655,7 +1640,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) return ret; } -static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue) +static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue) { struct socket *sock = queue->sock; @@ -1670,7 +1655,7 @@ static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue) static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) { kernel_sock_shutdown(queue->sock, SHUT_RDWR); - nvme_tcp_restore_sock_calls(queue); + nvme_tcp_restore_sock_ops(queue); cancel_work_sync(&queue->io_work); } @@ -1688,21 +1673,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) +{ + write_lock_bh(&queue->sock->sk->sk_callback_lock); + queue->sock->sk->sk_user_data = queue; + queue->state_change = queue->sock->sk->sk_state_change; + queue->data_ready = queue->sock->sk->sk_data_ready; + queue->write_space = queue->sock->sk->sk_write_space; + queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; + queue->sock->sk->sk_state_change = nvme_tcp_state_change; + queue->sock->sk->sk_write_space = nvme_tcp_write_space; +#ifdef CONFIG_NET_RX_BUSY_POLL + queue->sock->sk->sk_ll_usec = 1; +#endif + write_unlock_bh(&queue->sock->sk->sk_callback_lock); +} + static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[idx]; int ret; + queue->rd_enabled = true; + nvme_tcp_init_recv_ctx(queue); + nvme_tcp_setup_sock_ops(queue); + if (idx) ret = nvmf_connect_io_queue(nctrl, idx); else ret = nvmf_connect_admin_queue(nctrl); if (!ret) { - set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); + set_bit(NVME_TCP_Q_LIVE, &queue->flags); } else { - if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags)) - __nvme_tcp_stop_queue(&ctrl->queues[idx]); + if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) + __nvme_tcp_stop_queue(queue); dev_err(nctrl->device, "failed to connect queue: %d ret=%d\n", idx, ret); } From 6c75dc94f2b27fff57b305af9236eea181a00b6c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 28 Mar 2023 11:27:51 -0500 Subject: [PATCH 572/898] net: ipa: compute DMA pool size properly In gsi_trans_pool_init_dma(), the total size of a pool of memory used for DMA transactions is calculated. However the calculation is done incorrectly. For 4KB pages, this total size is currently always more than one page, and as a result, the calculation produces a positive (though incorrect) total size. The code still works in this case; we just end up with fewer DMA pool entries than we intended. Bjorn Andersson tested booting a kernel with 16KB pages, and hit a null pointer derereference in sg_alloc_append_table_from_pages(), descending from gsi_trans_pool_init_dma(). The cause of this was that a 16KB total size was going to be allocated, and with 16KB pages the order of that allocation is 0. The total_size calculation yielded 0, which eventually led to the crash. Correcting the total_size calculation fixes the problem. Reported-by: Bjorn Andersson Tested-by: Bjorn Andersson Fixes: 9dd441e4ed57 ("soc: qcom: ipa: GSI transactions") Reviewed-by: Mark Bloch Signed-off-by: Alex Elder Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230328162751.2861791-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi_trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 0f52c068c46d6..ee6fb00b71eb6 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -156,7 +156,7 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, * gsi_trans_pool_exit_dma() can assume the total allocated * size is exactly (count * size). */ - total_size = get_order(total_size) << PAGE_SHIFT; + total_size = PAGE_SIZE << get_order(total_size); virt = dma_alloc_coherent(dev, total_size, &addr, GFP_KERNEL); if (!virt) From 8c495270845d6b4854607e946baef3637a8259ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Mar 2023 17:00:13 -0700 Subject: [PATCH 573/898] bnx2x: use the right build_skb() helper build_skb() no longer accepts slab buffers. Since slab use is fairly uncommon we prefer the drivers to call a separate slab_build_skb() function appropriately. bnx2x uses the old semantics where size of 0 meant buffer from slab. It sets the fp->rx_frag_size to 0 for MTUs which don't fit in a page. It needs to call slab_build_skb(). This fixes the WARN_ONCE() of incorrect API use seen with bnx2x. Reported-by: Thomas Voegtle Link: https://lore.kernel.org/all/b8f295e4-ba57-8bfb-7d9c-9d62a498a727@lio96.de/ Fixes: ce098da1497c ("skbuff: Introduce slab_build_skb()") Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230329000013.2734957-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 16c490692f422..12083b9679b54 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -672,6 +672,18 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, return 0; } +static struct sk_buff * +bnx2x_build_skb(const struct bnx2x_fastpath *fp, void *data) +{ + struct sk_buff *skb; + + if (fp->rx_frag_size) + skb = build_skb(data, fp->rx_frag_size); + else + skb = slab_build_skb(data); + return skb; +} + static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) { if (fp->rx_frag_size) @@ -779,7 +791,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping), fp->rx_buf_size, DMA_FROM_DEVICE); if (likely(new_data)) - skb = build_skb(data, fp->rx_frag_size); + skb = bnx2x_build_skb(fp, data); if (likely(skb)) { #ifdef BNX2X_STOP_ON_ERROR @@ -1046,7 +1058,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) dma_unmap_addr(rx_buf, mapping), fp->rx_buf_size, DMA_FROM_DEVICE); - skb = build_skb(data, fp->rx_frag_size); + skb = bnx2x_build_skb(fp, data); if (unlikely(!skb)) { bnx2x_frag_free(fp, data); bnx2x_fp_qstats(bp, fp)-> From c5cff16f461a4a434a9915a7be7ac9ced861a8a4 Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Tue, 28 Mar 2023 10:26:59 -0700 Subject: [PATCH 574/898] i40e: fix registers dump after run ethtool adapter self test Fix invalid registers dump from ethtool -d ethX after adapter self test by ethtool -t ethY. It causes invalid data display. The problem was caused by overwriting i40e_reg_list[].elements which is common for ethtool self test and dump. Fixes: 22dd9ae8afcc ("i40e: Rework register diagnostic") Signed-off-by: Radoslaw Tyl Reviewed-by: Michal Swiatkowski Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230328172659.3906413-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_diag.c | 11 ++++++----- drivers/net/ethernet/intel/i40e/i40e_diag.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c index 5b3519c6e362e..97fe1787a8f4a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.c +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c @@ -44,7 +44,7 @@ static int i40e_diag_reg_pattern_test(struct i40e_hw *hw, return 0; } -struct i40e_diag_reg_test_info i40e_reg_list[] = { +const struct i40e_diag_reg_test_info i40e_reg_list[] = { /* offset mask elements stride */ {I40E_QTX_CTL(0), 0x0000FFBF, 1, I40E_QTX_CTL(1) - I40E_QTX_CTL(0)}, @@ -78,27 +78,28 @@ int i40e_diag_reg_test(struct i40e_hw *hw) { int ret_code = 0; u32 reg, mask; + u32 elements; u32 i, j; for (i = 0; i40e_reg_list[i].offset != 0 && !ret_code; i++) { + elements = i40e_reg_list[i].elements; /* set actual reg range for dynamically allocated resources */ if (i40e_reg_list[i].offset == I40E_QTX_CTL(0) && hw->func_caps.num_tx_qp != 0) - i40e_reg_list[i].elements = hw->func_caps.num_tx_qp; + elements = hw->func_caps.num_tx_qp; if ((i40e_reg_list[i].offset == I40E_PFINT_ITRN(0, 0) || i40e_reg_list[i].offset == I40E_PFINT_ITRN(1, 0) || i40e_reg_list[i].offset == I40E_PFINT_ITRN(2, 0) || i40e_reg_list[i].offset == I40E_QINT_TQCTL(0) || i40e_reg_list[i].offset == I40E_QINT_RQCTL(0)) && hw->func_caps.num_msix_vectors != 0) - i40e_reg_list[i].elements = - hw->func_caps.num_msix_vectors - 1; + elements = hw->func_caps.num_msix_vectors - 1; /* test register access */ mask = i40e_reg_list[i].mask; - for (j = 0; j < i40e_reg_list[i].elements && !ret_code; j++) { + for (j = 0; j < elements && !ret_code; j++) { reg = i40e_reg_list[i].offset + (j * i40e_reg_list[i].stride); ret_code = i40e_diag_reg_pattern_test(hw, reg, mask); diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h index e641035c7297d..c3ce5f35211f0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.h +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h @@ -20,7 +20,7 @@ struct i40e_diag_reg_test_info { u32 stride; /* bytes between each element */ }; -extern struct i40e_diag_reg_test_info i40e_reg_list[]; +extern const struct i40e_diag_reg_test_info i40e_reg_list[]; int i40e_diag_reg_test(struct i40e_hw *hw); int i40e_diag_eeprom_test(struct i40e_hw *hw); From 83714dc3db0e4a088673601bc8099b079bc1a077 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 28 Mar 2023 18:30:19 -0700 Subject: [PATCH 575/898] bnxt_en: Fix reporting of test result in ethtool selftest When the selftest command fails, driver is not reporting the failure by updating the "test->flags" when bnxt_close_nic() fails. Fixes: eb51365846bc ("bnxt_en: Add basic ethtool -t selftest support.") Reviewed-by: Pavan Chebbi Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index ec573127b7076..7658a06b8d059 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3738,6 +3738,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bnxt_ulp_stop(bp); rc = bnxt_close_nic(bp, true, false); if (rc) { + etest->flags |= ETH_TEST_FL_FAILED; bnxt_ulp_start(bp, rc); return; } From 62aad36ed31abc80f35db11e187e690448a79f7d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 28 Mar 2023 18:30:20 -0700 Subject: [PATCH 576/898] bnxt_en: Fix typo in PCI id to device description string mapping Fix 57502 and 57508 NPAR description string entries. The typos caused these devices to not match up with lspci output. Fixes: 49c98421e6ab ("bnxt_en: Add PCI IDs for 57500 series NPAR devices.") Reviewed-by: Pavan Chebbi Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e2e2c986c82b7..c23e3b397bcff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -175,12 +175,12 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 }, { PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 }, { PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 }, - { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57502_NPAR }, { PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR }, - { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR }, - { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57502_NPAR }, { PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR }, - { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57508_NPAR }, { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 }, { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, #ifdef CONFIG_BNXT_SRIOV From 581bce7bcb7e7f100908728e7b292e266c76895b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 28 Mar 2023 18:30:21 -0700 Subject: [PATCH 577/898] bnxt_en: Add missing 200G link speed reporting bnxt_fw_to_ethtool_speed() is missing the case statement for 200G link speed reported by firmware. As a result, ethtool will report unknown speed when the firmware reports 200G link speed. Fixes: 532262ba3b84 ("bnxt_en: ethtool: support PAM4 link speeds up to 200G") Signed-off-by: Michael Chan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index c0628ac1b798a..5928430f6f518 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1226,6 +1226,7 @@ struct bnxt_link_info { #define BNXT_LINK_SPEED_40GB PORT_PHY_QCFG_RESP_LINK_SPEED_40GB #define BNXT_LINK_SPEED_50GB PORT_PHY_QCFG_RESP_LINK_SPEED_50GB #define BNXT_LINK_SPEED_100GB PORT_PHY_QCFG_RESP_LINK_SPEED_100GB +#define BNXT_LINK_SPEED_200GB PORT_PHY_QCFG_RESP_LINK_SPEED_200GB u16 support_speeds; u16 support_pam4_speeds; u16 auto_link_speeds; /* fw adv setting */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7658a06b8d059..6bd18eb5137f4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1714,6 +1714,8 @@ u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed) return SPEED_50000; case BNXT_LINK_SPEED_100GB: return SPEED_100000; + case BNXT_LINK_SPEED_200GB: + return SPEED_200000; default: return SPEED_UNKNOWN; } From f9ea835e99bc8d049bf2a3ec8fa5a7cb4fcade23 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Tue, 28 Mar 2023 19:39:44 -0700 Subject: [PATCH 578/898] KVM: arm64: PMU: Restore the guest's EL0 event counting after migration Currently, with VHE, KVM enables the EL0 event counting for the guest on vcpu_load() or KVM enables it as a part of the PMU register emulation process, when needed. However, in the migration case (with VHE), the same handling is lacking, as vPMU register values that were restored by userspace haven't been propagated yet (the PMU events haven't been created) at the vcpu load-time on the first KVM_RUN (kvm_vcpu_pmu_restore_guest() called from vcpu_load() on the first KVM_RUN won't do anything as events_{guest,host} of kvm_pmu_events are still zero). So, with VHE, enable the guest's EL0 event counting on the first KVM_RUN (after the migration) when needed. More specifically, have kvm_pmu_handle_pmcr() call kvm_vcpu_pmu_restore_guest() so that kvm_pmu_handle_pmcr() on the first KVM_RUN can take care of it. Fixes: d0c94c49792c ("KVM: arm64: Restore PMU configuration on first run") Cc: stable@vger.kernel.org Reviewed-by: Marc Zyngier Signed-off-by: Reiji Watanabe Link: https://lore.kernel.org/r/20230329023944.2488484-1-reijiw@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/pmu-emul.c | 1 + arch/arm64/kvm/sys_regs.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index c243b10f3e150..5eca0cdd961df 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -558,6 +558,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) for_each_set_bit(i, &mask, 32) kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); } + kvm_vcpu_pmu_restore_guest(vcpu); } static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1b2c161120bea..34688918c8113 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -794,7 +794,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; kvm_pmu_handle_pmcr(vcpu, val); - kvm_vcpu_pmu_restore_guest(vcpu); } else { /* PMCR.P & PMCR.C are RAZ */ val = __vcpu_sys_reg(vcpu, PMCR_EL0) From f7154d967bc4ee25ea1572937550e711b2525474 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 28 Mar 2023 14:31:28 +0300 Subject: [PATCH 579/898] virtio/vsock: fix header length on skb merging This fixes appending newly arrived skbuff to the last skbuff of the socket's queue. Problem fires when we are trying to append data to skbuff which was already processed in dequeue callback at least once. Dequeue callback calls function 'skb_pull()' which changes 'skb->len'. In current implementation 'skb->len' is used to update length in header of the last skbuff after new data was copied to it. This is bug, because value in header is used to calculate 'rx_bytes'/'fwd_cnt' and thus must be not be changed during skbuff's lifetime. Bug starts to fire since: commit 077706165717 ("virtio/vsock: don't use skbuff state to account credit") It presents before, but didn't triggered due to a little bit buggy implementation of credit calculation logic. So use Fixes tag for it. Fixes: 077706165717 ("virtio/vsock: don't use skbuff state to account credit") Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6564192e7f201..11ca9b3362aa1 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1068,7 +1068,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, memcpy(skb_put(last_skb, skb->len), skb->data, skb->len); free_pkt = true; last_hdr->flags |= hdr->flags; - last_hdr->len = cpu_to_le32(last_skb->len); + le32_add_cpu(&last_hdr->len, len); goto out; } } From b8d2f61fdf2a566f7872158f35e65599aceb90fb Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 28 Mar 2023 14:32:12 +0300 Subject: [PATCH 580/898] virtio/vsock: WARN_ONCE() for invalid state of socket This adds WARN_ONCE() and return from stream dequeue callback when socket's queue is empty, but 'rx_bytes' still non-zero. This allows the detection of potential bugs due to packet merging (see previous patch). Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 11ca9b3362aa1..37934dfe72f45 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -363,6 +363,13 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, u32 free_space; spin_lock_bh(&vvs->rx_lock); + + if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes, + "rx_queue is empty, but rx_bytes is non-zero\n")) { + spin_unlock_bh(&vvs->rx_lock); + return err; + } + while (total < len && !skb_queue_empty(&vvs->rx_queue)) { skb = skb_peek(&vvs->rx_queue); From 25209a3209ecc44f93300b7ee5287f451be1d6ff Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 28 Mar 2023 14:33:07 +0300 Subject: [PATCH 581/898] test/vsock: new skbuff appending test This adds test which checks case when data of newly received skbuff is appended to the last skbuff in the socket's queue. It looks like simple test with 'send()' and 'recv()', but internally it triggers logic which appends one received skbuff to another. Test checks that this feature works correctly. This test is actual only for virtio transport. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- tools/testing/vsock/vsock_test.c | 90 ++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 3de10dbb50f52..12b97c92fbb20 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -968,6 +968,91 @@ static void test_seqpacket_inv_buf_server(const struct test_opts *opts) test_inv_buf_server(opts, false); } +#define HELLO_STR "HELLO" +#define WORLD_STR "WORLD" + +static void test_stream_virtio_skb_merge_client(const struct test_opts *opts) +{ + ssize_t res; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Send first skbuff. */ + res = send(fd, HELLO_STR, strlen(HELLO_STR), 0); + if (res != strlen(HELLO_STR)) { + fprintf(stderr, "unexpected send(2) result %zi\n", res); + exit(EXIT_FAILURE); + } + + control_writeln("SEND0"); + /* Peer reads part of first skbuff. */ + control_expectln("REPLY0"); + + /* Send second skbuff, it will be appended to the first. */ + res = send(fd, WORLD_STR, strlen(WORLD_STR), 0); + if (res != strlen(WORLD_STR)) { + fprintf(stderr, "unexpected send(2) result %zi\n", res); + exit(EXIT_FAILURE); + } + + control_writeln("SEND1"); + /* Peer reads merged skbuff packet. */ + control_expectln("REPLY1"); + + close(fd); +} + +static void test_stream_virtio_skb_merge_server(const struct test_opts *opts) +{ + unsigned char buf[64]; + ssize_t res; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("SEND0"); + + /* Read skbuff partially. */ + res = recv(fd, buf, 2, 0); + if (res != 2) { + fprintf(stderr, "expected recv(2) returns 2 bytes, got %zi\n", res); + exit(EXIT_FAILURE); + } + + control_writeln("REPLY0"); + control_expectln("SEND1"); + + res = recv(fd, buf + 2, sizeof(buf) - 2, 0); + if (res != 8) { + fprintf(stderr, "expected recv(2) returns 8 bytes, got %zi\n", res); + exit(EXIT_FAILURE); + } + + res = recv(fd, buf, sizeof(buf) - 8 - 2, MSG_DONTWAIT); + if (res != -1) { + fprintf(stderr, "expected recv(2) failure, got %zi\n", res); + exit(EXIT_FAILURE); + } + + if (memcmp(buf, HELLO_STR WORLD_STR, strlen(HELLO_STR WORLD_STR))) { + fprintf(stderr, "pattern mismatch\n"); + exit(EXIT_FAILURE); + } + + control_writeln("REPLY1"); + + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1038,6 +1123,11 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_inv_buf_client, .run_server = test_seqpacket_inv_buf_server, }, + { + .name = "SOCK_STREAM virtio skb merge", + .run_client = test_stream_virtio_skb_merge_client, + .run_server = test_stream_virtio_skb_merge_server, + }, {}, }; From a74fabfbd1b7013045afc8cc541e6cab3360ccb5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 29 Mar 2023 12:45:35 -0500 Subject: [PATCH 582/898] x86/ACPI/boot: Use FADT version to check support for online capable ACPI 6.3 introduced the online capable bit, and also introduced MADT version 5. Latter was used to distinguish whether the offset storing online capable could be used. However ACPI 6.2b has MADT version "45" which is for an errata version of the ACPI 6.2 spec. This means that the Linux code for detecting availability of MADT will mistakenly flag ACPI 6.2b as supporting online capable which is inaccurate as it's an ACPI 6.3 feature. Instead use the FADT major and minor revision fields to distinguish this. [ bp: Massage. ] Fixes: aa06e20f1be6 ("x86/ACPI: Don't add CPUs that are not online capable") Reported-by: Eric DeVolder Reported-by: Borislav Petkov Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/943d2445-84df-d939-f578-5d8240d342cc@unsolicited.net --- arch/x86/kernel/acpi/boot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1c38174b5f019..729218462404b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -146,7 +146,11 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) pr_debug("Local APIC address 0x%08x\n", madt->address); } - if (madt->header.revision >= 5) + + /* ACPI 6.3 and newer support the online capable bit. */ + if (acpi_gbl_FADT.header.revision > 6 || + (acpi_gbl_FADT.header.revision == 6 && + acpi_gbl_FADT.minor_revision >= 3)) acpi_support_online_capable = true; default_acpi_madt_oem_check(madt->header.oem_id, From fed8d8773b8ea68ad99d9eee8c8343bef9da2c2c Mon Sep 17 00:00:00 2001 From: Eric DeVolder Date: Mon, 27 Mar 2023 15:10:26 -0400 Subject: [PATCH 583/898] x86/acpi/boot: Correct acpi_is_processor_usable() check The logic in acpi_is_processor_usable() requires the online capable bit be set for hotpluggable CPUs. The online capable bit has been introduced in ACPI 6.3. However, for ACPI revisions < 6.3 which do not support that bit, CPUs should be reported as usable, not the other way around. Reverse the check. [ bp: Rewrite commit message. ] Fixes: e2869bd7af60 ("x86/acpi/boot: Do not register processors that cannot be onlined for x2APIC") Suggested-by: Miguel Luis Suggested-by: Boris Ostrovsky Signed-off-by: Eric DeVolder Signed-off-by: Borislav Petkov (AMD) Tested-by: David R Cc: Link: https://lore.kernel.org/r/20230327191026.3454-2-eric.devolder@oracle.com --- arch/x86/kernel/acpi/boot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 729218462404b..0dac4ab5b55ba 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -197,7 +197,8 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags) if (lapic_flags & ACPI_MADT_ENABLED) return true; - if (acpi_support_online_capable && (lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) + if (!acpi_support_online_capable || + (lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) return true; return false; From 4d78e032fee5d532e189cdb2c3c76112094e9751 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Mar 2023 11:00:00 +0200 Subject: [PATCH 584/898] wifi: mac80211: drop bogus static keywords in A-MSDU rx These were unintentional copy&paste mistakes. Cc: stable@vger.kernel.org Fixes: 986e43b19ae9 ("wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230330090001.60750-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e8de500eb9f3c..9353616b528f3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2896,7 +2896,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; - static ieee80211_rx_result res; + ieee80211_rx_result res; struct ethhdr ethhdr; const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source; @@ -3037,7 +3037,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) struct net_device *dev = sdata->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; __le16 fc = hdr->frame_control; - static ieee80211_rx_result res; + ieee80211_rx_result res; bool port_control; int err; From a16fc38315f2c69c520ee769976ecb9c706b8560 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Mar 2023 11:00:01 +0200 Subject: [PATCH 585/898] wifi: mac80211: fix potential null pointer dereference rx->sta->amsdu_mesh_control is being passed to ieee80211_amsdu_to_8023s without checking rx->sta. Since it doesn't make sense to accept A-MSDU packets without a sta, simply add a check earlier. Fixes: 6e4c0d0460bd ("wifi: mac80211: add a workaround for receiving non-standard mesh A-MSDU") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230330090001.60750-2-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9353616b528f3..78351895c3c69 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2930,7 +2930,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) data_offset, true)) return RX_DROP_UNUSABLE; - if (rx->sta && rx->sta->amsdu_mesh_control < 0) { + if (rx->sta->amsdu_mesh_control < 0) { bool valid_std = ieee80211_is_valid_amsdu(skb, true); bool valid_nonstd = ieee80211_is_valid_amsdu(skb, false); @@ -3006,7 +3006,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) } } - if (is_multicast_ether_addr(hdr->addr1)) + if (is_multicast_ether_addr(hdr->addr1) || !rx->sta) return RX_DROP_UNUSABLE; if (rx->key) { From dd01579e5ed922dcfcb8fec53fa03b81c7649a04 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Tue, 28 Mar 2023 01:07:41 +0800 Subject: [PATCH 586/898] wifi: mac80211: fix the size calculation of ieee80211_ie_len_eht_cap() Here should return the size of ieee80211_eht_cap_elem_fixed, so fix it. Fixes: 820acc810fb6 ("mac80211: Add EHT capabilities to association/probe request") Signed-off-by: Ryder Lee Link: https://lore.kernel.org/r/06c13635fc03bcff58a647b8e03e9f01a74294bd.1679935259.git.ryder.lee@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3aceb3b731bf4..8c397650b96f6 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4906,7 +4906,7 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) &eht_cap->eht_cap_elem, is_ap); return 2 + 1 + - sizeof(he_cap->he_cap_elem) + n + + sizeof(eht_cap->eht_cap_elem) + n + ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], eht_cap->eht_cap_elem.phy_cap_info); return 0; From e26c0946a5c1aa4d27f8dfe78f2a72b4550df91f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 26 Mar 2023 17:17:09 +0200 Subject: [PATCH 587/898] wifi: mac80211: fix receiving mesh packets in forwarding=0 networks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When forwarding is set to 0, frames are typically sent with ttl=1. Move the ttl decrement check below the check for local receive in order to fix packet drops. Reported-by: Thomas Hühn Reported-by: Nick Hainke Fixes: 986e43b19ae9 ("wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230326151709.17743-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 78351895c3c69..7bd5aa2d5a958 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2769,14 +2769,6 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta if (sdata->crypto_tx_tailroom_needed_cnt) tailroom = IEEE80211_ENCRYPT_TAILROOM; - if (!--mesh_hdr->ttl) { - if (multicast) - goto rx_accept; - - IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); - return RX_DROP_MONITOR; - } - if (mesh_hdr->flags & MESH_FLAGS_AE) { struct mesh_path *mppath; char *proxied_addr; @@ -2807,6 +2799,14 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta if (ether_addr_equal(sdata->vif.addr, eth->h_dest)) goto rx_accept; + if (!--mesh_hdr->ttl) { + if (multicast) + goto rx_accept; + + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); + return RX_DROP_MONITOR; + } + if (!ifmsh->mshcfg.dot11MeshForwarding) { if (is_multicast_ether_addr(eth->h_dest)) goto rx_accept; From 8f0149a8ac59c12cd47271ac625c27dac5621d3a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 24 Mar 2023 13:09:22 +0100 Subject: [PATCH 588/898] wifi: mac80211: fix mesh forwarding Linearize packets (needed for forwarding A-MSDU subframes). Fixes: 986e43b19ae9 ("wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230324120924.38412-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7bd5aa2d5a958..10efa429e9464 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2833,6 +2833,9 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta if (skb_cow_head(fwd_skb, hdrlen - sizeof(struct ethhdr))) return RX_DROP_UNUSABLE; + + if (skb_linearize(fwd_skb)) + return RX_DROP_UNUSABLE; } fwd_hdr = skb_push(fwd_skb, hdrlen - sizeof(struct ethhdr)); From 899c2c11810cfe38cb01c847d0df98e181ea5728 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 24 Mar 2023 13:09:23 +0100 Subject: [PATCH 589/898] wifi: mac80211: fix flow dissection for forwarded packets Adjust the network header to point at the correct payload offset Fixes: 986e43b19ae9 ("wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230324120924.38412-2-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 10efa429e9464..af57616d2f1d9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2850,7 +2850,7 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta hdrlen += ETH_ALEN; else fwd_skb->protocol = htons(fwd_skb->len - hdrlen); - skb_set_network_header(fwd_skb, hdrlen); + skb_set_network_header(fwd_skb, hdrlen + 2); info = IEEE80211_SKB_CB(fwd_skb); memset(info, 0, sizeof(*info)); From 12b220a6171faf10638ab683a975cadcf1a352d6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 24 Mar 2023 13:09:24 +0100 Subject: [PATCH 590/898] wifi: mac80211: fix invalid drv_sta_pre_rcu_remove calls for non-uploaded sta Avoid potential data corruption issues caused by uninitialized driver private data structures. Reported-by: Brian Coverstone Fixes: 6a9d1b91f34d ("mac80211: add pre-RCU-sync sta removal driver operation") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230324120924.38412-3-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7d68dbc872d77..941bda9141faa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1264,7 +1264,8 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) list_del_rcu(&sta->list); sta->removed = true; - drv_sta_pre_rcu_remove(local, sta->sdata, sta); + if (sta->uploaded) + drv_sta_pre_rcu_remove(local, sta->sdata, sta); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && rcu_access_pointer(sdata->u.vlan.sta) == sta) From c1976bd8f23016d8706973908f2bb0ac0d852a8f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 29 Mar 2023 13:16:01 +0900 Subject: [PATCH 591/898] zonefs: Always invalidate last cached page on append write When a direct append write is executed, the append offset may correspond to the last page of a sequential file inode which might have been cached already by buffered reads, page faults with mmap-read or non-direct readahead. To ensure that the on-disk and cached data is consistant for such last cached page, make sure to always invalidate it in zonefs_file_dio_append(). If the invalidation fails, return -EBUSY to userspace to differentiate from IO errors. This invalidation will always be a no-op when the FS block size (device zone write granularity) is equal to the page size (e.g. 4K). Reported-by: Hans Holmberg Fixes: 02ef12a663c7 ("zonefs: use REQ_OP_ZONE_APPEND for sync DIO") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Tested-by: Hans Holmberg --- fs/zonefs/file.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 617e4f9db42ea..c6ab2732955e2 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -382,6 +382,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) struct zonefs_zone *z = zonefs_inode_zone(inode); struct block_device *bdev = inode->i_sb->s_bdev; unsigned int max = bdev_max_zone_append_sectors(bdev); + pgoff_t start, end; struct bio *bio; ssize_t size = 0; int nr_pages; @@ -390,6 +391,19 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); iov_iter_truncate(from, max); + /* + * If the inode block size (zone write granularity) is smaller than the + * page size, we may be appending data belonging to the last page of the + * inode straddling inode->i_size, with that page already cached due to + * a buffered read or readahead. So make sure to invalidate that page. + * This will always be a no-op for the case where the block size is + * equal to the page size. + */ + start = iocb->ki_pos >> PAGE_SHIFT; + end = (iocb->ki_pos + iov_iter_count(from) - 1) >> PAGE_SHIFT; + if (invalidate_inode_pages2_range(inode->i_mapping, start, end)) + return -EBUSY; + nr_pages = iov_iter_npages(from, BIO_MAX_VECS); if (!nr_pages) return 0; From 77af13ba3c7f91d91c377c7e2d122849bbc17128 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 30 Mar 2023 09:47:58 +0900 Subject: [PATCH 592/898] zonefs: Do not propagate iomap_dio_rw() ENOTBLK error to user space The call to invalidate_inode_pages2_range() in __iomap_dio_rw() may fail, in which case -ENOTBLK is returned and this error code is propagated back to user space trhough iomap_dio_rw() -> zonefs_file_dio_write() return chain. This error code is fairly obscure and may confuse the user. Avoid this and be consistent with the behavior of zonefs_file_dio_append() for similar invalidate_inode_pages2_range() errors by returning -EBUSY to user space when iomap_dio_rw() returns -ENOTBLK. Suggested-by: Christoph Hellwig Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Tested-by: Hans Holmberg --- fs/zonefs/file.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index c6ab2732955e2..132f01d3461f1 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -581,11 +581,21 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) append = sync; } - if (append) + if (append) { ret = zonefs_file_dio_append(iocb, from); - else + } else { + /* + * iomap_dio_rw() may return ENOTBLK if there was an issue with + * page invalidation. Overwrite that error code with EBUSY to + * be consistent with zonefs_file_dio_append() return value for + * similar issues. + */ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, &zonefs_write_dio_ops, 0, NULL, 0); + if (ret == -ENOTBLK) + ret = -EBUSY; + } + if (zonefs_zone_is_seq(z) && (ret > 0 || ret == -EIOCBQUEUED)) { if (ret > 0) From fd30d1cdcc4ff405fc54765edf2e11b03f2ed4f3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 30 Mar 2023 06:52:38 -0600 Subject: [PATCH 593/898] io_uring: fix poll/netmsg alloc caches We increase cache->nr_cached when we free into the cache but don't decrease when we take from it, so in some time we'll get an empty cache with cache->nr_cached larger than IO_ALLOC_CACHE_MAX, that fails io_alloc_cache_put() and effectively disables caching. Fixes: 9b797a37c4bd8 ("io_uring: add abstraction around apoll cache") Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/alloc_cache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index 729793ae97127..c2cde88aeed53 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -27,6 +27,7 @@ static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *c struct hlist_node *node = cache->list.first; hlist_del(node); + cache->nr_cached--; return container_of(node, struct io_cache_entry, node); } From 2eca98e5b24d01c02b46c67be05a5f98cc9789b1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 29 Mar 2023 10:02:59 +0200 Subject: [PATCH 594/898] xen/netback: use same error messages for same errors Issue the same error message in case an illegal page boundary crossing has been detected in both cases where this is tested. Suggested-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Link: https://lore.kernel.org/r/20230329080259.14823-1-jgross@suse.com Signed-off-by: Paolo Abeni --- drivers/net/xen-netback/netback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 4943be4fd99d8..c1501f41e2d82 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -994,10 +994,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, /* No crossing a page as the payload mustn't fragment. */ if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) { - netdev_err(queue->vif->dev, - "txreq.offset: %u, size: %u, end: %lu\n", - txreq.offset, txreq.size, - (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size); + netdev_err(queue->vif->dev, "Cross page boundary, txreq.offset: %u, size: %u\n", + txreq.offset, txreq.size); xenvif_fatal_tx_err(queue->vif); break; } From ecaa4902439298f6b0e29f47424a86b310a9ff4f Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Thu, 30 Mar 2023 17:30:54 +0300 Subject: [PATCH 595/898] xhci: also avoid the XHCI_ZERO_64B_REGS quirk with a passthrough iommu Previously the quirk was skipped when no iommu was present. The same rationale for skipping the quirk also applies in the iommu.passthrough=1 case. Skip applying the XHCI_ZERO_64B_REGS quirk if the device's iommu domain is passthrough. Fixes: 12de0a35c996 ("xhci: Add quirk to zero 64bit registers on Renesas PCIe controllers") Cc: stable Signed-off-by: D Scott Phillips Acked-by: Marc Zyngier Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230330143056.1390020-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6183ce8574b1a..bdb6dd819a3ba 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -228,6 +229,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us) static void xhci_zero_64b_regs(struct xhci_hcd *xhci) { struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + struct iommu_domain *domain; int err, i; u64 val; u32 intrs; @@ -246,7 +248,9 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) * an iommu. Doing anything when there is no iommu is definitely * unsafe... */ - if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !device_iommu_mapped(dev)) + domain = iommu_get_domain_for_dev(dev); + if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !domain || + domain->type == IOMMU_DOMAIN_IDENTITY) return; xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n"); From 8e77d3d59d7b5da13deda1d832c51b8bbdbe2037 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 30 Mar 2023 17:30:55 +0300 Subject: [PATCH 596/898] Revert "usb: xhci-pci: Set PROBE_PREFER_ASYNCHRONOUS" This reverts commit 4c2604a9a6899bab195edbee35fc8d64ce1444aa. Asynch probe caused regression in a setup with both Renesas and Intel xHC controllers. Devices connected to the Renesas disconnected shortly after boot. With Asynch probe the busnumbers got interleaved. xhci_hcd 0000:00:14.0: new USB bus registered, assigned bus number 1 xhci_hcd 0000:04:00.0: new USB bus registered, assigned bus number 2 xhci_hcd 0000:00:14.0: new USB bus registered, assigned bus number 3 xhci_hcd 0000:04:00.0: new USB bus registered, assigned bus number 4 Reason why this commit causes regression is still unknown, but revert it while debugging the issue. Fixes: 4c2604a9a689 ("usb: xhci-pci: Set PROBE_PREFER_ASYNCHRONOUS") Cc: stable Link: https://lore.kernel.org/linux-usb/20230307132120.5897c5af@deangelis.fenrir.org.uk Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230330143056.1390020-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index fb988e4ea9244..6db07ca419c31 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -771,12 +771,11 @@ static struct pci_driver xhci_pci_driver = { /* suspend and resume implemented later */ .shutdown = usb_hcd_pci_shutdown, - .driver = { #ifdef CONFIG_PM - .pm = &usb_hcd_pci_pm_ops, -#endif - .probe_type = PROBE_PREFER_ASYNCHRONOUS, + .driver = { + .pm = &usb_hcd_pci_pm_ops }, +#endif }; static int __init xhci_pci_init(void) From f6caea4855553a8b99ba3ec23ecdb5ed8262f26c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 30 Mar 2023 17:30:56 +0300 Subject: [PATCH 597/898] xhci: Free the command allocated for setting LPM if we return early The command allocated to set exit latency LPM values need to be freed in case the command is never queued. This would be the case if there is no change in exit latency values, or device is missing. Reported-by: Mirsad Goran Todorovac Link: https://lore.kernel.org/linux-usb/24263902-c9b3-ce29-237b-1c3d6918f4fe@alu.unizg.hr Tested-by: Mirsad Goran Todorovac Fixes: 5c2a380a5aa8 ("xhci: Allocate separate command structures for each LPM command") Cc: Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230330143056.1390020-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index bdb6dd819a3ba..6307bae9cddff 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4442,6 +4442,7 @@ static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci, if (!virt_dev || max_exit_latency == virt_dev->current_mel) { spin_unlock_irqrestore(&xhci->lock, flags); + xhci_free_command(xhci, command); return 0; } From 2fec9dc8e0acc3dfb56d1389151bcf405f087b10 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 30 Mar 2023 10:33:02 +0800 Subject: [PATCH 598/898] drm/amdgpu: allow more APUs to do mode2 reset when go to S4 Skip mode2 reset only for IMU enabled APUs when do S4. This patch is to fix the regression issue https://gitlab.freedesktop.org/drm/amd/-/issues/2483 It is generated by commit b589626674de ("drm/amdgpu: skip ASIC reset for APUs when go to S4"). Fixes: b589626674de ("drm/amdgpu: skip ASIC reset for APUs when go to S4") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2483 Tested-by: Yuan Perry Signed-off-by: Tim Huang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 60b1857f469eb..aeeec211861c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) */ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && + adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */ + return false; + + if ((adev->flags & AMD_IS_APU) && + amdgpu_acpi_is_s3_active(adev)) return false; if (amdgpu_sriov_vf(adev)) From 963b2e8c428f79489ceeb058e8314554ec9cbe6f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 24 Feb 2023 18:21:54 +0100 Subject: [PATCH 599/898] drm/etnaviv: fix reference leak when mmaping imported buffer drm_gem_prime_mmap() takes a reference on the GEM object, but before that drm_gem_mmap_obj() already takes a reference, which will be leaked as only one reference is dropped when the mapping is closed. Drop the extra reference when dma_buf_mmap() succeeds. Cc: stable@vger.kernel.org Signed-off-by: Lucas Stach Reviewed-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index 7031db145a77a..3524b5811682a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -91,7 +91,15 @@ static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj) static int etnaviv_gem_prime_mmap_obj(struct etnaviv_gem_object *etnaviv_obj, struct vm_area_struct *vma) { - return dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0); + int ret; + + ret = dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0); + if (!ret) { + /* Drop the reference acquired by drm_gem_mmap_obj(). */ + drm_gem_object_put(&etnaviv_obj->base); + } + + return ret; } static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = { From 2658d0d0355a3470a96d0e4268cbad174fadf0ed Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 30 Mar 2023 17:33:27 +0200 Subject: [PATCH 600/898] Revert "drm/etnaviv: export client GPU usage statistics via fdinfo" This reverts commit 97804a133c68, as it builds on top of df622729ddbf ("drm/scheduler: track GPU active time per entity") which needs to be reverted, as it introduces a use-after-free. Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 43 +-------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 44ca803237a5f..31a7f59ccb49e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -22,7 +22,6 @@ #include "etnaviv_gem.h" #include "etnaviv_mmu.h" #include "etnaviv_perfmon.h" -#include "common.xml.h" /* * DRM operations: @@ -476,47 +475,7 @@ static const struct drm_ioctl_desc etnaviv_ioctls[] = { ETNA_IOCTL(PM_QUERY_SIG, pm_query_sig, DRM_RENDER_ALLOW), }; -static void etnaviv_fop_show_fdinfo(struct seq_file *m, struct file *f) -{ - struct drm_file *file = f->private_data; - struct drm_device *dev = file->minor->dev; - struct etnaviv_drm_private *priv = dev->dev_private; - struct etnaviv_file_private *ctx = file->driver_priv; - - /* - * For a description of the text output format used here, see - * Documentation/gpu/drm-usage-stats.rst. - */ - seq_printf(m, "drm-driver:\t%s\n", dev->driver->name); - seq_printf(m, "drm-client-id:\t%u\n", ctx->id); - - for (int i = 0; i < ETNA_MAX_PIPES; i++) { - struct etnaviv_gpu *gpu = priv->gpu[i]; - char engine[10] = "UNK"; - int cur = 0; - - if (!gpu) - continue; - - if (gpu->identity.features & chipFeatures_PIPE_2D) - cur = snprintf(engine, sizeof(engine), "2D"); - if (gpu->identity.features & chipFeatures_PIPE_3D) - cur = snprintf(engine + cur, sizeof(engine) - cur, - "%s3D", cur ? "/" : ""); - if (gpu->identity.nn_core_count > 0) - cur = snprintf(engine + cur, sizeof(engine) - cur, - "%sNN", cur ? "/" : ""); - - seq_printf(m, "drm-engine-%s:\t%llu ns\n", engine, - ctx->sched_entity[i].elapsed_ns); - } -} - -static const struct file_operations fops = { - .owner = THIS_MODULE, - DRM_GEM_FOPS, - .show_fdinfo = etnaviv_fop_show_fdinfo, -}; +DEFINE_DRM_GEM_FOPS(fops); static const struct drm_driver etnaviv_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER, From baad10973fdb442912af676de3348e80bd8fe602 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 30 Mar 2023 17:35:13 +0200 Subject: [PATCH 601/898] Revert "drm/scheduler: track GPU active time per entity" This reverts commit df622729ddbf as it introduces a use-after-free, which isn't easy to fix without going back to the design drawing board. Reported-by: Danilo Krummrich Signed-off-by: Lucas Stach --- drivers/gpu/drm/scheduler/sched_main.c | 6 ------ include/drm/gpu_scheduler.h | 7 ------- 2 files changed, 13 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4e6ad6e122bc4..0e43784202718 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -906,12 +906,6 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) spin_unlock(&sched->job_list_lock); - if (job) { - job->entity->elapsed_ns += ktime_to_ns( - ktime_sub(job->s_fence->finished.timestamp, - job->s_fence->scheduled.timestamp)); - } - return job; } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9db9e5e504eeb..9935d1e2ff69a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -228,13 +228,6 @@ struct drm_sched_entity { */ struct rb_node rb_tree_node; - /** - * @elapsed_ns: - * - * Records the amount of time where jobs from this entity were active - * on the GPU. - */ - uint64_t elapsed_ns; }; /** From ae817e618d4b5d221daae34d32a39476e4bdcb36 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 30 Mar 2023 09:42:18 -0400 Subject: [PATCH 602/898] thermal: intel: powerclamp: Fix cpumask and max_idle module parameters When cpumask is specified as a module parameter the value is overwritten by the module init routine. This can easily be fixed by checking to see if the mask has already been allocated in the init routine. When max_idle is specified as a module parameter a panic will occur. The problem is that the idle_injection_cpu_mask is not allocated until the module init routine executes. This can easily be fixed by allocating the cpumask if it's not already allocated. Fixes: ebf519710218 ("thermal: intel: powerclamp: Add two module parameters") Signed-off-by: David Arcari Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index c7ba5680cd483..91fc7e2394971 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -235,6 +235,12 @@ static int max_idle_set(const char *arg, const struct kernel_param *kp) goto skip_limit_set; } + if (!cpumask_available(idle_injection_cpu_mask)) { + ret = allocate_copy_idle_injection_mask(cpu_present_mask); + if (ret) + goto skip_limit_set; + } + if (check_invalid(idle_injection_cpu_mask, new_max_idle)) { ret = -EINVAL; goto skip_limit_set; @@ -791,7 +797,8 @@ static int __init powerclamp_init(void) return retval; mutex_lock(&powerclamp_lock); - retval = allocate_copy_idle_injection_mask(cpu_present_mask); + if (!cpumask_available(idle_injection_cpu_mask)) + retval = allocate_copy_idle_injection_mask(cpu_present_mask); mutex_unlock(&powerclamp_lock); if (retval) From 7bcad0f0e6fbc1d613e49e0ee35c8e5f2e685bb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Steffen=20B=C3=A4tz?= Date: Wed, 29 Mar 2023 12:01:40 -0300 Subject: [PATCH 603/898] net: dsa: mv88e6xxx: Enable IGMP snooping on user ports only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not set the MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP bit on CPU or DSA ports. This allows the host CPU port to be a regular IGMP listener by sending out IGMP Membership Reports, which would otherwise not be forwarded by the mv88exxx chip, but directly looped back to the CPU port itself. Fixes: 54d792f257c6 ("net: dsa: Centralise global and port setup code into mv88e6xxx.") Signed-off-by: Steffen Bätz Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230329150140.701559-1-festevam@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 30383c4f8fd0e..0de7b3611202c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3354,9 +3354,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * If this is the upstream port for this switch, enable * forwarding of unknown unicasts and multicasts. */ - reg = MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP | - MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP | + reg = MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP | MV88E6XXX_PORT_CTL0_STATE_FORWARDING; + /* Forward any IPv4 IGMP or IPv6 MLD frames received + * by a USER port to the CPU port to allow snooping. + */ + if (dsa_is_user_port(ds, port)) + reg |= MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP; + err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL0, reg); if (err) return err; From 64fdc5f341db01200e33105265d4b8450122a82e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 29 Mar 2023 18:18:21 +0300 Subject: [PATCH 604/898] net: dsa: sync unicast and multicast addresses for VLAN filters too If certain conditions are met, DSA can install all necessary MAC addresses on the CPU ports as FDB entries and disable flooding towards the CPU (we call this RX filtering). There is one corner case where this does not work. ip link add br0 type bridge vlan_filtering 1 && ip link set br0 up ip link set swp0 master br0 && ip link set swp0 up ip link add link swp0 name swp0.100 type vlan id 100 ip link set swp0.100 up && ip addr add 192.168.100.1/24 dev swp0.100 Traffic through swp0.100 is broken, because the bridge turns on VLAN filtering in the swp0 port (causing RX packets to be classified to the FDB database corresponding to the VID from their 802.1Q header), and although the 8021q module does call dev_uc_add() towards the real device, that API is VLAN-unaware, so it only contains the MAC address, not the VID; and DSA's current implementation of ndo_set_rx_mode() is only for VID 0 (corresponding to FDB entries which are installed in an FDB database which is only hit when the port is VLAN-unaware). It's interesting to understand why the bridge does not turn on IFF_PROMISC for its swp0 bridge port, and it may appear at first glance that this is a regression caused by the logic in commit 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode."). After all, a bridge port needs to have IFF_PROMISC by its very nature - it needs to receive and forward frames with a MAC DA different from the bridge ports' MAC addresses. While that may be true, when the bridge is VLAN-aware *and* it has a single port, there is no real reason to enable promiscuity even if that is an automatic port, with flooding and learning (there is nowhere for packets to go except to the BR_FDB_LOCAL entries), and this is how the corner case appears. Adding a second automatic interface to the bridge would make swp0 promisc as well, and would mask the corner case. Given the dev_uc_add() / ndo_set_rx_mode() API is what it is (it doesn't pass a VLAN ID), the only way to address that problem is to install host FDB entries for the cartesian product of RX filtering MAC addresses and VLAN RX filters. Fixes: 7569459a52c9 ("net: dsa: manage flooding on the CPU ports") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230329151821.745752-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/slave.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index cac17183589fe..165bb2cb84316 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -57,6 +57,12 @@ struct dsa_standalone_event_work { u16 vid; }; +struct dsa_host_vlan_rx_filtering_ctx { + struct net_device *dev; + const unsigned char *addr; + enum dsa_standalone_event event; +}; + static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) { return ds->ops->port_fdb_add && ds->ops->port_fdb_del && @@ -155,18 +161,37 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev, return 0; } +static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid, + void *arg) +{ + struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; + + return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event, + ctx->addr, vid); +} + static int dsa_slave_sync_uc(struct net_device *dev, const unsigned char *addr) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_UC_ADD, + }; + int err; dev_uc_add(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } static int dsa_slave_unsync_uc(struct net_device *dev, @@ -174,13 +199,23 @@ static int dsa_slave_unsync_uc(struct net_device *dev, { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_UC_DEL, + }; + int err; dev_uc_del(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } static int dsa_slave_sync_mc(struct net_device *dev, @@ -188,13 +223,23 @@ static int dsa_slave_sync_mc(struct net_device *dev, { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_MC_ADD, + }; + int err; dev_mc_add(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } static int dsa_slave_unsync_mc(struct net_device *dev, @@ -202,13 +247,23 @@ static int dsa_slave_unsync_mc(struct net_device *dev, { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_MC_DEL, + }; + int err; dev_mc_del(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } void dsa_slave_sync_ha(struct net_device *dev) @@ -1702,6 +1757,8 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, .flags = 0, }; struct netlink_ext_ack extack = {0}; + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; int ret; /* User port... */ @@ -1721,6 +1778,30 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, return ret; } + if (!dsa_switch_supports_uc_filtering(ds) && + !dsa_switch_supports_mc_filtering(ds)) + return 0; + + netif_addr_lock_bh(dev); + + if (dsa_switch_supports_mc_filtering(ds)) { + netdev_for_each_synced_mc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, + ha->addr, vid); + } + } + + if (dsa_switch_supports_uc_filtering(ds)) { + netdev_for_each_synced_uc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, + ha->addr, vid); + } + } + + netif_addr_unlock_bh(dev); + + dsa_flush_workqueue(); + return 0; } @@ -1733,13 +1814,43 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; int err; err = dsa_port_vlan_del(dp, &vlan); if (err) return err; - return dsa_port_host_vlan_del(dp, &vlan); + err = dsa_port_host_vlan_del(dp, &vlan); + if (err) + return err; + + if (!dsa_switch_supports_uc_filtering(ds) && + !dsa_switch_supports_mc_filtering(ds)) + return 0; + + netif_addr_lock_bh(dev); + + if (dsa_switch_supports_mc_filtering(ds)) { + netdev_for_each_synced_mc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, + ha->addr, vid); + } + } + + if (dsa_switch_supports_uc_filtering(ds)) { + netdev_for_each_synced_uc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, + ha->addr, vid); + } + } + + netif_addr_unlock_bh(dev); + + dsa_flush_workqueue(); + + return 0; } static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg) From 2960a2d33b02345c6d710251206053678f92246b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 Mar 2023 13:11:17 +0100 Subject: [PATCH 605/898] net: mvneta: fix potential double-frees in mvneta_txq_sw_deinit() Reported on the Turris forum, mvneta provokes kernel warnings in the architecture DMA mapping code when mvneta_setup_txqs() fails to allocate memory. This happens because when mvneta_cleanup_txqs() is called in the mvneta_stop() path, we leave pointers in the structure that have been freed. Then on mvneta_open(), we call mvneta_setup_txqs(), which starts allocating memory. On memory allocation failure, mvneta_cleanup_txqs() will walk all the queues freeing any non-NULL pointers - which includes pointers that were previously freed in mvneta_stop(). Fix this by setting these pointers to NULL to prevent double-freeing of the same memory. Fixes: 2adb719d74f6 ("net: mvneta: Implement software TSO") Link: https://forum.turris.cz/t/random-kernel-exceptions-on-hbl-tos-7-0/18865/8 Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1phUe5-00EieL-7q@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 0e39d199ff061..2cad76d0a50ef 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3549,6 +3549,8 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp, netdev_tx_reset_queue(nq); + txq->buf = NULL; + txq->tso_hdrs = NULL; txq->descs = NULL; txq->last_desc = 0; txq->next_desc_to_proc = 0; From 8c1cb87c2a5c29da416848451a687473f379611c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Mar 2023 14:08:38 +0200 Subject: [PATCH 606/898] net: ethernet: mtk_eth_soc: fix flow block refcounting logic Since we call flow_block_cb_decref on FLOW_BLOCK_UNBIND, we also need to call flow_block_cb_incref for a newly allocated cb. Also fix the accidentally inverted refcount check on unbind. Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support") Reviewed-by: Simon Horman Signed-off-by: Felix Fietkau Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230330120840.52079-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 81afd5ee3fbf1..161751bb36c9c 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -576,6 +576,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) if (IS_ERR(block_cb)) return PTR_ERR(block_cb); + flow_block_cb_incref(block_cb); flow_block_cb_add(block_cb, f); list_add_tail(&block_cb->driver_list, &block_cb_list); return 0; @@ -584,7 +585,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) if (!block_cb) return -ENOENT; - if (flow_block_cb_decref(block_cb)) { + if (!flow_block_cb_decref(block_cb)) { flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); } From 5f36ca1b841fb17a20249fd9fedafc7dc7fdd940 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Mar 2023 14:08:39 +0200 Subject: [PATCH 607/898] net: ethernet: mtk_eth_soc: fix L2 offloading with DSA untag offload Check for skb metadata in order to detect the case where the DSA header is not present. Fixes: 2d7605a72906 ("net: ethernet: mtk_eth_soc: enable hardware DSA untagging") Reviewed-by: Simon Horman Signed-off-by: Felix Fietkau Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230330120840.52079-2-nbd@nbd.name Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 +++--- drivers/net/ethernet/mediatek/mtk_ppe.c | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1835d92afe4b2..282f9435d5ff1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2057,9 +2057,6 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, netdev); - if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) - mtk_ppe_check_skb(eth->ppe[0], skb, hash); - if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { if (trxd.rxd3 & RX_DMA_VTAG_V2) { @@ -2087,6 +2084,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan_tci); } + if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) + mtk_ppe_check_skb(eth->ppe[0], skb, hash); + skb_record_rx_queue(skb, 0); napi_gro_receive(napi, skb); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 6883eb34cd8b4..a038b99ecbda7 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "mtk_eth_soc.h" #include "mtk_ppe.h" @@ -699,7 +700,9 @@ void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash) skb->dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK) goto out; - tag += 4; + if (!skb_metadata_dst(skb)) + tag += 4; + if (get_unaligned_be16(tag) != ETH_P_8021Q) break; From 924531326e2dd4ceabe7240f2b55a88e7d894ec2 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Mar 2023 14:08:40 +0200 Subject: [PATCH 608/898] net: ethernet: mtk_eth_soc: add missing ppe cache flush when deleting a flow The cache needs to be flushed to ensure that the hardware stops offloading the flow immediately. Fixes: 33fc42de3327 ("net: ethernet: mtk_eth_soc: support creating mac address based offload entries") Reviewed-by: Simon Horman Signed-off-by: Felix Fietkau Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230330120840.52079-3-nbd@nbd.name Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_ppe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index a038b99ecbda7..fd07d6e142733 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -459,6 +459,7 @@ __mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) hwe->ib1 &= ~MTK_FOE_IB1_STATE; hwe->ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_INVALID); dma_wmb(); + mtk_ppe_cache_clear(ppe); } entry->hash = 0xffff; From f7b58a69fad9d2c4c90cab0247811155dd0d48e7 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 30 Mar 2023 14:56:38 -0400 Subject: [PATCH 609/898] dm: fix improper splitting for abnormal bios "Abnormal" bios include discards, write zeroes and secure erase. By no longer passing the calculated 'len' pointer, commit 7dd06a2548b2 ("dm: allow dm_accept_partial_bio() for dm_io without duplicate bios") took a senseless approach to disallowing dm_accept_partial_bio() from working for duplicate bios processed using __send_duplicate_bios(). It inadvertently and incorrectly stopped the use of 'len' when initializing a target's io (in alloc_tio). As such the resulting tio could address more area of a device than it should. For example, when discarding an entire DM striped device with the following DM table: vg-lvol0: 0 159744 striped 2 128 7:0 2048 7:1 2048 vg-lvol0: 159744 45056 striped 2 128 7:2 2048 7:3 2048 Before this fix: device-mapper: striped: target_stripe=0, bdev=7:0, start=2048 len=102400 blkdiscard: attempt to access beyond end of device loop0: rw=2051, sector=2048, nr_sectors = 102400 limit=81920 device-mapper: striped: target_stripe=1, bdev=7:1, start=2048 len=102400 blkdiscard: attempt to access beyond end of device loop1: rw=2051, sector=2048, nr_sectors = 102400 limit=81920 After this fix; device-mapper: striped: target_stripe=0, bdev=7:0, start=2048 len=79872 device-mapper: striped: target_stripe=1, bdev=7:1, start=2048 len=79872 Fixes: 7dd06a2548b2 ("dm: allow dm_accept_partial_bio() for dm_io without duplicate bios") Cc: stable@vger.kernel.org Reported-by: Orange Kao Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2d0f934ba6e6a..e67a2757c53e9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1467,7 +1467,8 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len) } static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, - struct dm_target *ti, unsigned int num_bios) + struct dm_target *ti, unsigned int num_bios, + unsigned *len) { struct bio *bio; int try; @@ -1478,7 +1479,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, if (try) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { - bio = alloc_tio(ci, ti, bio_nr, NULL, + bio = alloc_tio(ci, ti, bio_nr, len, try ? GFP_NOIO : GFP_NOWAIT); if (!bio) break; @@ -1514,7 +1515,7 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, break; default: /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ - alloc_multiple_bios(&blist, ci, ti, num_bios); + alloc_multiple_bios(&blist, ci, ti, num_bios, len); while ((clone = bio_list_pop(&blist))) { dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); From 666eed46769d929c3e13636134ecfc67d75ef548 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 30 Mar 2023 15:09:29 -0400 Subject: [PATCH 610/898] dm: fix __send_duplicate_bios() to always allow for splitting IO Commit 7dd76d1feec70 ("dm: improve bio splitting and associated IO accounting") only called setup_split_accounting() from __send_duplicate_bios() if a single bio were being issued. But the case where duplicate bios are issued must call it too. Otherwise the bio won't be split and resubmitted (via recursion through block core back to DM) to submit the later portions of a bio (which may map to an entirely different target). For example, when discarding an entire DM striped device with the following DM table: vg-lvol0: 0 159744 striped 2 128 7:0 2048 7:1 2048 vg-lvol0: 159744 45056 striped 2 128 7:2 2048 7:3 2048 Before (broken, discards the first striped target's devices twice): device-mapper: striped: target_stripe=0, bdev=7:0, start=2048 len=79872 device-mapper: striped: target_stripe=1, bdev=7:1, start=2048 len=79872 device-mapper: striped: target_stripe=0, bdev=7:0, start=2049 len=22528 device-mapper: striped: target_stripe=1, bdev=7:1, start=2048 len=22528 After (works as expected): device-mapper: striped: target_stripe=0, bdev=7:0, start=2048 len=79872 device-mapper: striped: target_stripe=1, bdev=7:1, start=2048 len=79872 device-mapper: striped: target_stripe=0, bdev=7:2, start=2048 len=22528 device-mapper: striped: target_stripe=1, bdev=7:3, start=2048 len=22528 Fixes: 7dd76d1feec70 ("dm: improve bio splitting and associated IO accounting") Cc: stable@vger.kernel.org Reported-by: Orange Kao Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e67a2757c53e9..dfde0088147a1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1514,6 +1514,8 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, ret = 1; break; default: + if (len) + setup_split_accounting(ci, *len); /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ alloc_multiple_bios(&blist, ci, ti, num_bios, len); while ((clone = bio_list_pop(&blist))) { From 179a88a8558bbf42991d361595281f3e45d7edfc Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 29 Mar 2023 22:24:06 +0200 Subject: [PATCH 611/898] cifs: fix DFS traversal oops without CONFIG_CIFS_DFS_UPCALL When compiled with CONFIG_CIFS_DFS_UPCALL disabled, cifs_dfs_d_automount is NULL. cifs.ko logic for mapping CIFS_FATTR_DFS_REFERRAL attributes to S_AUTOMOUNT and corresponding dentry flags is retained regardless of CONFIG_CIFS_DFS_UPCALL, leading to a NULL pointer dereference in VFS follow_automount() when traversing a DFS referral link: BUG: kernel NULL pointer dereference, address: 0000000000000000 ... Call Trace: __traverse_mounts+0xb5/0x220 ? cifs_revalidate_mapping+0x65/0xc0 [cifs] step_into+0x195/0x610 ? lookup_fast+0xe2/0xf0 path_lookupat+0x64/0x140 filename_lookup+0xc2/0x140 ? __create_object+0x299/0x380 ? kmem_cache_alloc+0x119/0x220 ? user_path_at_empty+0x31/0x50 user_path_at_empty+0x31/0x50 __x64_sys_chdir+0x2a/0xd0 ? exit_to_user_mode_prepare+0xca/0x100 do_syscall_64+0x42/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc This fix adds an inline cifs_dfs_d_automount() {return -EREMOTE} handler when CONFIG_CIFS_DFS_UPCALL is disabled. An alternative would be to avoid flagging S_AUTOMOUNT, etc. without CONFIG_CIFS_DFS_UPCALL. This approach was chosen as it provides more control over the error path. Signed-off-by: David Disseldorp Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 71fe0a0a79926..415176b2cf321 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -124,7 +124,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops; #ifdef CONFIG_CIFS_DFS_UPCALL extern struct vfsmount *cifs_dfs_d_automount(struct path *path); #else -#define cifs_dfs_d_automount NULL +static inline struct vfsmount *cifs_dfs_d_automount(struct path *path) +{ + return ERR_PTR(-EREMOTE); +} #endif /* Functions related to symlinks */ From 6cc041e90c178955219dcee4030bd5423f800f10 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 29 Mar 2023 17:14:21 -0300 Subject: [PATCH 612/898] cifs: avoid races in parallel reconnects in smb1 Prevent multiple threads of doing negotiate, session setup and tree connect by holding @ses->session_mutex in cifs_reconnect_tcon() while reconnecting session and tcon. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 38a697eca3050..c9d57ba84be41 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -71,7 +71,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) int rc; struct cifs_ses *ses; struct TCP_Server_Info *server; - struct nls_table *nls_codepage; + struct nls_table *nls_codepage = NULL; /* * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for @@ -99,6 +99,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } spin_unlock(&tcon->tc_lock); +again: rc = cifs_wait_for_server_reconnect(server, tcon->retry); if (rc) return rc; @@ -110,8 +111,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } spin_unlock(&ses->chan_lock); - nls_codepage = load_nls_default(); - + mutex_lock(&ses->session_mutex); /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed @@ -120,29 +120,38 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); + mutex_lock(&ses->session_mutex); + + if (tcon->retry) + goto again; rc = -EHOSTDOWN; goto out; } spin_unlock(&server->srv_lock); + nls_codepage = load_nls_default(); + /* * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session */ + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - if (!cifs_chan_needs_reconnect(ses, server)) { + if (!cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD) { spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); /* this means that we only need to tree connect */ if (tcon->need_reconnect) goto skip_sess_setup; - rc = -EHOSTDOWN; + mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(0, ses, server); if (!rc) rc = cifs_setup_session(0, ses, server, nls_codepage); From 09ba47b44d26b475bbdf9c80db9e0193d2b58956 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 29 Mar 2023 17:14:22 -0300 Subject: [PATCH 613/898] cifs: prevent infinite recursion in CIFSGetDFSRefer() We can't call smb_init() in CIFSGetDFSRefer() as cifs_reconnect_tcon() may end up calling CIFSGetDFSRefer() again to get new DFS referrals and thus causing an infinite recursion. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c9d57ba84be41..0d30b17494e4a 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4382,8 +4382,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, return -ENODEV; getDFSRetry: - rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB, - (void **) &pSMBr); + /* + * Use smb_init_no_reconnect() instead of smb_init() as + * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus + * causing an infinite recursion. + */ + rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, + (void **)&pSMB, (void **)&pSMBr); if (rc) return rc; From e03677100707f849f01d8faf07ee58b4e56cdbf1 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 29 Mar 2023 17:14:23 -0300 Subject: [PATCH 614/898] cifs: get rid of dead check in smb2_reconnect() The SMB2_IOCTL check in the switch statement will never be true as we return earlier from smb2_reconnect() if @smb2_command == SMB2_IOCTL. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6bd2aa6af18f3..2b92132097dc8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -310,7 +310,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, case SMB2_READ: case SMB2_WRITE: case SMB2_LOCK: - case SMB2_IOCTL: case SMB2_QUERY_DIRECTORY: case SMB2_CHANGE_NOTIFY: case SMB2_QUERY_INFO: From f9d2b1e146e0f82f3d04629afd92698522058361 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Wed, 29 Mar 2023 16:51:58 +0000 Subject: [PATCH 615/898] virtio/vsock: fix leaks due to missing skb owner This patch sets the skb owner in the recv and send path for virtio. For the send path, this solves the leak caused when virtio_transport_purge_skbs() finds skb->sk is always NULL and therefore never matches it with the current socket. Setting the owner upon allocation fixes this. For the recv path, this ensures correctness of accounting and also correct transfer of ownership in vsock_loopback (when skbs are sent from one socket and received by another). Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Signed-off-by: Bobby Eshleman Reported-by: Cong Wang Link: https://lore.kernel.org/all/ZCCbATwov4U+GBUv@pop-os.localdomain/ Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 37934dfe72f45..ee78b4082ef95 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -94,6 +94,11 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, info->op, info->flags); + if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) { + WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n"); + goto out; + } + return skb; out: @@ -1303,6 +1308,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, goto free_pkt; } + if (!skb_set_owner_sk_safe(skb, sk)) { + WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n"); + goto free_pkt; + } + vsk = vsock_sk(sk); lock_sock(sk); From c7d624520c1bd4e42d8ceb8283d6505fc90acccb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 29 Mar 2023 21:47:19 +0800 Subject: [PATCH 616/898] iommu/vt-d: Remove unnecessary locking in intel_irq_remapping_alloc() The global rwsem dmar_global_lock was introduced by commit 3a5670e8ac932 ("iommu/vt-d: Introduce a rwsem to protect global data structures"). It is used to protect DMAR related global data from DMAR hotplug operations. Using dmar_global_lock in intel_irq_remapping_alloc() is unnecessary as the DMAR global data structures are not touched there. Remove it to avoid below lockdep warning. ====================================================== WARNING: possible circular locking dependency detected 6.3.0-rc2 #468 Not tainted ------------------------------------------------------ swapper/0/1 is trying to acquire lock: ff1db4cb40178698 (&domain->mutex){+.+.}-{3:3}, at: __irq_domain_alloc_irqs+0x3b/0xa0 but task is already holding lock: ffffffffa0c1cdf0 (dmar_global_lock){++++}-{3:3}, at: intel_iommu_init+0x58e/0x880 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (dmar_global_lock){++++}-{3:3}: lock_acquire+0xd6/0x320 down_read+0x42/0x180 intel_irq_remapping_alloc+0xad/0x750 mp_irqdomain_alloc+0xb8/0x2b0 irq_domain_alloc_irqs_locked+0x12f/0x2d0 __irq_domain_alloc_irqs+0x56/0xa0 alloc_isa_irq_from_domain.isra.7+0xa0/0xe0 mp_map_pin_to_irq+0x1dc/0x330 setup_IO_APIC+0x128/0x210 apic_intr_mode_init+0x67/0x110 x86_late_time_init+0x24/0x40 start_kernel+0x41e/0x7e0 secondary_startup_64_no_verify+0xe0/0xeb -> #0 (&domain->mutex){+.+.}-{3:3}: check_prevs_add+0x160/0xef0 __lock_acquire+0x147d/0x1950 lock_acquire+0xd6/0x320 __mutex_lock+0x9c/0xfc0 __irq_domain_alloc_irqs+0x3b/0xa0 dmar_alloc_hwirq+0x9e/0x120 iommu_pmu_register+0x11d/0x200 intel_iommu_init+0x5de/0x880 pci_iommu_init+0x12/0x40 do_one_initcall+0x65/0x350 kernel_init_freeable+0x3ca/0x610 kernel_init+0x1a/0x140 ret_from_fork+0x29/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(dmar_global_lock); lock(&domain->mutex); lock(dmar_global_lock); lock(&domain->mutex); *** DEADLOCK *** Fixes: 9dbb8e3452ab ("irqdomain: Switch to per-domain locking") Reviewed-by: Jacob Pan Tested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230314051836.23817-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20230329134721.469447-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/irq_remapping.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 6d01fa078c36f..df9e261af0b56 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -311,14 +311,12 @@ static int set_ioapic_sid(struct irte *irte, int apic) if (!irte) return -1; - down_read(&dmar_global_lock); for (i = 0; i < MAX_IO_APICS; i++) { if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) { sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn; break; } } - up_read(&dmar_global_lock); if (sid == 0) { pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic); @@ -338,14 +336,12 @@ static int set_hpet_sid(struct irte *irte, u8 id) if (!irte) return -1; - down_read(&dmar_global_lock); for (i = 0; i < MAX_HPET_TBS; i++) { if (ir_hpet[i].iommu && ir_hpet[i].id == id) { sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn; break; } } - up_read(&dmar_global_lock); if (sid == 0) { pr_warn("Failed to set source-id of HPET block (%d)\n", id); @@ -1339,9 +1335,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, if (!data) goto out_free_parent; - down_read(&dmar_global_lock); index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs); - up_read(&dmar_global_lock); if (index < 0) { pr_warn("Failed to allocate IRTE\n"); kfree(data); From bfd3c6b9fa4a1dc78139dd1621d5bea321ffa69d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 29 Mar 2023 21:47:20 +0800 Subject: [PATCH 617/898] iommu/vt-d: Allow zero SAGAW if second-stage not supported The VT-d spec states (in section 11.4.2) that hardware implementations reporting second-stage translation support (SSTS) field as Clear also report the SAGAW field as 0. Fix an inappropriate check in alloc_iommu(). Fixes: 792fb43ce2c9 ("iommu/vt-d: Enable Intel IOMMU scalable mode by default") Suggested-by: Raghunathan Srinivasan Reviewed-by: Kevin Tian Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230318024824.124542-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20230329134721.469447-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 6acfe879589cb..23828d189c2a6 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1071,7 +1071,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) } err = -EINVAL; - if (cap_sagaw(iommu->cap) == 0) { + if (!cap_sagaw(iommu->cap) && + (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) { pr_info("%s: No supported address widths. Not attempting DMA translation.\n", iommu->name); drhd->ignored = 1; From 16812c96550c30a8d5743167ef4e462d6fbe7472 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Mar 2023 21:47:21 +0800 Subject: [PATCH 618/898] iommu/vt-d: Fix an IOMMU perfmon warning when CPU hotplug A warning can be triggered when hotplug CPU 0. $ echo 0 > /sys/devices/system/cpu/cpu0/online ------------[ cut here ]------------ Voluntary context switch within RCU read-side critical section! WARNING: CPU: 0 PID: 19 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x4f4/0x580 RIP: 0010:rcu_note_context_switch+0x4f4/0x580 Call Trace: ? perf_event_update_userpage+0x104/0x150 __schedule+0x8d/0x960 ? perf_event_set_state.part.82+0x11/0x50 schedule+0x44/0xb0 schedule_timeout+0x226/0x310 ? __perf_event_disable+0x64/0x1a0 ? _raw_spin_unlock+0x14/0x30 wait_for_completion+0x94/0x130 __wait_rcu_gp+0x108/0x130 synchronize_rcu+0x67/0x70 ? invoke_rcu_core+0xb0/0xb0 ? __bpf_trace_rcu_stall_warning+0x10/0x10 perf_pmu_migrate_context+0x121/0x370 iommu_pmu_cpu_offline+0x6a/0xa0 ? iommu_pmu_del+0x1e0/0x1e0 cpuhp_invoke_callback+0x129/0x510 cpuhp_thread_fun+0x94/0x150 smpboot_thread_fn+0x183/0x220 ? sort_range+0x20/0x20 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 ---[ end trace 0000000000000000 ]--- The synchronize_rcu() will be invoked in the perf_pmu_migrate_context(), when migrating a PMU to a new CPU. However, the current for_each_iommu() is within RCU read-side critical section. Two methods were considered to fix the issue. - Use the dmar_global_lock to replace the RCU read lock when going through the drhd list. But it triggers a lockdep warning. - Use the cpuhp_setup_state_multi() to set up a dedicated state for each IOMMU PMU. The lock can be avoided. The latter method is implemented in this patch. Since each IOMMU PMU has a dedicated state, add cpuhp_node and cpu in struct iommu_pmu to track the state. The state can be dynamically allocated now. Remove the CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE. Fixes: 46284c6ceb5e ("iommu/vt-d: Support cpumask for IOMMU perfmon") Reported-by: Ammy Yi Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230328182028.1366416-1-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230329134721.469447-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 2 ++ drivers/iommu/intel/perfmon.c | 68 ++++++++++++++++++++++------------- include/linux/cpuhotplug.h | 1 - 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d6df3b8658129..694ab9b7d3e95 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -641,6 +641,8 @@ struct iommu_pmu { DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; unsigned char irq_name[16]; + struct hlist_node cpuhp_node; + int cpu; }; #define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED) diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index e17d9743a0d8c..cf43e798eca49 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -773,19 +773,34 @@ static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) iommu->perf_irq = 0; } -static int iommu_pmu_cpu_online(unsigned int cpu) +static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { + struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); + if (cpumask_empty(&iommu_pmu_cpu_mask)) cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); + if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask)) + iommu_pmu->cpu = cpu; + return 0; } -static int iommu_pmu_cpu_offline(unsigned int cpu) +static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) { - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - int target; + struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); + int target = cpumask_first(&iommu_pmu_cpu_mask); + + /* + * The iommu_pmu_cpu_mask has been updated when offline the CPU + * for the first iommu_pmu. Migrate the other iommu_pmu to the + * new target. + */ + if (target < nr_cpu_ids && target != iommu_pmu->cpu) { + perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); + iommu_pmu->cpu = target; + return 0; + } if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) return 0; @@ -795,45 +810,50 @@ static int iommu_pmu_cpu_offline(unsigned int cpu) if (target < nr_cpu_ids) cpumask_set_cpu(target, &iommu_pmu_cpu_mask); else - target = -1; + return 0; - rcu_read_lock(); - - for_each_iommu(iommu, drhd) { - if (!iommu->pmu) - continue; - perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target); - } - rcu_read_unlock(); + perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); + iommu_pmu->cpu = target; return 0; } static int nr_iommu_pmu; +static enum cpuhp_state iommu_cpuhp_slot; static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) { int ret; - if (nr_iommu_pmu++) - return 0; + if (!nr_iommu_pmu) { + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "driver/iommu/intel/perfmon:online", + iommu_pmu_cpu_online, + iommu_pmu_cpu_offline); + if (ret < 0) + return ret; + iommu_cpuhp_slot = ret; + } - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, - "driver/iommu/intel/perfmon:online", - iommu_pmu_cpu_online, - iommu_pmu_cpu_offline); - if (ret) - nr_iommu_pmu = 0; + ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); + if (ret) { + if (!nr_iommu_pmu) + cpuhp_remove_multi_state(iommu_cpuhp_slot); + return ret; + } + nr_iommu_pmu++; - return ret; + return 0; } static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) { + cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); + if (--nr_iommu_pmu) return; - cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE); + cpuhp_remove_multi_state(iommu_cpuhp_slot); } void iommu_pmu_register(struct intel_iommu *iommu) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c6fab004104a8..5b2f8147d1ae3 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -218,7 +218,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, - CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, From 44d807320000db0d0013372ad39b53e12d52f758 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 30 Mar 2023 09:25:32 +0800 Subject: [PATCH 619/898] net: qrtr: Fix a refcount bug in qrtr_recvmsg() Syzbot reported a bug as following: refcount_t: addition on 0; use-after-free. ... RIP: 0010:refcount_warn_saturate+0x17c/0x1f0 lib/refcount.c:25 ... Call Trace: __refcount_add include/linux/refcount.h:199 [inline] __refcount_inc include/linux/refcount.h:250 [inline] refcount_inc include/linux/refcount.h:267 [inline] kref_get include/linux/kref.h:45 [inline] qrtr_node_acquire net/qrtr/af_qrtr.c:202 [inline] qrtr_node_lookup net/qrtr/af_qrtr.c:398 [inline] qrtr_send_resume_tx net/qrtr/af_qrtr.c:1003 [inline] qrtr_recvmsg+0x85f/0x990 net/qrtr/af_qrtr.c:1070 sock_recvmsg_nosec net/socket.c:1017 [inline] sock_recvmsg+0xe2/0x160 net/socket.c:1038 qrtr_ns_worker+0x170/0x1700 net/qrtr/ns.c:688 process_one_work+0x991/0x15c0 kernel/workqueue.c:2390 worker_thread+0x669/0x1090 kernel/workqueue.c:2537 It occurs in the concurrent scenario of qrtr_recvmsg() and qrtr_endpoint_unregister() as following: cpu0 cpu1 qrtr_recvmsg qrtr_endpoint_unregister qrtr_send_resume_tx qrtr_node_release qrtr_node_lookup mutex_lock(&qrtr_node_lock) spin_lock_irqsave(&qrtr_nodes_lock, ) refcount_dec_and_test(&node->ref) [node->ref == 0] radix_tree_lookup [node != NULL] __qrtr_node_release qrtr_node_acquire spin_lock_irqsave(&qrtr_nodes_lock, ) kref_get(&node->ref) [WARNING] ... mutex_unlock(&qrtr_node_lock) Use qrtr_node_lock to protect qrtr_node_lookup() implementation, this is actually improving the protection of node reference. Fixes: 0a7e0d0ef054 ("net: qrtr: Migrate node lookup tree to spinlock") Reported-by: syzbot+a7492efaa5d61b51db23@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=a7492efaa5d61b51db23 Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- net/qrtr/af_qrtr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 5c2fb992803b7..3a70255c8d02f 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -393,10 +393,12 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int nid) struct qrtr_node *node; unsigned long flags; + mutex_lock(&qrtr_node_lock); spin_lock_irqsave(&qrtr_nodes_lock, flags); node = radix_tree_lookup(&qrtr_nodes, nid); node = qrtr_node_acquire(node); spin_unlock_irqrestore(&qrtr_nodes_lock, flags); + mutex_unlock(&qrtr_node_lock); return node; } From 653a180957a85c3fc30320cc7e84f5dc913a64f8 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:02 +0800 Subject: [PATCH 620/898] net: phylink: add phylink_expects_phy() method Provide phylink_expects_phy() to allow MAC drivers to check if it is expecting a PHY to attach to. Since fixed-linked setups do not need to attach to a PHY. Provides a boolean value as to if the MAC should expect a PHY. Returns true if a PHY is expected. Reviewed-by: Russell King (Oracle) Signed-off-by: Michael Sit Wei Hong Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 19 +++++++++++++++++++ include/linux/phylink.h | 1 + 2 files changed, 20 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 1a2f074685fa9..30c166b334686 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1586,6 +1586,25 @@ void phylink_destroy(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_destroy); +/** + * phylink_expects_phy() - Determine if phylink expects a phy to be attached + * @pl: a pointer to a &struct phylink returned from phylink_create() + * + * When using fixed-link mode, or in-band mode with 1000base-X or 2500base-X, + * no PHY is needed. + * + * Returns true if phylink will be expecting a PHY. + */ +bool phylink_expects_phy(struct phylink *pl) +{ + if (pl->cfg_link_an_mode == MLO_AN_FIXED || + (pl->cfg_link_an_mode == MLO_AN_INBAND && + phy_interface_mode_is_8023z(pl->link_config.interface))) + return false; + return true; +} +EXPORT_SYMBOL_GPL(phylink_expects_phy); + static void phylink_phy_change(struct phy_device *phydev, bool up) { struct phylink *pl = phydev->phylink; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index c492c26202b5b..637698ed5cb6c 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -574,6 +574,7 @@ struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); void phylink_destroy(struct phylink *); +bool phylink_expects_phy(struct phylink *pl); int phylink_connect_phy(struct phylink *, struct phy_device *); int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags); From fe2cfbc9680356a3d9f8adde8a38e715831e32f5 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:03 +0800 Subject: [PATCH 621/898] net: stmmac: check if MAC needs to attach to a PHY After the introduction of the fixed-link support, the MAC driver no longer attempt to scan for a PHY to attach to. This causes the non fixed-link setups to stop working. Using the phylink_expects_phy() to check and determine if the MAC should expect and attach a PHY. Fixes: ab21cf920928 ("net: stmmac: make mdio register skips PHY scanning for fixed-link") Signed-off-by: Michael Sit Wei Hong Signed-off-by: Lai Peter Jun Ann Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 17310ade88dda..d41a5f92aee7a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1135,6 +1135,7 @@ static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); struct fwnode_handle *fwnode; + bool phy_needed; int ret; fwnode = of_fwnode_handle(priv->plat->phylink_node); @@ -1144,10 +1145,11 @@ static int stmmac_init_phy(struct net_device *dev) if (fwnode) ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); + phy_needed = phylink_expects_phy(priv->phylink); /* Some DT bindings do not set-up the PHY handle. Let's try to * manually parse it */ - if (!fwnode || ret) { + if (!fwnode || phy_needed || ret) { int addr = priv->plat->phy_addr; struct phy_device *phydev; From 6fc21a6ed5953b1dd3a41ce7be1ea57f5ef8c081 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:04 +0800 Subject: [PATCH 622/898] net: stmmac: remove redundant fixup to support fixed-link mode Currently, intel_speed_mode_2500() will fix-up xpcs_an_inband to 1 if the underlying controller has a max speed of 1000Mbps. The value has been initialized and modified if it is a fixed-linked setup earlier. This patch removes the fix-up to allow for fixed-linked setup support. In stmmac_phy_setup(), ovr_an_inband is set based on the value of xpcs_an_inband. Which in turn will return an error in phylink_parse_mode() where MLO_AN_FIXED and ovr_an_inband are both set. Fixes: c82386310d95 ("stmmac: intel: prepare to support 1000BASE-X phy interface setting") Signed-off-by: Michael Sit Wei Hong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 13aa919633b47..ab9f876b6df7e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -251,7 +251,6 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data) priv->plat->mdio_bus_data->xpcs_an_inband = false; } else { priv->plat->max_speed = 1000; - priv->plat->mdio_bus_data->xpcs_an_inband = true; } } From 154e07c164859fc90bf4e8143f2f6c1af9f3a35e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 30 Mar 2023 11:54:42 +0200 Subject: [PATCH 623/898] l2tp: generate correct module alias strings Commit 65b32f801bfb ("uapi: move IPPROTO_L2TP to in.h") moved the definition of IPPROTO_L2TP from a define to an enum, but since __stringify doesn't work properly with enums, we ended up breaking the modalias strings for the l2tp modules: $ modinfo l2tp_ip l2tp_ip6 | grep alias alias: net-pf-2-proto-IPPROTO_L2TP alias: net-pf-2-proto-2-type-IPPROTO_L2TP alias: net-pf-10-proto-IPPROTO_L2TP alias: net-pf-10-proto-2-type-IPPROTO_L2TP Use the resolved number directly in MODULE_ALIAS_*() macros (as we already do with SOCK_DGRAM) to fix the alias strings: $ modinfo l2tp_ip l2tp_ip6 | grep alias alias: net-pf-2-proto-115 alias: net-pf-2-proto-115-type-2 alias: net-pf-10-proto-115 alias: net-pf-10-proto-115-type-2 Moreover, fix the ordering of the parameters passed to MODULE_ALIAS_NET_PF_PROTO_TYPE() by switching proto and type. Fixes: 65b32f801bfb ("uapi: move IPPROTO_L2TP to in.h") Link: https://lore.kernel.org/lkml/ZCQt7hmodtUaBlCP@righiandr-XPS-13-7390 Signed-off-by: Guillaume Nault Signed-off-by: Andrea Righi Reviewed-by: Wojciech Drewek Tested-by: Wojciech Drewek Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 8 ++++---- net/l2tp/l2tp_ip6.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4db5a554bdbd9..41a74fc84ca13 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -677,8 +677,8 @@ MODULE_AUTHOR("James Chapman "); MODULE_DESCRIPTION("L2TP over IP"); MODULE_VERSION("1.0"); -/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like - * enums +/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol, + * because __stringify doesn't like enums */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); -MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 115, 2); +MODULE_ALIAS_NET_PF_PROTO(PF_INET, 115); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 2478aa60145fb..5137ea1861ce2 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -806,8 +806,8 @@ MODULE_AUTHOR("Chris Elston "); MODULE_DESCRIPTION("L2TP IP encapsulation for IPv6"); MODULE_VERSION("1.0"); -/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like - * enums +/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol, + * because __stringify doesn't like enums */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP); -MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 115, 2); +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, 115); From c5b959eeb7f9e40673b97c08c71cbfff5f5923f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 31 Mar 2023 09:48:56 +0200 Subject: [PATCH 624/898] net: netcp: MAX_SKB_FRAGS is now 'int' The type of MAX_SKB_FRAGS has changed recently, so the debug printk needs to be updated: drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_create_interface': drivers/net/ethernet/ti/netcp_core.c:2084:30: error: format '%ld' expects argument of type 'long int', but argument 3 has type 'int' [-Werror=format=] 2084 | dev_err(dev, "tx-pool size too small, must be at least %ld\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 1bb596a9d8a2b..dfdbcdeb991fd 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2081,7 +2081,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, netcp->tx_pool_region_id = temp[1]; if (netcp->tx_pool_size < MAX_SKB_FRAGS) { - dev_err(dev, "tx-pool size too small, must be at least %ld\n", + dev_err(dev, "tx-pool size too small, must be at least %d\n", MAX_SKB_FRAGS); ret = -ENODEV; goto quit; From 362f0b6678ad1377c322a7dd237ea6785efc7342 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 31 Mar 2023 08:35:15 +0200 Subject: [PATCH 625/898] net: wwan: t7xx: do not compile with -Werror When playing with various compilers or their versions, some choke on the t7xx code. For example (with gcc 13): In file included from ./arch/s390/include/generated/asm/rwonce.h:1, from ../include/linux/compiler.h:247, from ../include/linux/build_bug.h:5, from ../include/linux/bits.h:22, from ../drivers/net/wwan/t7xx/t7xx_state_monitor.c:17: In function 'preempt_count', inlined from 't7xx_fsm_append_event' at ../drivers/net/wwan/t7xx/t7xx_state_monitor.c:439:43: ../include/asm-generic/rwonce.h:44:26: error: array subscript 0 is outside array bounds of 'const volatile int[0]' [-Werror=array-bounds=] There is no reason for any code in the kernel to be built with -Werror by default. Note that we have generic CONFIG_WERROR. So if anyone wants -Werror, they can enable that. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/all/20230330232717.1f8bf5ea@kernel.org/ Cc: Chandrashekar Devegowda Cc: Intel Corporation Cc: Chiranjeevi Rapolu Cc: Liu Haijun Cc: M Chetan Kumar Cc: Ricardo Martinez Cc: Loic Poulain Cc: Sergey Ryazanov Cc: Johannes Berg Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/wwan/t7xx/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wwan/t7xx/Makefile b/drivers/net/wwan/t7xx/Makefile index 268ff9e87e5b3..2652cd00504e6 100644 --- a/drivers/net/wwan/t7xx/Makefile +++ b/drivers/net/wwan/t7xx/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y += -Werror - obj-${CONFIG_MTK_T7XX} := mtk_t7xx.o mtk_t7xx-y:= t7xx_pci.o \ t7xx_pcie_mac.o \ From ffa5395a7901e83a68d88207c4592962906641bd Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 31 Mar 2023 10:56:41 +0300 Subject: [PATCH 626/898] vsock/vmci: convert VMCI error code to -ENOMEM on send This adds conversion of VMCI specific error code to general -ENOMEM. It is needed, because af_vsock.c passes error value returned from transport to the user, which does not expect to get VMCI_ERROR_* values. Fixes: c43170b7e157 ("vsock: return errors other than -ENOMEM to socket") Signed-off-by: Arseniy Krasnov Reviewed-by: Vishnu Dasa Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 36eb16a40745d..95cc4d79ba296 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1842,7 +1842,13 @@ static ssize_t vmci_transport_stream_enqueue( struct msghdr *msg, size_t len) { - return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); + ssize_t err; + + err = vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); + if (err < 0) + err = -ENOMEM; + + return err; } static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) From cb2239c198ad9fbd5aced22cf93e45562da781eb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 30 Mar 2023 09:13:16 +0200 Subject: [PATCH 627/898] fs: drop peer group ids under namespace lock When cleaning up peer group ids in the failure path we need to make sure to hold on to the namespace lock. Otherwise another thread might just turn the mount from a shared into a non-shared mount concurrently. Link: https://lore.kernel.org/lkml/00000000000088694505f8132d77@google.com Fixes: 2a1867219c7b ("fs: add mount_setattr()") Reported-by: syzbot+8ac3859139c685c4f597@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # 5.12+ Message-Id: <20230330-vfs-mount_setattr-propagation-fix-v1-1-37548d91533b@kernel.org> Signed-off-by: Christian Brauner --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index bc0f15257b49c..6836e937ee613 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4183,9 +4183,9 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) unlock_mount_hash(); if (kattr->propagation) { - namespace_unlock(); if (err) cleanup_group_ids(mnt, NULL); + namespace_unlock(); } return err; From a288fd158fbf85c06a9ac01cecabf97ac5d962e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Fri, 24 Feb 2023 16:22:20 +0100 Subject: [PATCH 628/898] dmaengine: apple-admac: Handle 'global' interrupt flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In addition to TX channel and RX channel interrupt flags there's another class of 'global' interrupt flags with unknown semantics. Those weren't being handled up to now, and they are the suspected cause of stuck IRQ states that have been sporadically occurring. Check the global flags and clear them if raised. Fixes: b127315d9a78 ("dmaengine: apple-admac: Add Apple ADMAC driver") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20230224152222.26732-1-povik+lin@cutebit.org Signed-off-by: Vinod Koul --- drivers/dma/apple-admac.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 90f28bda29c8b..00cbfafe0ed9d 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -75,6 +75,7 @@ #define REG_TX_INTSTATE(idx) (0x0030 + (idx) * 4) #define REG_RX_INTSTATE(idx) (0x0040 + (idx) * 4) +#define REG_GLOBAL_INTSTATE(idx) (0x0050 + (idx) * 4) #define REG_CHAN_INTSTATUS(ch, idx) (0x8010 + (ch) * 0x200 + (idx) * 4) #define REG_CHAN_INTMASK(ch, idx) (0x8020 + (ch) * 0x200 + (idx) * 4) @@ -672,13 +673,14 @@ static void admac_handle_chan_int(struct admac_data *ad, int no) static irqreturn_t admac_interrupt(int irq, void *devid) { struct admac_data *ad = devid; - u32 rx_intstate, tx_intstate; + u32 rx_intstate, tx_intstate, global_intstate; int i; rx_intstate = readl_relaxed(ad->base + REG_RX_INTSTATE(ad->irq_index)); tx_intstate = readl_relaxed(ad->base + REG_TX_INTSTATE(ad->irq_index)); + global_intstate = readl_relaxed(ad->base + REG_GLOBAL_INTSTATE(ad->irq_index)); - if (!tx_intstate && !rx_intstate) + if (!tx_intstate && !rx_intstate && !global_intstate) return IRQ_NONE; for (i = 0; i < ad->nchannels; i += 2) { @@ -693,6 +695,12 @@ static irqreturn_t admac_interrupt(int irq, void *devid) rx_intstate >>= 1; } + if (global_intstate) { + dev_warn(ad->dev, "clearing unknown global interrupt flag: %x\n", + global_intstate); + writel_relaxed(~(u32) 0, ad->base + REG_GLOBAL_INTSTATE(ad->irq_index)); + } + return IRQ_HANDLED; } From 6e96adcaa7a29827ac8ee8df290a44957a4823ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Fri, 24 Feb 2023 16:22:22 +0100 Subject: [PATCH 629/898] dmaengine: apple-admac: Set src_addr_widths capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing setting of 'src_addr_widths', which is the same as for the other direction. Fixes: b127315d9a78 ("dmaengine: apple-admac: Add Apple ADMAC driver") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20230224152222.26732-3-povik+lin@cutebit.org Signed-off-by: Vinod Koul --- drivers/dma/apple-admac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 00cbfafe0ed9d..b9132b495d181 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -858,6 +858,9 @@ static int admac_probe(struct platform_device *pdev) dma->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); dma->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); From d9503be5a100c553731c0e8a82c7b4201e8a970c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Fri, 24 Feb 2023 16:22:21 +0100 Subject: [PATCH 630/898] dmaengine: apple-admac: Fix 'current_tx' not getting freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In terminate_all we should queue up all submitted descriptors to be freed. We do that for the content of the 'issued' and 'submitted' lists, but the 'current_tx' descriptor falls through the cracks as it's removed from the 'issued' list once it gets assigned to be the current descriptor. Explicitly queue up freeing of the 'current_tx' descriptor to address a memory leak that is otherwise present. Fixes: b127315d9a78 ("dmaengine: apple-admac: Add Apple ADMAC driver") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20230224152222.26732-2-povik+lin@cutebit.org Signed-off-by: Vinod Koul --- drivers/dma/apple-admac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index b9132b495d181..4cf8da77bdd91 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -512,7 +512,10 @@ static int admac_terminate_all(struct dma_chan *chan) admac_stop_chan(adchan); admac_reset_rings(adchan); - adchan->current_tx = NULL; + if (adchan->current_tx) { + list_add_tail(&adchan->current_tx->node, &adchan->to_free); + adchan->current_tx = NULL; + } /* * Descriptors can only be freed after the tasklet * has been killed (in admac_synchronize). From 9fdc1605c504204e0fdec7892b29c916579e06f3 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Fri, 31 Mar 2023 16:32:41 +0800 Subject: [PATCH 631/898] ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook There is a HP ProBook which using ALC236 codec and need the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mute LED and micmute LED work. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20230331083242.58416-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a2706fd87b141..bd4e1a3a55b47 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9443,6 +9443,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8b66, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b87, "HP", ALC236_FIXUP_HP_GPIO_LED), From e4efa515d58f1363d8a27e548f9c5769d3121e03 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Mar 2023 13:22:52 +0100 Subject: [PATCH 632/898] wifi: brcmfmac: Fix SDIO suspend/resume regression After commit 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card unless WOWL is used"), the wifi adapter by default is turned off on suspend and then re-probed on resume. In at least 2 model x86/acpi tablets with brcmfmac43430a1 wifi adapters, the newly added re-probe on resume fails like this: brcmfmac: brcmf_sdio_bus_rxctl: resumed on timeout ieee80211 phy1: brcmf_bus_started: failed: -110 ieee80211 phy1: brcmf_attach: dongle is not responding: err=-110 brcmfmac: brcmf_sdio_firmware_callback: brcmf_attach failed It seems this specific brcmfmac model does not like being reprobed without it actually being turned off first. And the adapter is not being turned off during suspend because of commit f0992ace680c ("brcmfmac: prohibit ACPI power management for brcmfmac driver"). Now that the driver is being reprobed on resume, the disabling of ACPI pm is no longer necessary, except when WOWL is used (in which case there is no-reprobe). Move the dis-/en-abling of ACPI pm to brcmf_sdio_wowl_config(), this fixes the brcmfmac43430a1 suspend/resume regression and should help save some power when suspended. This change means that the code now also may re-enable ACPI pm when WOWL gets disabled. ACPI pm should only be re-enabled if it was enabled by the ACPI core originally. Add a brcmf_sdiod_acpi_save_power_manageable() to save the original state for this. This has been tested on the following devices: Asus T100TA brcmfmac43241b4-sdio Acer Iconia One 7 B1-750 brcmfmac43340-sdio Chuwi Hi8 brcmfmac43430a0-sdio Chuwi Hi8 brcmfmac43430a1-sdio (the Asus T100TA is the device for which the prohibiting of ACPI pm was originally added) Fixes: 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card unless WOWL is used") Cc: Ulf Hansson Signed-off-by: Hans de Goede Reviewed-by: Ulf Hansson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230320122252.240070-1-hdegoede@redhat.com --- .../broadcom/brcm80211/brcmfmac/bcmsdh.c | 36 +++++++++++++------ .../broadcom/brcm80211/brcmfmac/sdio.h | 2 ++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index b7c918f241c91..65d4799a56584 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -994,15 +994,34 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids); -static void brcmf_sdiod_acpi_set_power_manageable(struct device *dev, - int val) +static void brcmf_sdiod_acpi_save_power_manageable(struct brcmf_sdio_dev *sdiodev) { #if IS_ENABLED(CONFIG_ACPI) struct acpi_device *adev; - adev = ACPI_COMPANION(dev); + adev = ACPI_COMPANION(&sdiodev->func1->dev); if (adev) - adev->flags.power_manageable = 0; + sdiodev->func1_power_manageable = adev->flags.power_manageable; + + adev = ACPI_COMPANION(&sdiodev->func2->dev); + if (adev) + sdiodev->func2_power_manageable = adev->flags.power_manageable; +#endif +} + +static void brcmf_sdiod_acpi_set_power_manageable(struct brcmf_sdio_dev *sdiodev, + int enable) +{ +#if IS_ENABLED(CONFIG_ACPI) + struct acpi_device *adev; + + adev = ACPI_COMPANION(&sdiodev->func1->dev); + if (adev) + adev->flags.power_manageable = enable ? sdiodev->func1_power_manageable : 0; + + adev = ACPI_COMPANION(&sdiodev->func2->dev); + if (adev) + adev->flags.power_manageable = enable ? sdiodev->func2_power_manageable : 0; #endif } @@ -1012,7 +1031,6 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func, int err; struct brcmf_sdio_dev *sdiodev; struct brcmf_bus *bus_if; - struct device *dev; brcmf_dbg(SDIO, "Enter\n"); brcmf_dbg(SDIO, "Class=%x\n", func->class); @@ -1020,14 +1038,9 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func, brcmf_dbg(SDIO, "sdio device ID: 0x%04x\n", func->device); brcmf_dbg(SDIO, "Function#: %d\n", func->num); - dev = &func->dev; - /* Set MMC_QUIRK_LENIENT_FN0 for this card */ func->card->quirks |= MMC_QUIRK_LENIENT_FN0; - /* prohibit ACPI power management for this device */ - brcmf_sdiod_acpi_set_power_manageable(dev, 0); - /* Consume func num 1 but dont do anything with it. */ if (func->num == 1) return 0; @@ -1059,6 +1072,7 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func, dev_set_drvdata(&sdiodev->func1->dev, bus_if); sdiodev->dev = &sdiodev->func1->dev; + brcmf_sdiod_acpi_save_power_manageable(sdiodev); brcmf_sdiod_change_state(sdiodev, BRCMF_SDIOD_DOWN); brcmf_dbg(SDIO, "F2 found, calling brcmf_sdiod_probe...\n"); @@ -1124,6 +1138,8 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled) if (sdiodev->settings->bus.sdio.oob_irq_supported || pm_caps & MMC_PM_WAKE_SDIO_IRQ) { + /* Stop ACPI from turning off the device when wowl is enabled */ + brcmf_sdiod_acpi_set_power_manageable(sdiodev, !enabled); sdiodev->wowl_enabled = enabled; brcmf_dbg(SDIO, "Configuring WOWL, enabled=%d\n", enabled); return; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h index b76d34d36bde6..0d18ed15b4032 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h @@ -188,6 +188,8 @@ struct brcmf_sdio_dev { char nvram_name[BRCMF_FW_NAME_LEN]; char clm_name[BRCMF_FW_NAME_LEN]; bool wowl_enabled; + bool func1_power_manageable; + bool func2_power_manageable; enum brcmf_sdiod_state state; struct brcmf_sdiod_freezer *freezer; const struct firmware *clm_fw; From 2ceb76f734e37833824b7fab6af17c999eb48d2b Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 22 Mar 2023 17:37:17 +0100 Subject: [PATCH 633/898] wifi: mt76: mt7921: Fix use-after-free in fw features query. Stop referencing 'features' memory after release_firmware is called. Fixes this crash: RIP: 0010:mt7921_check_offload_capability+0x17d mt7921_pci_probe+0xca/0x4b0 ... Signed-off-by: Ben Greear Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/51fd8f76494348aa9ecbf0abc471ebe47a983dfd.1679502607.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7921/init.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c index 80c71acfe1591..cc94531185dac 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c @@ -171,12 +171,12 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band) u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) { - struct mt7921_fw_features *features = NULL; const struct mt76_connac2_fw_trailer *hdr; struct mt7921_realease_info *rel_info; const struct firmware *fw; int ret, i, offset = 0; const u8 *data, *end; + u8 offload_caps = 0; ret = request_firmware(&fw, fw_wm, dev); if (ret) @@ -208,7 +208,10 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) data += sizeof(*rel_info); if (rel_info->tag == MT7921_FW_TAG_FEATURE) { + struct mt7921_fw_features *features; + features = (struct mt7921_fw_features *)data; + offload_caps = features->data; break; } @@ -218,7 +221,7 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) out: release_firmware(fw); - return features ? features->data : 0; + return offload_caps; } EXPORT_SYMBOL_GPL(mt7921_check_offload_capability); From eb85df0a5643612285f61f38122564498d0c49f7 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 28 Mar 2023 12:01:17 +0200 Subject: [PATCH 634/898] wifi: mt76: mt7921: fix fw used for offload check for mt7922 Fix the firmware version used for offload capability check used by 0x0616 devices. This path enables offload capabilities for 0x0616 devices. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217245 Fixes: 034ae28b56f1 ("wifi: mt76: mt7921: introduce remain_on_channel support") Cc: stable@vger.kernel.org Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/632d8f0c9781c9902d7160e2c080aa7e9232d50d.1679997487.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index cb72ded372564..5c23c827abe47 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -20,7 +20,7 @@ static const struct pci_device_id mt7921_pci_device_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0608), .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0616), - .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, + .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM }, { }, }; From f1594bc676579133a3cd906d7d27733289edfb86 Mon Sep 17 00:00:00 2001 From: Anh Tuan Phan Date: Fri, 24 Mar 2023 09:14:15 +0700 Subject: [PATCH 635/898] selftests mount: Fix mount_setattr_test builds failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling selftests with target mount_setattr I encountered some errors with the below messages: mount_setattr_test.c: In function ‘mount_setattr_thread’: mount_setattr_test.c:343:16: error: variable ‘attr’ has initializer but incomplete type 343 | struct mount_attr attr = { | ^~~~~~~~~~ These errors might be because of linux/mount.h is not included. This patch resolves that issue. Signed-off-by: Anh Tuan Phan Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/mount_setattr/mount_setattr_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 582669ca38e9d..c6a8c732b8021 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../kselftest_harness.h" From 52882b9c7a761b2b4e44717d6fbd1ed94c601b7f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 May 2022 17:48:07 +1000 Subject: [PATCH 636/898] KVM: PPC: Make KVM_CAP_IRQFD_RESAMPLE platform dependent When introduced, IRQFD resampling worked on POWER8 with XICS. However KVM on POWER9 has never implemented it - the compatibility mode code ("XICS-on-XIVE") misses the kvm_notify_acked_irq() call and the native XIVE mode does not handle INTx in KVM at all. This moved the capability support advertising to platforms and stops advertising it on XIVE, i.e. POWER9 and later. Signed-off-by: Alexey Kardashevskiy Acked-by: Anup Patel Acked-by: Nicholas Piggin Message-Id: <20220504074807.3616813-1-aik@ozlabs.ru> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/arm.c | 1 + arch/powerpc/kvm/powerpc.c | 6 ++++++ arch/s390/kvm/kvm-s390.c | 1 + arch/x86/kvm/x86.c | 1 + virt/kvm/kvm_main.c | 1 - 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf0872..3f6a5efdbcf0d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -220,6 +220,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: case KVM_CAP_ARM_SYSTEM_SUSPEND: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c5405fc55387..d23e25e8432d3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -576,6 +576,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif +#ifdef CONFIG_HAVE_KVM_IRQFD + case KVM_CAP_IRQFD_RESAMPLE: + r = !xive_enabled(); + break; +#endif + case KVM_CAP_PPC_ALLOC_HTAB: r = hv_enabled; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 39b36562c043f..1eeb9ae57879c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -573,6 +573,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d6f98b7635fc..3d852ce849206 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4432,6 +4432,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VAPIC: case KVM_CAP_ENABLE_CAP: case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d255964ec331e..b1679d08a2160 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4479,7 +4479,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif #ifdef CONFIG_HAVE_KVM_IRQFD case KVM_CAP_IRQFD: - case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: From 52f91e51944808d83dfe2d5582601b5e84e472cc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 Mar 2023 19:31:48 +0200 Subject: [PATCH 637/898] platform/x86: gigabyte-wmi: add support for X570S AORUS ELITE Add "X570S AORUS ELITE" to known working boards Reported-by: Brandon Nielsen Link: https://lore.kernel.org/r/20230331014902.7864-1-nielsenb@jetfuse.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 5e5b17c50eb67..2a426040f749e 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -161,6 +161,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570S AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"), { } }; From e352d685fde427a8fc9beb2ba30888f5d6f2e5e6 Mon Sep 17 00:00:00 2001 From: weiliang1503 Date: Thu, 30 Mar 2023 19:49:43 +0800 Subject: [PATCH 638/898] platform/x86: asus-nb-wmi: Add quirk_asus_tablet_mode to other ROG Flow X13 models Make quirk_asus_tablet_mode apply on other ROG Flow X13 devices, which only affects the GV301Q model before. Signed-off-by: weiliang1503 Link: https://lore.kernel.org/r/20230330114943.15057-1-weiliang1503@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-nb-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index cb15acdf14a30..e2c9a68d12df9 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -464,7 +464,8 @@ static const struct dmi_system_id asus_quirks[] = { .ident = "ASUS ROG FLOW X13", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "GV301Q"), + /* Match GV301** */ + DMI_MATCH(DMI_PRODUCT_NAME, "GV301"), }, .driver_data = &quirk_asus_tablet_mode, }, From e3271a5917d1501089b1a224d702aa053e2877f4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 30 Mar 2023 21:46:44 +0200 Subject: [PATCH 639/898] platform/x86: ideapad-laptop: Stop sending KEY_TOUCHPAD_TOGGLE Commit 5829f8a897e4 ("platform/x86: ideapad-laptop: Send KEY_TOUCHPAD_TOGGLE on some models") made ideapad-laptop send KEY_TOUCHPAD_TOGGLE when we receive an ACPI notify with VPC event bit 5 set and the touchpad-state has not been changed by the EC itself already. This was done under the assumption that this would be good to do to make the touchpad-toggle hotkey work on newer models where the EC does not toggle the touchpad on/off itself (because it is not routed through the PS/2 controller, but uses I2C). But it turns out that at least some models, e.g. the Yoga 7-15ITL5 the EC triggers an ACPI notify with VPC event bit 5 set on resume, which would now cause a spurious KEY_TOUCHPAD_TOGGLE on resume to which the desktop environment responds by disabling the touchpad in software, breaking the touchpad (until manually re-enabled) on resume. It was never confirmed that sending KEY_TOUCHPAD_TOGGLE actually improves things on new models and at least some new models like the Yoga 7-15ITL5 don't have a touchpad on/off toggle hotkey at all, while still sending ACPI notify events with VPC event bit 5 set. So it seems best to revert the change to send KEY_TOUCHPAD_TOGGLE when receiving an ACPI notify events with VPC event bit 5 and the touchpad state as reported by the EC has not changed. Note this is not a full revert the code to cache the last EC touchpad state is kept to avoid sending spurious KEY_TOUCHPAD_ON / _OFF events on resume. Fixes: 5829f8a897e4 ("platform/x86: ideapad-laptop: Send KEY_TOUCHPAD_TOGGLE on some models") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217234 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230330194644.64628-1-hdegoede@redhat.com --- drivers/platform/x86/ideapad-laptop.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 0eb5bfdd823a1..959ec3c5f376e 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1170,7 +1170,6 @@ static const struct key_entry ideapad_keymap[] = { { KE_KEY, 65, { KEY_PROG4 } }, { KE_KEY, 66, { KEY_TOUCHPAD_OFF } }, { KE_KEY, 67, { KEY_TOUCHPAD_ON } }, - { KE_KEY, 68, { KEY_TOUCHPAD_TOGGLE } }, { KE_KEY, 128, { KEY_ESC } }, /* @@ -1526,18 +1525,16 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ if (priv->features.ctrl_ps2_aux_port) i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); - if (send_events) { - /* - * On older models the EC controls the touchpad and toggles it - * on/off itself, in this case we report KEY_TOUCHPAD_ON/_OFF. - * If the EC did not toggle, report KEY_TOUCHPAD_TOGGLE. - */ - if (value != priv->r_touchpad_val) { - ideapad_input_report(priv, value ? 67 : 66); - sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); - } else { - ideapad_input_report(priv, 68); - } + /* + * On older models the EC controls the touchpad and toggles it on/off + * itself, in this case we report KEY_TOUCHPAD_ON/_OFF. Some models do + * an acpi-notify with VPC bit 5 set on resume, so this function get + * called with send_events=true on every resume. Therefor if the EC did + * not toggle, do nothing to avoid sending spurious KEY_TOUCHPAD_TOGGLE. + */ + if (send_events && value != priv->r_touchpad_val) { + ideapad_input_report(priv, value ? 67 : 66); + sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); } priv->r_touchpad_val = value; From 36d4d213c6d4fffae2645a601e8ae996de4c3645 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Fri, 31 Mar 2023 10:23:17 -0600 Subject: [PATCH 640/898] ALSA: hda/realtek: Add quirk for Clevo X370SNW Fixes speaker output and headset detection on Clevo X370SNW. Signed-off-by: Jeremy Soller Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20230331162317.14992-1-tcrawford@system76.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bd4e1a3a55b47..26187f5d56b59 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2624,6 +2624,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), + SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), From 804d8e0a6e54427268790472781e03bc243f4ee3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 31 Mar 2023 16:31:19 -0400 Subject: [PATCH 641/898] NFSD: Avoid calling OPDESC() with ops->opnum == OP_ILLEGAL OPDESC() simply indexes into nfsd4_ops[] by the op's operation number, without range checking that value. It assumes callers are careful to avoid calling it with an out-of-bounds opnum value. nfsd4_decode_compound() is not so careful, and can invoke OPDESC() with opnum set to OP_ILLEGAL, which is 10044 -- well beyond the end of nfsd4_ops[]. Reported-by: Jeff Layton Fixes: f4f9ef4a1b0a ("nfsd4: opdesc will be useful outside nfs4proc.c") Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 97edb32be77f1..67bbd2d6334c4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2476,10 +2476,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; + op->opdesc = NULL; if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) return false; if (nfsd4_opnum_in_range(argp, op)) { + op->opdesc = OPDESC(op); op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); if (op->status != nfs_ok) trace_nfsd_compound_decode_err(argp->rqstp, @@ -2490,7 +2492,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } - op->opdesc = OPDESC(op); + /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: From 15a8b55dbb1ba154d82627547c5761cac884d810 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 27 Mar 2023 06:21:37 -0400 Subject: [PATCH 642/898] nfsd: call op_release, even when op_func returns an error For ops with "trivial" replies, nfsd4_encode_operation will shortcut most of the encoding work and skip to just marshalling up the status. One of the things it skips is calling op_release. This could cause a memory leak in the layoutget codepath if there is an error at an inopportune time. Have the compound processing engine always call op_release, even when op_func sets an error in op->status. With this change, we also need nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL on error to avoid a double free. Reported-by: Zhi Li Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403 Fixes: 34b1744c91cc ("nfsd4: define ->op_release for compound ops") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/blocklayout.c | 1 + fs/nfsd/nfs4xdr.c | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 04697f8dc37d6..01d7fd108cf3d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, out_free_dev: kfree(dev); + gdp->gd_device = NULL; return ret; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 67bbd2d6334c4..7799835c2196e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5400,10 +5400,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) __be32 *p; p = xdr_reserve_space(xdr, 8); - if (!p) { - WARN_ON_ONCE(1); - return; - } + if (!p) + goto release; *p++ = cpu_to_be32(op->opnum); post_err_offset = xdr->buf->len; @@ -5418,8 +5416,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) op->status = encoder(resp, op->status, &op->u); if (op->status) trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status); - if (opdesc && opdesc->op_release) - opdesc->op_release(&op->u); xdr_commit_encode(xdr); /* nfsd4_check_resp_size guarantees enough room for error status */ @@ -5460,6 +5456,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) } status: *p = op->status; +release: + if (opdesc && opdesc->op_release) + opdesc->op_release(&op->u); } /* From 7b50567bdcad8925ca1e075feb7171c12015afd1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Feb 2023 17:13:12 +0100 Subject: [PATCH 643/898] media: i2c: imx290: fix conditional function defintions The runtime suspend/resume functions are only referenced from the dev_pm_ops, but they use the old SET_RUNTIME_PM_OPS() helper that requires a __maybe_unused annotation to avoid a warning: drivers/media/i2c/imx290.c:1082:12: error: unused function 'imx290_runtime_resume' [-Werror,-Wunused-function] static int imx290_runtime_resume(struct device *dev) ^ drivers/media/i2c/imx290.c:1090:12: error: unused function 'imx290_runtime_suspend' [-Werror,-Wunused-function] static int imx290_runtime_suspend(struct device *dev) ^ Convert this to the new RUNTIME_PM_OPS() helper that so this is not required. To improve this further, also use the pm_ptr() helper that lets the dev_pm_ops get dropped entirely when CONFIG_PM is disabled. A related mistake happened in the of_match_ptr() macro here, which like SET_RUNTIME_PM_OPS() requires the match table to be marked as __maybe_unused, though I could not reproduce building this without CONFIG_OF. Remove the of_match_ptr() here as there is no point in dropping the match table in configurations without CONFIG_OF. Fixes: 02852c01f654 ("media: i2c: imx290: Initialize runtime PM before subdev") Signed-off-by: Arnd Bergmann Reported-by: Guenter Roeck Reported-by: Sudip Mukherjee Reviewed-by: Manivannan Sadhasivam Reviewed-by: Laurent Pinchart Signed-off-by: Linus Torvalds --- drivers/media/i2c/imx290.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c index 49d6c8bdec412..48ae2e0adf9ea 100644 --- a/drivers/media/i2c/imx290.c +++ b/drivers/media/i2c/imx290.c @@ -1098,7 +1098,7 @@ static int imx290_runtime_suspend(struct device *dev) } static const struct dev_pm_ops imx290_pm_ops = { - SET_RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL) + RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL) }; /* ---------------------------------------------------------------------------- @@ -1362,8 +1362,8 @@ static struct i2c_driver imx290_i2c_driver = { .remove = imx290_remove, .driver = { .name = "imx290", - .pm = &imx290_pm_ops, - .of_match_table = of_match_ptr(imx290_of_match), + .pm = pm_ptr(&imx290_pm_ops), + .of_match_table = imx290_of_match, }, }; From 7f67aa097e875c87fba024e850cf405342300059 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 31 Mar 2023 00:39:38 +0200 Subject: [PATCH 644/898] drm/nouveau/disp: Support more modes by checking with lower bpc This allows us to advertise more modes especially on HDR displays. Fixes using 4K@60 modes on my TV and main display both using a HDMI to DP adapter. Also fixes similar issues for users running into this. Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230330223938.4025569-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 32 +++++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_dp.c | 8 ++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index ed9d374147b8d..5bb777ff13130 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -363,6 +363,35 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder, return 0; } +static void +nv50_outp_atomic_fix_depth(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state) +{ + struct nv50_head_atom *asyh = nv50_head_atom(crtc_state); + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct drm_display_mode *mode = &asyh->state.adjusted_mode; + unsigned int max_rate, mode_rate; + + switch (nv_encoder->dcb->type) { + case DCB_OUTPUT_DP: + max_rate = nv_encoder->dp.link_nr * nv_encoder->dp.link_bw; + + /* we don't support more than 10 anyway */ + asyh->or.bpc = min_t(u8, asyh->or.bpc, 10); + + /* reduce the bpc until it works out */ + while (asyh->or.bpc > 6) { + mode_rate = DIV_ROUND_UP(mode->clock * asyh->or.bpc * 3, 8); + if (mode_rate <= max_rate) + break; + + asyh->or.bpc -= 2; + } + break; + default: + break; + } +} + static int nv50_outp_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, @@ -381,6 +410,9 @@ nv50_outp_atomic_check(struct drm_encoder *encoder, if (crtc_state->mode_changed || crtc_state->connectors_changed) asyh->or.bpc = connector->display_info.bpc; + /* We might have to reduce the bpc */ + nv50_outp_atomic_fix_depth(encoder, crtc_state); + return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index e00876f92aeea..d49b4875fc3c9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -263,8 +263,6 @@ nouveau_dp_irq(struct work_struct *work) } /* TODO: - * - Use the minimum possible BPC here, once we add support for the max bpc - * property. * - Validate against the DP caps advertised by the GPU (we don't check these * yet) */ @@ -276,7 +274,11 @@ nv50_dp_mode_valid(struct drm_connector *connector, { const unsigned int min_clock = 25000; unsigned int max_rate, mode_rate, ds_max_dotclock, clock = mode->clock; - const u8 bpp = connector->display_info.bpc * 3; + /* Check with the minmum bpc always, so we can advertise better modes. + * In particlar not doing this causes modes to be dropped on HDR + * displays as we might check with a bpc of 16 even. + */ + const u8 bpp = 6 * 3; if (mode->flags & DRM_MODE_FLAG_INTERLACE && !outp->caps.dp_interlace) return MODE_NO_INTERLACE; From adef41b03b35839e5677aace628d02597f04a616 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 31 Mar 2023 21:16:22 -0700 Subject: [PATCH 645/898] Revert "net: netcp: MAX_SKB_FRAGS is now 'int'" This reverts commit c5b959eeb7f9e40673b97c08c71cbfff5f5923f2. Reverted change is required after commit 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") which does not exist in this tree, yet. It's only present in -next trees at the time of writing. Reported-by: Nathan Chancellor Link: https://lore.kernel.org/all/20230331214444.GA1426512@dev-arch.thelio-3990X/ Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/netcp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index dfdbcdeb991fd..1bb596a9d8a2b 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2081,7 +2081,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, netcp->tx_pool_region_id = temp[1]; if (netcp->tx_pool_size < MAX_SKB_FRAGS) { - dev_err(dev, "tx-pool size too small, must be at least %d\n", + dev_err(dev, "tx-pool size too small, must be at least %ld\n", MAX_SKB_FRAGS); ret = -ENODEV; goto quit; From 7d63b67125382ff0ffdfca434acbc94a38bd092b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Mar 2023 17:45:02 +0000 Subject: [PATCH 646/898] icmp: guard against too small mtu syzbot was able to trigger a panic [1] in icmp_glue_bits(), or more exactly in skb_copy_and_csum_bits() There is no repro yet, but I think the issue is that syzbot manages to lower device mtu to a small value, fooling __icmp_send() __icmp_send() must make sure there is enough room for the packet to include at least the headers. We might in the future refactor skb_copy_and_csum_bits() and its callers to no longer crash when something bad happens. [1] kernel BUG at net/core/skbuff.c:3343 ! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 15766 Comm: syz-executor.0 Not tainted 6.3.0-rc4-syzkaller-00039-gffe78bbd5121 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 RIP: 0010:skb_copy_and_csum_bits+0x798/0x860 net/core/skbuff.c:3343 Code: f0 c1 c8 08 41 89 c6 e9 73 ff ff ff e8 61 48 d4 f9 e9 41 fd ff ff 48 8b 7c 24 48 e8 52 48 d4 f9 e9 c3 fc ff ff e8 c8 27 84 f9 <0f> 0b 48 89 44 24 28 e8 3c 48 d4 f9 48 8b 44 24 28 e9 9d fb ff ff RSP: 0018:ffffc90000007620 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000000001e8 RCX: 0000000000000100 RDX: ffff8880276f6280 RSI: ffffffff87fdd138 RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000 R10: 00000000000001e8 R11: 0000000000000001 R12: 000000000000003c R13: 0000000000000000 R14: ffff888028244868 R15: 0000000000000b0e FS: 00007fbc81f1c700(0000) GS:ffff88802ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2df43000 CR3: 00000000744db000 CR4: 0000000000150ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: icmp_glue_bits+0x7b/0x210 net/ipv4/icmp.c:353 __ip_append_data+0x1d1b/0x39f0 net/ipv4/ip_output.c:1161 ip_append_data net/ipv4/ip_output.c:1343 [inline] ip_append_data+0x115/0x1a0 net/ipv4/ip_output.c:1322 icmp_push_reply+0xa8/0x440 net/ipv4/icmp.c:370 __icmp_send+0xb80/0x1430 net/ipv4/icmp.c:765 ipv4_send_dest_unreach net/ipv4/route.c:1239 [inline] ipv4_link_failure+0x5a9/0x9e0 net/ipv4/route.c:1246 dst_link_failure include/net/dst.h:423 [inline] arp_error_report+0xcb/0x1c0 net/ipv4/arp.c:296 neigh_invalidate+0x20d/0x560 net/core/neighbour.c:1079 neigh_timer_handler+0xc77/0xff0 net/core/neighbour.c:1166 call_timer_fn+0x1a0/0x580 kernel/time/timer.c:1700 expire_timers+0x29b/0x4b0 kernel/time/timer.c:1751 __run_timers kernel/time/timer.c:2022 [inline] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+d373d60fddbdc915e666@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230330174502.1915328-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8cebb476b3ab1..b8607763d113a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -749,6 +749,11 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); + /* Guard against tiny mtu. We need to include at least one + * IP network header for this message to make any sense. + */ + if (room <= (int)sizeof(struct iphdr)) + goto ende; icmp_param.data_len = skb_in->len - icmp_param.offset; if (icmp_param.data_len > room) From f785f5ee968f7045268b8be6b0abc850c4a4277c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 31 Mar 2023 16:22:17 +0200 Subject: [PATCH 647/898] ALSA: hda/hdmi: Preserve the previous PCM device upon re-enablement When a DRM driver turns on or off the screen with the audio capability, it notifies the ELD to HD-audio HDMI codec driver via component ops. HDMI codec driver, in turn, attaches or detaches the PCM stream for the given port on the fly. The problem is that, since the recent code change, the HDMI driver always treats the PCM stream assignment dynamically; this ended up the confusion of the PCM device appearance. e.g. when a screen goes once off and on again, it may appear on a different PCM device before the screen-off. Although the application should treat such a change, it doesn't seem working gracefully with the current pipewire (maybe PulseAudio, too). As a workaround, this patch changes the HDMI codec driver behavior slightly to be more consistent. Now it remembers the previous PCM slot for the given port and try to assign to it. That is, if a port is re-enabled, the driver tries to use the same PCM slot that was assigned to that port previously. If it conflicts, a new slot is searched and used like before, instead. Note that multiple monitor connections are the only typical case where the PCM slot preservation is effective. As long as only a single monitor is connected, the behavior isn't changed, and the first PCM slot is still assigned always. Fixes: ef6f5494faf6 ("ALSA: hda/hdmi: Use only dynamic PCM device allocation") Reviewed-by: Jaroslav Kysela Link: https://bugzilla.kernel.org/show_bug.cgi?id=217259 Link: https://lore.kernel.org/r/20230331142217.19791-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 9ea633fe93393..4ffa3a59f419f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -81,6 +81,7 @@ struct hdmi_spec_per_pin { struct delayed_work work; struct hdmi_pcm *pcm; /* pointer to spec->pcm_rec[n] dynamically*/ int pcm_idx; /* which pcm is attached. -1 means no pcm is attached */ + int prev_pcm_idx; /* previously assigned pcm index */ int repoll_count; bool setup; /* the stream has been set up by prepare callback */ bool silent_stream; @@ -1380,9 +1381,17 @@ static void hdmi_attach_hda_pcm(struct hdmi_spec *spec, /* pcm already be attached to the pin */ if (per_pin->pcm) return; + /* try the previously used slot at first */ + idx = per_pin->prev_pcm_idx; + if (idx >= 0) { + if (!test_bit(idx, &spec->pcm_bitmap)) + goto found; + per_pin->prev_pcm_idx = -1; /* no longer valid, clear it */ + } idx = hdmi_find_pcm_slot(spec, per_pin); if (idx == -EBUSY) return; + found: per_pin->pcm_idx = idx; per_pin->pcm = get_hdmi_pcm(spec, idx); set_bit(idx, &spec->pcm_bitmap); @@ -1398,6 +1407,7 @@ static void hdmi_detach_hda_pcm(struct hdmi_spec *spec, return; idx = per_pin->pcm_idx; per_pin->pcm_idx = -1; + per_pin->prev_pcm_idx = idx; /* remember the previous index */ per_pin->pcm = NULL; if (idx >= 0 && idx < spec->pcm_used) clear_bit(idx, &spec->pcm_bitmap); @@ -1924,6 +1934,7 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid) per_pin->pcm = NULL; per_pin->pcm_idx = -1; + per_pin->prev_pcm_idx = -1; per_pin->pin_nid = pin_nid; per_pin->pin_nid_idx = spec->num_nids; per_pin->dev_id = i; From c0921e51dab767ef5adf6175c4a0ba3c6e1074a3 Mon Sep 17 00:00:00 2001 From: Wojciech Lukowicz Date: Sat, 1 Apr 2023 20:50:38 +0100 Subject: [PATCH 648/898] io_uring: fix return value when removing provided buffers When a request to remove buffers is submitted, and the given number to be removed is larger than available in the specified buffer group, the resulting CQE result will be the number of removed buffers + 1, which is 1 more than it should be. Previously, the head was part of the list and it got removed after the loop, so the increment was needed. Now, the head is not an element of the list, so the increment shouldn't be there anymore. Fixes: dbc7d452e7cf ("io_uring: manage provided buffers strictly ordered") Signed-off-by: Wojciech Lukowicz Link: https://lore.kernel.org/r/20230401195039.404909-2-wlukowicz01@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 3002dc8271959..0fdcc0adbdbcc 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -228,7 +228,6 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return i; } - /* the head kbuf is the list itself */ while (!list_empty(&bl->buf_list)) { struct io_buffer *nxt; @@ -238,7 +237,6 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return i; cond_resched(); } - i++; return i; } From b4a72c0589fdea6259720375426179888969d6a2 Mon Sep 17 00:00:00 2001 From: Wojciech Lukowicz Date: Sat, 1 Apr 2023 20:50:39 +0100 Subject: [PATCH 649/898] io_uring: fix memory leak when removing provided buffers When removing provided buffers, io_buffer structs are not being disposed of, leading to a memory leak. They can't be freed individually, because they are allocated in page-sized groups. They need to be added to some free list instead, such as io_buffers_cache. All callers already hold the lock protecting it, apart from when destroying buffers, so had to extend the lock there. Fixes: cc3cec8367cb ("io_uring: speedup provided buffer handling") Signed-off-by: Wojciech Lukowicz Link: https://lore.kernel.org/r/20230401195039.404909-2-wlukowicz01@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 +- io_uring/kbuf.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 722624b6d0dca..2a8b8c304d2ab 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2789,8 +2789,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free); io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); - mutex_unlock(&ctx->uring_lock); io_destroy_buffers(ctx); + mutex_unlock(&ctx->uring_lock); if (ctx->sq_creds) put_cred(ctx->sq_creds); if (ctx->submitter_task) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 0fdcc0adbdbcc..a90c820ce99e1 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -228,11 +228,14 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return i; } + /* protects io_buffers_cache */ + lockdep_assert_held(&ctx->uring_lock); + while (!list_empty(&bl->buf_list)) { struct io_buffer *nxt; nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); - list_del(&nxt->list); + list_move(&nxt->list, &ctx->io_buffers_cache); if (++i == nbufs) return i; cond_resched(); From ea4f1009408efb4989a0f139b70fb338e7f687d0 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Mon, 13 Mar 2023 22:43:25 +0800 Subject: [PATCH 650/898] 9p/xen : Fix use after free bug in xen_9pfs_front_remove due to race condition In xen_9pfs_front_probe, it calls xen_9pfs_front_alloc_dataring to init priv->rings and bound &ring->work with p9_xen_response. When it calls xen_9pfs_front_event_handler to handle IRQ requests, it will finally call schedule_work to start the work. When we call xen_9pfs_front_remove to remove the driver, there may be a sequence as follows: Fix it by finishing the work before cleanup in xen_9pfs_front_free. Note that, this bug is found by static analysis, which might be false positive. CPU0 CPU1 |p9_xen_response xen_9pfs_front_remove| xen_9pfs_front_free| kfree(priv) | //free priv | |p9_tag_lookup |//use priv->client Fixes: 71ebd71921e4 ("xen/9pfs: connect to the backend") Signed-off-by: Zheng Wang Reviewed-by: Michal Swiatkowski Signed-off-by: Eric Van Hensbergen --- net/9p/trans_xen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index c64050e839ac6..1fffe2bed5b02 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -280,6 +280,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) write_unlock(&xen_9pfs_lock); for (i = 0; i < priv->num_rings; i++) { + struct xen_9pfs_dataring *ring = &priv->rings[i]; + + cancel_work_sync(&ring->work); + if (!priv->rings[i].intf) break; if (priv->rings[i].irq > 0) From 275b471e3d2daf1472ae8fa70dc1b50c9e0b9e75 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 Mar 2023 19:21:44 -0700 Subject: [PATCH 651/898] net: don't let netpoll invoke NAPI if in xmit context Commit 0db3dc73f7a3 ("[NETPOLL]: tx lock deadlock fix") narrowed down the region under netif_tx_trylock() inside netpoll_send_skb(). (At that point in time netif_tx_trylock() would lock all queues of the device.) Taking the tx lock was problematic because driver's cleanup method may take the same lock. So the change made us hold the xmit lock only around xmit, and expected the driver to take care of locking within ->ndo_poll_controller(). Unfortunately this only works if netpoll isn't itself called with the xmit lock already held. Netpoll code is careful and uses trylock(). The drivers, however, may be using plain lock(). Printing while holding the xmit lock is going to result in rare deadlocks. Luckily we record the xmit lock owners, so we can scan all the queues, the same way we scan NAPI owners. If any of the xmit locks is held by the local CPU we better not attempt any polling. It would be nice if we could narrow down the check to only the NAPIs and the queue we're trying to use. I don't see a way to do that now. Reported-by: Roman Gushchin Fixes: 0db3dc73f7a3 ("[NETPOLL]: tx lock deadlock fix") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/netpoll.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a089b704b986d..e6a739b1afa9e 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -137,6 +137,20 @@ static void queue_process(struct work_struct *work) } } +static int netif_local_xmit_active(struct net_device *dev) +{ + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) + return 1; + } + + return 0; +} + static void poll_one_napi(struct napi_struct *napi) { int work; @@ -183,7 +197,10 @@ void netpoll_poll_dev(struct net_device *dev) if (!ni || down_trylock(&ni->dev_lock)) return; - if (!netif_running(dev)) { + /* Some drivers will take the same locks in poll and xmit, + * we can't poll if local CPU is already in xmit. + */ + if (!netif_running(dev) || netif_local_xmit_active(dev)) { up(&ni->dev_lock); return; } From 089b91a0155c4de1209a07ff2a7dd299ff3ece47 Mon Sep 17 00:00:00 2001 From: Gustav Ekelund Date: Fri, 31 Mar 2023 10:40:13 +0200 Subject: [PATCH 652/898] net: dsa: mv88e6xxx: Reset mv88e6393x force WD event bit The force watchdog event bit is not cleared during SW reset in the mv88e6393x switch. This is a different behavior compared to mv886390 which clears the force WD event bit as advertised. This causes a force WD event to be handled over and over again as the SW reset following the event never clears the force WD event bit. Explicitly clear the watchdog event register to 0 in irq_action when handling an event to prevent the switch from sending continuous interrupts. Marvell aren't aware of any other stuck bits apart from the force WD bit. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family" Signed-off-by: Gustav Ekelund Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- drivers/net/dsa/mv88e6xxx/global2.c | 20 ++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/global2.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0de7b3611202c..7108f745fbf01 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5601,7 +5601,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { * .port_set_upstream_port method. */ .set_egress_port = mv88e6393x_set_egress_port, - .watchdog_ops = &mv88e6390_watchdog_ops, + .watchdog_ops = &mv88e6393x_watchdog_ops, .mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index ed3b2f88e7835..a7af3cebae97c 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -943,6 +943,26 @@ const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = { .irq_free = mv88e6390_watchdog_free, }; +static int mv88e6393x_watchdog_action(struct mv88e6xxx_chip *chip, int irq) +{ + mv88e6390_watchdog_action(chip, irq); + + /* Fix for clearing the force WD event bit. + * Unreleased erratum on mv88e6393x. + */ + mv88e6xxx_g2_write(chip, MV88E6390_G2_WDOG_CTL, + MV88E6390_G2_WDOG_CTL_UPDATE | + MV88E6390_G2_WDOG_CTL_PTR_EVENT); + + return IRQ_HANDLED; +} + +const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops = { + .irq_action = mv88e6393x_watchdog_action, + .irq_setup = mv88e6390_watchdog_setup, + .irq_free = mv88e6390_watchdog_free, +}; + static irqreturn_t mv88e6xxx_g2_watchdog_thread_fn(int irq, void *dev_id) { struct mv88e6xxx_chip *chip = dev_id; diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index e973114d68906..7e091965582b7 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -369,6 +369,7 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; +extern const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops; extern const struct mv88e6xxx_avb_ops mv88e6165_avb_ops; extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops; From e669ce46740a9815953bb4452a6bc5a7fdc21a50 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 31 Mar 2023 14:49:59 +0200 Subject: [PATCH 653/898] net: ethernet: mtk_eth_soc: fix remaining throughput regression Based on further tests, it seems that the QDMA shaper is not able to perform shaping close to the MAC link rate without throughput loss. This cannot be compensated by increasing the shaping rate, so it seems to be an internal limit. Fix the remaining throughput regression by detecting that condition and limiting shaping to ports with lower link speed. This patch intentionally ignores link speed gain from TRGMII, because even on such links, shaping to 1000 Mbit/s incurs some throughput degradation. Fixes: f63959c7eec3 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Tested-By: Frank Wunderlich Reported-by: Frank Wunderlich Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 282f9435d5ff1..e14050e178624 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -753,6 +753,7 @@ static void mtk_mac_link_up(struct phylink_config *config, MAC_MCR_FORCE_RX_FC); /* Configure speed */ + mac->speed = speed; switch (speed) { case SPEED_2500: case SPEED_1000: @@ -3235,6 +3236,9 @@ static int mtk_device_event(struct notifier_block *n, unsigned long event, void if (dp->index >= MTK_QDMA_NUM_QUEUES) return NOTIFY_DONE; + if (mac->speed > 0 && mac->speed <= s.base.speed) + s.base.speed = 0; + mtk_set_queue_speed(eth, dp->index + 3, s.base.speed); return NOTIFY_DONE; From 2584024b23552c00d95b50255e47bd18d306d31a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 1 Apr 2023 19:09:57 -0400 Subject: [PATCH 654/898] sctp: check send stream number after wait_for_sndbuf This patch fixes a corner case where the asoc out stream count may change after wait_for_sndbuf. When the main thread in the client starts a connection, if its out stream count is set to N while the in stream count in the server is set to N - 2, another thread in the client keeps sending the msgs with stream number N - 1, and waits for sndbuf before processing INIT_ACK. However, after processing INIT_ACK, the out stream count in the client is shrunk to N - 2, the same to the in stream count in the server. The crash occurs when the thread waiting for sndbuf is awake and sends the msg in a non-existing stream(N - 1), the call trace is as below: KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f] Call Trace: sctp_cmd_send_msg net/sctp/sm_sideeffect.c:1114 [inline] sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1777 [inline] sctp_side_effects net/sctp/sm_sideeffect.c:1199 [inline] sctp_do_sm+0x197d/0x5310 net/sctp/sm_sideeffect.c:1170 sctp_primitive_SEND+0x9f/0xc0 net/sctp/primitive.c:163 sctp_sendmsg_to_asoc+0x10eb/0x1a30 net/sctp/socket.c:1868 sctp_sendmsg+0x8d4/0x1d90 net/sctp/socket.c:2026 inet_sendmsg+0x9d/0xe0 net/ipv4/af_inet.c:825 sock_sendmsg_nosec net/socket.c:722 [inline] sock_sendmsg+0xde/0x190 net/socket.c:745 The fix is to add an unlikely check for the send stream number after the thread wakes up from the wait_for_sndbuf. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: syzbot+47c24ca20a2fa01f082e@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b91616f819def..218e0982c3707 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1830,6 +1830,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) goto err; + if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) { + err = -EINVAL; + goto err; + } } if (sctp_state(asoc, CLOSED)) { From f95b8ea79c47c0ad3d18f45ad538f9970e414d1f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Feb 2023 11:22:54 +0100 Subject: [PATCH 655/898] Revert "venus: firmware: Correct non-pix start and end addresses" This reverts commit a837e5161cff, which broke probing of the venus driver, at least on the SC7180 SoC HP X2 Chromebook: qcom-venus aa00000.video-codec: Adding to iommu group 11 qcom-venus aa00000.video-codec: non legacy binding qcom-venus aa00000.video-codec: failed to reset venus core qcom-venus: probe of aa00000.video-codec failed with error -110 Matthias Kaehlcke also reported that the same change caused a regression in SC7180 and sc7280, that prevents AOSS from entering sleep mode during system suspend. So let's revert this commit for now to fix both issues. Fixes: a837e5161cff ("venus: firmware: Correct non-pix start and end addresses") Reported-by: Matthias Kaehlcke Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Torvalds --- drivers/media/platform/qcom/venus/firmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c index 61ff20a7e935e..cfb11c551167c 100644 --- a/drivers/media/platform/qcom/venus/firmware.c +++ b/drivers/media/platform/qcom/venus/firmware.c @@ -38,8 +38,8 @@ static void venus_reset_cpu(struct venus_core *core) writel(fw_size, wrapper_base + WRAPPER_FW_END_ADDR); writel(0, wrapper_base + WRAPPER_CPA_START_ADDR); writel(fw_size, wrapper_base + WRAPPER_CPA_END_ADDR); - writel(0, wrapper_base + WRAPPER_NONPIX_START_ADDR); - writel(0, wrapper_base + WRAPPER_NONPIX_END_ADDR); + writel(fw_size, wrapper_base + WRAPPER_NONPIX_START_ADDR); + writel(fw_size, wrapper_base + WRAPPER_NONPIX_END_ADDR); if (IS_V6(core)) { /* Bring XTSS out of reset */ From 7e364e56293bb98cae1b55fd835f5991c4e96e7d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Apr 2023 14:29:29 -0700 Subject: [PATCH 656/898] Linux 6.3-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index da2586d4c7281..ef4e96b9cd5b0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 347dca97f75d461ac2ac256b68e495dfc6f14d61 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Wed, 29 Mar 2023 00:07:30 +0000 Subject: [PATCH 657/898] Update email address and mailing list for v9fs We've recently moved the mailing list to lists.linux.dev to move away from the sourceforge infrastructure. This also updates the website from the (no longer v9fs relevant?) swik.net address to the github group which contains pointers to test cases, the protocol, servers, etc. This also changes my email from my gmail to my kernel.org address. Signed-off-by: Eric Van Hensbergen Acked-by: Dominique Martinet Acked-by: Christian Schoenebeck --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f3053..8799a222048be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -224,13 +224,13 @@ S: Orphan / Obsolete F: drivers/net/ethernet/8390/ 9P FILE SYSTEM -M: Eric Van Hensbergen +M: Eric Van Hensbergen M: Latchesar Ionkov M: Dominique Martinet R: Christian Schoenebeck -L: v9fs-developer@lists.sourceforge.net +L: v9fs@lists.linux.dev S: Maintained -W: http://swik.net/v9fs +W: http://github.com/v9fs Q: http://patchwork.kernel.org/project/v9fs-devel/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git T: git git://github.com/martinetd/linux.git From 86eb94bf8006a85738f0ccf49e3ce894e03922a6 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Wed, 29 Mar 2023 16:54:25 -0400 Subject: [PATCH 658/898] scsi: Revert "scsi: ufs: core: Initialize devfreq synchronously" This reverts commit 7dafc3e007918384c8693ff8d70381b5c1e9c247. This patch introduced a regression [1] where hba->pwr_info is used before being initialized, which could create issues in ufshcd_scale_gear(). Revert it until a better solution is found. [1] https://lore.kernel.org/all/CAGaU9a_PMZhqv+YJ0r3w-hJMsR922oxW6Kg59vw+oen-NZ6Otw@mail.gmail.com Signed-off-by: Adrien Thierry Link: https://lore.kernel.org/r/20230329205426.46393-1-athierry@redhat.com Reviewed-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 47 +++++++++++++-------------------------- include/ufs/ufshcd.h | 1 - 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 37e178a9ac475..70b112038792a 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1409,13 +1409,6 @@ static int ufshcd_devfreq_target(struct device *dev, struct ufs_clk_info *clki; unsigned long irq_flags; - /* - * Skip devfreq if UFS initialization is not finished. - * Otherwise ufs could be in a inconsistent state. - */ - if (!smp_load_acquire(&hba->logical_unit_scan_finished)) - return 0; - if (!ufshcd_is_clkscaling_supported(hba)) return -EINVAL; @@ -8399,6 +8392,22 @@ static int ufshcd_add_lus(struct ufs_hba *hba) if (ret) goto out; + /* Initialize devfreq after UFS device is detected */ + if (ufshcd_is_clkscaling_supported(hba)) { + memcpy(&hba->clk_scaling.saved_pwr_info.info, + &hba->pwr_info, + sizeof(struct ufs_pa_layer_attr)); + hba->clk_scaling.saved_pwr_info.is_valid = true; + hba->clk_scaling.is_allowed = true; + + ret = ufshcd_devfreq_init(hba); + if (ret) + goto out; + + hba->clk_scaling.is_enabled = true; + ufshcd_init_clk_scaling_sysfs(hba); + } + ufs_bsg_probe(hba); ufshpb_init(hba); scsi_scan_host(hba->host); @@ -8670,12 +8679,6 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) if (ret) { pm_runtime_put_sync(hba->dev); ufshcd_hba_exit(hba); - } else { - /* - * Make sure that when reader code sees UFS initialization has finished, - * all initialization steps have really been executed. - */ - smp_store_release(&hba->logical_unit_scan_finished, true); } } @@ -10316,30 +10319,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) */ ufshcd_set_ufs_dev_active(hba); - /* Initialize devfreq */ - if (ufshcd_is_clkscaling_supported(hba)) { - memcpy(&hba->clk_scaling.saved_pwr_info.info, - &hba->pwr_info, - sizeof(struct ufs_pa_layer_attr)); - hba->clk_scaling.saved_pwr_info.is_valid = true; - hba->clk_scaling.is_allowed = true; - - err = ufshcd_devfreq_init(hba); - if (err) - goto rpm_put_sync; - - hba->clk_scaling.is_enabled = true; - ufshcd_init_clk_scaling_sysfs(hba); - } - async_schedule(ufshcd_async_scan, hba); ufs_sysfs_add_nodes(hba->dev); device_enable_async_suspend(dev); return 0; -rpm_put_sync: - pm_runtime_put_sync(dev); free_tmf_queue: blk_mq_destroy_queue(hba->tmf_queue); blk_put_queue(hba->tmf_queue); diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 25aab8ec4f86b..431c3afb2ce0f 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -979,7 +979,6 @@ struct ufs_hba { struct completion *uic_async_done; enum ufshcd_state ufshcd_state; - bool logical_unit_scan_finished; u32 eh_flags; u32 intr_mask; u16 ee_ctrl_mask; From a3d27dfdcfc27ac3f46de5391bb6d24f04af7941 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Fri, 31 Mar 2023 17:53:17 +0530 Subject: [PATCH 659/898] scsi: mpi3mr: Handle soft reset in progress fault code (0xF002) The driver is exiting from the fault watchdog thread if it sees the 0xF002 (Soft reset in progress) fault code. If the driver initiates the soft reset, then the driver restarts the watchdog at the end of the soft reset completion. However, if the soft reset is initiated by the firmware asynchronously, then the driver will never restart the watchdog and never re-initialize the controller after the asynchronous soft reset completion. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20230331122317.11391-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index a565817aa56d4..d109a4ceb72b1 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -2526,7 +2526,7 @@ static void mpi3mr_watchdog_work(struct work_struct *work) mrioc->unrecoverable = 1; goto schedule_work; case MPI3_SYSIF_FAULT_CODE_SOFT_RESET_IN_PROGRESS: - return; + goto schedule_work; case MPI3_SYSIF_FAULT_CODE_CI_ACTIVATION_RESET: reset_reason = MPI3MR_RESET_FROM_CIACTIV_FAULT; break; From 85ade4010e13ef152ea925c74d94253db92e5428 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Sat, 25 Mar 2023 11:00:04 +0000 Subject: [PATCH 660/898] scsi: qla2xxx: Fix memory leak in qla2x00_probe_one() There is a memory leak reported by kmemleak: unreferenced object 0xffffc900003f0000 (size 12288): comm "modprobe", pid 19117, jiffies 4299751452 (age 42490.264s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000629261a8>] __vmalloc_node_range+0xe56/0x1110 [<0000000001906886>] __vmalloc_node+0xbd/0x150 [<000000005bb4dc34>] vmalloc+0x25/0x30 [<00000000a2dc1194>] qla2x00_create_host+0x7a0/0xe30 [qla2xxx] [<0000000062b14b47>] qla2x00_probe_one+0x2eb8/0xd160 [qla2xxx] [<00000000641ccc04>] local_pci_probe+0xeb/0x1a0 The root cause is traced to an error-handling path in qla2x00_probe_one() when the adapter "base_vha" initialize failed. The fab_scan_rp "scan.l" is used to record the port information and it is allocated in qla2x00_create_host(). However, it is not released in the error handling path "probe_failed". Fix this by freeing the memory of "scan.l" when an error occurs in the adapter initialization process. Fixes: a4239945b8ad ("scsi: qla2xxx: Add switch command to simplify fabric discovery") Signed-off-by: Li Zetao Link: https://lore.kernel.org/r/20230325110004.363898-1-lizetao1@huawei.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index bee1b8a820207..d0cdbfe771a9c 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3617,6 +3617,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) probe_failed: qla_enode_stop(base_vha); qla_edb_stop(base_vha); + vfree(base_vha->scan.l); if (base_vha->gnl.l) { dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma); From 48b19b79cfa37b1e50da3b5a8af529f994c08901 Mon Sep 17 00:00:00 2001 From: Zhong Jinghua Date: Wed, 29 Mar 2023 15:17:39 +0800 Subject: [PATCH 661/898] scsi: iscsi_tcp: Check that sock is valid before iscsi_set_param() The validity of sock should be checked before assignment to avoid incorrect values. Commit 57569c37f0ad ("scsi: iscsi: iscsi_tcp: Fix null-ptr-deref while calling getpeername()") introduced this change which may lead to inconsistent values of tcp_sw_conn->sendpage and conn->datadgst_en. Fix the issue by moving the position of the assignment. Fixes: 57569c37f0ad ("scsi: iscsi: iscsi_tcp: Fix null-ptr-deref while calling getpeername()") Signed-off-by: Zhong Jinghua Link: https://lore.kernel.org/r/20230329071739.2175268-1-zhongjinghua@huaweicloud.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/iscsi_tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index c76f82fb8b636..15f4529089262 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -771,13 +771,12 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn, iscsi_set_param(cls_conn, param, buf, buflen); break; case ISCSI_PARAM_DATADGST_EN: - iscsi_set_param(cls_conn, param, buf, buflen); - mutex_lock(&tcp_sw_conn->sock_lock); if (!tcp_sw_conn->sock) { mutex_unlock(&tcp_sw_conn->sock_lock); return -ENOTCONN; } + iscsi_set_param(cls_conn, param, buf, buflen); tcp_sw_conn->sendpage = conn->datadgst_en ? sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage; mutex_unlock(&tcp_sw_conn->sock_lock); From 3a9b557f44ea8f216aab515a7db20e23f0eb51b9 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 31 Mar 2023 08:42:12 +0900 Subject: [PATCH 662/898] ksmbd: delete asynchronous work from list When smb2_lock request is canceled by smb2_cancel or smb2_close(), ksmbd is missing deleting async_request_entry async_requests list. Because calling init_smb2_rsp_hdr() in smb2_lock() mark ->synchronous as true and then it will not be deleted in ksmbd_conn_try_dequeue_request(). This patch add release_async_work() to release the ones allocated for async work. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 12 +++++------- fs/ksmbd/ksmbd_work.h | 2 +- fs/ksmbd/smb2pdu.c | 33 +++++++++++++++++++++------------ fs/ksmbd/smb2pdu.h | 1 + 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 3f5dfebaa0418..365ac32af5058 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -112,10 +112,8 @@ void ksmbd_conn_enqueue_request(struct ksmbd_work *work) struct ksmbd_conn *conn = work->conn; struct list_head *requests_queue = NULL; - if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE) { + if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE) requests_queue = &conn->requests; - work->synchronous = true; - } if (requests_queue) { atomic_inc(&conn->req_running); @@ -136,14 +134,14 @@ int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work) if (!work->multiRsp) atomic_dec(&conn->req_running); - spin_lock(&conn->request_lock); if (!work->multiRsp) { + spin_lock(&conn->request_lock); list_del_init(&work->request_entry); - if (!work->synchronous) - list_del_init(&work->async_request_entry); + spin_unlock(&conn->request_lock); + if (work->asynchronous) + release_async_work(work); ret = 0; } - spin_unlock(&conn->request_lock); wake_up_all(&conn->req_running_q); return ret; diff --git a/fs/ksmbd/ksmbd_work.h b/fs/ksmbd/ksmbd_work.h index 3234f2cf6327c..f8ae6144c0aea 100644 --- a/fs/ksmbd/ksmbd_work.h +++ b/fs/ksmbd/ksmbd_work.h @@ -68,7 +68,7 @@ struct ksmbd_work { /* Request is encrypted */ bool encrypted:1; /* Is this SYNC or ASYNC ksmbd_work */ - bool synchronous:1; + bool asynchronous:1; bool need_invalidate_rkey:1; unsigned int remote_key; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 97c9d1b5bcc0b..3656ccac06e33 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -498,12 +498,6 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work) rsp_hdr->SessionId = rcv_hdr->SessionId; memcpy(rsp_hdr->Signature, rcv_hdr->Signature, 16); - work->synchronous = true; - if (work->async_id) { - ksmbd_release_id(&conn->async_ida, work->async_id); - work->async_id = 0; - } - return 0; } @@ -644,7 +638,7 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg) pr_err("Failed to alloc async message id\n"); return id; } - work->synchronous = false; + work->asynchronous = true; work->async_id = id; rsp_hdr->Id.AsyncId = cpu_to_le64(id); @@ -664,6 +658,24 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg) return 0; } +void release_async_work(struct ksmbd_work *work) +{ + struct ksmbd_conn *conn = work->conn; + + spin_lock(&conn->request_lock); + list_del_init(&work->async_request_entry); + spin_unlock(&conn->request_lock); + + work->asynchronous = 0; + work->cancel_fn = NULL; + kfree(work->cancel_argv); + work->cancel_argv = NULL; + if (work->async_id) { + ksmbd_release_id(&conn->async_ida, work->async_id); + work->async_id = 0; + } +} + void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status) { struct smb2_hdr *rsp_hdr; @@ -7045,13 +7057,9 @@ int smb2_lock(struct ksmbd_work *work) ksmbd_vfs_posix_lock_wait(flock); - spin_lock(&work->conn->request_lock); spin_lock(&fp->f_lock); list_del(&work->fp_entry); - work->cancel_fn = NULL; - kfree(argv); spin_unlock(&fp->f_lock); - spin_unlock(&work->conn->request_lock); if (work->state != KSMBD_WORK_ACTIVE) { list_del(&smb_lock->llist); @@ -7069,6 +7077,7 @@ int smb2_lock(struct ksmbd_work *work) work->send_no_response = 1; goto out; } + init_smb2_rsp_hdr(work); smb2_set_err_rsp(work); rsp->hdr.Status = @@ -7081,7 +7090,7 @@ int smb2_lock(struct ksmbd_work *work) spin_lock(&work->conn->llist_lock); list_del(&smb_lock->clist); spin_unlock(&work->conn->llist_lock); - + release_async_work(work); goto retry; } else if (!rc) { spin_lock(&work->conn->llist_lock); diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index 0c8a770fe3189..9420dd2813fb7 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -486,6 +486,7 @@ int find_matching_smb2_dialect(int start_index, __le16 *cli_dialects, struct file_lock *smb_flock_init(struct file *f); int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg); +void release_async_work(struct ksmbd_work *work); void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status); struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn *conn); From dc8289f912387c3bcfbc5d2db29c8947fa207c11 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 1 Apr 2023 13:21:04 +0900 Subject: [PATCH 663/898] ksmbd: fix slab-out-of-bounds in init_smb2_rsp_hdr When smb1 mount fails, KASAN detect slab-out-of-bounds in init_smb2_rsp_hdr like the following one. For smb1 negotiate(56bytes) , init_smb2_rsp_hdr() for smb2 is called. The issue occurs while handling smb1 negotiate as smb2 server operations. Add smb server operations for smb1 (get_cmd_val, init_rsp_hdr, allocate_rsp_buf, check_user_session) to handle smb1 negotiate so that smb2 server operation does not handle it. [ 411.400423] CIFS: VFS: Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers [ 411.400452] CIFS: Attempting to mount \\192.168.45.139\homes [ 411.479312] ksmbd: init_smb2_rsp_hdr : 492 [ 411.479323] ================================================================== [ 411.479327] BUG: KASAN: slab-out-of-bounds in init_smb2_rsp_hdr+0x1e2/0x1f4 [ksmbd] [ 411.479369] Read of size 16 at addr ffff888488ed0734 by task kworker/14:1/199 [ 411.479379] CPU: 14 PID: 199 Comm: kworker/14:1 Tainted: G OE 6.1.21 #3 [ 411.479386] Hardware name: ASUSTeK COMPUTER INC. Z10PA-D8 Series/Z10PA-D8 Series, BIOS 3801 08/23/2019 [ 411.479390] Workqueue: ksmbd-io handle_ksmbd_work [ksmbd] [ 411.479425] Call Trace: [ 411.479428] [ 411.479432] dump_stack_lvl+0x49/0x63 [ 411.479444] print_report+0x171/0x4a8 [ 411.479452] ? kasan_complete_mode_report_info+0x3c/0x200 [ 411.479463] ? init_smb2_rsp_hdr+0x1e2/0x1f4 [ksmbd] [ 411.479497] kasan_report+0xb4/0x130 [ 411.479503] ? init_smb2_rsp_hdr+0x1e2/0x1f4 [ksmbd] [ 411.479537] kasan_check_range+0x149/0x1e0 [ 411.479543] memcpy+0x24/0x70 [ 411.479550] init_smb2_rsp_hdr+0x1e2/0x1f4 [ksmbd] [ 411.479585] handle_ksmbd_work+0x109/0x760 [ksmbd] [ 411.479616] ? _raw_spin_unlock_irqrestore+0x50/0x50 [ 411.479624] ? smb3_encrypt_resp+0x340/0x340 [ksmbd] [ 411.479656] process_one_work+0x49c/0x790 [ 411.479667] worker_thread+0x2b1/0x6e0 [ 411.479674] ? process_one_work+0x790/0x790 [ 411.479680] kthread+0x177/0x1b0 [ 411.479686] ? kthread_complete_and_exit+0x30/0x30 [ 411.479692] ret_from_fork+0x22/0x30 [ 411.479702] Fixes: 39b291b86b59 ("ksmbd: return unsupported error on smb1 mount") Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/server.c | 5 +- fs/ksmbd/smb2pdu.c | 3 - fs/ksmbd/smb_common.c | 138 +++++++++++++++++++++++++++++++++--------- fs/ksmbd/smb_common.h | 2 +- 4 files changed, 111 insertions(+), 37 deletions(-) diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c index 394b6ceac4312..0d8242789dc8f 100644 --- a/fs/ksmbd/server.c +++ b/fs/ksmbd/server.c @@ -289,10 +289,7 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn) work->request_buf = conn->request_buf; conn->request_buf = NULL; - if (ksmbd_init_smb_server(work)) { - ksmbd_free_work_struct(work); - return -EINVAL; - } + ksmbd_init_smb_server(work); ksmbd_conn_enqueue_request(work); atomic_inc(&conn->r_count); diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 3656ccac06e33..8af939a181be5 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -229,9 +229,6 @@ int init_smb2_neg_rsp(struct ksmbd_work *work) struct smb2_negotiate_rsp *rsp; struct ksmbd_conn *conn = work->conn; - if (conn->need_neg == false) - return -EINVAL; - *(__be32 *)work->response_buf = cpu_to_be32(conn->vals->header_size); diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index 9c1ce6d199ce4..af0c2a9b85290 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -283,20 +283,121 @@ static int ksmbd_negotiate_smb_dialect(void *buf) return BAD_PROT_ID; } -int ksmbd_init_smb_server(struct ksmbd_work *work) +#define SMB_COM_NEGOTIATE_EX 0x0 + +/** + * get_smb1_cmd_val() - get smb command value from smb header + * @work: smb work containing smb header + * + * Return: smb command value + */ +static u16 get_smb1_cmd_val(struct ksmbd_work *work) { - struct ksmbd_conn *conn = work->conn; + return SMB_COM_NEGOTIATE_EX; +} - if (conn->need_neg == false) +/** + * init_smb1_rsp_hdr() - initialize smb negotiate response header + * @work: smb work containing smb request + * + * Return: 0 on success, otherwise -EINVAL + */ +static int init_smb1_rsp_hdr(struct ksmbd_work *work) +{ + struct smb_hdr *rsp_hdr = (struct smb_hdr *)work->response_buf; + struct smb_hdr *rcv_hdr = (struct smb_hdr *)work->request_buf; + + /* + * Remove 4 byte direct TCP header. + */ + *(__be32 *)work->response_buf = + cpu_to_be32(sizeof(struct smb_hdr) - 4); + + rsp_hdr->Command = SMB_COM_NEGOTIATE; + *(__le32 *)rsp_hdr->Protocol = SMB1_PROTO_NUMBER; + rsp_hdr->Flags = SMBFLG_RESPONSE; + rsp_hdr->Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS | + SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME; + rsp_hdr->Pid = rcv_hdr->Pid; + rsp_hdr->Mid = rcv_hdr->Mid; + return 0; +} + +/** + * smb1_check_user_session() - check for valid session for a user + * @work: smb work containing smb request buffer + * + * Return: 0 on success, otherwise error + */ +static int smb1_check_user_session(struct ksmbd_work *work) +{ + unsigned int cmd = work->conn->ops->get_cmd_val(work); + + if (cmd == SMB_COM_NEGOTIATE_EX) return 0; - init_smb3_11_server(conn); + return -EINVAL; +} + +/** + * smb1_allocate_rsp_buf() - allocate response buffer for a command + * @work: smb work containing smb request + * + * Return: 0 on success, otherwise -ENOMEM + */ +static int smb1_allocate_rsp_buf(struct ksmbd_work *work) +{ + work->response_buf = kmalloc(MAX_CIFS_SMALL_BUFFER_SIZE, + GFP_KERNEL | __GFP_ZERO); + work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE; + + if (!work->response_buf) { + pr_err("Failed to allocate %u bytes buffer\n", + MAX_CIFS_SMALL_BUFFER_SIZE); + return -ENOMEM; + } - if (conn->ops->get_cmd_val(work) != SMB_COM_NEGOTIATE) - conn->need_neg = false; return 0; } +static struct smb_version_ops smb1_server_ops = { + .get_cmd_val = get_smb1_cmd_val, + .init_rsp_hdr = init_smb1_rsp_hdr, + .allocate_rsp_buf = smb1_allocate_rsp_buf, + .check_user_session = smb1_check_user_session, +}; + +static int smb1_negotiate(struct ksmbd_work *work) +{ + return ksmbd_smb_negotiate_common(work, SMB_COM_NEGOTIATE); +} + +static struct smb_version_cmds smb1_server_cmds[1] = { + [SMB_COM_NEGOTIATE_EX] = { .proc = smb1_negotiate, }, +}; + +static void init_smb1_server(struct ksmbd_conn *conn) +{ + conn->ops = &smb1_server_ops; + conn->cmds = smb1_server_cmds; + conn->max_cmds = ARRAY_SIZE(smb1_server_cmds); +} + +void ksmbd_init_smb_server(struct ksmbd_work *work) +{ + struct ksmbd_conn *conn = work->conn; + __le32 proto; + + if (conn->need_neg == false) + return; + + proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol; + if (proto == SMB1_PROTO_NUMBER) + init_smb1_server(conn); + else + init_smb3_11_server(conn); +} + int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, struct ksmbd_file *dir, struct ksmbd_dir_info *d_info, @@ -444,20 +545,10 @@ static int smb_handle_negotiate(struct ksmbd_work *work) ksmbd_debug(SMB, "Unsupported SMB1 protocol\n"); - /* - * Remove 4 byte direct TCP header, add 2 byte bcc and - * 2 byte DialectIndex. - */ - *(__be32 *)work->response_buf = - cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2 + 2); + /* Add 2 byte bcc and 2 byte DialectIndex. */ + inc_rfc1001_len(work->response_buf, 4); neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS; - neg_rsp->hdr.Command = SMB_COM_NEGOTIATE; - *(__le32 *)neg_rsp->hdr.Protocol = SMB1_PROTO_NUMBER; - neg_rsp->hdr.Flags = SMBFLG_RESPONSE; - neg_rsp->hdr.Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS | - SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME; - neg_rsp->hdr.WordCount = 1; neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect); neg_rsp->ByteCount = 0; @@ -473,24 +564,13 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) ksmbd_negotiate_smb_dialect(work->request_buf); ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect); - if (command == SMB2_NEGOTIATE_HE) { - struct smb2_hdr *smb2_hdr = smb2_get_msg(work->request_buf); - - if (smb2_hdr->ProtocolId != SMB2_PROTO_NUMBER) { - ksmbd_debug(SMB, "Downgrade to SMB1 negotiation\n"); - command = SMB_COM_NEGOTIATE; - } - } - if (command == SMB2_NEGOTIATE_HE) { ret = smb2_handle_negotiate(work); - init_smb2_neg_rsp(work); return ret; } if (command == SMB_COM_NEGOTIATE) { if (__smb2_negotiate(conn)) { - conn->need_neg = true; init_smb3_11_server(conn); init_smb2_neg_rsp(work); ksmbd_debug(SMB, "Upgrade to SMB2 negotiation\n"); diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index d30ce4c1a1517..9130d2e3cd78c 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn); int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count); -int ksmbd_init_smb_server(struct ksmbd_work *work); +void ksmbd_init_smb_server(struct ksmbd_work *work); struct ksmbd_kstat; int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, From 764a2ab9eb56e1200083e771aab16186836edf1d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 21 May 2021 11:38:11 +0200 Subject: [PATCH 664/898] drm/panfrost: Fix the panfrost_mmu_map_fault_addr() error path Make sure all bo->base.pages entries are either NULL or pointing to a valid page before calling drm_gem_shmem_put_pages(). Reported-by: Tomeu Vizoso Cc: Fixes: 187d2929206e ("drm/panfrost: Add support for GPU heap allocations") Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20210521093811.1018992-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 666a5e53fe193..e961fa27702ce 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -504,6 +504,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, if (IS_ERR(pages[i])) { mutex_unlock(&bo->base.pages_lock); ret = PTR_ERR(pages[i]); + pages[i] = NULL; goto err_pages; } } From f1b17f429f066f920a6a1056332e66f8a5b92256 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 30 Jan 2023 12:06:31 +0000 Subject: [PATCH 665/898] drm/i915/ttm: fix sparse warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sparse complains with: drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1066:21: sparse: expected restricted vm_fault_t [assigned] [usertype] ret drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1066:21: sparse: got int Fixes: 516198d317d8 ("drm/i915: audit bo->resource usage v3") Reported-by: kernel test robot Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230130101230.25347-1-matthew.auld@intel.com Reviewed-by: Nirmoy Das Acked-by: Christian König Signed-off-by: Christian König (cherry picked from commit fde789e8339c60c8c58e5a71fa819fcfe52d839e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 7420276827a5b..4758f21c91e15 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1067,11 +1067,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) .interruptible = true, .no_wait_gpu = true, /* should be idle already */ }; + int err; GEM_BUG_ON(!bo->ttm || !(bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - ret = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx); - if (ret) { + err = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx); + if (err) { dma_resv_unlock(bo->base.resv); return VM_FAULT_SIGBUS; } From c74237496fbc799257b091179dd01a3200f7314d Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 13 Mar 2023 13:55:56 -0700 Subject: [PATCH 666/898] drm/i915/huc: Cancel HuC delayed load timer on reset. In the rare case where we do a full GT reset after starting the HuC load and before it completes (which basically boils down to i915 hanging during init), we need to cancel the delayed load fence, as it will be re-initialized in the post-reset recovery. Fixes: 27536e03271d ("drm/i915/huc: track delayed HuC load with a fence") Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230313205556.1174503-1-daniele.ceraolospurio@intel.com (cherry picked from commit cdf7911f7dbcb37228409a63bf75630776c45a15) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 7 +++++++ drivers/gpu/drm/i915/gt/uc/intel_huc.h | 7 +------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 410905da8e974..0c103ca160d10 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -235,6 +235,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc) i915_sw_fence_fini(&huc->delayed_load.fence); } +int intel_huc_sanitize(struct intel_huc *huc) +{ + delayed_huc_load_complete(huc); + intel_uc_fw_sanitize(&huc->fw); + return 0; +} + static bool vcs_supported(struct intel_gt *gt) { intel_engine_mask_t mask = gt->info.engine_mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 52db03620c609..db555b3c1f562 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -41,6 +41,7 @@ struct intel_huc { } delayed_load; }; +int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); @@ -54,12 +55,6 @@ bool intel_huc_is_authenticated(struct intel_huc *huc); void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); -static inline int intel_huc_sanitize(struct intel_huc *huc) -{ - intel_uc_fw_sanitize(&huc->fw); - return 0; -} - static inline bool intel_huc_is_supported(struct intel_huc *huc) { return intel_uc_fw_is_supported(&huc->fw); From 95d939bb97ff9be101ae4ceeb322535589da2190 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Mon, 27 Mar 2023 09:42:17 +0300 Subject: [PATCH 667/898] drm/i915: Use compressed bpp when calculating m/n value for DP MST DSC For obvious reasons, we use compressed bpp instead of pipe bpp for DSC DP SST case. Lets be consistent and use compressed bpp instead of pipe bpp, also in DP MST DSC case. Signed-off-by: Stanislav Lisovskiy Reviewed-by: Vinod Govindapillai Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Link: https://patchwork.freedesktop.org/patch/msgid/20230327064217.24033-1-stanislav.lisovskiy@intel.com (cherry picked from commit ea1deabc6f11575eb3375b454457eaa3c9837abc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 2106b3de225a0..7c9b328bc2d73 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -232,7 +232,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, return slots; } - intel_link_compute_m_n(crtc_state->pipe_bpp, + intel_link_compute_m_n(crtc_state->dsc.compressed_bpp, crtc_state->lane_count, adjusted_mode->crtc_clock, crtc_state->port_clock, From dc30c011469165d57af9adac5baff7d767d20e5c Mon Sep 17 00:00:00 2001 From: Min Li Date: Tue, 28 Mar 2023 17:36:27 +0800 Subject: [PATCH 668/898] drm/i915: fix race condition UAF in i915_perf_add_config_ioctl Userspace can guess the id value and try to race oa_config object creation with config remove, resulting in a use-after-free if we dereference the object after unlocking the metrics_lock. For that reason, unlocking the metrics_lock must be done after we are done dereferencing the object. Signed-off-by: Min Li Fixes: f89823c21224 ("drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface") Cc: # v4.14+ Reviewed-by: Andi Shyti Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230328093627.5067-1-lm0963hack@gmail.com [tursulin: Manually added stable tag.] (cherry picked from commit 49f6f6483b652108bcb73accd0204a464b922395) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 283a4a3c6862f..0040749363001 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4638,13 +4638,13 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, err = oa_config->id; goto sysfs_err; } - - mutex_unlock(&perf->metrics_lock); + id = oa_config->id; drm_dbg(&perf->i915->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + mutex_unlock(&perf->metrics_lock); - return oa_config->id; + return id; sysfs_err: mutex_unlock(&perf->metrics_lock); From dc3421560a67361442f33ec962fc6dd48895a0df Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 20 Mar 2023 15:14:23 +0000 Subject: [PATCH 669/898] drm/i915: Fix context runtime accounting When considering whether to mark one context as stopped and another as started we need to look at whether the previous and new _contexts_ are different and not just requests. Otherwise the software tracked context start time was incorrectly updated to the most recent lite-restore time- stamp, which was in some cases resulting in active time going backward, until the context switch (typically the heartbeat pulse) would synchronise with the hardware tracked context runtime. Easiest use case to observe this behaviour was with a full screen clients with close to 100% engine load. Signed-off-by: Tvrtko Ursulin Fixes: bb6287cb1886 ("drm/i915: Track context current active time") Cc: # v5.19+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230320151423.1708436-1-tvrtko.ursulin@linux.intel.com [tursulin: Fix spelling in commit msg.] (cherry picked from commit b3e70051879c665acdd3a1ab50d0ed58d6a8001f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 1bbe6708d0a7f..750326434677f 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2018,6 +2018,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * inspecting the queue to see if we need to resumbit. */ if (*prev != *execlists->active) { /* elide lite-restores */ + struct intel_context *prev_ce = NULL, *active_ce = NULL; + /* * Note the inherent discrepancy between the HW runtime, * recorded as part of the context switch, and the CPU @@ -2029,9 +2031,15 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * and correct overselves later when updating from HW. */ if (*prev) - lrc_runtime_stop((*prev)->context); + prev_ce = (*prev)->context; if (*execlists->active) - lrc_runtime_start((*execlists->active)->context); + active_ce = (*execlists->active)->context; + if (prev_ce != active_ce) { + if (prev_ce) + lrc_runtime_stop(prev_ce); + if (active_ce) + lrc_runtime_start(active_ce); + } new_timeslice(execlists); } From ad651d68cee75e9ac20002254c4e5d09ee67a84b Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 2 Apr 2023 12:44:37 +0100 Subject: [PATCH 670/898] net: sfp: add quirk enabling 2500Base-x for HG MXPD-483II The HG MXPD-483II 1310nm SFP module is meant to operate with 2500Base-X, however, in their EEPROM they incorrectly specify: Transceiver type : Ethernet: 1000BASE-LX ... BR, Nominal : 2600MBd Use sfp_quirk_2500basex for this module to allow 2500Base-X mode anyway. https://forum.banana-pi.org/t/bpi-r3-sfp-module-compatibility/14573/60 Reported-by: chowtom Tested-by: chowtom Signed-off-by: Daniel Golle Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index fb98db61e06cb..8af10bb53e57b 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -387,6 +387,10 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp), + // HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports + // 2600MBd in their EERPOM + SFP_QUIRK_M("HG GENUINE", "MXPD-483II", sfp_quirk_2500basex), + // Huawei MA5671A can operate at 2500base-X, but report 1.2GBd NRZ in // their EEPROM SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex, From 839349d13905927d8a567ca4d21d88c82028e31d Mon Sep 17 00:00:00 2001 From: Sricharan Ramabadhran Date: Mon, 3 Apr 2023 12:28:51 +0530 Subject: [PATCH 671/898] net: qrtr: Do not do DEL_SERVER broadcast after DEL_CLIENT On the remote side, when QRTR socket is removed, af_qrtr will call qrtr_port_remove() which broadcasts the DEL_CLIENT packet to all neighbours including local NS. NS upon receiving the DEL_CLIENT packet, will remove the lookups associated with the node:port and broadcasts the DEL_SERVER packet. But on the host side, due to the arrival of the DEL_CLIENT packet, the NS would've already deleted the server belonging to that port. So when the remote's NS again broadcasts the DEL_SERVER for that port, it throws below error message on the host: "failed while handling packet from 2:-2" So fix this error by not broadcasting the DEL_SERVER packet when the DEL_CLIENT packet gets processed." Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Reviewed-by: Manivannan Sadhasivam Signed-off-by: Ram Kumar Dharuman Signed-off-by: Sricharan Ramabadhran Signed-off-by: David S. Miller --- net/qrtr/ns.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 722936f7dd988..0f25a386138c9 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -274,7 +274,7 @@ static struct qrtr_server *server_add(unsigned int service, return NULL; } -static int server_del(struct qrtr_node *node, unsigned int port) +static int server_del(struct qrtr_node *node, unsigned int port, bool bcast) { struct qrtr_lookup *lookup; struct qrtr_server *srv; @@ -287,7 +287,7 @@ static int server_del(struct qrtr_node *node, unsigned int port) radix_tree_delete(&node->servers, port); /* Broadcast the removal of local servers */ - if (srv->node == qrtr_ns.local_node) + if (srv->node == qrtr_ns.local_node && bcast) service_announce_del(&qrtr_ns.bcast_sq, srv); /* Announce the service's disappearance to observers */ @@ -373,7 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) } slot = radix_tree_iter_resume(slot, &iter); rcu_read_unlock(); - server_del(node, srv->port); + server_del(node, srv->port, true); rcu_read_lock(); } rcu_read_unlock(); @@ -459,10 +459,13 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, kfree(lookup); } - /* Remove the server belonging to this port */ + /* Remove the server belonging to this port but don't broadcast + * DEL_SERVER. Neighbours would've already removed the server belonging + * to this port due to the DEL_CLIENT broadcast from qrtr_port_remove(). + */ node = node_get(node_id); if (node) - server_del(node, port); + server_del(node, port, false); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); @@ -567,7 +570,7 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, if (!node) return -ENOENT; - return server_del(node, port); + return server_del(node, port, true); } static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, From ea30388baebcce37fd594d425a65037ca35e59e8 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 3 Apr 2023 15:34:17 +0800 Subject: [PATCH 672/898] ipv6: Fix an uninit variable access bug in __ip6_make_skb() Syzbot reported a bug as following: ===================================================== BUG: KMSAN: uninit-value in arch_atomic64_inc arch/x86/include/asm/atomic64_64.h:88 [inline] BUG: KMSAN: uninit-value in arch_atomic_long_inc include/linux/atomic/atomic-long.h:161 [inline] BUG: KMSAN: uninit-value in atomic_long_inc include/linux/atomic/atomic-instrumented.h:1429 [inline] BUG: KMSAN: uninit-value in __ip6_make_skb+0x2f37/0x30f0 net/ipv6/ip6_output.c:1956 arch_atomic64_inc arch/x86/include/asm/atomic64_64.h:88 [inline] arch_atomic_long_inc include/linux/atomic/atomic-long.h:161 [inline] atomic_long_inc include/linux/atomic/atomic-instrumented.h:1429 [inline] __ip6_make_skb+0x2f37/0x30f0 net/ipv6/ip6_output.c:1956 ip6_finish_skb include/net/ipv6.h:1122 [inline] ip6_push_pending_frames+0x10e/0x550 net/ipv6/ip6_output.c:1987 rawv6_push_pending_frames+0xb12/0xb90 net/ipv6/raw.c:579 rawv6_sendmsg+0x297e/0x2e60 net/ipv6/raw.c:922 inet_sendmsg+0x101/0x180 net/ipv4/af_inet.c:827 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0xa8e/0xe70 net/socket.c:2476 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2530 __sys_sendmsg net/socket.c:2559 [inline] __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x367/0x540 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook mm/slab.h:766 [inline] slab_alloc_node mm/slub.c:3452 [inline] __kmem_cache_alloc_node+0x71f/0xce0 mm/slub.c:3491 __do_kmalloc_node mm/slab_common.c:967 [inline] __kmalloc_node_track_caller+0x114/0x3b0 mm/slab_common.c:988 kmalloc_reserve net/core/skbuff.c:492 [inline] __alloc_skb+0x3af/0x8f0 net/core/skbuff.c:565 alloc_skb include/linux/skbuff.h:1270 [inline] __ip6_append_data+0x51c1/0x6bb0 net/ipv6/ip6_output.c:1684 ip6_append_data+0x411/0x580 net/ipv6/ip6_output.c:1854 rawv6_sendmsg+0x2882/0x2e60 net/ipv6/raw.c:915 inet_sendmsg+0x101/0x180 net/ipv4/af_inet.c:827 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0xa8e/0xe70 net/socket.c:2476 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2530 __sys_sendmsg net/socket.c:2559 [inline] __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x367/0x540 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd It is because icmp6hdr does not in skb linear region under the scenario of SOCK_RAW socket. Access icmp6_hdr(skb)->icmp6_type directly will trigger the uninit variable access bug. Use a local variable icmp6_type to carry the correct value in different scenarios. Fixes: 14878f75abd5 ("[IPV6]: Add ICMPMsgStats MIB (RFC 4293) [rev 2]") Reported-by: syzbot+8257f4dcef79de670baf@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=3d605ec1d0a7f2a269a1a6936ac7f2b85975ee9c Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c314fdde0097c..95a55c6630add 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1965,8 +1965,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + u8 icmp6_type; - ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl) + icmp6_type = fl6->fl6_icmp_type; + else + icmp6_type = icmp6_hdr(skb)->icmp6_type; + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } From a3c4c053014585dcf20f4df954791b74d8a8afcd Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 31 Mar 2023 23:33:19 +0200 Subject: [PATCH 673/898] platform/x86: think-lmi: Fix memory leak when showing current settings When retriving a item string with tlmi_setting(), the result has to be freed using kfree(). In current_value_show() however, malformed item strings are not freed, causing a memory leak. Fix this by eliminating the early return responsible for this. Reported-by: Mirsad Goran Todorovac Link: https://lore.kernel.org/platform-driver-x86/01e920bc-5882-ba0c-dd15-868bf0eca0b8@alu.unizg.hr/T/#t Tested-by: Mirsad Goran Todorovac Fixes: 0fdf10e5fc96 ("platform/x86: think-lmi: Split current_value to reflect only the value") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20230331213319.41040-1-W_Armin@gmx.de Tested-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index c816646eb6616..6034df6d577d6 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -930,10 +930,12 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a /* validate and split from `item,value` -> `value` */ value = strpbrk(item, ","); if (!value || value == item || !strlen(value + 1)) - return -EINVAL; + ret = -EINVAL; + else + ret = sysfs_emit(buf, "%s\n", value + 1); - ret = sysfs_emit(buf, "%s\n", value + 1); kfree(item); + return ret; } From e7d796fccdc8d17c2d21817ebe4c7bf5bbfe5433 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 2 Apr 2023 21:31:19 -0400 Subject: [PATCH 674/898] platform/x86: think-lmi: Fix memory leaks when parsing ThinkStation WMI strings My previous commit introduced a memory leak where the item allocated from tlmi_setting was not freed. This commit also renames it to avoid confusion with the similarly name variable in the same function. Fixes: 8a02d70679fc ("platform/x86: think-lmi: Add possible_values for ThinkStation") Reported-by: Mirsad Todorovac Link: https://lore.kernel.org/lkml/df26ff45-8933-f2b3-25f4-6ee51ccda7d8@gmx.de/T/ Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230403013120.2105-1-mpearson-lenovo@squebb.ca Tested-by: Mario Limonciello Tested-by: Mirsad Goran Todorovac Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 6034df6d577d6..87f832142d8d6 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1459,10 +1459,10 @@ static int tlmi_analyze(void) * name string. * Try and pull that out if it's available. */ - char *item, *optstart, *optend; + char *optitem, *optstart, *optend; - if (!tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID)) { - optstart = strstr(item, "[Optional:"); + if (!tlmi_setting(setting->index, &optitem, LENOVO_BIOS_SETTING_GUID)) { + optstart = strstr(optitem, "[Optional:"); if (optstart) { optstart += strlen("[Optional:"); optend = strstr(optstart, "]"); @@ -1471,6 +1471,7 @@ static int tlmi_analyze(void) kstrndup(optstart, optend - optstart, GFP_KERNEL); } + kfree(optitem); } } /* From 7065655216d4d034d71164641f3bec0b189ad6fa Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 2 Apr 2023 21:31:20 -0400 Subject: [PATCH 675/898] platform/x86: think-lmi: Clean up display of current_value on Thinkstation On ThinkStations on retrieving the attribute value the BIOS appends the possible values to the string. Clean up the display in the current_value_show function so the options part is not displayed. Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms") Reported by Mario Limoncello Link: https://github.com/fwupd/fwupd/issues/5077#issuecomment-1488730526 Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230403013120.2105-2-mpearson-lenovo@squebb.ca Tested-by: Mario Limonciello Tested-by: Mirsad Goran Todorovac Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 87f832142d8d6..78dc82bda4dde 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -920,7 +920,7 @@ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *at static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); - char *item, *value; + char *item, *value, *p; int ret; ret = tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID); @@ -931,9 +931,12 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a value = strpbrk(item, ","); if (!value || value == item || !strlen(value + 1)) ret = -EINVAL; - else + else { + /* On Workstations remove the Options part after the value */ + p = strchrnul(value, ';'); + *p = '\0'; ret = sysfs_emit(buf, "%s\n", value + 1); - + } kfree(item); return ret; From 9a469c6dfab38326f99f105386db84230be09ee3 Mon Sep 17 00:00:00 2001 From: Benjamin Asbach Date: Sat, 1 Apr 2023 01:24:47 +0200 Subject: [PATCH 676/898] platform/x86: thinkpad_acpi: Add missing T14s Gen1 type to s2idle quirk list From the commit message adding the first s2idle quirks: > Lenovo laptops that contain NVME SSDs across a variety of generations have > trouble resuming from suspend to idle when the IOMMU translation layer is > active for the NVME storage device. > > This generally manifests as a large resume delay or page faults. These > delays and page faults occur as a result of a Lenovo BIOS specific SMI > that runs during the D3->D0 transition on NVME devices. Add the DMI ids for another variant of the T14s Gen1, which also needs the s2idle quirk. Link: https://lore.kernel.org/all/20220503183420.348-1-mario.limonciello@amd.com/ Link: https://bbs.archlinux.org/viewtopic.php?pid=2084655#p2084655 Signed-off-by: Benjamin Asbach Tested-by: Benjamin Asbach Link: https://lore.kernel.org/r/20230331232447.37204-1-asbachb.kernel@impl.it Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 32c10457399e4..7191ff2625b1e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4478,6 +4478,14 @@ static const struct dmi_system_id fwbug_list[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20UH"), } }, + { + .ident = "T14s Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"), + } + }, { .ident = "P14s Gen1 AMD", .driver_data = &quirk_s2idle_bug, From cf5fa3ca0552f1b7ba8490de40700bbfb6979b17 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 29 Mar 2023 19:20:38 +0300 Subject: [PATCH 677/898] wifi: ath11k: reduce the MHI timeout to 20s Currently ath11k breaks after hibernation, the reason being that ath11k expects that the wireless device will have power during suspend and the firmware will continue running. But of course during hibernation the power from the device is cut off and firmware is not running when resuming, so ath11k will fail. (The reason why ath11k needs the firmware running is the interaction between mac80211 and MHI stack, it's a long story and more info in the bugzilla report.) In SUSE kernels the watchdog timeout is reduced from the default 120 to 60 seconds: CONFIG_DPM_WATCHDOG_TIMEOUT=60 But as the ath11k MHI timeout is 90 seconds the kernel will crash before will ath11k will recover in resume callback. To avoid the crash reduce the MHI timeout to just 20 seconds. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.9 Link: https://bugzilla.kernel.org/show_bug.cgi?id=214649 Signed-off-by: Kalle Valo Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230329162038.8637-1-kvalo@kernel.org --- drivers/net/wireless/ath/ath11k/mhi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index 86995e8dc9135..a62ee05c54097 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -16,7 +16,7 @@ #include "pci.h" #include "pcic.h" -#define MHI_TIMEOUT_DEFAULT_MS 90000 +#define MHI_TIMEOUT_DEFAULT_MS 20000 #define RDDM_DUMP_SIZE 0x420000 static struct mhi_channel_config ath11k_mhi_channels_qca6390[] = { From e6db67fa871dee37d22701daba806bfcd4d9df49 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Mar 2023 11:12:59 +0200 Subject: [PATCH 678/898] wifi: mt76: ignore key disable commands This helps avoid cleartext leakage of already queued or powersave buffered packets, when a reassoc triggers the key deletion. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230330091259.61378-1-nbd@nbd.name --- .../net/wireless/mediatek/mt76/mt7603/main.c | 10 +-- .../net/wireless/mediatek/mt76/mt7615/mac.c | 70 ++++++------------- .../net/wireless/mediatek/mt76/mt7615/main.c | 15 ++-- .../wireless/mediatek/mt76/mt7615/mt7615.h | 6 +- .../net/wireless/mediatek/mt76/mt76x02_util.c | 18 ++--- .../net/wireless/mediatek/mt76/mt7915/main.c | 13 ++-- .../net/wireless/mediatek/mt76/mt7921/main.c | 13 ++-- .../net/wireless/mediatek/mt76/mt7996/main.c | 13 ++-- 8 files changed, 62 insertions(+), 96 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index ca50feb0b3a9d..1b1358c6bb464 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -512,15 +512,15 @@ mt7603_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) return -EOPNOTSUPP; - if (cmd == SET_KEY) { - key->hw_key_idx = wcid->idx; - wcid->hw_key_idx = idx; - } else { + if (cmd != SET_KEY) { if (idx == wcid->hw_key_idx) wcid->hw_key_idx = -1; - key = NULL; + return 0; } + + key->hw_key_idx = wcid->idx; + wcid->hw_key_idx = idx; mt76_wcid_key_setup(&dev->mt76, wcid, key); return mt7603_wtbl_set_key(dev, wcid->idx, key); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index a95602473359e..51a968a6afdc9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -1193,8 +1193,7 @@ EXPORT_SYMBOL_GPL(mt7615_mac_enable_rtscts); static int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, struct ieee80211_key_conf *key, - enum mt76_cipher_type cipher, u16 cipher_mask, - enum set_key_cmd cmd) + enum mt76_cipher_type cipher, u16 cipher_mask) { u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx) + 30 * 4; u8 data[32] = {}; @@ -1203,27 +1202,18 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, return -EINVAL; mt76_rr_copy(dev, addr, data, sizeof(data)); - if (cmd == SET_KEY) { - if (cipher == MT_CIPHER_TKIP) { - /* Rx/Tx MIC keys are swapped */ - memcpy(data, key->key, 16); - memcpy(data + 16, key->key + 24, 8); - memcpy(data + 24, key->key + 16, 8); - } else { - if (cipher_mask == BIT(cipher)) - memcpy(data, key->key, key->keylen); - else if (cipher != MT_CIPHER_BIP_CMAC_128) - memcpy(data, key->key, 16); - if (cipher == MT_CIPHER_BIP_CMAC_128) - memcpy(data + 16, key->key, 16); - } + if (cipher == MT_CIPHER_TKIP) { + /* Rx/Tx MIC keys are swapped */ + memcpy(data, key->key, 16); + memcpy(data + 16, key->key + 24, 8); + memcpy(data + 24, key->key + 16, 8); } else { + if (cipher_mask == BIT(cipher)) + memcpy(data, key->key, key->keylen); + else if (cipher != MT_CIPHER_BIP_CMAC_128) + memcpy(data, key->key, 16); if (cipher == MT_CIPHER_BIP_CMAC_128) - memset(data + 16, 0, 16); - else if (cipher_mask) - memset(data, 0, 16); - if (!cipher_mask) - memset(data, 0, sizeof(data)); + memcpy(data + 16, key->key, 16); } mt76_wr_copy(dev, addr, data, sizeof(data)); @@ -1234,7 +1224,7 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, static int mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid, enum mt76_cipher_type cipher, u16 cipher_mask, - int keyidx, enum set_key_cmd cmd) + int keyidx) { u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx), w0, w1; @@ -1253,9 +1243,7 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid, else w0 &= ~MT_WTBL_W0_RX_IK_VALID; - if (cmd == SET_KEY && - (cipher != MT_CIPHER_BIP_CMAC_128 || - cipher_mask == BIT(cipher))) { + if (cipher != MT_CIPHER_BIP_CMAC_128 || cipher_mask == BIT(cipher)) { w0 &= ~MT_WTBL_W0_KEY_IDX; w0 |= FIELD_PREP(MT_WTBL_W0_KEY_IDX, keyidx); } @@ -1272,19 +1260,10 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid, static void mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid, - enum mt76_cipher_type cipher, u16 cipher_mask, - enum set_key_cmd cmd) + enum mt76_cipher_type cipher, u16 cipher_mask) { u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx); - if (!cipher_mask) { - mt76_clear(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE); - return; - } - - if (cmd != SET_KEY) - return; - if (cipher == MT_CIPHER_BIP_CMAC_128 && cipher_mask & ~BIT(MT_CIPHER_BIP_CMAC_128)) return; @@ -1295,8 +1274,7 @@ mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid, int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd) + struct ieee80211_key_conf *key) { enum mt76_cipher_type cipher; u16 cipher_mask = wcid->cipher; @@ -1306,19 +1284,14 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, if (cipher == MT_CIPHER_NONE) return -EOPNOTSUPP; - if (cmd == SET_KEY) - cipher_mask |= BIT(cipher); - else - cipher_mask &= ~BIT(cipher); - - mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask, cmd); - err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask, - cmd); + cipher_mask |= BIT(cipher); + mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask); + err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask); if (err < 0) return err; err = mt7615_mac_wtbl_update_pk(dev, wcid, cipher, cipher_mask, - key->keyidx, cmd); + key->keyidx); if (err < 0) return err; @@ -1329,13 +1302,12 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd) + struct ieee80211_key_conf *key) { int err; spin_lock_bh(&dev->mt76.lock); - err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd); + err = __mt7615_mac_wtbl_set_key(dev, wcid, key); spin_unlock_bh(&dev->mt76.lock); return err; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index ab4c1b4478aa9..dadb13f2ca095 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -391,18 +391,17 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (cmd == SET_KEY) *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); if (mt76_is_mmio(&dev->mt76)) - err = mt7615_mac_wtbl_set_key(dev, wcid, key, cmd); + err = mt7615_mac_wtbl_set_key(dev, wcid, key); else - err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd); + err = __mt7615_mac_wtbl_set_key(dev, wcid, key); out: mt7615_mutex_release(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h index 43591b4c1d9af..9e58f6924493f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h @@ -490,11 +490,9 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi, void mt7615_mac_set_timing(struct mt7615_phy *phy); int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd); + struct ieee80211_key_conf *key); int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd); + struct ieee80211_key_conf *key); void mt7615_mac_reset_work(struct work_struct *work); u32 mt7615_mac_get_sta_tid_sn(struct mt7615_dev *dev, int wcid, u8 tid); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 7451a63206a5f..dcbb5c605dfe6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -454,20 +454,20 @@ int mt76x02_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, msta = sta ? (struct mt76x02_sta *)sta->drv_priv : NULL; wcid = msta ? &msta->wcid : &mvif->group_wcid; - if (cmd == SET_KEY) { - key->hw_key_idx = wcid->idx; - wcid->hw_key_idx = idx; - if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) { - key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX; - wcid->sw_iv = true; - } - } else { + if (cmd != SET_KEY) { if (idx == wcid->hw_key_idx) { wcid->hw_key_idx = -1; wcid->sw_iv = false; } - key = NULL; + return 0; + } + + key->hw_key_idx = wcid->idx; + wcid->hw_key_idx = idx; + if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) { + key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX; + wcid->sw_iv = true; } mt76_wcid_key_setup(&dev->mt76, wcid, key); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 3bbccbdfc5eb0..784191ec48029 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -410,16 +410,15 @@ static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, mt7915_mcu_add_bss_info(phy, vif, true); } - if (cmd == SET_KEY) + if (cmd == SET_KEY) { *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + } else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip, key, MCU_EXT_CMD(STA_REC_UPDATE), &msta->wcid, cmd); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 75eaf86c6a78e..42933a6b7334b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -569,16 +569,15 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, mt7921_mutex_acquire(dev); - if (cmd == SET_KEY) + if (cmd == SET_KEY) { *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + } else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip, key, MCU_UNI_CMD(STA_REC_UPDATE), &msta->wcid, cmd); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 3e4da0350d965..1ba22d147949b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -351,16 +351,15 @@ static int mt7996_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, mt7996_mcu_add_bss_info(phy, vif, true); } - if (cmd == SET_KEY) + if (cmd == SET_KEY) { *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + } else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); err = mt7996_mcu_add_key(&dev->mt76, vif, &msta->bip, key, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), &msta->wcid, cmd); From fe092498cb9638418c96675be320c74a16306b48 Mon Sep 17 00:00:00 2001 From: Dhruva Gole Date: Mon, 3 Apr 2023 12:54:42 +0530 Subject: [PATCH 679/898] gpio: davinci: Do not clear the bank intr enable bit in save_context The interrupt enable bits might be set if we want to use the GPIO as wakeup source. Clearing this will mean disabling of interrupts in the GPIO banks that we may want to wakeup from. Thus remove the line that was clearing this bit from the driver's save context function. Cc: Devarsh Thakkar Fixes: 0651a730924b ("gpio: davinci: Add support for system suspend/resume PM") Signed-off-by: Dhruva Gole Reviewed-by: Linus Walleij Acked-by: Keerthy Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-davinci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 26b1f7465e091..636c4df43fee7 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -641,9 +641,6 @@ static void davinci_gpio_save_context(struct davinci_gpio_controller *chips, context->set_falling = readl_relaxed(&g->set_falling); } - /* Clear Bank interrupt enable bit */ - writel_relaxed(0, base + BINTEN); - /* Clear all interrupt status registers */ writel_relaxed(GENMASK(31, 0), &g->intstat); } From 7b75c4703609a3ebaf67271813521bc0281e1ec1 Mon Sep 17 00:00:00 2001 From: Dhruva Gole Date: Mon, 3 Apr 2023 12:54:43 +0530 Subject: [PATCH 680/898] gpio: davinci: Add irq chip flag to skip set wake Add the IRQCHIP_SKIP_SET_WAKE flag since there are no special IRQ Wake bits that can be set to enable wakeup IRQ. Fixes: 3d9edf09d452 ("[ARM] 4457/2: davinci: GPIO support") Signed-off-by: Dhruva Gole Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-davinci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 636c4df43fee7..43b2dc8821e6b 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -324,7 +324,7 @@ static struct irq_chip gpio_irqchip = { .irq_enable = gpio_irq_enable, .irq_disable = gpio_irq_disable, .irq_set_type = gpio_irq_type, - .flags = IRQCHIP_SET_TYPE_MASKED, + .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE, }; static void gpio_irq_handler(struct irq_desc *desc) From 738a96c4a8c36950803fdd27e7c30aca92dccefd Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Sat, 1 Apr 2023 19:41:44 -0400 Subject: [PATCH 681/898] bpf, arm64: Fixed a BTI error on returning to patched function When BPF_TRAMP_F_CALL_ORIG is set, BPF trampoline uses BLR to jump back to the instruction next to call site to call the patched function. For BTI-enabled kernel, the instruction next to call site is usually PACIASP, in this case, it's safe to jump back with BLR. But when the call site is not followed by a PACIASP or bti, a BTI exception is triggered. Here is a fault log: Unhandled 64-bit el1h sync exception on CPU0, ESR 0x0000000034000002 -- BTI CPU: 0 PID: 263 Comm: test_progs Tainted: GF Hardware name: linux,dummy-virt (DT) pstate: 40400805 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=-c) pc : bpf_fentry_test1+0xc/0x30 lr : bpf_trampoline_6442573892_0+0x48/0x1000 sp : ffff80000c0c3a50 x29: ffff80000c0c3a90 x28: ffff0000c2e6c080 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000050 x23: 0000000000000000 x22: 0000ffffcfd2a7f0 x21: 000000000000000a x20: 0000ffffcfd2a7f0 x19: 0000000000000000 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcfd2a7f0 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: ffff80000914f5e4 x9 : ffff8000082a1528 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0101010101010101 x5 : 0000000000000000 x4 : 00000000fffffff2 x3 : 0000000000000001 x2 : ffff8001f4b82000 x1 : 0000000000000000 x0 : 0000000000000001 Kernel panic - not syncing: Unhandled exception CPU: 0 PID: 263 Comm: test_progs Tainted: GF Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0xec/0x144 show_stack+0x24/0x7c dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 panic+0x1cc/0x3ec __el0_error_handler_common+0x0/0x130 el1h_64_sync_handler+0x60/0xd0 el1h_64_sync+0x78/0x7c bpf_fentry_test1+0xc/0x30 bpf_fentry_test1+0xc/0x30 bpf_prog_test_run_tracing+0xdc/0x2a0 __sys_bpf+0x438/0x22a0 __arm64_sys_bpf+0x30/0x54 invoke_syscall+0x78/0x110 el0_svc_common.constprop.0+0x6c/0x1d0 do_el0_svc+0x38/0xe0 el0_svc+0x30/0xd0 el0t_64_sync_handler+0x1ac/0x1b0 el0t_64_sync+0x1a0/0x1a4 Kernel Offset: disabled CPU features: 0x0000,00034c24,f994fdab Memory Limit: none And the instruction next to call site of bpf_fentry_test1 is ADD, not PACIASP: : bti c nop nop add w0, w0, #0x1 paciasp For BPF prog, JIT always puts a PACIASP after call site for BTI-enabled kernel, so there is no problem. To fix it, replace BLR with RET to bypass the branch target check. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Reported-by: Florent Revest Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Tested-by: Florent Revest Acked-by: Florent Revest Link: https://lore.kernel.org/bpf/20230401234144.3719742-1-xukuohai@huaweicloud.com --- arch/arm64/net/bpf_jit.h | 4 ++++ arch/arm64/net/bpf_jit_comp.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index a6acb94ea3d63..c2edadb8ec6a3 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -281,4 +281,8 @@ /* DMB */ #define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH) +/* ADR */ +#define A64_ADR(Rd, offset) \ + aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR) + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 62f805f427b79..b26da8efa616e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1900,7 +1900,8 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, restore_args(ctx, args_off, nargs); /* call original func */ emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); - emit(A64_BLR(A64_R(10)), ctx); + emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); + emit(A64_RET(A64_R(10)), ctx); /* store return value */ emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); /* reserve a nop for bpf_tramp_image_put */ From 4ccf11c4e8a8e051499d53a12f502196c97a758e Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Tue, 21 Mar 2023 19:04:43 +0800 Subject: [PATCH 682/898] tracing/synthetic: Fix races on freeing last_cmd Currently, the "last_cmd" variable can be accessed by multiple processes asynchronously when multiple users manipulate synthetic_events node at the same time, it could lead to use-after-free or double-free. This patch add "lastcmd_mutex" to prevent "last_cmd" from being accessed asynchronously. ================================================================ It's easy to reproduce in the KASAN environment by running the two scripts below in different shells. script 1: while : do echo -n -e '\x88' > /sys/kernel/tracing/synthetic_events done script 2: while : do echo -n -e '\xb0' > /sys/kernel/tracing/synthetic_events done ================================================================ double-free scenario: process A process B ------------------- --------------- 1.kstrdup last_cmd 2.free last_cmd 3.free last_cmd(double-free) ================================================================ use-after-free scenario: process A process B ------------------- --------------- 1.kstrdup last_cmd 2.free last_cmd 3.tracing_log_err(use-after-free) ================================================================ Appendix 1. KASAN report double-free: BUG: KASAN: double-free in kfree+0xdc/0x1d4 Free of addr ***** by task sh/4879 Call trace: ... kfree+0xdc/0x1d4 create_or_delete_synth_event+0x60/0x1e8 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Allocated by task 4879: ... kstrdup+0x5c/0x98 create_or_delete_synth_event+0x6c/0x1e8 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Freed by task 5464: ... kfree+0xdc/0x1d4 create_or_delete_synth_event+0x60/0x1e8 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... ================================================================ Appendix 2. KASAN report use-after-free: BUG: KASAN: use-after-free in strlen+0x5c/0x7c Read of size 1 at addr ***** by task sh/5483 sh: CPU: 7 PID: 5483 Comm: sh ... __asan_report_load1_noabort+0x34/0x44 strlen+0x5c/0x7c tracing_log_err+0x60/0x444 create_or_delete_synth_event+0xc4/0x204 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Allocated by task 5483: ... kstrdup+0x5c/0x98 create_or_delete_synth_event+0x80/0x204 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Freed by task 5480: ... kfree+0xdc/0x1d4 create_or_delete_synth_event+0x74/0x204 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Link: https://lore.kernel.org/linux-trace-kernel/20230321110444.1587-1-Tze-nan.Wu@mediatek.com Fixes: 27c888da9867 ("tracing: Remove size restriction on synthetic event cmd error logging") Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Matthias Brugger Cc: AngeloGioacchino Del Regno Cc: "Tom Zanussi" Signed-off-by: Tze-nan Wu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 46d0abb32d0fa..f0ff730125bf2 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -44,14 +44,21 @@ enum { ERRORS }; static const char *err_text[] = { ERRORS }; +DEFINE_MUTEX(lastcmd_mutex); static char *last_cmd; static int errpos(const char *str) { + int ret = 0; + + mutex_lock(&lastcmd_mutex); if (!str || !last_cmd) - return 0; + goto out; - return err_pos(last_cmd, str); + ret = err_pos(last_cmd, str); + out: + mutex_unlock(&lastcmd_mutex); + return ret; } static void last_cmd_set(const char *str) @@ -59,18 +66,22 @@ static void last_cmd_set(const char *str) if (!str) return; + mutex_lock(&lastcmd_mutex); kfree(last_cmd); - last_cmd = kstrdup(str, GFP_KERNEL); + mutex_unlock(&lastcmd_mutex); } static void synth_err(u8 err_type, u16 err_pos) { + mutex_lock(&lastcmd_mutex); if (!last_cmd) - return; + goto out; tracing_log_err(NULL, "synthetic_events", last_cmd, err_text, err_type, err_pos); + out: + mutex_unlock(&lastcmd_mutex); } static int create_synth_event(const char *raw_command); From 6455b6163d8c680366663cdb8c679514d55fc30c Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Sat, 25 Mar 2023 10:12:47 +0800 Subject: [PATCH 683/898] ring-buffer: Fix race while reader and writer are on the same page When user reads file 'trace_pipe', kernel keeps printing following logs that warn at "cpu_buffer->reader_page->read > rb_page_size(reader)" in rb_get_reader_page(). It just looks like there's an infinite loop in tracing_read_pipe(). This problem occurs several times on arm64 platform when testing v5.10 and below. Call trace: rb_get_reader_page+0x248/0x1300 rb_buffer_peek+0x34/0x160 ring_buffer_peek+0xbc/0x224 peek_next_entry+0x98/0xbc __find_next_entry+0xc4/0x1c0 trace_find_next_entry_inc+0x30/0x94 tracing_read_pipe+0x198/0x304 vfs_read+0xb4/0x1e0 ksys_read+0x74/0x100 __arm64_sys_read+0x24/0x30 el0_svc_common.constprop.0+0x7c/0x1bc do_el0_svc+0x2c/0x94 el0_svc+0x20/0x30 el0_sync_handler+0xb0/0xb4 el0_sync+0x160/0x180 Then I dump the vmcore and look into the problematic per_cpu ring_buffer, I found that tail_page/commit_page/reader_page are on the same page while reader_page->read is obviously abnormal: tail_page == commit_page == reader_page == { .write = 0x100d20, .read = 0x8f9f4805, // Far greater than 0xd20, obviously abnormal!!! .entries = 0x10004c, .real_end = 0x0, .page = { .time_stamp = 0x857257416af0, .commit = 0xd20, // This page hasn't been full filled. // .data[0...0xd20] seems normal. } } The root cause is most likely the race that reader and writer are on the same page while reader saw an event that not fully committed by writer. To fix this, add memory barriers to make sure the reader can see the content of what is committed. Since commit a0fcaaed0c46 ("ring-buffer: Fix race between reset page and reading page") has added the read barrier in rb_get_reader_page(), here we just need to add the write barrier. Link: https://lore.kernel.org/linux-trace-kernel/20230325021247.2923907-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: 77ae365eca89 ("ring-buffer: make lockless") Suggested-by: Steven Rostedt (Google) Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c6f47b6cfd5f1..76a2d91eecad9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3098,6 +3098,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) if (RB_WARN_ON(cpu_buffer, rb_is_reader_page(cpu_buffer->tail_page))) return; + /* + * No need for a memory barrier here, as the update + * of the tail_page did it for this page. + */ local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(&cpu_buffer->commit_page); @@ -3107,6 +3111,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { + /* Make sure the readers see the content of what is committed. */ + smp_wmb(); local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); RB_WARN_ON(cpu_buffer, @@ -4684,7 +4690,12 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) /* * Make sure we see any padding after the write update - * (see rb_reset_tail()) + * (see rb_reset_tail()). + * + * In addition, a writer may be writing on the reader page + * if the page has not been fully filled, so the read barrier + * is also needed to make sure we see the content of what is + * committed by the writer (see rb_set_commit_to_write()). */ smp_rmb(); From ea65b41807a26495ff2a73dd8b1bab2751940887 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 27 Mar 2023 18:36:46 +0100 Subject: [PATCH 684/898] ftrace: Mark get_lock_parent_ip() __always_inline If the compiler decides not to inline this function then preemption tracing will always show an IP inside the preemption disabling path and never the function actually calling preempt_{enable,disable}. Link: https://lore.kernel.org/linux-trace-kernel/20230327173647.1690849-1-john@metanate.com Cc: Masami Hiramatsu Cc: Mark Rutland Cc: stable@vger.kernel.org Fixes: f904f58263e1d ("sched/debug: Fix preempt_disable_ip recording for preempt_disable()") Signed-off-by: John Keeping Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366c730beaa3e..402fc061de75c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -980,7 +980,7 @@ static inline void __ftrace_enabled_restore(int enabled) #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) -static inline unsigned long get_lock_parent_ip(void) +static __always_inline unsigned long get_lock_parent_ip(void) { unsigned long addr = CALLER_ADDR0; From b9f451a9029a16eb7913ace09b92493d00f2e564 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 29 Mar 2023 17:50:15 +0200 Subject: [PATCH 685/898] tracing/timerlat: Notify new max thread latency timerlat is not reporting a new tracing_max_latency for the thread latency. The reason is that it is not calling notify_new_max_latency() function after the new thread latency is sampled. Call notify_new_max_latency() after computing the thread latency. Link: https://lkml.kernel.org/r/16e18d61d69073d0192ace07bf61e405cca96e9c.1680104184.git.bristot@kernel.org Cc: stable@vger.kernel.org Fixes: dae181349f1e ("tracing/osnoise: Support a list of trace_array *tr") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 9176bb7a9bb45..e8116094bed86 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1738,6 +1738,8 @@ static int timerlat_main(void *data) trace_timerlat_sample(&s); + notify_new_max_latency(diff); + timerlat_dump_stack(time_to_us(diff)); tlat->tracing_thread = false; From d3cba7f02cd82118c32651c73374d8a5a459d9a6 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 29 Mar 2023 17:50:16 +0200 Subject: [PATCH 686/898] tracing/osnoise: Fix notify new tracing_max_latency osnoise/timerlat tracers are reporting new max latency on instances where the tracing is off, creating inconsistencies between the max reported values in the trace and in the tracing_max_latency. Thus only report new tracing_max_latency on active tracing instances. Link: https://lkml.kernel.org/r/ecd109fde4a0c24ab0f00ba1e9a144ac19a91322.1680104184.git.bristot@kernel.org Cc: stable@vger.kernel.org Fixes: dae181349f1e ("tracing/osnoise: Support a list of trace_array *tr") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index e8116094bed86..4496975f2029f 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1296,7 +1296,7 @@ static void notify_new_max_latency(u64 latency) rcu_read_lock(); list_for_each_entry_rcu(inst, &osnoise_instances, list) { tr = inst->tr; - if (tr->max_latency < latency) { + if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { tr->max_latency = latency; latency_fsnotify(tr); } From f82e7ca019dfad3b006fd3b772f7ac569672db55 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 9 Mar 2023 22:13:02 -0500 Subject: [PATCH 687/898] tracing: Error if a trace event has an array for a __field() A __field() in the TRACE_EVENT() macro is used to set up the fields of the trace event data. It is for single storage units (word, char, int, pointer, etc) and not for complex structures or arrays. Unfortunately, there's nothing preventing the build from accepting: __field(int, arr[5]); from building. It will turn into a array value. This use to work fine, as the offset and size use to be determined by the macro using the field name, but things have changed and the offset and size are now determined by the type. So the above would only be size 4, and the next field will be located 4 bytes from it (instead of 20). The proper way to declare static arrays is to use the __array() macro. Instead of __field(int, arr[5]) it should be __array(int, arr, 5). Add some macro tricks to the building of a trace event from the TRACE_EVENT() macro such that __field(int, arr[5]) will fail to build. A comment by the failure will explain why the build failed. Link: https://lore.kernel.org/lkml/20230306122549.236561-1-douglas.raillard@arm.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230309221302.642e82d9@gandalf.local.home Reported-by: Douglas RAILLARD Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) --- include/trace/stages/stage5_get_offsets.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index ac5c24d3beeb2..e30a13be46ba5 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -9,17 +9,30 @@ #undef __entry #define __entry entry +/* + * Fields should never declare an array: i.e. __field(int, arr[5]) + * If they do, it will cause issues in parsing and possibly corrupt the + * events. To prevent that from happening, test the sizeof() a fictitious + * type called "struct _test_no_array_##item" which will fail if "item" + * contains array elements (like "arr[5]"). + * + * If you hit this, use __array(int, arr, 5) instead. + */ #undef __field -#define __field(type, item) +#define __field(type, item) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_ext -#define __field_ext(type, item, filter_type) +#define __field_ext(type, item, filter_type) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_struct -#define __field_struct(type, item) +#define __field_struct(type, item) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) +#define __field_struct_ext(type, item, filter_type) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __array #define __array(type, item, len) From f71e0e329c152c7f11ddfd97ffc62aba152fad3f Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Tue, 28 Mar 2023 17:58:18 +0200 Subject: [PATCH 688/898] mtd: rawnand: stm32_fmc2: remove unsupported EDO mode Remove the EDO mode support from as the FMC2 controller does not support the feature. Signed-off-by: Christophe Kerello Fixes: 2cd457f328c1 ("mtd: rawnand: stm32_fmc2: add STM32 FMC2 NAND flash controller driver") Cc: stable@vger.kernel.org #v5.4+ Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230328155819.225521-2-christophe.kerello@foss.st.com --- drivers/mtd/nand/raw/stm32_fmc2_nand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c index 5d627048c420d..3abb63d00a0bb 100644 --- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c +++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c @@ -1531,6 +1531,9 @@ static int stm32_fmc2_nfc_setup_interface(struct nand_chip *chip, int chipnr, if (IS_ERR(sdrt)) return PTR_ERR(sdrt); + if (sdrt->tRC_min < 30000) + return -EOPNOTSUPP; + if (chipnr == NAND_DATA_IFACE_CHECK_ONLY) return 0; From ddbb664b6ab8de7dffa388ae0c88cd18616494e5 Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Tue, 28 Mar 2023 17:58:19 +0200 Subject: [PATCH 689/898] mtd: rawnand: stm32_fmc2: use timings.mode instead of checking tRC_min Use timings.mode value instead of checking tRC_min timing for EDO mode support. Signed-off-by: Christophe Kerello Fixes: 2cd457f328c1 ("mtd: rawnand: stm32_fmc2: add STM32 FMC2 NAND flash controller driver") Cc: stable@vger.kernel.org #v5.10+ Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230328155819.225521-3-christophe.kerello@foss.st.com --- drivers/mtd/nand/raw/stm32_fmc2_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c index 3abb63d00a0bb..9e74bcd90aaa2 100644 --- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c +++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c @@ -1531,7 +1531,7 @@ static int stm32_fmc2_nfc_setup_interface(struct nand_chip *chip, int chipnr, if (IS_ERR(sdrt)) return PTR_ERR(sdrt); - if (sdrt->tRC_min < 30000) + if (conf->timings.mode > 3) return -EOPNOTSUPP; if (chipnr == NAND_DATA_IFACE_CHECK_ONLY) From 0c3089601f064d80b3838eceb711fcac04bceaad Mon Sep 17 00:00:00 2001 From: Bang Li Date: Wed, 29 Mar 2023 00:30:12 +0800 Subject: [PATCH 690/898] mtdblock: tolerate corrected bit-flips mtd_read() may return -EUCLEAN in case of corrected bit-flips.This particular condition should not be treated like an error. Signed-off-by: Bang Li Fixes: e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") Cc: # v3.7 Acked-by: Richard Weinberger Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230328163012.4264-1-libang.linuxer@gmail.com --- drivers/mtd/mtdblock.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index 1e94e7d10b8be..a0a1194dc1d90 100644 --- a/drivers/mtd/mtdblock.c +++ b/drivers/mtd/mtdblock.c @@ -153,7 +153,7 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos, mtdblk->cache_state = STATE_EMPTY; ret = mtd_read(mtd, sect_start, sect_size, &retlen, mtdblk->cache_data); - if (ret) + if (ret && !mtd_is_bitflip(ret)) return ret; if (retlen != sect_size) return -EIO; @@ -188,8 +188,12 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos, pr_debug("mtdblock: read on \"%s\" at 0x%lx, size 0x%x\n", mtd->name, pos, len); - if (!sect_size) - return mtd_read(mtd, pos, len, &retlen, buf); + if (!sect_size) { + ret = mtd_read(mtd, pos, len, &retlen, buf); + if (ret && !mtd_is_bitflip(ret)) + return ret; + return 0; + } while (len > 0) { unsigned long sect_start = (pos/sect_size)*sect_size; @@ -209,7 +213,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos, memcpy (buf, mtdblk->cache_data + offset, size); } else { ret = mtd_read(mtd, pos, size, &retlen, buf); - if (ret) + if (ret && !mtd_is_bitflip(ret)) return ret; if (retlen != size) return -EIO; From 93942b70461574ca7fc3d91494ca89b16a4c64c7 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Wed, 29 Mar 2023 10:47:26 +0300 Subject: [PATCH 691/898] mtd: rawnand: meson: fix bitmask for length in command word Valid mask is 0x3FFF, without this patch the following problems were found: 1) [ 0.938914] Could not find a valid ONFI parameter page, trying bit-wise majority to recover it [ 0.947384] ONFI parameter recovery failed, aborting 2) Read with disabled ECC mode was broken. Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Cc: Signed-off-by: Arseniy Krasnov Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/3794ffbf-dfea-e96f-1f97-fe235b005e19@sberdevices.ru --- drivers/mtd/nand/raw/meson_nand.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index a28574c009003..074e14225c06a 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -280,7 +280,7 @@ static void meson_nfc_cmd_access(struct nand_chip *nand, int raw, bool dir, if (raw) { len = mtd->writesize + mtd->oobsize; - cmd = (len & GENMASK(5, 0)) | scrambler | DMA_DIR(dir); + cmd = (len & GENMASK(13, 0)) | scrambler | DMA_DIR(dir); writel(cmd, nfc->reg_base + NFC_REG_CMD); return; } @@ -544,7 +544,7 @@ static int meson_nfc_read_buf(struct nand_chip *nand, u8 *buf, int len) if (ret) goto out; - cmd = NFC_CMD_N2M | (len & GENMASK(5, 0)); + cmd = NFC_CMD_N2M | (len & GENMASK(13, 0)); writel(cmd, nfc->reg_base + NFC_REG_CMD); meson_nfc_drain_cmd(nfc); @@ -568,7 +568,7 @@ static int meson_nfc_write_buf(struct nand_chip *nand, u8 *buf, int len) if (ret) return ret; - cmd = NFC_CMD_M2N | (len & GENMASK(5, 0)); + cmd = NFC_CMD_M2N | (len & GENMASK(13, 0)); writel(cmd, nfc->reg_base + NFC_REG_CMD); meson_nfc_drain_cmd(nfc); From 34bafc747c54fb58c1908ec3116fa6137393e596 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:02 +0100 Subject: [PATCH 692/898] cxl/pci: Handle truncated CDAT header cxl_cdat_get_length() only checks whether the DOE response size is sufficient for the Table Access response header (1 dword), but not the succeeding CDAT header (1 dword length plus other fields). It thus returns whatever uninitialized memory happens to be on the stack if a truncated DOE response with only 1 dword was received. Fix it. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Reported-by: Ming Li Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Ming Li Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/000e69cd163461c8b1bc2cf4155b6e25402c29c7.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 49a99a84b6aa6..87da8c935185a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -510,7 +510,7 @@ static int cxl_cdat_get_length(struct device *dev, return rc; } wait_for_completion(&t.c); - if (t.task.rv < sizeof(__le32)) + if (t.task.rv < 2 * sizeof(__le32)) return -EIO; *length = le32_to_cpu(t.response_pl[1]); From b56faef2312057db20479b240eb71bd2e51fb51c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:03 +0100 Subject: [PATCH 693/898] cxl/pci: Handle truncated CDAT entries If truncated CDAT entries are received from a device, the concatenation of those entries constitutes a corrupt CDAT, yet is happily exposed to user space. Avoid by verifying response lengths and erroring out if truncation is detected. The last CDAT entry may still be truncated despite the checks introduced herein if the length in the CDAT header is too small. However, that is easily detectable by user space because it reaches EOF prematurely. A subsequent commit which rightsizes the CDAT response allocation closes that remaining loophole. The two lines introduced here which exceed 80 chars are shortened to less than 80 chars by a subsequent commit which migrates to a synchronous DOE API and replaces "t.task.rv" by "rc". The existing acpi_cdat_header and acpi_table_cdat struct definitions provided by ACPICA cannot be used because they do not employ __le16 or __le32 types. I believe that cannot be changed because those types are Linux-specific and ACPI is specified for little endian platforms only, hence doesn't care about endianness. So duplicate the structs. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Link: https://lore.kernel.org/r/bce3aebc0e8e18a1173425a7a865b232c3912963.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 13 +++++++++---- drivers/cxl/cxlpci.h | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 87da8c935185a..fb600dfbf5a6e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -529,8 +529,8 @@ static int cxl_cdat_read_table(struct device *dev, do { DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t); + struct cdat_entry_header *entry; size_t entry_dw; - __le32 *entry; int rc; rc = pci_doe_submit_task(cdat_doe, &t.task); @@ -539,14 +539,19 @@ static int cxl_cdat_read_table(struct device *dev, return rc; } wait_for_completion(&t.c); - /* 1 DW header + 1 DW data min */ - if (t.task.rv < (2 * sizeof(__le32))) + + /* 1 DW Table Access Response Header + CDAT entry */ + entry = (struct cdat_entry_header *)(t.response_pl + 1); + if ((entry_handle == 0 && + t.task.rv != sizeof(__le32) + sizeof(struct cdat_header)) || + (entry_handle > 0 && + (t.task.rv < sizeof(__le32) + sizeof(*entry) || + t.task.rv != sizeof(__le32) + le16_to_cpu(entry->length)))) return -EIO; /* Get the CXL table access header entry handle */ entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, le32_to_cpu(t.response_pl[0])); - entry = t.response_pl + 1; entry_dw = t.task.rv / sizeof(__le32); /* Skip Header */ entry_dw -= 1; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index be6a2ef3cce37..0465ef963cd6a 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -68,6 +68,20 @@ enum cxl_regloc_type { CXL_REGLOC_RBI_TYPES }; +struct cdat_header { + __le32 length; + u8 revision; + u8 checksum; + u8 reserved[6]; + __le32 sequence; +} __packed; + +struct cdat_entry_header { + u8 type; + u8 reserved; + __le16 length; +} __packed; + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, From 4fe2c13d59d849be3b45371e3913ec5dc77fc0fb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:04 +0100 Subject: [PATCH 694/898] cxl/pci: Handle excessive CDAT length If the length in the CDAT header is larger than the concatenation of the header and all table entries, then the CDAT exposed to user space contains trailing null bytes. Not every consumer may be able to handle that. Per Postel's robustness principle, "be liberal in what you accept" and silently reduce the cached length to avoid exposing those null bytes. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Link: https://lore.kernel.org/r/6d98b3c7da5343172bd3ccabfabbc1f31c079d74.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index fb600dfbf5a6e..523d5b9fd7fcf 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -564,6 +564,9 @@ static int cxl_cdat_read_table(struct device *dev, } } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); + /* Length in CDAT header may exceed concatenation of CDAT entries */ + cdat->length -= length; + return 0; } From 92dc899c3b4927f3cfa23f55bf759171234b5802 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:05 +0100 Subject: [PATCH 695/898] PCI/DOE: Silence WARN splat with CONFIG_DEBUG_OBJECTS=y Gregory Price reports a WARN splat with CONFIG_DEBUG_OBJECTS=y upon CXL probing because pci_doe_submit_task() invokes INIT_WORK() instead of INIT_WORK_ONSTACK() for a work_struct that was allocated on the stack. All callers of pci_doe_submit_task() allocate the work_struct on the stack, so replace INIT_WORK() with INIT_WORK_ONSTACK() as a backportable short-term fix. The long-term fix implemented by a subsequent commit is to move to a synchronous API which allocates the work_struct internally in the DOE library. Stacktrace for posterity: WARNING: CPU: 0 PID: 23 at lib/debugobjects.c:545 __debug_object_init.cold+0x18/0x183 CPU: 0 PID: 23 Comm: kworker/u2:1 Not tainted 6.1.0-0.rc1.20221019gitaae703b02f92.17.fc38.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: pci_doe_submit_task+0x5d/0xd0 pci_doe_discovery+0xb4/0x100 pcim_doe_create_mb+0x219/0x290 cxl_pci_probe+0x192/0x430 local_pci_probe+0x41/0x80 pci_device_probe+0xb3/0x220 really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach_async_helper+0x5c/0xe0 async_run_entry_fn+0x30/0x130 process_one_work+0x294/0x5b0 Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Link: https://lore.kernel.org/linux-cxl/Y1bOniJliOFszvIK@memverge.com/ Reported-by: Gregory Price Tested-by: Ira Weiny Tested-by: Gregory Price Signed-off-by: Lukas Wunner Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Reviewed-by: Gregory Price Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/67a9117f463ecdb38a2dbca6a20391ce2f1e7a06.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/pci/doe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index 6f097932ccbf9..c14ffdf23f870 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -523,6 +523,8 @@ EXPORT_SYMBOL_GPL(pci_doe_supports_prot); * task->complete will be called when the state machine is done processing this * task. * + * @task must be allocated on the stack. + * * Excess data will be discarded. * * RETURNS: 0 when task has been successfully queued, -ERRNO on error @@ -544,7 +546,7 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) return -EIO; task->doe_mb = doe_mb; - INIT_WORK(&task->work, doe_statemachine_work); + INIT_WORK_ONSTACK(&task->work, doe_statemachine_work); queue_work(doe_mb->work_queue, &task->work); return 0; } From abf04be0e7071f2bcd39bf97ba407e7d4439785e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:06 +0100 Subject: [PATCH 696/898] PCI/DOE: Fix memory leak with CONFIG_DEBUG_OBJECTS=y After a pci_doe_task completes, its work_struct needs to be destroyed to avoid a memory leak with CONFIG_DEBUG_OBJECTS=y. Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Ira Weiny Reviewed-by: Davidlohr Bueso Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/775768b4912531c3b887d405fc51a50e465e1bf9.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/pci/doe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index c14ffdf23f870..e5e9b287b9766 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -224,6 +224,7 @@ static void signal_task_complete(struct pci_doe_task *task, int rv) { task->rv = rv; task->complete(task); + destroy_work_on_stack(&task->work); } static void signal_task_abort(struct pci_doe_task *task, int rv) From a3046a618a284579d1189af8711765f553eed707 Mon Sep 17 00:00:00 2001 From: David Gow Date: Sat, 18 Mar 2023 12:15:54 +0800 Subject: [PATCH 697/898] um: Only disable SSE on clang to work around old GCC bugs As part of the Rust support for UML, we disable SSE (and similar flags) to match the normal x86 builds. This both makes sense (we ideally want a similar configuration to x86), and works around a crash bug with SSE generation under Rust with LLVM. However, this breaks compiling stdlib.h under gcc < 11, as the x86_64 ABI requires floating-point return values be stored in an SSE register. gcc 11 fixes this by only doing register allocation when a function is actually used, and since we never use atof(), it shouldn't be a problem: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 Nevertheless, only disable SSE on clang setups, as that's a simple way of working around everyone's bugs. Fixes: 884981867947 ("rust: arch/um: Disable FP/SIMD instruction to match x86") Reported-by: Roberto Sassu Link: https://lore.kernel.org/linux-um/6df2ecef9011d85654a82acd607fdcbc93ad593c.camel@huaweicloud.com/ Tested-by: Roberto Sassu Tested-by: SeongJae Park Signed-off-by: David Gow Reviewed-by: Vincenzo Palazzo Tested-by: Arthur Grillo Signed-off-by: Richard Weinberger --- arch/x86/Makefile.um | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index b70559b821df8..2106a2bd152bf 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -3,9 +3,14 @@ core-y += arch/x86/crypto/ # # Disable SSE and other FP/SIMD instructions to match normal x86 +# This is required to work around issues in older LLVM versions, but breaks +# GCC versions < 11. See: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 # +ifeq ($(CONFIG_CC_IS_CLANG),y) KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 +endif ifeq ($(CONFIG_X86_32),y) START := 0x8048000 From c6b486fb33680ad5a3a6390ce693c835caaae3f7 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Mon, 3 Apr 2023 14:33:21 +0530 Subject: [PATCH 698/898] net: ethernet: ti: am65-cpsw: Fix mdio cleanup in probe In the am65_cpsw_nuss_probe() function's cleanup path, the call to of_platform_device_destroy() for the common->mdio_dev device is invoked unconditionally. It is possible that either the MDIO node is not present in the device-tree, or the MDIO node is disabled in the device-tree. In both these cases, the MDIO device is not created, resulting in a NULL pointer dereference when the of_platform_device_destroy() function is invoked on the common->mdio_dev device on the cleanup path. Fix this by ensuring that the common->mdio_dev device exists, before attempting to invoke of_platform_device_destroy(). Fixes: a45cfcc69a25 ("net: ethernet: ti: am65-cpsw-nuss: use of_platform_device_create() for mdio") Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230403090321.835877-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 4e3861c47708c..bcea87b7151c0 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2926,7 +2926,8 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); err_of_clear: - of_platform_device_destroy(common->mdio_dev, NULL); + if (common->mdio_dev) + of_platform_device_destroy(common->mdio_dev, NULL); err_pm_clear: pm_runtime_put_sync(dev); pm_runtime_disable(dev); @@ -2956,7 +2957,8 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) am65_cpts_release(common->cpts); am65_cpsw_disable_serdes_phy(common); - of_platform_device_destroy(common->mdio_dev, NULL); + if (common->mdio_dev) + of_platform_device_destroy(common->mdio_dev, NULL); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); From b277fc793daf258877b4c0744b52f69d6e6ba22e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 4 Apr 2023 09:44:33 +0530 Subject: [PATCH 699/898] powerpc/papr_scm: Update the NUMA distance table for the target node Platform device helper routines won't update the NUMA distance table while creating a platform device, even if the device is present on a NUMA node that doesn't have memory or CPU. This is especially true for pmem devices. If the target node of the pmem device is not online, we find the nearest online node to the device and associate the pmem device with that online node. To find the nearest online node, we should have the numa distance table updated correctly. Update the distance information during the device probe. For a papr scm device on NUMA node 3 distance_lookup_table value for distance_ref_points_depth = 2 before and after fix is below: Before fix: node 3 distance depth 0 - 0 node 3 distance depth 1 - 0 node 4 distance depth 0 - 4 node 4 distance depth 1 - 2 node 5 distance depth 0 - 5 node 5 distance depth 1 - 1 After fix node 3 distance depth 0 - 3 node 3 distance depth 1 - 1 node 4 distance depth 0 - 4 node 4 distance depth 1 - 2 node 5 distance depth 0 - 5 node 5 distance depth 1 - 1 Without the fix, the nearest numa node to the pmem device (NUMA node 3) will be picked as 4. After the fix, we get the correct numa node which is 5. Fixes: da1115fdbd6e ("powerpc/nvdimm: Pick nearby online node if the device node is not online") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://msgid.link/20230404041433.1781804-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/numa.c | 1 + arch/powerpc/platforms/pseries/papr_scm.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b44ce71917d75..16cfe56be05bb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -366,6 +366,7 @@ void update_numa_distance(struct device_node *node) WARN(numa_distance_table[nid][nid] == -1, "NUMA distance details for node %d not provided\n", nid); } +EXPORT_SYMBOL_GPL(update_numa_distance); /* * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 2f8385523a132..1a53e048ceb76 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -1428,6 +1428,13 @@ static int papr_scm_probe(struct platform_device *pdev) return -ENODEV; } + /* + * open firmware platform device create won't update the NUMA + * distance table. For PAPR SCM devices we use numa_map_to_online_node() + * to find the nearest online NUMA node and that requires correct + * distance table information. + */ + update_numa_distance(dn); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) From e4395701330fc4aee530905039516fe770b81417 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 31 Mar 2023 12:32:24 -0300 Subject: [PATCH 700/898] iommufd: Check for uptr overflow syzkaller found that setting up a map with a user VA that wraps past zero can trigger WARN_ONs, particularly from pin_user_pages weirdly returning 0 due to invalid arguments. Prevent creating a pages with a uptr and size that would math overflow. WARNING: CPU: 0 PID: 518 at drivers/iommu/iommufd/pages.c:793 pfn_reader_user_pin+0x2e6/0x390 Modules linked in: CPU: 0 PID: 518 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:pfn_reader_user_pin+0x2e6/0x390 Code: b1 11 e9 25 fe ff ff e8 28 e4 0f ff 31 ff 48 89 de e8 2e e6 0f ff 48 85 db 74 0a e8 14 e4 0f ff e9 4d ff ff ff e8 0a e4 0f ff <0f> 0b bb f2 ff ff ff e9 3c ff ff ff e8 f9 e3 0f ff ba 01 00 00 00 RSP: 0018:ffffc90000f9fa30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff821e2b72 RDX: 0000000000000000 RSI: ffff888014184680 RDI: 0000000000000002 RBP: ffffc90000f9fa78 R08: 00000000000000ff R09: 0000000079de6f4e R10: ffffc90000f9f790 R11: ffff888014185418 R12: ffffc90000f9fc60 R13: 0000000000000002 R14: ffff888007879800 R15: 0000000000000000 FS: 00007f4227555740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000043 CR3: 000000000e748005 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: pfn_reader_next+0x14a/0x7b0 ? interval_tree_double_span_iter_update+0x11a/0x140 pfn_reader_first+0x140/0x1b0 iopt_pages_rw_slow+0x71/0x280 ? __this_cpu_preempt_check+0x20/0x30 iopt_pages_rw_access+0x2b2/0x5b0 iommufd_access_rw+0x19f/0x2f0 iommufd_test+0xd11/0x16f0 ? write_comp_data+0x2f/0x90 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 ? __pfx_iommufd_fops_ioctl+0x10/0x10 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Cc: Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Link: https://lore.kernel.org/r/1-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index f8d92c9bb65b6..400ec7c91ed7e 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1142,6 +1142,7 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, bool writable) { struct iopt_pages *pages; + unsigned long end; /* * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP @@ -1150,6 +1151,9 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, if (length > SIZE_MAX - PAGE_SIZE || length == 0) return ERR_PTR(-EINVAL); + if (check_add_overflow((unsigned long)uptr, length, &end)) + return ERR_PTR(-EOVERFLOW); + pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); if (!pages) return ERR_PTR(-ENOMEM); From 727c28c1cef2bc013d2c8bb6c50e410a3882a04e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 31 Mar 2023 12:32:25 -0300 Subject: [PATCH 701/898] iommufd: Fix unpinning of pages when an access is present syzkaller found that the calculation of batch_last_index should use 'start_index' since at input to this function the batch is either empty or it has already been adjusted to cross any accesses so it will start at the point we are unmapping from. Getting this wrong causes the unmap to run over the end of the pages which corrupts pages that were never mapped. In most cases this triggers the num pinned debugging: WARNING: CPU: 0 PID: 557 at drivers/iommu/iommufd/pages.c:294 __iopt_area_unfill_domain+0x152/0x560 Modules linked in: CPU: 0 PID: 557 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__iopt_area_unfill_domain+0x152/0x560 Code: d2 0f ff 44 8b 64 24 54 48 8b 44 24 48 31 ff 44 89 e6 48 89 44 24 38 e8 fc d3 0f ff 45 85 e4 0f 85 eb 01 00 00 e8 0e d2 0f ff <0f> 0b e8 07 d2 0f ff 48 8b 44 24 38 89 5c 24 58 89 18 8b 44 24 54 RSP: 0018:ffffc9000108baf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000ffffffff RCX: ffffffff821e3f85 RDX: 0000000000000000 RSI: ffff88800faf0000 RDI: 0000000000000002 RBP: ffffc9000108bd18 R08: 000000000003ca25 R09: 0000000000000014 R10: 000000000003ca00 R11: 0000000000000024 R12: 0000000000000004 R13: 0000000000000801 R14: 00000000000007ff R15: 0000000000000800 FS: 00007f3499ce1740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000243 CR3: 00000000179c2001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: iopt_area_unfill_domain+0x32/0x40 iopt_table_remove_domain+0x23f/0x4c0 iommufd_device_selftest_detach+0x3a/0x90 iommufd_selftest_destroy+0x55/0x70 iommufd_object_destroy_user+0xce/0x130 iommufd_destroy+0xa2/0xc0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Also add some useful WARN_ON sanity checks. Cc: Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Link: https://lore.kernel.org/r/2-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 400ec7c91ed7e..b11aace836542 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1207,13 +1207,21 @@ iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, unsigned long start = max(start_index, *unmapped_end_index); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + batch->total_pfns) + WARN_ON(*unmapped_end_index - + batch->total_pfns != + start_index); batch_from_domain(batch, domain, area, start, last_index); - batch_last_index = start + batch->total_pfns - 1; + batch_last_index = start_index + batch->total_pfns - 1; } else { batch_last_index = last_index; } + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(batch_last_index > real_last_index); + /* * unmaps must always 'cut' at a place where the pfns are not * contiguous to pair with the maps that always install From 13a0d1ae7ee6b438f5537711a8c60cba00554943 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 31 Mar 2023 12:32:26 -0300 Subject: [PATCH 702/898] iommufd: Do not corrupt the pfn list when doing batch carry If batch->end is 0 then setting npfns[0] before computing the new value of pfns will fail to adjust the pfn and result in various page accounting corruptions. It should be ordered after. This seems to result in various kinds of page meta-data corruption related failures: WARNING: CPU: 1 PID: 527 at mm/gup.c:75 try_grab_folio+0x503/0x740 Modules linked in: CPU: 1 PID: 527 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:try_grab_folio+0x503/0x740 Code: e3 01 48 89 de e8 6d c1 dd ff 48 85 db 0f 84 7c fe ff ff e8 4f bf dd ff 49 8d 47 ff 48 89 45 d0 e9 73 fe ff ff e8 3d bf dd ff <0f> 0b 31 db e9 d0 fc ff ff e8 2f bf dd ff 48 8b 5d c8 31 ff 48 89 RSP: 0018:ffffc90000f37908 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 00000000fffffc02 RCX: ffffffff81504c26 RDX: 0000000000000000 RSI: ffff88800d030000 RDI: 0000000000000002 RBP: ffffc90000f37948 R08: 000000000003ca24 R09: 0000000000000008 R10: 000000000003ca00 R11: 0000000000000023 R12: ffffea000035d540 R13: 0000000000000001 R14: 0000000000000000 R15: ffffea000035d540 FS: 00007fecbf659740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200011c3 CR3: 000000000ef66006 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace: internal_get_user_pages_fast+0xd32/0x2200 pin_user_pages_fast+0x65/0x90 pfn_reader_user_pin+0x376/0x390 pfn_reader_next+0x14a/0x7b0 pfn_reader_first+0x140/0x1b0 iopt_area_fill_domain+0x74/0x210 iopt_table_add_domain+0x30e/0x6e0 iommufd_device_selftest_attach+0x7f/0x140 iommufd_test+0x10ff/0x16f0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Cc: Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages") Link: https://lore.kernel.org/r/3-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index b11aace836542..3c47846cc5efe 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) batch->npfns[batch->end - 1] < keep_pfns); batch->total_pfns = keep_pfns; - batch->npfns[0] = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + (batch->npfns[batch->end - 1] - keep_pfns); + batch->npfns[0] = keep_pfns; batch->end = 0; } From 218c597325f4faf7b7a6049233a30d7842b5b2dc Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Mon, 3 Apr 2023 14:11:20 +0200 Subject: [PATCH 703/898] net: stmmac: fix up RX flow hash indirection table when setting channels stmmac_reinit_queues() fails to fix up the RX hash. Even if the number of channels gets restricted, the output of `ethtool -x' indicates that all RX queues are used: $ ethtool -l enp0s29f2 Channel parameters for enp0s29f2: Pre-set maximums: RX: 8 TX: 8 Other: n/a Combined: n/a Current hardware settings: RX: 8 TX: 8 Other: n/a Combined: n/a $ ethtool -x enp0s29f2 RX flow hash indirection table for enp0s29f2 with 8 RX ring(s): 0: 0 1 2 3 4 5 6 7 8: 0 1 2 3 4 5 6 7 [...] $ ethtool -L enp0s29f2 rx 3 $ ethtool -x enp0s29f2 RX flow hash indirection table for enp0s29f2 with 3 RX ring(s): 0: 0 1 2 3 4 5 6 7 8: 0 1 2 3 4 5 6 7 [...] Fix this by setting the indirection table according to the number of specified queues. The result is now as expected: $ ethtool -L enp0s29f2 rx 3 $ ethtool -x enp0s29f2 RX flow hash indirection table for enp0s29f2 with 3 RX ring(s): 0: 0 1 2 0 1 2 0 1 8: 2 0 1 2 0 1 2 0 [...] Tested on Intel Elkhart Lake. Fixes: 0366f7e06a6b ("net: stmmac: add ethtool support for get/set channels") Signed-off-by: Corinna Vinschen Link: https://lore.kernel.org/r/20230403121120.489138-1-vinschen@redhat.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d41a5f92aee7a..59cbf3597eb42 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6950,7 +6950,7 @@ static void stmmac_napi_del(struct net_device *dev) int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) { struct stmmac_priv *priv = netdev_priv(dev); - int ret = 0; + int ret = 0, i; if (netif_running(dev)) stmmac_release(dev); @@ -6959,6 +6959,10 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) priv->plat->rx_queues_to_use = rx_cnt; priv->plat->tx_queues_to_use = tx_cnt; + if (!netif_is_rxfh_configured(dev)) + for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++) + priv->rss.table[i] = ethtool_rxfh_indir_default(i, + rx_cnt); stmmac_napi_add(dev); From 5085e41f9e83a1bec51da1f20b54f2ec3a13a3fe Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Mar 2023 14:24:27 -0400 Subject: [PATCH 704/898] sunrpc: only free unix grouplist after RCU settles While the unix_gid object is rcu-freed, the group_info list that it contains is not. Ensure that we only put the group list reference once we are really freeing the unix_gid object. Reported-by: Zhi Li Link: https://bugzilla.redhat.com/show_bug.cgi?id=2183056 Signed-off-by: Jeff Layton Fixes: fd5d2f78261b ("SUNRPC: Make server side AUTH_UNIX use lockless lookups") Signed-off-by: Chuck Lever --- net/sunrpc/svcauth_unix.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 983c5891cb565..4246363cb0951 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -416,14 +416,23 @@ static int unix_gid_hash(kuid_t uid) return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS); } -static void unix_gid_put(struct kref *kref) +static void unix_gid_free(struct rcu_head *rcu) { - struct cache_head *item = container_of(kref, struct cache_head, ref); - struct unix_gid *ug = container_of(item, struct unix_gid, h); + struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu); + struct cache_head *item = &ug->h; + if (test_bit(CACHE_VALID, &item->flags) && !test_bit(CACHE_NEGATIVE, &item->flags)) put_group_info(ug->gi); - kfree_rcu(ug, rcu); + kfree(ug); +} + +static void unix_gid_put(struct kref *kref) +{ + struct cache_head *item = container_of(kref, struct cache_head, ref); + struct unix_gid *ug = container_of(item, struct unix_gid, h); + + call_rcu(&ug->rcu, unix_gid_free); } static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew) From 8be8f170e8383fd1421e8b87950e90d7dd45be07 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Mar 2023 13:47:58 -0400 Subject: [PATCH 705/898] NFS: Remove "select RPCSEC_GSS_KRB5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_CRYPTO=n (e.g. arm/shmobile_defconfig): WARNING: unmet direct dependencies detected for RPCSEC_GSS_KRB5 Depends on [n]: NETWORK_FILESYSTEMS [=y] && SUNRPC [=y] && CRYPTO [=n] Selected by [y]: - NFS_V4 [=y] && NETWORK_FILESYSTEMS [=y] && NFS_FS [=y] As NFSv4 can work without crypto enabled, remove the RPCSEC_GSS_KRB5 dependency altogether. Trond says: > It is possible to use the NFSv4.1 client with just AUTH_SYS, and > in fact there are plenty of people out there using only that. The > fact that RFC5661 gets its knickers in a twist about RPCSEC_GSS > support is largely irrelevant to those people. > > The other issue is that ’select’ enforces the strict dependency > that if the NFS client is compiled into the kernel, then the > RPCSEC_GSS and kerberos code needs to be compiled in as well: they > cannot exist as modules. Fixes: e57d06527738 ("NFS & NFSD: Update GSS dependencies") Reported-by: kernel test robot Reported-by: Niklas Söderlund Suggested-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 450d6c3bc05e2..c1c7ed2fd860e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -75,7 +75,6 @@ config NFS_V3_ACL config NFS_V4 tristate "NFS client support for NFS version 4" depends on NFS_FS - select RPCSEC_GSS_KRB5 select KEYS help This option enables support for version 4 of the NFS protocol From 7de82c2f36fb26aa78440bbf0efcf360b691d98b Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Sat, 1 Apr 2023 13:22:08 -0700 Subject: [PATCH 706/898] NFSD: callback request does not use correct credential for AUTH_SYS Currently callback request does not use the credential specified in CREATE_SESSION if the security flavor for the back channel is AUTH_SYS. Problem was discovered by pynfs 4.1 DELEG5 and DELEG7 test with error: DELEG5 st_delegation.testCBSecParms : FAILURE expected callback with uid, gid == 17, 19, got 0, 0 Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Fixes: 8276c902bbe9 ("SUNRPC: remove uid and gid from struct auth_cred") Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 2a815f5a52c4b..4039ffcf90ba5 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -946,8 +946,8 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r if (!kcred) return NULL; - kcred->uid = ses->se_cb_sec.uid; - kcred->gid = ses->se_cb_sec.gid; + kcred->fsuid = ses->se_cb_sec.uid; + kcred->fsgid = ses->se_cb_sec.gid; return kcred; } } From f1ba4e674febf5c0e9f725a75ca43b7722b4e963 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Thu, 30 Mar 2023 17:49:52 -0400 Subject: [PATCH 707/898] virtio-blk: fix to match virtio spec The merged patch series to support zoned block devices in virtio-blk is not the most up to date version. The merged patch can be found at https://lore.kernel.org/linux-block/20221016034127.330942-3-dmitry.fomichev@wdc.com/ but the latest and reviewed version is https://lore.kernel.org/linux-block/20221110053952.3378990-3-dmitry.fomichev@wdc.com/ The reason is apparently that the correct mailing lists and maintainers were not copied. The differences between the two are mostly cleanups, but there is one change that is very important in terms of compatibility with the approved virtio-zbd specification. Before it was approved, the OASIS virtio spec had a change in VIRTIO_BLK_T_ZONE_APPEND request layout that is not reflected in the current virtio-blk driver code. In the running code, the status is the first byte of the in-header that is followed by some pad bytes and the u64 that carries the sector at which the data has been written to the zone back to the driver, aka the append sector. This layout turned out to be problematic for implementing in QEMU and the request status byte has been eventually made the last byte of the in-header. The current code doesn't expect that and this causes the append sector value always come as zero to the block layer. This needs to be fixed ASAP. Fixes: 95bfec41bd3d ("virtio-blk: add support for zoned block devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Stefan Hajnoczi Reviewed-by: Damien Le Moal Message-Id: <20230330214953.1088216-2-dmitry.fomichev@wdc.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 238 +++++++++++++++++++++----------- include/uapi/linux/virtio_blk.h | 18 +-- 2 files changed, 166 insertions(+), 90 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2723eede6f217..4f0dbbb3d4a53 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -96,16 +96,14 @@ struct virtblk_req { /* * The zone append command has an extended in header. - * The status field in zone_append_in_hdr must have - * the same offset in virtblk_req as the non-zoned - * status field above. + * The status field in zone_append_in_hdr must always + * be the last byte. */ struct { + __virtio64 sector; u8 status; - u8 reserved[7]; - __le64 append_sector; - } zone_append_in_hdr; - }; + } zone_append; + } in_hdr; size_t in_hdr_len; @@ -154,7 +152,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) sgs[num_out + num_in++] = vbr->sg_table.sgl; } - sg_init_one(&in_hdr, &vbr->status, vbr->in_hdr_len); + sg_init_one(&in_hdr, &vbr->in_hdr.status, vbr->in_hdr_len); sgs[num_out + num_in++] = &in_hdr; return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); @@ -242,11 +240,14 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, struct virtblk_req *vbr) { - size_t in_hdr_len = sizeof(vbr->status); + size_t in_hdr_len = sizeof(vbr->in_hdr.status); bool unmap = false; u32 type; u64 sector = 0; + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && op_is_zone_mgmt(req_op(req))) + return BLK_STS_NOTSUPP; + /* Set fields for all request types */ vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); @@ -287,7 +288,7 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, case REQ_OP_ZONE_APPEND: type = VIRTIO_BLK_T_ZONE_APPEND; sector = blk_rq_pos(req); - in_hdr_len = sizeof(vbr->zone_append_in_hdr); + in_hdr_len = sizeof(vbr->in_hdr.zone_append); break; case REQ_OP_ZONE_RESET: type = VIRTIO_BLK_T_ZONE_RESET; @@ -297,7 +298,10 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, type = VIRTIO_BLK_T_ZONE_RESET_ALL; break; case REQ_OP_DRV_IN: - /* Out header already filled in, nothing to do */ + /* + * Out header has already been prepared by the caller (virtblk_get_id() + * or virtblk_submit_zone_report()), nothing to do here. + */ return 0; default: WARN_ON_ONCE(1); @@ -318,16 +322,28 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, return 0; } +/* + * The status byte is always the last byte of the virtblk request + * in-header. This helper fetches its value for all in-header formats + * that are currently defined. + */ +static inline u8 virtblk_vbr_status(struct virtblk_req *vbr) +{ + return *((u8 *)&vbr->in_hdr + vbr->in_hdr_len - 1); +} + static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - blk_status_t status = virtblk_result(vbr->status); + blk_status_t status = virtblk_result(virtblk_vbr_status(vbr)); + struct virtio_blk *vblk = req->mq_hctx->queue->queuedata; virtblk_unmap_data(req, vbr); virtblk_cleanup_cmd(req); if (req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = le64_to_cpu(vbr->zone_append_in_hdr.append_sector); + req->__sector = virtio64_to_cpu(vblk->vdev, + vbr->in_hdr.zone_append.sector); blk_mq_end_request(req, status); } @@ -355,7 +371,7 @@ static int virtblk_handle_req(struct virtio_blk_vq *vq, if (likely(!blk_should_fake_timeout(req->q)) && !blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, vbr->status, + !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), virtblk_complete_batch)) virtblk_request_done(req); req_done++; @@ -550,7 +566,6 @@ static void virtio_queue_rqs(struct request **rqlist) #ifdef CONFIG_BLK_DEV_ZONED static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, unsigned int nr_zones, - unsigned int zone_sectors, size_t *buflen) { struct request_queue *q = vblk->disk->queue; @@ -558,7 +573,7 @@ static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, void *buf; nr_zones = min_t(unsigned int, nr_zones, - get_capacity(vblk->disk) >> ilog2(zone_sectors)); + get_capacity(vblk->disk) >> ilog2(vblk->zone_sectors)); bufsize = sizeof(struct virtio_blk_zone_report) + nr_zones * sizeof(struct virtio_blk_zone_descriptor); @@ -592,7 +607,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, return PTR_ERR(req); vbr = blk_mq_rq_to_pdu(req); - vbr->in_hdr_len = sizeof(vbr->status); + vbr->in_hdr_len = sizeof(vbr->in_hdr.status); vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT); vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector); @@ -601,7 +616,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(vbr->status)); + err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status)); out: blk_mq_free_request(req); return err; @@ -609,29 +624,72 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, static int virtblk_parse_zone(struct virtio_blk *vblk, struct virtio_blk_zone_descriptor *entry, - unsigned int idx, unsigned int zone_sectors, - report_zones_cb cb, void *data) + unsigned int idx, report_zones_cb cb, void *data) { struct blk_zone zone = { }; - if (entry->z_type != VIRTIO_BLK_ZT_SWR && - entry->z_type != VIRTIO_BLK_ZT_SWP && - entry->z_type != VIRTIO_BLK_ZT_CONV) { - dev_err(&vblk->vdev->dev, "invalid zone type %#x\n", - entry->z_type); - return -EINVAL; + zone.start = virtio64_to_cpu(vblk->vdev, entry->z_start); + if (zone.start + vblk->zone_sectors <= get_capacity(vblk->disk)) + zone.len = vblk->zone_sectors; + else + zone.len = get_capacity(vblk->disk) - zone.start; + zone.capacity = virtio64_to_cpu(vblk->vdev, entry->z_cap); + zone.wp = virtio64_to_cpu(vblk->vdev, entry->z_wp); + + switch (entry->z_type) { + case VIRTIO_BLK_ZT_SWR: + zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; + break; + case VIRTIO_BLK_ZT_SWP: + zone.type = BLK_ZONE_TYPE_SEQWRITE_PREF; + break; + case VIRTIO_BLK_ZT_CONV: + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + break; + default: + dev_err(&vblk->vdev->dev, "zone %llu: invalid type %#x\n", + zone.start, entry->z_type); + return -EIO; } - zone.type = entry->z_type; - zone.cond = entry->z_state; - zone.len = zone_sectors; - zone.capacity = le64_to_cpu(entry->z_cap); - zone.start = le64_to_cpu(entry->z_start); - if (zone.cond == BLK_ZONE_COND_FULL) + switch (entry->z_state) { + case VIRTIO_BLK_ZS_EMPTY: + zone.cond = BLK_ZONE_COND_EMPTY; + break; + case VIRTIO_BLK_ZS_CLOSED: + zone.cond = BLK_ZONE_COND_CLOSED; + break; + case VIRTIO_BLK_ZS_FULL: + zone.cond = BLK_ZONE_COND_FULL; zone.wp = zone.start + zone.len; - else - zone.wp = le64_to_cpu(entry->z_wp); + break; + case VIRTIO_BLK_ZS_EOPEN: + zone.cond = BLK_ZONE_COND_EXP_OPEN; + break; + case VIRTIO_BLK_ZS_IOPEN: + zone.cond = BLK_ZONE_COND_IMP_OPEN; + break; + case VIRTIO_BLK_ZS_NOT_WP: + zone.cond = BLK_ZONE_COND_NOT_WP; + break; + case VIRTIO_BLK_ZS_RDONLY: + zone.cond = BLK_ZONE_COND_READONLY; + zone.wp = ULONG_MAX; + break; + case VIRTIO_BLK_ZS_OFFLINE: + zone.cond = BLK_ZONE_COND_OFFLINE; + zone.wp = ULONG_MAX; + break; + default: + dev_err(&vblk->vdev->dev, "zone %llu: invalid condition %#x\n", + zone.start, entry->z_state); + return -EIO; + } + /* + * The callback below checks the validity of the reported + * entry data, no need to further validate it here. + */ return cb(&zone, idx, data); } @@ -641,39 +699,47 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector, { struct virtio_blk *vblk = disk->private_data; struct virtio_blk_zone_report *report; - unsigned int zone_sectors = vblk->zone_sectors; - unsigned int nz, i; - int ret, zone_idx = 0; + unsigned long long nz, i; size_t buflen; + unsigned int zone_idx = 0; + int ret; if (WARN_ON_ONCE(!vblk->zone_sectors)) return -EOPNOTSUPP; - report = virtblk_alloc_report_buffer(vblk, nr_zones, - zone_sectors, &buflen); + report = virtblk_alloc_report_buffer(vblk, nr_zones, &buflen); if (!report) return -ENOMEM; + mutex_lock(&vblk->vdev_mutex); + + if (!vblk->vdev) { + ret = -ENXIO; + goto fail_report; + } + while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) { memset(report, 0, buflen); ret = virtblk_submit_zone_report(vblk, (char *)report, buflen, sector); - if (ret) { - if (ret > 0) - ret = -EIO; - goto out_free; - } - nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); + if (ret) + goto fail_report; + + nz = min_t(u64, virtio64_to_cpu(vblk->vdev, report->nr_zones), + nr_zones); if (!nz) break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { ret = virtblk_parse_zone(vblk, &report->zones[i], - zone_idx, zone_sectors, cb, data); + zone_idx, cb, data); if (ret) - goto out_free; - sector = le64_to_cpu(report->zones[i].z_start) + zone_sectors; + goto fail_report; + + sector = virtio64_to_cpu(vblk->vdev, + report->zones[i].z_start) + + vblk->zone_sectors; zone_idx++; } } @@ -682,7 +748,8 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector, ret = zone_idx; else ret = -EINVAL; -out_free: +fail_report: + mutex_unlock(&vblk->vdev_mutex); kvfree(report); return ret; } @@ -691,20 +758,28 @@ static void virtblk_revalidate_zones(struct virtio_blk *vblk) { u8 model; - if (!vblk->zone_sectors) - return; - virtio_cread(vblk->vdev, struct virtio_blk_config, zoned.model, &model); - if (!blk_revalidate_disk_zones(vblk->disk, NULL)) - set_capacity_and_notify(vblk->disk, 0); + switch (model) { + default: + dev_err(&vblk->vdev->dev, "unknown zone model %d\n", model); + fallthrough; + case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + disk_set_zoned(vblk->disk, BLK_ZONED_NONE); + return; + case VIRTIO_BLK_Z_HM: + WARN_ON_ONCE(!vblk->zone_sectors); + if (!blk_revalidate_disk_zones(vblk->disk, NULL)) + set_capacity_and_notify(vblk->disk, 0); + } } static int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { - u32 v; + u32 v, wg; u8 model; int ret; @@ -713,16 +788,11 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, switch (model) { case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + /* Present the host-aware device as non-zoned */ return 0; case VIRTIO_BLK_Z_HM: break; - case VIRTIO_BLK_Z_HA: - /* - * Present the host-aware device as a regular drive. - * TODO It is possible to add an option to make it appear - * in the system as a zoned drive. - */ - return 0; default: dev_err(&vdev->dev, "unsupported zone model %d\n", model); return -EINVAL; @@ -735,32 +805,31 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); - disk_set_max_open_zones(vblk->disk, le32_to_cpu(v)); - - dev_dbg(&vdev->dev, "max open zones = %u\n", le32_to_cpu(v)); + disk_set_max_open_zones(vblk->disk, v); + dev_dbg(&vdev->dev, "max open zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, zoned.max_active_zones, &v); - disk_set_max_active_zones(vblk->disk, le32_to_cpu(v)); - dev_dbg(&vdev->dev, "max active zones = %u\n", le32_to_cpu(v)); + disk_set_max_active_zones(vblk->disk, v); + dev_dbg(&vdev->dev, "max active zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, - zoned.write_granularity, &v); - if (!v) { + zoned.write_granularity, &wg); + if (!wg) { dev_warn(&vdev->dev, "zero write granularity reported\n"); return -ENODEV; } - blk_queue_physical_block_size(q, le32_to_cpu(v)); - blk_queue_io_min(q, le32_to_cpu(v)); + blk_queue_physical_block_size(q, wg); + blk_queue_io_min(q, wg); - dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v)); + dev_dbg(&vdev->dev, "write granularity = %u\n", wg); /* * virtio ZBD specification doesn't require zones to be a power of * two sectors in size, but the code in this driver expects that. */ - virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v); - vblk->zone_sectors = le32_to_cpu(v); + virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, + &vblk->zone_sectors); if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) { dev_err(&vdev->dev, "zoned device with non power of two zone size %u\n", @@ -783,8 +852,15 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); return -ENODEV; } - blk_queue_max_zone_append_sectors(q, le32_to_cpu(v)); - dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v)); + if ((v << SECTOR_SHIFT) < wg) { + dev_err(&vdev->dev, + "write granularity %u exceeds max_append_sectors %u limit\n", + wg, v); + return -ENODEV; + } + + blk_queue_max_zone_append_sectors(q, v); + dev_dbg(&vdev->dev, "max append sectors = %u\n", v); } return ret; @@ -794,6 +870,7 @@ static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) { return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); } + #else /* @@ -801,9 +878,11 @@ static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) * We only need to define a few symbols to avoid compilation errors. */ #define virtblk_report_zones NULL + static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) { } + static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { @@ -831,7 +910,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) return PTR_ERR(req); vbr = blk_mq_rq_to_pdu(req); - vbr->in_hdr_len = sizeof(vbr->status); + vbr->in_hdr_len = sizeof(vbr->in_hdr.status); vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); vbr->out_hdr.sector = 0; @@ -840,7 +919,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(vbr->status)); + err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status)); out: blk_mq_free_request(req); return err; @@ -1504,9 +1583,6 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_cleanup_disk; } - dev_info(&vdev->dev, "blk config size: %zu\n", - sizeof(struct virtio_blk_config)); - err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); if (err) goto out_cleanup_disk; diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 5af2a0300bb9d..3744e4da1b2a7 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -140,11 +140,11 @@ struct virtio_blk_config { /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ struct virtio_blk_zoned_characteristics { - __le32 zone_sectors; - __le32 max_open_zones; - __le32 max_active_zones; - __le32 max_append_sectors; - __le32 write_granularity; + __virtio32 zone_sectors; + __virtio32 max_open_zones; + __virtio32 max_active_zones; + __virtio32 max_append_sectors; + __virtio32 write_granularity; __u8 model; __u8 unused2[3]; } zoned; @@ -241,11 +241,11 @@ struct virtio_blk_outhdr { */ struct virtio_blk_zone_descriptor { /* Zone capacity */ - __le64 z_cap; + __virtio64 z_cap; /* The starting sector of the zone */ - __le64 z_start; + __virtio64 z_start; /* Zone write pointer position in sectors */ - __le64 z_wp; + __virtio64 z_wp; /* Zone type */ __u8 z_type; /* Zone state */ @@ -254,7 +254,7 @@ struct virtio_blk_zone_descriptor { }; struct virtio_blk_zone_report { - __le64 nr_zones; + __virtio64 nr_zones; __u8 reserved[56]; struct virtio_blk_zone_descriptor zones[]; }; From 10805eb5d6d15fd4f61b05cc7aa269e12ab99848 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Thu, 30 Mar 2023 17:49:53 -0400 Subject: [PATCH 708/898] virtio-blk: fix ZBD probe in kernels without ZBD support When the kernel is built without support for zoned block devices, virtio-blk probe needs to error out any host-managed device scans to prevent such devices from appearing in the system as non-zoned. The current virtio-blk code simply bypasses all ZBD checks if CONFIG_BLK_DEV_ZONED is not defined and this leads to host-managed block devices being presented as non-zoned in the OS. This is one of the main problems this patch series is aimed to fix. In this patch, make VIRTIO_BLK_F_ZONED feature defined even when CONFIG_BLK_DEV_ZONED is not. This change makes the code compliant with the voted revision of virtio-blk ZBD spec. Modify the probe code to look at the situation when VIRTIO_BLK_F_ZONED is negotiated in a kernel that is built without ZBD support. In this case, the code checks the zoned model of the device and fails the probe is the device is host-managed. The patch also adds the comment to clarify that the call to perform the zoned device probe is correctly placed after virtio_device ready(). Fixes: 95bfec41bd3d ("virtio-blk: add support for zoned block devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Stefan Hajnoczi Reviewed-by: Damien Le Moal Message-Id: <20230330214953.1088216-3-dmitry.fomichev@wdc.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4f0dbbb3d4a53..2b918e28acaac 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -866,16 +866,12 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, return ret; } -static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) -{ - return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); -} - #else /* * Zoned block device support is not configured in this kernel. - * We only need to define a few symbols to avoid compilation errors. + * Host-managed zoned devices can't be supported, but others are + * good to go as regular block devices. */ #define virtblk_report_zones NULL @@ -886,12 +882,16 @@ static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { - return -EOPNOTSUPP; -} + u8 model; -static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) -{ - return false; + virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model); + if (model == VIRTIO_BLK_Z_HM) { + dev_err(&vdev->dev, + "virtio_blk: zoned devices are not supported"); + return -EOPNOTSUPP; + } + + return 0; } #endif /* CONFIG_BLK_DEV_ZONED */ @@ -1577,7 +1577,11 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - if (virtblk_has_zoned_feature(vdev)) { + /* + * All steps that follow use the VQs therefore they need to be + * placed after the virtio_device_ready() call above. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { err = virtblk_probe_zoned_device(vdev, vblk, q); if (err) goto out_cleanup_disk; @@ -1683,10 +1687,7 @@ static unsigned int features[] = { VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, - VIRTIO_BLK_F_SECURE_ERASE, -#ifdef CONFIG_BLK_DEV_ZONED - VIRTIO_BLK_F_ZONED, -#endif /* CONFIG_BLK_DEV_ZONED */ + VIRTIO_BLK_F_SECURE_ERASE, VIRTIO_BLK_F_ZONED, }; static struct virtio_driver virtio_blk = { From e508efc3ae7e44eb3caf595a086bfd3824da5b9a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:18 -0500 Subject: [PATCH 709/898] vhost-scsi: Fix vhost_scsi struct use after free If vhost_scsi_setup_vq_cmds fails we leave the tpg->vhost_scsi pointer set. If the device is freed and then the user unmaps the LUN, the call to vhost_scsi_port_unlink -> vhost_scsi_hotunplug will see the that tpg->vhost_scsi is still set and try to use it. This has us clear the vhost_scsi pointer in the failure path. It also has us take tv_tpg_mutex in this failure path, because tv_tpg_vhost_count is accessed under this mutex in vhost_scsi_drop_nexus and in the future we will want to serialize access to tpg->vhost_scsi with that mutex instead of the vhost_scsi_mutex. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index b244e7c0f514c..5875241e1654f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1658,7 +1658,10 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { tpg = vs_tpg[i]; if (tpg) { + mutex_lock(&tpg->tv_tpg_mutex); + tpg->vhost_scsi = NULL; tpg->tv_tpg_vhost_count--; + mutex_unlock(&tpg->tv_tpg_mutex); target_undepend_item(&tpg->se_tpg.tpg_group.cg_item); } } From 4c363c81f66c77e0a2394b9a4efa707d122dc544 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:19 -0500 Subject: [PATCH 710/898] vhost-scsi: Fix crash during LUN unmapping We normally clear the endpoint then unmap LUNs so the devices are fully shutdown when the LUN is unmapped, but it's legal to unmap before clearing. If the user does that while TMFs are running then we can end up crashing. vhost_scsi_port_unlink assumes that the LUN's tmf struct will always be on the tmf_queue list. However, if a TMF is running then it will have been removed while it's executing. If we do a LUN unmap at this time, then we assume the entry is on the list and just start accessing it and free it. This fixes the bug by just allocating the vhost_scsi_tmf struct when it's needed like is done with the se_tmr struct that's needed when we submit the TMF. In this path perf is not an issue and we can use GFP_KERNEL since it won't swing directly back on us, so we don't need to preallocate the struct. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 5875241e1654f..32d0be9681035 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -125,7 +125,6 @@ struct vhost_scsi_tpg { struct se_portal_group se_tpg; /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */ struct vhost_scsi *vhost_scsi; - struct list_head tmf_queue; }; struct vhost_scsi_tport { @@ -206,10 +205,8 @@ struct vhost_scsi { struct vhost_scsi_tmf { struct vhost_work vwork; - struct vhost_scsi_tpg *tpg; struct vhost_scsi *vhost; struct vhost_scsi_virtqueue *svq; - struct list_head queue_entry; struct se_cmd se_cmd; u8 scsi_resp; @@ -352,12 +349,9 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) static void vhost_scsi_release_tmf_res(struct vhost_scsi_tmf *tmf) { - struct vhost_scsi_tpg *tpg = tmf->tpg; struct vhost_scsi_inflight *inflight = tmf->inflight; - mutex_lock(&tpg->tv_tpg_mutex); - list_add_tail(&tpg->tmf_queue, &tmf->queue_entry); - mutex_unlock(&tpg->tv_tpg_mutex); + kfree(tmf); vhost_scsi_put_inflight(inflight); } @@ -1194,19 +1188,11 @@ vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg, goto send_reject; } - mutex_lock(&tpg->tv_tpg_mutex); - if (list_empty(&tpg->tmf_queue)) { - pr_err("Missing reserve TMF. Could not handle LUN RESET.\n"); - mutex_unlock(&tpg->tv_tpg_mutex); + tmf = kzalloc(sizeof(*tmf), GFP_KERNEL); + if (!tmf) goto send_reject; - } - - tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf, - queue_entry); - list_del_init(&tmf->queue_entry); - mutex_unlock(&tpg->tv_tpg_mutex); - tmf->tpg = tpg; + vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work); tmf->vhost = vs; tmf->svq = svq; tmf->resp_iov = vq->iov[vc->out]; @@ -2035,19 +2021,11 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg, { struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tmf *tmf; - - tmf = kzalloc(sizeof(*tmf), GFP_KERNEL); - if (!tmf) - return -ENOMEM; - INIT_LIST_HEAD(&tmf->queue_entry); - vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work); mutex_lock(&vhost_scsi_mutex); mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count++; - list_add_tail(&tmf->queue_entry, &tpg->tmf_queue); mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotplug(tpg, lun); @@ -2062,16 +2040,11 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, { struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tmf *tmf; mutex_lock(&vhost_scsi_mutex); mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count--; - tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf, - queue_entry); - list_del(&tmf->queue_entry); - kfree(tmf); mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotunplug(tpg, lun); @@ -2332,7 +2305,6 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, const char *name) } mutex_init(&tpg->tv_tpg_mutex); INIT_LIST_HEAD(&tpg->tv_tpg_list); - INIT_LIST_HEAD(&tpg->tmf_queue); tpg->tport = tport; tpg->tport_tpgt = tpgt; From 9513c55ce3e8f0f118c47423452c4a4bdaa80222 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 15 Feb 2023 15:33:50 -0700 Subject: [PATCH 711/898] tools/virtio: fix typo in README instructions We need to have a unique chardev for each data path, else the chardevs will collide and qemu will die with this message: qemu-system-x86_64: -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0, id=channel1,name=trace-path-cpu0: Property 'virtserialport.chardev' can't take value 'charchannel0': Device 'charchannel0' is in use Signed-off-by: Ross Zwisler Message-Id: <20230215223350.2658616-7-zwisler@google.com> Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio-trace/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README index b64845b823abd..4fb9368bf7519 100644 --- a/tools/virtio/virtio-trace/README +++ b/tools/virtio/virtio-trace/README @@ -61,7 +61,7 @@ and id=channel0,name=agent-ctl-path\ ##data path## -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\ - -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\ + -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,\ id=channel1,name=trace-path-cpu0\ ... From f0417e72add5aed5997092b6eb76298290d866c9 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 3 Apr 2023 14:40:39 +0300 Subject: [PATCH 712/898] vdpa/mlx5: Add and remove debugfs in setup/teardown driver The right place to add the debugfs create is in setup_driver() and remove it in teardown_driver(). Current code adds the debugfs when creating the device but resetting a device will remove the debugfs subtree and subsequent set_driver will not be able to create the files since the debugfs pointer is NULL. Fixes: 294221004322 ("vdpa/mlx5: Add debugfs subtree") Signed-off-by: Eli Cohen v3 -> v4: Fix error flow in setup_driver() Message-Id: <20230403114039.11102-1-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 520646ae7fa01..195963b82b636 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2467,10 +2467,11 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) err = 0; goto out; } + mlx5_vdpa_add_debugfs(ndev); err = setup_virtqueues(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); - goto out; + goto err_setup; } err = create_rqt(ndev); @@ -2500,6 +2501,8 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) destroy_rqt(ndev); err_rqt: teardown_virtqueues(ndev); +err_setup: + mlx5_vdpa_remove_debugfs(ndev->debugfs); out: return err; } @@ -2513,6 +2516,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) if (!ndev->setup) return; + mlx5_vdpa_remove_debugfs(ndev->debugfs); + ndev->debugfs = NULL; teardown_steering(ndev); destroy_tir(ndev); destroy_rqt(ndev); @@ -3261,7 +3266,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, if (err) goto err_reg; - mlx5_vdpa_add_debugfs(ndev); mgtdev->ndev = ndev; return 0; From b4a01ace20f5c93c724abffc0a83ec84f514b98d Mon Sep 17 00:00:00 2001 From: Simei Su Date: Wed, 22 Mar 2023 10:24:15 +0800 Subject: [PATCH 713/898] ice: fix wrong fallback logic for FDIR When adding a FDIR filter, if ice_vc_fdir_set_irq_ctx returns failure, the inserted fdir entry will not be removed and if ice_vc_fdir_write_fltr returns failure, the fdir context info for irq handler will not be cleared which may lead to inconsistent or memory leak issue. This patch refines failure cases to resolve this issue. Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Signed-off-by: Simei Su Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 5fd75e75772ec..4d007d8c25403 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1871,7 +1871,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) v_ret = VIRTCHNL_STATUS_SUCCESS; stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id); - goto err_free_conf; + goto err_rem_entry; } ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun); @@ -1880,15 +1880,16 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n", vf->vf_id, ret); - goto err_rem_entry; + goto err_clr_irq; } exit: kfree(stat); return ret; -err_rem_entry: +err_clr_irq: ice_vc_fdir_clear_irq_ctx(vf); +err_rem_entry: ice_vc_fdir_remove_entry(vf, conf, conf->flow_id); err_free_conf: devm_kfree(dev, conf); From 83c911dc5e0e8e6eaa6431c06972a8f159bfe2fc Mon Sep 17 00:00:00 2001 From: Lingyu Liu Date: Tue, 28 Mar 2023 10:49:11 +0000 Subject: [PATCH 714/898] ice: Reset FDIR counter in FDIR init stage Reset the FDIR counters when FDIR inits. Without this patch, when VF initializes or resets, all the FDIR counters are not cleaned, which may cause unexpected behaviors for future FDIR rule create (e.g., rule conflict). Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Signed-off-by: Junfeng Guo Signed-off-by: Lingyu Liu Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_virtchnl_fdir.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 4d007d8c25403..daa6a1e894cfc 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -541,6 +541,21 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf) } } +/** + * ice_vc_fdir_reset_cnt_all - reset all FDIR counters for this VF FDIR + * @fdir: pointer to the VF FDIR structure + */ +static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir) +{ + enum ice_fltr_ptype flow; + + for (flow = ICE_FLTR_PTYPE_NONF_NONE; + flow < ICE_FLTR_PTYPE_MAX; flow++) { + fdir->fdir_fltr_cnt[flow][0] = 0; + fdir->fdir_fltr_cnt[flow][1] = 0; + } +} + /** * ice_vc_fdir_has_prof_conflict * @vf: pointer to the VF structure @@ -1998,6 +2013,7 @@ void ice_vf_fdir_init(struct ice_vf *vf) spin_lock_init(&fdir->ctx_lock); fdir->ctx_irq.flags = 0; fdir->ctx_done.flags = 0; + ice_vc_fdir_reset_cnt_all(fdir); } /** From e81625218bf7986ba1351a98c43d346b15601d26 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 4 Apr 2023 16:23:21 +0100 Subject: [PATCH 715/898] KVM: arm64: Advertise ID_AA64PFR0_EL1.CSV2/3 to protected VMs The existing pKVM code attempts to advertise CSV2/3 using values initialized to 0, but never set. To advertise CSV2/3 to protected guests, pass the CSV2/3 values to hyp when initializing hyp's view of guests' ID_AA64PFR0_EL1. Similar to non-protected KVM, these are system-wide, rather than per cpu, for simplicity. Fixes: 6c30bfb18d0b ("KVM: arm64: Add handlers for protected VM System Registers") Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20230404152321.413064-1-tabba@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/arm.c | 26 ++++++++++++++++++- .../arm64/kvm/hyp/include/nvhe/fixed_config.h | 5 +++- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 7 ----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf0872..e850aeccebb9c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1889,9 +1889,33 @@ static int __init do_pkvm_init(u32 hyp_va_bits) return ret; } +static u64 get_hyp_id_aa64pfr0_el1(void) +{ + /* + * Track whether the system isn't affected by spectre/meltdown in the + * hypervisor's view of id_aa64pfr0_el1, used for protected VMs. + * Although this is per-CPU, we make it global for simplicity, e.g., not + * to have to worry about vcpu migration. + * + * Unlike for non-protected VMs, userspace cannot override this for + * protected VMs. + */ + u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); + + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), + arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), + arm64_get_meltdown_state() == SPECTRE_UNAFFECTED); + + return val; +} + static void kvm_hyp_init_symbols(void) { - kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = get_hyp_id_aa64pfr0_el1(); kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 07edfc7524c94..37440e1dda930 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -33,11 +33,14 @@ * Allow for protected VMs: * - Floating-point and Advanced SIMD * - Data Independent Timing + * - Spectre/Meltdown Mitigation */ #define PVM_ID_AA64PFR0_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \ ) /* diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 08d2b004f4b73..edd969a1f36b5 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -85,19 +85,12 @@ static u64 get_restricted_features_unsigned(u64 sys_reg_val, static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) { - const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); u64 set_mask = 0; u64 allow_mask = PVM_ID_AA64PFR0_ALLOW; set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val, PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); - /* Spectre and Meltdown mitigation in KVM */ - set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), - (u64)kvm->arch.pfr0_csv2); - set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), - (u64)kvm->arch.pfr0_csv3); - return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; } From d564fa1ff19e893e2971d66e5c8f49dc1cdc8ffc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 9 Jan 2023 15:11:52 +0200 Subject: [PATCH 716/898] asm-generic/io.h: suppress endianness warnings for readq() and writeq() Commit c1d55d50139b ("asm-generic/io.h: Fix sparse warnings on big-endian architectures") missed fixing the 64-bit accessors. Arnd explains in the attached link why the casts are necessary, even if __raw_readq() and __raw_writeq() do not take endian-specific types. Link: https://lore.kernel.org/lkml/9105d6fc-880b-4734-857d-e3d30b87ccf6@app.fastmail.com/ Suggested-by: Arnd Bergmann Signed-off-by: Vladimir Oltean Reviewed-by: Jonathan Cameron Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 4c44a29b5e8ef..d78c3056c98f9 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -236,7 +236,7 @@ static inline u64 readq(const volatile void __iomem *addr) log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); __io_br(); - val = __le64_to_cpu(__raw_readq(addr)); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); __io_ar(val); log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; @@ -287,7 +287,7 @@ static inline void writeq(u64 value, volatile void __iomem *addr) { log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __io_bw(); - __raw_writeq(__cpu_to_le64(value), addr); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); __io_aw(); log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } From 05d3855b4d21ef3c2df26be1cbba9d2c68915fcb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 9 Jan 2023 15:11:53 +0200 Subject: [PATCH 717/898] asm-generic/io.h: suppress endianness warnings for relaxed accessors Copy the forced type casts from the normal MMIO accessors to suppress the sparse warnings that point out __raw_readl() returns a native endian word (just like readl()). Signed-off-by: Vladimir Oltean Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index d78c3056c98f9..587e7e9b9a375 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -319,7 +319,7 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr) u16 val; log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); - val = __le16_to_cpu(__raw_readw(addr)); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } @@ -332,7 +332,7 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr) u32 val; log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); - val = __le32_to_cpu(__raw_readl(addr)); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } @@ -345,7 +345,7 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) u64 val; log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); - val = __le64_to_cpu(__raw_readq(addr)); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } @@ -366,7 +366,7 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) static inline void writew_relaxed(u16 value, volatile void __iomem *addr) { log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); - __raw_writew(cpu_to_le16(value), addr); + __raw_writew((u16 __force)cpu_to_le16(value), addr); log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -376,7 +376,7 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr) static inline void writel_relaxed(u32 value, volatile void __iomem *addr) { log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); - __raw_writel(__cpu_to_le32(value), addr); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -386,7 +386,7 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr) static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) { log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); - __raw_writeq(__cpu_to_le64(value), addr); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif From 656e9007ef5862746cdf7ac16267c8e06e7b0989 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Mar 2023 09:53:31 +0100 Subject: [PATCH 718/898] asm-generic: avoid __generic_cmpxchg_local warnings Code that passes a 32-bit constant into cmpxchg() produces a harmless sparse warning because of the truncation in the branch that is not taken: fs/erofs/zdata.c: note: in included file (through /home/arnd/arm-soc/arch/arm/include/asm/cmpxchg.h, /home/arnd/arm-soc/arch/arm/include/asm/atomic.h, /home/arnd/arm-soc/include/linux/atomic.h, ...): include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe) include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe) include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe) include/asm-generic/cmpxchg-local.h:30:42: warning: cast truncates bits from constant value (5f0edead becomes ad) include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe) include/asm-generic/cmpxchg-local.h:34:44: warning: cast truncates bits from constant value (5f0edead becomes dead) This was reported as a regression to Matt's recent __generic_cmpxchg_local patch, though this patch only added more warnings on top of the ones that were already there. Rewording the truncation to use an explicit bitmask instead of a cast to a smaller type avoids the warning but otherwise leaves the code unchanged. I had another look at why the cast is even needed for atomic_cmpxchg(), and as Matt describes the problem here is that atomic_t contains a signed 'int', but cmpxchg() takes an 'unsigned long' argument, and converting between the two leads to a 64-bit sign-extension of negative 32-bit atomics. I checked the other implementations of arch_cmpxchg() and did not find any others that run into the same problem as __generic_cmpxchg_local(), but it's easy to be on the safe side here and always convert the signed int into an unsigned int when calling arch_cmpxchg(), as this will work even when any of the arch_cmpxchg() implementations run into the same problem. Fixes: 624654152284 ("locking/atomic: cmpxchg: Make __generic_cmpxchg_local compare against zero-extended 'old' value") Reviewed-by: Matt Evans Signed-off-by: Arnd Bergmann --- include/asm-generic/atomic.h | 4 ++-- include/asm-generic/cmpxchg-local.h | 12 ++++++------ include/asm-generic/cmpxchg.h | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 04b8be9f1a77c..e271d6708c876 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -130,7 +130,7 @@ ATOMIC_OP(xor, ^) #define arch_atomic_read(v) READ_ONCE((v)->counter) #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) -#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (v))) -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (old), (new))) +#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (u32)(v))) +#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (u32)(old), (u32)(new))) #endif /* __ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index c3e7315b7c1d6..3df9f59a544e4 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h @@ -26,16 +26,16 @@ static inline unsigned long __generic_cmpxchg_local(volatile void *ptr, raw_local_irq_save(flags); switch (size) { case 1: prev = *(u8 *)ptr; - if (prev == (u8)old) - *(u8 *)ptr = (u8)new; + if (prev == (old & 0xffu)) + *(u8 *)ptr = (new & 0xffu); break; case 2: prev = *(u16 *)ptr; - if (prev == (u16)old) - *(u16 *)ptr = (u16)new; + if (prev == (old & 0xffffu)) + *(u16 *)ptr = (new & 0xffffu); break; case 4: prev = *(u32 *)ptr; - if (prev == (u32)old) - *(u32 *)ptr = (u32)new; + if (prev == (old & 0xffffffffffu)) + *(u32 *)ptr = (new & 0xffffffffu); break; case 8: prev = *(u64 *)ptr; if (prev == old) diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h index dca4419922a97..848de25fc4bf8 100644 --- a/include/asm-generic/cmpxchg.h +++ b/include/asm-generic/cmpxchg.h @@ -32,7 +32,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u8 *)ptr; - *(volatile u8 *)ptr = x; + *(volatile u8 *)ptr = (x & 0xffu); local_irq_restore(flags); return ret; #endif /* __xchg_u8 */ @@ -43,7 +43,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u16 *)ptr; - *(volatile u16 *)ptr = x; + *(volatile u16 *)ptr = (x & 0xffffu); local_irq_restore(flags); return ret; #endif /* __xchg_u16 */ @@ -54,7 +54,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u32 *)ptr; - *(volatile u32 *)ptr = x; + *(volatile u32 *)ptr = (x & 0xffffffffu); local_irq_restore(flags); return ret; #endif /* __xchg_u32 */ From fb5015bc8b733323b58f015b88e4f316010ec856 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Fri, 31 Mar 2023 10:31:16 +0100 Subject: [PATCH 719/898] docs: kvm: x86: Fix broken field list Add a missing ":" to fix a broken field list. Signed-off-by: Takahiro Itazuri Fixes: ba7bb663f554 ("KVM: x86: Provide per VM capability for disabling PMU virtualization") Message-Id: <20230331093116.99820-1-itazur@amazon.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 62de0768d6aa5..a5c803f398323 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8296,11 +8296,11 @@ ENOSYS for the others. 8.35 KVM_CAP_PMU_CAPABILITY --------------------------- -:Capability KVM_CAP_PMU_CAPABILITY +:Capability: KVM_CAP_PMU_CAPABILITY :Architectures: x86 :Type: vm :Parameters: arg[0] is bitmask of PMU virtualization capabilities. -:Returns 0 on success, -EINVAL when arg[0] contains invalid bits +:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits This capability alters PMU virtualization in KVM. From 9da667e50c7e62266f3c2f8ad57b32fca40716b1 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 29 Mar 2023 18:03:21 +0200 Subject: [PATCH 720/898] vdpa_sim_net: complete the initialization before register the device Initialization must be completed before calling _vdpa_register_device() since it can connect the device to the vDPA bus, so requests can arrive after that call. So for example vdpasim_net_work(), which uses the net->*_stats variables, can be scheduled before they are initialized. Let's move _vdpa_register_device() to the end of vdpasim_net_dev_add() and add a comment to avoid future issues. Fixes: 0899774cb360 ("vdpa_sim_net: vendor satistics") Signed-off-by: Stefano Garzarella Message-Id: <20230329160321.187176-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 862f405362de2..dfe2ce3418035 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -466,16 +466,21 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, vdpasim_net_setup_config(simdev, config); - ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); - if (ret) - goto reg_err; - net = sim_to_net(simdev); u64_stats_init(&net->tx_stats.syncp); u64_stats_init(&net->rx_stats.syncp); u64_stats_init(&net->cq_stats.syncp); + /* + * Initialization must be completed before this call, since it can + * connect the device to the vDPA bus, so requests can arrive after + * this call. + */ + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); + if (ret) + goto reg_err; + return 0; reg_err: From 38a8c4d1d45006841f0643f4cb29b5e50758837c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 31 Mar 2023 11:00:56 -0700 Subject: [PATCH 721/898] blk-mq: directly poll requests Polling needs a bio with a valid bi_bdev, but neither of those are guaranteed for polled driver requests. Make request based polling directly use blk-mq's polling function instead. When executing a request from a polled hctx, we know the request's cookie, and that it's from a live blk-mq queue that supports polling, so we can safely skip everything that bio_poll provides. Cc: stable@kernel.org Reported-by: Martin Belanger Reported-by: Daniel Wagner Signed-off-by: Keith Busch Tested-by: Daniel Wagner Revieded-by: Daniel Wagner Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Tested-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20230331180056.1155862-1-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index cf1a39adf9a5b..f0ea9dcfb9662 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1359,8 +1359,6 @@ bool blk_rq_is_poll(struct request *rq) return false; if (rq->mq_hctx->type != HCTX_TYPE_POLL) return false; - if (WARN_ON_ONCE(!rq->bio)) - return false; return true; } EXPORT_SYMBOL_GPL(blk_rq_is_poll); @@ -1368,7 +1366,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll); static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { do { - bio_poll(rq->bio, NULL, 0); + blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0); cond_resched(); } while (!completion_done(wait)); } From 82f0832af26a30ae5f21b335c5f68b538e710c29 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 4 Apr 2023 15:34:12 -0700 Subject: [PATCH 722/898] cxl/hdm: Fix double allocation of @cxlhdm devm_cxl_setup_emulated_hdm() reallocates an instance of @cxlhdm that was already allocated at the start of devm_cxl_setup_hdm(). Only one is needed and devm_cxl_setup_emulated_hdm() does not do enough to warrant being an explicit helper. Fixes: 4474ce565ee4 ("cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders") Tested-by: Dave Jiang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167703067936.185722.7908921750127154779.stgit@dwillia2-xfh.jf.intel.com Link: https://lore.kernel.org/r/168012574357.221280.5001364964799725366.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 45deda18ed322..038f88eae226b 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -101,27 +101,6 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, BIT(CXL_CM_CAP_CAP_ID_HDM)); } -static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) -{ - struct device *dev = &port->dev; - struct cxl_hdm *cxlhdm; - - if (!info->mem_enabled) - return ERR_PTR(-ENODEV); - - cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); - if (!cxlhdm) - return ERR_PTR(-ENOMEM); - - cxlhdm->port = port; - cxlhdm->decoder_count = info->ranges; - cxlhdm->target_count = info->ranges; - dev_set_drvdata(&port->dev, cxlhdm); - - return cxlhdm; -} - /** * devm_cxl_setup_hdm - map HDM decoder component registers * @port: cxl_port to map @@ -138,13 +117,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); if (!cxlhdm) return ERR_PTR(-ENOMEM); - cxlhdm->port = port; - crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - if (info && info->mem_enabled) - return devm_cxl_setup_emulated_hdm(port, info); + dev_set_drvdata(dev, cxlhdm); + crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb && info && info->mem_enabled) { + cxlhdm->decoder_count = info->ranges; + return cxlhdm; + } else if (!crb) { dev_err(dev, "No component registers mapped\n"); return ERR_PTR(-ENXIO); } @@ -160,8 +140,6 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, return ERR_PTR(-ENXIO); } - dev_set_drvdata(dev, cxlhdm); - return cxlhdm; } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); From b70c2cf95ee1ca2806cb7191504920f8f5b4454e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 14:33:48 -0700 Subject: [PATCH 723/898] cxl/hdm: Skip emulation when driver manages mem_enable If the driver is allowed to enable memory operation itself then it can also turn on HDM decoder support at will. With this the second call to cxl_setup_hdm_decoder_from_dvsec(), when an HDM decoder is not committed, is not needed. Fixes: b777e9bec960 ("cxl/hdm: Emulate HDM decoder from DVSEC range registers") Link: http://lore.kernel.org/r/20230220113657.000042e1@huawei.com Reported-by: Jonathan Cameron Tested-by: Jonathan Cameron Reviewed-by: Jonathan Cameron Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167703068474.185722.664126485486344246.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 31 ++++++++++++++++++------------- drivers/cxl/cxl.h | 4 +++- drivers/cxl/port.c | 2 +- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 038f88eae226b..cc123996b1a47 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -717,19 +717,29 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, return 0; } -static bool should_emulate_decoders(struct cxl_port *port) +static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) { - struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); - void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_hdm *cxlhdm; + void __iomem *hdm; u32 ctrl; int i; - if (!is_cxl_endpoint(cxlhdm->port)) + if (!info) return false; + cxlhdm = dev_get_drvdata(&info->port->dev); + hdm = cxlhdm->regs.hdm_decoder; + if (!hdm) return true; + /* + * If HDM decoders are present and the driver is in control of + * Mem_Enable skip DVSEC based emulation + */ + if (!info->mem_enabled) + return false; + /* * If any decoders are committed already, there should not be any * emulated DVSEC decoders. @@ -747,7 +757,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_decoder *cxled = NULL; + struct cxl_endpoint_decoder *cxled; u64 size, base, skip, dpa_size; bool committed; u32 remainder; @@ -758,12 +768,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, unsigned char target_id[8]; } target_list; - if (should_emulate_decoders(port)) + if (should_emulate_decoders(info)) return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); - if (is_endpoint_decoder(&cxld->dev)) - cxled = to_cxl_endpoint_decoder(&cxld->dev); - ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); @@ -784,9 +791,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, .end = base + size - 1, }; - if (cxled && !committed && range_len(&info->dvsec_range[which])) - return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); - /* decoders are enabled if committed */ if (committed) { cxld->flags |= CXL_DECODER_F_ENABLE; @@ -824,7 +828,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, if (rc) return rc; - if (!cxled) { + if (!info) { target_list.value = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); for (i = 0; i < cxld->interleave_ways; i++) @@ -844,6 +848,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return -ENXIO; } skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which)); + cxled = to_cxl_endpoint_decoder(&cxld->dev); rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip); if (rc) { dev_err(&port->dev, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f2b0962a552d5..aab87d74474d8 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) /** * struct cxl_endpoint_dvsec_info - Cached DVSEC info - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE + * @mem_enabled: cached value of mem_enabled in the DVSEC at init time * @ranges: Number of active HDM ranges this device uses. + * @port: endpoint port associated with this info instance * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE */ struct cxl_endpoint_dvsec_info { bool mem_enabled; int ranges; + struct cxl_port *port; struct range dvsec_range[2]; }; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 1049bb5ea4961..9c8f46ed336b0 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -78,8 +78,8 @@ static int cxl_switch_port_probe(struct cxl_port *port) static int cxl_endpoint_port_probe(struct cxl_port *port) { + struct cxl_endpoint_dvsec_info info = { .port = port }; struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); - struct cxl_endpoint_dvsec_info info = { 0 }; struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_hdm *cxlhdm; struct cxl_port *root; From d35b495ddf92c964eedf2ac86fdbf88dc3e5cbc9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 14:39:16 -0700 Subject: [PATCH 724/898] cxl/port: Fix find_cxl_root() for RCDs and simplify it The find_cxl_root() helper is used to lookup root decoders and other CXL platform topology information for a given endpoint. It turns out that for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev) is always NULL for the RCH topology case because it expects to find a cxl_port at the host-bridge. RCH topologies only have the root cxl_port object with the host-bridge as a dport. While there are no reports of this being a problem to date, by inspection region enumeration should crash as a result of this problem, and it does in a local unit test for this scenario. However, an observation that ever since: commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") ...all callers of find_cxl_root() occur after the memdev connection to the port topology has been established. That means that find_cxl_root() can be simplified to a walk of the endpoint port topology to the root. Switch to that arrangement which also fixes the RCD bug. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168002857715.50647.344876437247313909.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 6 +++--- drivers/cxl/core/port.c | 38 +++++++------------------------------- drivers/cxl/core/region.c | 2 +- drivers/cxl/cxl.h | 4 ++-- drivers/cxl/port.c | 2 +- 5 files changed, 14 insertions(+), 38 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index c2e4b10937884..f8c38d9972522 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data) return is_cxl_nvdimm_bridge(dev); } -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start) +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd) { - struct cxl_port *port = find_cxl_root(start); + struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev)); struct device *dev; if (!port) @@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd) struct device *dev; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); if (!cxl_nvb) return -ENODEV; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 8ee6b6e2e2a4e..4d1f9c5b5029a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -823,41 +823,17 @@ static bool dev_is_cxl_root_child(struct device *dev) return false; } -/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */ -static int match_root_child(struct device *dev, const void *match) +struct cxl_port *find_cxl_root(struct cxl_port *port) { - const struct device *iter = NULL; - struct cxl_dport *dport; - struct cxl_port *port; - - if (!dev_is_cxl_root_child(dev)) - return 0; - - port = to_cxl_port(dev); - iter = match; - while (iter) { - dport = cxl_find_dport_by_dev(port, iter); - if (dport) - break; - iter = iter->parent; - } - - return !!iter; -} + struct cxl_port *iter = port; -struct cxl_port *find_cxl_root(struct device *dev) -{ - struct device *port_dev; - struct cxl_port *root; + while (iter && !is_cxl_root(iter)) + iter = to_cxl_port(iter->dev.parent); - port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child); - if (!port_dev) + if (!iter) return NULL; - - root = to_cxl_port(port_dev->parent); - get_device(&root->dev); - put_device(port_dev); - return root; + get_device(&iter->dev); + return iter; } EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f29028148806b..808f23ec4e2bc 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2251,7 +2251,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) * bridge for one device is the same for all. */ if (i == 0) { - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); if (!cxl_nvb) { cxlr_pmem = ERR_PTR(-ENODEV); goto out; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index aab87d74474d8..044a92d9813e2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -658,7 +658,7 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_dport *parent_dport); -struct cxl_port *find_cxl_root(struct device *dev); +struct cxl_port *find_cxl_root(struct cxl_port *port); int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); void cxl_bus_drain(void); @@ -760,7 +760,7 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm_bridge(struct device *dev); int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd); -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev); +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd); #ifdef CONFIG_CXL_REGION bool is_cxl_pmem_region(struct device *dev); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 9c8f46ed336b0..22a7ab2bae7c7 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -119,7 +119,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) * This can't fail in practice as CXL root exit unregisters all * descendant ports and that in turn synchronizes with cxl_port_probe() */ - root = find_cxl_root(&cxlmd->dev); + root = find_cxl_root(port); /* * Now that all endpoint decoders are successfully enumerated, try to From 030f880342b875c7d714d06d3ca4058ae9f13fee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 14:44:41 -0700 Subject: [PATCH 725/898] cxl/region: Fix region setup/teardown for RCDs RCDs (CXL memory devices that link train without VH capability and show up as root complex integrated endpoints), hide the presence of the link between the endpoint and the host-bridge. The CXL region setup/teardown paths assume that a link hop is present and go looking for at least one 'struct cxl_port' instance between the CXL root port-object and an endpoint port-object leading to crashes of the form: BUG: kernel NULL pointer dereference, address: 0000000000000008 [..] RIP: 0010:cxl_region_setup_targets+0x3e9/0xae0 [cxl_core] [..] Call Trace: cxl_region_attach+0x46c/0x7a0 [cxl_core] cxl_create_region+0x20b/0x270 [cxl_core] cxl_mock_mem_probe+0x641/0x800 [cxl_mock_mem] platform_probe+0x5b/0xb0 Detect RCDs explicitly and skip walking the non-existent port hierarchy between root and endpoint in that case. While this has been a problem since: commit 0a19bfc8de93 ("cxl/port: Add RCD endpoint port enumeration") ...it becomes a more reliable crash scenario with the new autodiscovery implementation. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168002858268.50647.728091521032131326.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 808f23ec4e2bc..52bbf6268d5fb 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_endpoint_decoder *cxled = p->targets[i]; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *iter = cxled_to_port(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_ep *ep; int rc = 0; + if (cxlds->rcd) + goto endpoint_reset; + while (!is_cxl_root(to_cxl_port(iter->dev.parent))) iter = to_cxl_port(iter->dev.parent); @@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) return rc; } +endpoint_reset: rc = cxled->cxld.reset(&cxled->cxld); if (rc) return rc; @@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled; + struct cxl_dev_state *cxlds; struct cxl_memdev *cxlmd; struct cxl_port *iter; struct cxl_ep *ep; @@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + + if (cxlds->rcd) + continue; iter = cxled_to_port(cxled); while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled; + struct cxl_dev_state *cxlds; + int i, rc, rch = 0, vh = 0; struct cxl_memdev *cxlmd; struct cxl_port *iter; struct cxl_ep *ep; - int i, rc; for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + + /* validate that all targets agree on topology */ + if (!cxlds->rcd) { + vh++; + } else { + rch++; + continue; + } iter = cxled_to_port(cxled); while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) } } + if (rch && vh) { + dev_err(&cxlr->dev, "mismatched CXL topologies detected\n"); + cxl_region_teardown_targets(cxlr); + return -ENXIO; + } + return 0; } From 9ff3eec958cf365857ae8a630237ece4f83bb337 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 15:01:20 -0700 Subject: [PATCH 726/898] cxl/region: Move coherence tracking into cxl_region_attach() Each time the contents of a given HPA are potentially changed in a cache incoherent manner the CXL core sets CXL_REGION_F_INCOHERENT to invalidate CPU caches before the region is used. Successful invocation of attach_target() indicates that DPA has been newly assigned to a given HPA in the dynamic region creation flow. However, attach_target() is also reused in the autodiscovery flow where the region was activated by platform firmware. In that case there is no need to invalidate caches because that region is already in active use and nothing about the autodiscovery flow modifies the HPA-to-DPA relationship. In the autodiscovery case cxl_region_attach() exits early after determining the endpoint decoder is already correctly attached to the region. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168002858817.50647.1217607907088920888.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 52bbf6268d5fb..b2fd67fcebfb5 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1674,6 +1674,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (rc) goto err_decrement; p->state = CXL_CONFIG_ACTIVE; + set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); } cxled->cxld.interleave_ways = p->interleave_ways; @@ -1775,8 +1776,6 @@ static int attach_target(struct cxl_region *cxlr, down_read(&cxl_dpa_rwsem); rc = cxl_region_attach(cxlr, cxled, pos); - if (rc == 0) - set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); up_read(&cxl_dpa_rwsem); up_write(&cxl_region_rwsem); return rc; From 52cc48ad2a76a5fe82d239044d67944bbb928de6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 15:13:37 -0700 Subject: [PATCH 727/898] cxl/hdm: Limit emulation to the number of range registers Recall that range register emulation seeks to treat the 2 potential range registers as Linux CXL "decoder" objects. The number of range registers can be 1 or 2, while HDM decoder ranges can include more than 2. Be careful not to confuse DVSEC range count with HDM capability decoder count. Commit to range register earlier in devm_cxl_setup_hdm(). Otherwise, a device with more HDM decoders than range registers can set @cxlhdm->decoder_count to an invalid value. Avoid introducing a forward declaration by just moving the definition of should_emulate_decoders() earlier in the file. should_emulate_decoders() is unchanged. Tested-by: Dave Jiang Fixes: d7a2153762c7 ("cxl/hdm: Add emulation when HDM decoders are not committed") Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168012574932.221280.15944705098679646436.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 82 +++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index cc123996b1a47..9884b6d4d930d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -101,6 +101,42 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, BIT(CXL_CM_CAP_CAP_ID_HDM)); } +static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) +{ + struct cxl_hdm *cxlhdm; + void __iomem *hdm; + u32 ctrl; + int i; + + if (!info) + return false; + + cxlhdm = dev_get_drvdata(&info->port->dev); + hdm = cxlhdm->regs.hdm_decoder; + + if (!hdm) + return true; + + /* + * If HDM decoders are present and the driver is in control of + * Mem_Enable skip DVSEC based emulation + */ + if (!info->mem_enabled) + return false; + + /* + * If any decoders are committed already, there should not be any + * emulated DVSEC decoders. + */ + for (i = 0; i < cxlhdm->decoder_count; i++) { + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) + return false; + } + + return true; +} + /** * devm_cxl_setup_hdm - map HDM decoder component registers * @port: cxl_port to map @@ -140,6 +176,16 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, return ERR_PTR(-ENXIO); } + /* + * Now that the hdm capability is parsed, decide if range + * register emulation is needed and fixup cxlhdm accordingly. + */ + if (should_emulate_decoders(info)) { + dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges, + info->ranges > 1 ? "s" : ""); + cxlhdm->decoder_count = info->ranges; + } + return cxlhdm; } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); @@ -717,42 +763,6 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, return 0; } -static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) -{ - struct cxl_hdm *cxlhdm; - void __iomem *hdm; - u32 ctrl; - int i; - - if (!info) - return false; - - cxlhdm = dev_get_drvdata(&info->port->dev); - hdm = cxlhdm->regs.hdm_decoder; - - if (!hdm) - return true; - - /* - * If HDM decoders are present and the driver is in control of - * Mem_Enable skip DVSEC based emulation - */ - if (!info->mem_enabled) - return false; - - /* - * If any decoders are committed already, there should not be any - * emulated DVSEC decoders. - */ - for (i = 0; i < cxlhdm->decoder_count; i++) { - ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); - if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) - return false; - } - - return true; -} - static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) From 24b18197184ac39bb8566fb82c0bf788bcd0d45b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 16:01:32 -0700 Subject: [PATCH 728/898] cxl/hdm: Extend DVSEC range register emulation for region enumeration One motivation for mapping range registers to decoder objects is to use those settings for region autodiscovery. The need to map a region for devices programmed to use range registers is especially urgent now that the kernel no longer routes "Soft Reserved" ranges in the memory map to device-dax by default. The CXL memory range loses all access mechanisms. Complete the implementation by marking the DPA reservation and setting the endpoint-decoder state to signal autodiscovery. Note that the default settings of ways=1 and granularity=4096 set in cxl_decode_init() do not need to be updated. Fixes: 09d09e04d2fc ("cxl/dax: Create dax devices for CXL RAM regions") Tested-by: Dave Jiang Tested-by: Gregory Price Link: https://lore.kernel.org/r/168012575521.221280.14177293493678527326.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 9884b6d4d930d..02cc2c38b44ba 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -738,14 +738,20 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld) return 0; } -static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, - struct cxl_decoder *cxld, int which, - struct cxl_endpoint_dvsec_info *info) +static int cxl_setup_hdm_decoder_from_dvsec( + struct cxl_port *port, struct cxl_decoder *cxld, u64 *dpa_base, + int which, struct cxl_endpoint_dvsec_info *info) { + struct cxl_endpoint_decoder *cxled; + u64 len; + int rc; + if (!is_cxl_endpoint(port)) return -EOPNOTSUPP; - if (!range_len(&info->dvsec_range[which])) + cxled = to_cxl_endpoint_decoder(&cxld->dev); + len = range_len(&info->dvsec_range[which]); + if (!len) return -ENOENT; cxld->target_type = CXL_DECODER_EXPANDER; @@ -760,6 +766,16 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK; port->commit_end = cxld->id; + rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0); + if (rc) { + dev_err(&port->dev, + "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)", + port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc); + return rc; + } + *dpa_base += len; + cxled->state = CXL_DECODER_STATE_AUTO; + return 0; } @@ -779,7 +795,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, } target_list; if (should_emulate_decoders(info)) - return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); + return cxl_setup_hdm_decoder_from_dvsec(port, cxld, dpa_base, + which, info); ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); From 0a78cf7264d29abeca098eae0b188a10aabc8a32 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 3 Apr 2023 12:49:58 -0700 Subject: [PATCH 729/898] raw: Fix NULL deref in raw_get_next(). Dae R. Jeong reported a NULL deref in raw_get_next() [0]. It seems that the repro was running these sequences in parallel so that one thread was iterating on a socket that was being freed in another netns. unshare(0x40060200) r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00') socket$inet_icmp_raw(0x2, 0x3, 0x1) pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f) After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW sockets. However, we should use spinlock for slow paths to avoid the NULL deref. Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object is not reused during iteration in the grace period. In fact, the lockless readers do not check the nulls marker with get_nulls_value(). So, SOCK_RAW should use hlist instead of hlist_nulls. Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(), let's convert hlist_nulls to hlist and use sk_for_each_rcu() for fast paths and sk_for_each() and spinlock for /proc/net/raw. [0]: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225 seq_read+0x224/0x320 fs/seq_file.c:162 pde_read fs/proc/inode.c:316 [inline] proc_reg_read+0x23f/0x330 fs/proc/inode.c:328 vfs_read+0x31e/0xd30 fs/read_write.c:468 ksys_pread64 fs/read_write.c:665 [inline] __do_sys_pread64 fs/read_write.c:675 [inline] __se_sys_pread64 fs/read_write.c:672 [inline] __x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x478d29 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011 RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29 RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000 R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740 R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU") Reported-by: syzbot Reported-by: Dae R. Jeong Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/raw.h | 4 ++-- net/ipv4/raw.c | 36 +++++++++++++++++++----------------- net/ipv4/raw_diag.c | 10 ++++------ net/ipv6/raw.c | 10 ++++------ 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/include/net/raw.h b/include/net/raw.h index 2c004c20ed996..3af5289fdead9 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -37,7 +37,7 @@ int raw_rcv(struct sock *, struct sk_buff *); struct raw_hashinfo { spinlock_t lock; - struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; + struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; }; static inline u32 raw_hashfunc(const struct net *net, u32 proto) @@ -51,7 +51,7 @@ static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) spin_lock_init(&hashinfo->lock); for (i = 0; i < RAW_HTABLE_SIZE; i++) - INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i); + INIT_HLIST_HEAD(&hashinfo->ht[i]); } #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 94df935ee0c5a..8088a5011e7df 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -91,12 +91,12 @@ EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; - struct hlist_nulls_head *hlist; + struct hlist_head *hlist; hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)]; spin_lock(&h->lock); - __sk_nulls_add_node_rcu(sk, hlist); + sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); spin_unlock(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -110,7 +110,7 @@ void raw_unhash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; spin_lock(&h->lock); - if (__sk_nulls_del_node_init_rcu(sk)) + if (sk_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&h->lock); } @@ -163,16 +163,15 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) static int raw_v4_input(struct net *net, struct sk_buff *skb, const struct iphdr *iph, int hash) { - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int sdif = inet_sdif(skb); + struct hlist_head *hlist; int dif = inet_iif(skb); int delivered = 0; struct sock *sk; hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (!raw_v4_match(net, sk, iph->protocol, iph->saddr, iph->daddr, dif, sdif)) continue; @@ -264,10 +263,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); + struct hlist_head *hlist; const struct iphdr *iph; struct sock *sk; int hash; @@ -276,7 +274,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { iph = (const struct iphdr *)skb->data; if (!raw_v4_match(net, sk, iph->protocol, iph->daddr, iph->saddr, dif, sdif)) @@ -950,14 +948,13 @@ static struct sock *raw_get_first(struct seq_file *seq, int bucket) { struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { hlist = &h->ht[state->bucket]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each(sk, hlist) { if (sock_net(sk) == seq_file_net(seq)) return sk; } @@ -970,7 +967,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) struct raw_iter_state *state = raw_seq_private(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && sock_net(sk) != seq_file_net(seq)); if (!sk) @@ -989,9 +986,12 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(&h->lock) { - rcu_read_lock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_lock(&h->lock); + return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); @@ -1010,9 +1010,11 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(&h->lock) { - rcu_read_unlock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_seq_stop); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 999321834b94a..da3591a66a169 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -57,8 +57,7 @@ static bool raw_lookup(struct net *net, struct sock *sk, static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) { struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int slot; @@ -68,7 +67,7 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 rcu_read_lock(); for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (raw_lookup(net, sk, r)) { /* * Grab it and keep until we fill @@ -142,9 +141,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int num, s_num, slot, s_slot; + struct hlist_head *hlist; struct sock *sk = NULL; struct nlattr *bc; @@ -161,7 +159,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, num = 0; hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index bac9ba747bdec..a327aa481df48 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -141,10 +141,9 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; const struct in6_addr *saddr; const struct in6_addr *daddr; + struct hlist_head *hlist; struct sock *sk; bool delivered = false; __u8 hash; @@ -155,7 +154,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { int filtered; if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, @@ -333,15 +332,14 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int hash; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; From ab5fb73ffa01072b4d8031cc05801fa1cb653bee Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 3 Apr 2023 12:49:59 -0700 Subject: [PATCH 730/898] ping: Fix potentail NULL deref for /proc/net/icmp. After commit dbca1596bbb0 ("ping: convert to RCU lookups, get rid of rwlock"), we use RCU for ping sockets, but we should use spinlock for /proc/net/icmp to avoid a potential NULL deref mentioned in the previous patch. Let's go back to using spinlock there. Note we can convert ping sockets to use hlist instead of hlist_nulls because we do not use SLAB_TYPESAFE_BY_RCU for ping sockets. Fixes: dbca1596bbb0 ("ping: convert to RCU lookups, get rid of rwlock") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/ipv4/ping.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 409ec2a1f95b0..5178a3f3cb537 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1089,13 +1089,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) - __acquires(RCU) + __acquires(ping_table.lock) { struct ping_iter_state *state = seq->private; state->bucket = 0; state->family = family; - rcu_read_lock(); + spin_lock(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1121,9 +1121,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(ping_table.lock) { - rcu_read_unlock(); + spin_unlock(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_seq_stop); From e847c7675e19ef344913724dc68f83df31ad6a17 Mon Sep 17 00:00:00 2001 From: Andy Roulin Date: Mon, 3 Apr 2023 14:20:53 -0700 Subject: [PATCH 731/898] ethtool: reset #lanes when lanes is omitted If the number of lanes was forced and then subsequently the user omits this parameter, the ksettings->lanes is reset. The driver should then reset the number of lanes to the device's default for the specified speed. However, although the ksettings->lanes is set to 0, the mod variable is not set to true to indicate the driver and userspace should be notified of the changes. The consequence is that the same ethtool operation will produce different results based on the initial state. If the initial state is: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 2 Duplex: Full Auto-negotiation: on then executing 'ethtool -s swp1 speed 50000 autoneg off' will yield: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 2 Duplex: Full Auto-negotiation: off While if the initial state is: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 1 Duplex: Full Auto-negotiation: off executing the same 'ethtool -s swp1 speed 50000 autoneg off' results in: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 1 Duplex: Full Auto-negotiation: off This patch fixes this behavior. Omitting lanes will always results in the driver choosing the default lane width for the chosen speed. In this scenario, regardless of the initial state, the end state will be, e.g., $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 2 Duplex: Full Auto-negotiation: off Fixes: 012ce4dd3102 ("ethtool: Extend link modes settings uAPI with lanes") Signed-off-by: Andy Roulin Reviewed-by: Danielle Ratson Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/ac238d6b-8726-8156-3810-6471291dbc7f@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/linkmodes.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index fab66c169b9fa..20165e07ef901 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -270,11 +270,12 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, "lanes configuration not supported by device"); return -EOPNOTSUPP; } - } else if (!lsettings->autoneg) { - /* If autoneg is off and lanes parameter is not passed from user, - * set the lanes parameter to 0. + } else if (!lsettings->autoneg && ksettings->lanes) { + /* If autoneg is off and lanes parameter is not passed from user but + * it was defined previously then set the lanes parameter to 0. */ ksettings->lanes = 0; + *mod = true; } ret = ethnl_update_bitset(ksettings->link_modes.advertising, From a1865f2e7d10dde00d35a2122b38d2e469ae67ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Apr 2023 21:46:43 +0000 Subject: [PATCH 732/898] netlink: annotate lockless accesses to nlk->max_recvmsg_len syzbot reported a data-race in data-race in netlink_recvmsg() [1] Indeed, netlink_recvmsg() can be run concurrently, and netlink_dump() also needs protection. [1] BUG: KCSAN: data-race in netlink_recvmsg / netlink_recvmsg read to 0xffff888141840b38 of 8 bytes by task 23057 on cpu 0: netlink_recvmsg+0xea/0x730 net/netlink/af_netlink.c:1988 sock_recvmsg_nosec net/socket.c:1017 [inline] sock_recvmsg net/socket.c:1038 [inline] __sys_recvfrom+0x1ee/0x2e0 net/socket.c:2194 __do_sys_recvfrom net/socket.c:2212 [inline] __se_sys_recvfrom net/socket.c:2208 [inline] __x64_sys_recvfrom+0x78/0x90 net/socket.c:2208 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd write to 0xffff888141840b38 of 8 bytes by task 23037 on cpu 1: netlink_recvmsg+0x114/0x730 net/netlink/af_netlink.c:1989 sock_recvmsg_nosec net/socket.c:1017 [inline] sock_recvmsg net/socket.c:1038 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2720 ___sys_recvmsg net/socket.c:2762 [inline] do_recvmmsg+0x2e5/0x710 net/socket.c:2856 __sys_recvmmsg net/socket.c:2935 [inline] __do_sys_recvmmsg net/socket.c:2958 [inline] __se_sys_recvmmsg net/socket.c:2951 [inline] __x64_sys_recvmmsg+0xe2/0x160 net/socket.c:2951 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x0000000000000000 -> 0x0000000000001000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 23037 Comm: syz-executor.2 Not tainted 6.3.0-rc4-syzkaller-00195-g5a57b48fdfcb #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 Fixes: 9063e21fb026 ("netlink: autosize skb lengthes") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230403214643.768555-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c642776597531..f365dfdd672d7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1952,7 +1952,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - size_t copied; + size_t copied, max_recvmsg_len; struct sk_buff *skb, *data_skb; int err, ret; @@ -1985,9 +1985,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, #endif /* Record the max length of recvmsg() calls for future allocations */ - nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); - nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - SKB_WITH_OVERHEAD(32768)); + max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len); + max_recvmsg_len = min_t(size_t, max_recvmsg_len, + SKB_WITH_OVERHEAD(32768)); + WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len); copied = data_skb->len; if (len < copied) { @@ -2236,6 +2237,7 @@ static int netlink_dump(struct sock *sk) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; int alloc_min_size; @@ -2258,8 +2260,9 @@ static int netlink_dump(struct sock *sk) cb = &nlk->cb; alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (alloc_min_size < nlk->max_recvmsg_len) { - alloc_size = nlk->max_recvmsg_len; + max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len); + if (alloc_min_size < max_recvmsg_len) { + alloc_size = max_recvmsg_len; skb = alloc_skb(alloc_size, (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN | __GFP_NORETRY); From 3ce9345580974863c060fa32971537996a7b2d57 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Mon, 3 Apr 2023 10:28:09 -0700 Subject: [PATCH 733/898] gve: Secure enough bytes in the first TX desc for all TCP pkts Non-GSO TCP packets whose SKBs' linear portion did not include the entire TCP header were not populating the first Tx descriptor with as many bytes as the vNIC expected. This change ensures that all TCP packets populate the first descriptor with the correct number of bytes. Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC") Signed-off-by: Shailend Chand Link: https://lore.kernel.org/r/20230403172809.2939306-1-shailend@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 2 ++ drivers/net/ethernet/google/gve/gve_tx.c | 12 +++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 64eb0442c82fd..005cb9dfe078b 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -47,6 +47,8 @@ #define GVE_RX_BUFFER_SIZE_DQO 2048 +#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 4888bf05fbedb..5e11b82367545 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -284,8 +284,8 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, int bytes; int hlen; - hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + - tcp_hdrlen(skb) : skb_headlen(skb); + hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + tcp_hdrlen(skb) : + min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len); pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen); @@ -454,13 +454,11 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st pkt_desc = &tx->desc[idx]; l4_hdr_offset = skb_checksum_start_offset(skb); - /* If the skb is gso, then we want the tcp header in the first segment - * otherwise we want the linear portion of the skb (which will contain - * the checksum because skb->csum_start and skb->csum_offset are given - * relative to skb->head) in the first segment. + /* If the skb is gso, then we want the tcp header alone in the first segment + * otherwise we want the minimum required by the gVNIC spec. */ hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : - skb_headlen(skb); + min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len); info->skb = skb; /* We don't want to split the header, so if necessary, pad to the end From 9d52727f8043cfda241ae96896628d92fa9c50bb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 4 Apr 2023 22:21:14 -0400 Subject: [PATCH 734/898] tracing: Have tracing_snapshot_instance_cond() write errors to the appropriate instance If a trace instance has a failure with its snapshot code, the error message is to be written to that instance's buffer. But currently, the message is written to the top level buffer. Worse yet, it may also disable the top level buffer and not the instance that had the issue. Link: https://lkml.kernel.org/r/20230405022341.688730321@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ross Zwisler Fixes: 2824f50332486 ("tracing: Make the snapshot trigger work with instances") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 937e9676dfd42..ed1d1093f5e9d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1149,22 +1149,22 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, unsigned long flags; if (in_nmi()) { - internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); - internal_trace_puts("*** snapshot is being ignored ***\n"); + trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); + trace_array_puts(tr, "*** snapshot is being ignored ***\n"); return; } if (!tr->allocated_snapshot) { - internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n"); - internal_trace_puts("*** stopping trace here! ***\n"); - tracing_off(); + trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); + trace_array_puts(tr, "*** stopping trace here! ***\n"); + tracer_tracing_off(tr); return; } /* Note, snapshot can not be used when the tracer uses it */ if (tracer->use_max_tr) { - internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n"); - internal_trace_puts("*** Can not use snapshot (sorry) ***\n"); + trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); + trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } From e94891641c21f607e4d6887bcd3beff882fcc483 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 4 Apr 2023 22:21:15 -0400 Subject: [PATCH 735/898] tracing: Fix ftrace_boot_snapshot command line logic The kernel command line ftrace_boot_snapshot by itself is supposed to trigger a snapshot at the end of boot up of the main top level trace buffer. A ftrace_boot_snapshot=foo will do the same for an instance called foo that was created by trace_instance=foo,... The logic was broken where if ftrace_boot_snapshot was by itself, it would trigger a snapshot for all instances that had tracing enabled, regardless if it asked for a snapshot or not. When a snapshot is requested for a buffer, the buffer's tr->allocated_snapshot is set to true. Use that to know if a trace buffer wants a snapshot at boot up or not. Since the top level buffer is part of the ftrace_trace_arrays list, there's no reason to treat it differently than the other buffers. Just iterate the list if ftrace_boot_snapshot was specified. Link: https://lkml.kernel.org/r/20230405022341.895334039@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ross Zwisler Fixes: 9c1c251d670bc ("tracing: Allow boot instances to have snapshot buffers") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ed1d1093f5e9d..4686473b8497b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10393,19 +10393,20 @@ __init static int tracer_alloc_buffers(void) void __init ftrace_boot_snapshot(void) { +#ifdef CONFIG_TRACER_MAX_TRACE struct trace_array *tr; - if (snapshot_at_boot) { - tracing_snapshot(); - internal_trace_puts("** Boot snapshot taken **\n"); - } + if (!snapshot_at_boot) + return; list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr == &global_trace) + if (!tr->allocated_snapshot) continue; - trace_array_puts(tr, "** Boot snapshot taken **\n"); + tracing_snapshot_instance(tr); + trace_array_puts(tr, "** Boot snapshot taken **\n"); } +#endif } void __init early_trace_init(void) From 774e7cb50359eff7e966f4e2e80c9486014d3a23 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Fri, 31 Mar 2023 13:36:02 +0200 Subject: [PATCH 736/898] accel/ivpu: Add dma fence to command buffers only Currently job->done_fence is added to every BO handle within a job. If job handle (command buffer) is shared between multiple submits, KMD will add the fence in each of them. Then bo_wait_ioctl() executed on command buffer will exit only when all jobs containing that handle are done. This creates deadlock scenario for user mode driver in case when job handle is added as dependency of another job, because bo_wait_ioctl() of first job will wait until second job finishes, and second job can not finish before first one. Having fences added only to job buffer handle allows user space to execute bo_wait_ioctl() on the job even if it's handle is submitted with other job. Fixes: cd7272215c44 ("accel/ivpu: Add command buffer submission logic") Signed-off-by: Karol Wachowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230331113603.2802515-2-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 910301fae4351..3c6f1e16cf2ff 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -461,26 +461,22 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; - ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, - &acquire_ctx); + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx); if (ret) { ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); return ret; } - for (i = 0; i < buf_count; i++) { - ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); - if (ret) { - ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); - goto unlock_reservations; - } + ret = dma_resv_reserve_fences(bo->base.resv, 1); + if (ret) { + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); + goto unlock_reservations; } - for (i = 0; i < buf_count; i++) - dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(bo->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); unlock_reservations: - drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx); wmb(); /* Flush write combining buffers */ From 0ec8671837a61d841462179686c5819d951d3b10 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 31 Mar 2023 13:36:03 +0200 Subject: [PATCH 737/898] accel/ivpu: Fix S3 system suspend when not idle Wait for VPU to be idle in ivpu_pm_suspend_cb() before powering off the device, so jobs are not lost and TDRs are not triggered after resume. Fixes: 852be13f3bd3 ("accel/ivpu: Add PM support") Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230331113603.2802515-3-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_pm.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 7df72fa8100f9..bde42d6383da6 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -140,32 +140,28 @@ int ivpu_pm_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct ivpu_device *vdev = to_ivpu_device(drm); - int ret; + unsigned long timeout; ivpu_dbg(vdev, PM, "Suspend..\n"); - ret = ivpu_suspend(vdev); - if (ret && vdev->pm->suspend_reschedule_counter) { - ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", - vdev->pm->suspend_reschedule_counter); - pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); - vdev->pm->suspend_reschedule_counter--; - return -EBUSY; - } else if (!vdev->pm->suspend_reschedule_counter) { - ivpu_warn(vdev, "Failed to enter idle, force suspend\n"); - ivpu_pm_prepare_cold_boot(vdev); - } else { - ivpu_pm_prepare_warm_boot(vdev); + timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); + while (!ivpu_hw_is_idle(vdev)) { + cond_resched(); + if (time_after_eq(jiffies, timeout)) { + ivpu_err(vdev, "Failed to enter idle on system suspend\n"); + return -EBUSY; + } } - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + ivpu_suspend(vdev); + ivpu_pm_prepare_warm_boot(vdev); pci_save_state(to_pci_dev(dev)); pci_set_power_state(to_pci_dev(dev), PCI_D3hot); ivpu_dbg(vdev, PM, "Suspend done.\n"); - return ret; + return 0; } int ivpu_pm_resume_cb(struct device *dev) From b168098912926236bbeebaf7795eb7aab76d2b45 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Apr 2023 11:08:58 +0200 Subject: [PATCH 738/898] perf: Optimize perf_pmu_migrate_context() Thomas reported that offlining CPUs spends a lot of time in synchronize_rcu() as called from perf_pmu_migrate_context() even though he's not actually using uncore events. Turns out, the thing is unconditionally waiting for RCU, even if there's no actual events to migrate. Fixes: 0cda4c023132 ("perf: Introduce perf_pmu_migrate_context()") Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Link: https://lkml.kernel.org/r/20230403090858.GT4253@hirez.programming.kicks-ass.net --- kernel/events/core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index fb3e436bcd4ac..115320faf1db4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12893,12 +12893,14 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->pinned_groups, &events); __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->flexible_groups, &events); - /* - * Wait for the events to quiesce before re-instating them. - */ - synchronize_rcu(); + if (!list_empty(&events)) { + /* + * Wait for the events to quiesce before re-instating them. + */ + synchronize_rcu(); - __perf_pmu_install(dst_ctx, dst_cpu, pmu, &events); + __perf_pmu_install(dst_ctx, dst_cpu, pmu, &events); + } mutex_unlock(&dst_ctx->mutex); mutex_unlock(&src_ctx->mutex); From 24d3ae2f37d8bc3c14b31d353c5d27baf582b6a6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 22 Mar 2023 13:24:49 -0700 Subject: [PATCH 739/898] perf/core: Fix the same task check in perf_event_set_output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same task check in perf_event_set_output has some potential issues for some usages. For the current perf code, there is a problem if using of perf_event_open() to have multiple samples getting into the same mmap’d memory when they are both attached to the same process. https://lore.kernel.org/all/92645262-D319-4068-9C44-2409EF44888E@gmail.com/ Because the event->ctx is not ready when the perf_event_set_output() is invoked in the perf_event_open(). Besides the above issue, before the commit bd2756811766 ("perf: Rewrite core context handling"), perf record can errors out when sampling with a hardware event and a software event as below. $ perf record -e cycles,dummy --per-thread ls failed to mmap with 22 (Invalid argument) That's because that prior to the commit a hardware event and a software event are from different task context. The problem should be a long time issue since commit c3f00c70276d ("perk: Separate find_get_context() from event initialization"). The task struct is stored in the event->hw.target for each per-thread event. It is a more reliable way to determine whether two events are attached to the same task. The event->hw.target was also introduced several years ago by the commit 50f16a8bf9d7 ("perf: Remove type specific target pointers"). It can not only be used to fix the issue with the current code, but also back port to fix the issues with an older kernel. Note: The event->hw.target was introduced later than commit c3f00c70276d. The patch may cannot be applied between the commit c3f00c70276d and commit 50f16a8bf9d7. Anybody that wants to back-port this at that period may have to find other solutions. Fixes: c3f00c70276d ("perf: Separate find_get_context() from event initialization") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Zhengjun Xing Link: https://lkml.kernel.org/r/20230322202449.512091-1-kan.liang@linux.intel.com --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 115320faf1db4..435815d3be3f8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12173,7 +12173,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) /* * If its not a per-cpu rb, it must be the same task. */ - if (output_event->cpu == -1 && output_event->ctx != event->ctx) + if (output_event->cpu == -1 && output_event->hw.target != event->hw.target) goto out; /* From b45193cb4df556fe6251b285a5ce44046dd36b4a Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 4 Apr 2023 09:31:28 +0200 Subject: [PATCH 740/898] can: j1939: j1939_tp_tx_dat_new(): fix out-of-bounds memory access In the j1939_tp_tx_dat_new() function, an out-of-bounds memory access could occur during the memcpy() operation if the size of skb->cb is larger than the size of struct j1939_sk_buff_cb. This is because the memcpy() operation uses the size of skb->cb, leading to a read beyond the struct j1939_sk_buff_cb. Updated the memcpy() operation to use the size of struct j1939_sk_buff_cb instead of the size of skb->cb. This ensures that the memcpy() operation only reads the memory within the bounds of struct j1939_sk_buff_cb, preventing out-of-bounds memory access. Additionally, add a BUILD_BUG_ON() to check that the size of skb->cb is greater than or equal to the size of struct j1939_sk_buff_cb. This ensures that the skb->cb buffer is large enough to hold the j1939_sk_buff_cb structure. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Reported-by: Shuangpeng Bai Tested-by: Shuangpeng Bai Signed-off-by: Oleksij Rempel Link: https://groups.google.com/g/syzkaller/c/G_LL-C3plRs/m/-8xCi6dCAgAJ Link: https://lore.kernel.org/all/20230404073128.3173900-1-o.rempel@pengutronix.de Cc: stable@vger.kernel.org [mkl: rephrase commit message] Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fb92c3609e172..fe3df23a25957 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -604,7 +604,10 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, /* reserve CAN header */ skb_reserve(skb, offsetof(struct can_frame, data)); - memcpy(skb->cb, re_skcb, sizeof(skb->cb)); + /* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */ + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb)); + + memcpy(skb->cb, re_skcb, sizeof(*re_skcb)); skcb = j1939_skb_to_cb(skb); if (swap_src_dst) j1939_skbcb_swap(skcb); From 0145462fc802cd447ef5d029758043c7f15b4b1e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 30 Mar 2023 19:02:48 +0200 Subject: [PATCH 741/898] can: isotp: isotp_recvmsg(): use sock_recv_cmsgs() to get SOCK_RXQ_OVFL infos isotp.c was still using sock_recv_timestamp() which does not provide control messages to detect dropped PDUs in the receive path. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230330170248.62342-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 9bc344851704e..47c2ebad10edc 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1120,7 +1120,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (ret < 0) goto out_err; - sock_recv_timestamp(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(ISOTP_MIN_NAMELEN); From 79e19fa79cb5d5f1b3bf3e3ae24989ccb93c7b7b Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 31 Mar 2023 14:55:11 +0200 Subject: [PATCH 742/898] can: isotp: isotp_ops: fix poll() to not report false EPOLLOUT events When using select()/poll()/epoll() with a non-blocking ISOTP socket to wait for when non-blocking write is possible, a false EPOLLOUT event is sometimes returned. This can happen at least after sending a message which must be split to multiple CAN frames. The reason is that isotp_sendmsg() returns -EAGAIN when tx.state is not equal to ISOTP_IDLE and this behavior is not reflected in datagram_poll(), which is used in isotp_ops. This is fixed by introducing ISOTP-specific poll function, which suppresses the EPOLLOUT events in that case. v2: https://lore.kernel.org/all/20230302092812.320643-1-michal.sojka@cvut.cz v1: https://lore.kernel.org/all/20230224010659.48420-1-michal.sojka@cvut.cz https://lore.kernel.org/all/b53a04a2-ba1f-3858-84c1-d3eb3301ae15@hartkopp.net Signed-off-by: Michal Sojka Reported-by: Jakub Jira Tested-by: Oliver Hartkopp Acked-by: Oliver Hartkopp Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/20230331125511.372783-1-michal.sojka@cvut.cz Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 47c2ebad10edc..281b7766c54ec 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1608,6 +1608,21 @@ static int isotp_init(struct sock *sk) return 0; } +static __poll_t isotp_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + struct sock *sk = sock->sk; + struct isotp_sock *so = isotp_sk(sk); + + __poll_t mask = datagram_poll(file, sock, wait); + poll_wait(file, &so->wait, wait); + + /* Check for false positives due to TX state */ + if ((mask & EPOLLWRNORM) && (so->tx.state != ISOTP_IDLE)) + mask &= ~(EPOLLOUT | EPOLLWRNORM); + + return mask; +} + static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { @@ -1623,7 +1638,7 @@ static const struct proto_ops isotp_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = isotp_getname, - .poll = datagram_poll, + .poll = isotp_poll, .ioctl = isotp_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, From 051737439eaee5bdd03d3c2ef5510d54a478fd05 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 31 Mar 2023 15:19:35 +0200 Subject: [PATCH 743/898] can: isotp: fix race between isotp_sendsmg() and isotp_release() As discussed with Dae R. Jeong and Hillf Danton here [1] the sendmsg() function in isotp.c might get into a race condition when restoring the former tx.state from the old_state. Remove the old_state concept and implement proper locking for the ISOTP_IDLE transitions in isotp_sendmsg(), inspired by a simplification idea from Hillf Danton. Introduce a new tx.state ISOTP_SHUTDOWN and use the same locking mechanism from isotp_release() which resolves a potential race between isotp_sendsmg() and isotp_release(). [1] https://lore.kernel.org/linux-can/ZB%2F93xJxq%2FBUqAgG@dragonet v1: https://lore.kernel.org/all/20230331102114.15164-1-socketcan@hartkopp.net v2: https://lore.kernel.org/all/20230331123600.3550-1-socketcan@hartkopp.net take care of signal interrupts for wait_event_interruptible() in isotp_release() v3: https://lore.kernel.org/all/20230331130654.9886-1-socketcan@hartkopp.net take care of signal interrupts for wait_event_interruptible() in isotp_sendmsg() in the wait_tx_done case v4: https://lore.kernel.org/all/20230331131935.21465-1-socketcan@hartkopp.net take care of signal interrupts for wait_event_interruptible() in isotp_sendmsg() in ALL cases Cc: Dae R. Jeong Cc: Hillf Danton Signed-off-by: Oliver Hartkopp Fixes: 4f027cba8216 ("can: isotp: split tx timer into transmission and timeout") Link: https://lore.kernel.org/all/20230331131935.21465-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org [mkl: rephrase commit message] Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 55 ++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 281b7766c54ec..5761d4ab839dd 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -119,7 +119,8 @@ enum { ISOTP_WAIT_FIRST_FC, ISOTP_WAIT_FC, ISOTP_WAIT_DATA, - ISOTP_SENDING + ISOTP_SENDING, + ISOTP_SHUTDOWN, }; struct tpcon { @@ -880,8 +881,8 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) txtimer); struct sock *sk = &so->sk; - /* don't handle timeouts in IDLE state */ - if (so->tx.state == ISOTP_IDLE) + /* don't handle timeouts in IDLE or SHUTDOWN state */ + if (so->tx.state == ISOTP_IDLE || so->tx.state == ISOTP_SHUTDOWN) return HRTIMER_NORESTART; /* we did not get any flow control or echo frame in time */ @@ -918,7 +919,6 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); - u32 old_state = so->tx.state; struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf; @@ -928,23 +928,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) int off; int err; - if (!so->bound) + if (!so->bound || so->tx.state == ISOTP_SHUTDOWN) return -EADDRNOTAVAIL; +wait_free_buffer: /* we do not support multiple buffers - for now */ - if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE || - wq_has_sleeper(&so->wait)) { - if (msg->msg_flags & MSG_DONTWAIT) { - err = -EAGAIN; - goto err_out; - } + if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT)) + return -EAGAIN; - /* wait for complete transmission of current pdu */ - err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); - if (err) - goto err_out; + /* wait for complete transmission of current pdu */ + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; - so->tx.state = ISOTP_SENDING; + if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) { + if (so->tx.state == ISOTP_SHUTDOWN) + return -EADDRNOTAVAIL; + + goto wait_free_buffer; } if (!size || size > MAX_MSG_LENGTH) { @@ -1074,7 +1075,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (wait_tx_done) { /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; if (sk->sk_err) return -sk->sk_err; @@ -1082,13 +1085,15 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return size; +err_event_drop: + /* got signal: force tx state machine to be idle */ + so->tx.state = ISOTP_IDLE; + hrtimer_cancel(&so->txfrtimer); + hrtimer_cancel(&so->txtimer); err_out_drop: /* drop this PDU and unlock a potential wait queue */ - old_state = ISOTP_IDLE; -err_out: - so->tx.state = old_state; - if (so->tx.state == ISOTP_IDLE) - wake_up_interruptible(&so->wait); + so->tx.state = ISOTP_IDLE; + wake_up_interruptible(&so->wait); return err; } @@ -1150,10 +1155,12 @@ static int isotp_release(struct socket *sock) net = sock_net(sk); /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + while (wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE) == 0 && + cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SHUTDOWN) != ISOTP_IDLE) + ; /* force state machines to be idle also when a signal occurred */ - so->tx.state = ISOTP_IDLE; + so->tx.state = ISOTP_SHUTDOWN; so->rx.state = ISOTP_IDLE; spin_lock(&isotp_notifier_lock); From 81515ecf155a38f3532bf5ddef88d651898df6be Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 4 Apr 2023 10:46:41 -0700 Subject: [PATCH 744/898] x86/cpu: Add model number for Intel Arrow Lake processor Successor to Lunar Lake. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230404174641.426593-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index cbaf174d8efd9..b3af2d45bbbb5 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -125,6 +125,8 @@ #define INTEL_FAM6_LUNARLAKE_M 0xBD +#define INTEL_FAM6_ARROWLAKE 0xC6 + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ From 3357c6e429643231e60447b52ffbb7ac895aca22 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 4 Apr 2023 19:45:04 -0400 Subject: [PATCH 745/898] tracing: Free error logs of tracing instances When a tracing instance is removed, the error messages that hold errors that occurred in the instance needs to be freed. The following reports a memory leak: # cd /sys/kernel/tracing # mkdir instances/foo # echo 'hist:keys=x' > instances/foo/events/sched/sched_switch/trigger # cat instances/foo/error_log [ 117.404795] hist:sched:sched_switch: error: Couldn't find field Command: hist:keys=x ^ # rmdir instances/foo Then check for memory leaks: # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88810d8ec700 (size 192): comm "bash", pid 869, jiffies 4294950577 (age 215.752s) hex dump (first 32 bytes): 60 dd 68 61 81 88 ff ff 60 dd 68 61 81 88 ff ff `.ha....`.ha.... a0 30 8c 83 ff ff ff ff 26 00 0a 00 00 00 00 00 .0......&....... backtrace: [<00000000dae26536>] kmalloc_trace+0x2a/0xa0 [<00000000b2938940>] tracing_log_err+0x277/0x2e0 [<000000004a0e1b07>] parse_atom+0x966/0xb40 [<0000000023b24337>] parse_expr+0x5f3/0xdb0 [<00000000594ad074>] event_hist_trigger_parse+0x27f8/0x3560 [<00000000293a9645>] trigger_process_regex+0x135/0x1a0 [<000000005c22b4f2>] event_trigger_write+0x87/0xf0 [<000000002cadc509>] vfs_write+0x162/0x670 [<0000000059c3b9be>] ksys_write+0xca/0x170 [<00000000f1cddc00>] do_syscall_64+0x3e/0xc0 [<00000000868ac68c>] entry_SYSCALL_64_after_hwframe+0x72/0xdc unreferenced object 0xffff888170c35a00 (size 32): comm "bash", pid 869, jiffies 4294950577 (age 215.752s) hex dump (first 32 bytes): 0a 20 20 43 6f 6d 6d 61 6e 64 3a 20 68 69 73 74 . Command: hist 3a 6b 65 79 73 3d 78 0a 00 00 00 00 00 00 00 00 :keys=x......... backtrace: [<000000006a747de5>] __kmalloc+0x4d/0x160 [<000000000039df5f>] tracing_log_err+0x29b/0x2e0 [<000000004a0e1b07>] parse_atom+0x966/0xb40 [<0000000023b24337>] parse_expr+0x5f3/0xdb0 [<00000000594ad074>] event_hist_trigger_parse+0x27f8/0x3560 [<00000000293a9645>] trigger_process_regex+0x135/0x1a0 [<000000005c22b4f2>] event_trigger_write+0x87/0xf0 [<000000002cadc509>] vfs_write+0x162/0x670 [<0000000059c3b9be>] ksys_write+0xca/0x170 [<00000000f1cddc00>] do_syscall_64+0x3e/0xc0 [<00000000868ac68c>] entry_SYSCALL_64_after_hwframe+0x72/0xdc The problem is that the error log needs to be freed when the instance is removed. Link: https://lore.kernel.org/lkml/76134d9f-a5ba-6a0d-37b3-28310b4a1e91@alu.unizg.hr/ Link: https://lore.kernel.org/linux-trace-kernel/20230404194504.5790b95f@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Mark Rutland Cc: Thorsten Leemhuis Cc: Ulf Hansson Cc: Eric Biggers Fixes: 2f754e771b1a6 ("tracing: Have the error logs show up in the proper instances") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Todorovac Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4686473b8497b..36a6037823cd9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9516,6 +9516,7 @@ static int __remove_instance(struct trace_array *tr) tracefs_remove(tr->dir); free_percpu(tr->last_func_repeats); free_trace_buffers(tr); + clear_tracing_err_log(tr); for (i = 0; i < tr->nr_topts; i++) { kfree(tr->topts[i].topts); From d3205ab75e99a47539ec91ef85ba488f4ddfeaa9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 3 Apr 2023 13:09:25 -0700 Subject: [PATCH 746/898] nvme: fix discard support without oncs The device can report discard support without setting the ONCS DSM bit. When not set, the driver clears max_discard_size expecting it to be set later. We don't know the size until we have the namespace format, though, so setting it is deferred until configuring one, but the driver was abandoning the discard settings due to that initial clearing. Move the max_discard_size calculation above the check for a '0' discard size. Fixes: 1a86924e4f46475 ("nvme: fix interpretation of DMRSL") Reported-by: Laurence Oberman Signed-off-by: Keith Busch Reviewed-by: Niklas Cassel Reviewed-by: Sagi Grimberg Tested-by: Laurence Oberman Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 53ef028596c61..d6a9bac91a4cd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1674,6 +1674,9 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) + ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); + if (ctrl->max_discard_sectors == 0) { blk_queue_max_discard_sectors(queue, 0); return; @@ -1688,9 +1691,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) if (queue->limits.max_discard_sectors) return; - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) - ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); - blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); From 32d85999680601d01b2a36713c9ffd7397c8688b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Apr 2023 12:36:25 +0200 Subject: [PATCH 747/898] arm64: compat: Work around uninitialized variable warning Dan reports that smatch complains about a potential uninitialized variable being used in the compat alignment fixup code. The logic is not wrong per se, but we do end up using an uninitialized variable if reading the instruction that triggered the alignment fault from user space faults, even if the fault ensures that the uninitialized value doesn't propagate any further. Given that we just give up and return 1 if any fault occurs when reading the instruction, let's get rid of the 'success handling' pattern that captures the fault in a variable and aborts later, and instead, just return 1 immediately if any of the get_user() calls result in an exception. Fixes: 3fc24ef32d3b ("arm64: compat: Implement misalignment fixups for multiword loads") Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202304021214.gekJ8yRc-lkp@intel.com/ Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20230404103625.2386382-1-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/compat_alignment.c | 32 ++++++++++++---------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c index 5edec2f49ec98..deff21bfa6800 100644 --- a/arch/arm64/kernel/compat_alignment.c +++ b/arch/arm64/kernel/compat_alignment.c @@ -314,36 +314,32 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs) int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs); unsigned int type; u32 instr = 0; - u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; - int fault; instrptr = instruction_pointer(regs); if (compat_thumb_mode(regs)) { __le16 __user *ptr = (__le16 __user *)(instrptr & ~1); + u16 tinstr, tinst2; - fault = alignment_get_thumb(regs, ptr, &tinstr); - if (!fault) { - if (IS_T32(tinstr)) { - /* Thumb-2 32-bit */ - u16 tinst2; - fault = alignment_get_thumb(regs, ptr + 1, &tinst2); - instr = ((u32)tinstr << 16) | tinst2; - thumb2_32b = 1; - } else { - isize = 2; - instr = thumb2arm(tinstr); - } + if (alignment_get_thumb(regs, ptr, &tinstr)) + return 1; + + if (IS_T32(tinstr)) { /* Thumb-2 32-bit */ + if (alignment_get_thumb(regs, ptr + 1, &tinst2)) + return 1; + instr = ((u32)tinstr << 16) | tinst2; + thumb2_32b = 1; + } else { + isize = 2; + instr = thumb2arm(tinstr); } } else { - fault = alignment_get_arm(regs, (__le32 __user *)instrptr, &instr); + if (alignment_get_arm(regs, (__le32 __user *)instrptr, &instr)) + return 1; } - if (fault) - return 1; - switch (CODING_BITS(instr)) { case 0x00000000: /* 3.13.4 load/store instruction extensions */ if (LDSTHD_I_BIT(instr)) From ec799c8a92e0be91e0940cc739a27f483242df65 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 30 Mar 2023 18:02:24 +0300 Subject: [PATCH 748/898] usb: dwc3: pci: add support for the Intel Meteor Lake-S This patch adds the necessary PCI ID for Intel Meteor Lake-S devices. Signed-off-by: Heikki Krogerus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230330150224.89316-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index a23ddbb819795..560793545362a 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -49,6 +49,7 @@ #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 #define PCI_DEVICE_ID_INTEL_MTLM 0x7eb1 #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 +#define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -474,6 +475,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLS), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, From eddebe39602efe631b83ff8d03f26eba12cfd760 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 29 Mar 2023 21:51:59 +0000 Subject: [PATCH 749/898] usb: typec: altmodes/displayport: Fix configure initial pin assignment While determining the initial pin assignment to be sent in the configure message, using the DP_PIN_ASSIGN_DP_ONLY_MASK mask causes the DFP_U to send both Pin Assignment C and E when both are supported by the DFP_U and UFP_U. The spec (Table 5-7 DFP_U Pin Assignment Selection Mandates, VESA DisplayPort Alt Mode Standard v2.0) indicates that the DFP_U never selects Pin Assignment E when Pin Assignment C is offered. Update the DP_PIN_ASSIGN_DP_ONLY_MASK conditional to intially select only Pin Assignment C if it is available. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230329215159.2046932-1-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 662cd043b50ea..8f3e884222ade 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -112,8 +112,12 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con) if (dp->data.status & DP_STATUS_PREFER_MULTI_FUNC && pin_assign & DP_PIN_ASSIGN_MULTI_FUNC_MASK) pin_assign &= DP_PIN_ASSIGN_MULTI_FUNC_MASK; - else if (pin_assign & DP_PIN_ASSIGN_DP_ONLY_MASK) + else if (pin_assign & DP_PIN_ASSIGN_DP_ONLY_MASK) { pin_assign &= DP_PIN_ASSIGN_DP_ONLY_MASK; + /* Default to pin assign C if available */ + if (pin_assign & BIT(DP_PIN_ASSIGN_C)) + pin_assign = BIT(DP_PIN_ASSIGN_C); + } if (!pin_assign) return -EINVAL; From d356b3cdd00cae4508be566a47c0cfb74e14862a Mon Sep 17 00:00:00 2001 From: Sandeep Dhavale Date: Sat, 1 Apr 2023 06:05:08 +0000 Subject: [PATCH 750/898] usb: gadget: f_fs: Fix ffs_epfile_read_iter to handle ITER_UBUF iov_iter for ffs_epfile_read_iter can be ITER_UBUF with io_uring. In that case dup_iter() does not have to allocate anything and it can return NULL. ffs_epfile_read_iter treats this as a failure and returns -ENOMEM. Fix it by checking if iter_is_ubuf(). Fixes: 1e23db450cff ("io_uring: use iter_ubuf for single range imports") Signed-off-by: Sandeep Dhavale Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20230401060509.3608259-2-dhavale@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index ddfc537c7526b..56cdfb2e42113 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1251,7 +1251,7 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to) p->kiocb = kiocb; if (p->aio) { p->to_free = dup_iter(&p->data, to, GFP_KERNEL); - if (!p->to_free) { + if (!iter_is_ubuf(&p->data) && !p->to_free) { kfree(p); return -ENOMEM; } From e07fec475cc86ce6ded82908df1d511edc3303b7 Mon Sep 17 00:00:00 2001 From: Sandeep Dhavale Date: Sat, 1 Apr 2023 06:05:09 +0000 Subject: [PATCH 751/898] usb: gadgetfs: Fix ep_read_iter to handle ITER_UBUF iov_iter for ep_read_iter can be ITER_UBUF with io_uring. In that case dup_iter() does not have to allocate iov and it can return NULL. Fix the assumption by checking for iter_is_ubuf() other wise ep_read_iter can treat this as failure and return -ENOMEM. Fixes: 1e23db450cff ("io_uring: use iter_ubuf for single range imports") Signed-off-by: Sandeep Dhavale Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20230401060509.3608259-3-dhavale@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index d605bc2e7e8fd..28249d0bf0629 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -614,7 +614,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!priv) goto fail; priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL); - if (!priv->to_free) { + if (!iter_is_ubuf(&priv->to) && !priv->to_free) { kfree(priv); goto fail; } From d19342c6609b67f2ba83b9eccca2777e3687f625 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Wed, 5 Apr 2023 10:16:48 -0300 Subject: [PATCH 752/898] cifs: sanitize paths in cifs_update_super_prepath. After a server reboot, clients are failing to move files with ENOENT. This is caused by DFS referrals containing multiple separators, which the server move call doesn't recognize. v1: Initial patch. v2: Move prototype to header. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2182472 Fixes: a31080899d5f ("cifs: sanitize multiple delimiters in prepath") Actually-Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Thiago Rafael Becker Signed-off-by: Steve French --- fs/cifs/fs_context.c | 13 +++++++------ fs/cifs/fs_context.h | 3 +++ fs/cifs/misc.c | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 6d13f8207e96a..ace11a1a7c8ab 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -441,13 +441,14 @@ int smb3_parse_opt(const char *options, const char *key, char **val) * but there are some bugs that prevent rename from working if there are * multiple delimiters. * - * Returns a sanitized duplicate of @path. The caller is responsible for - * cleaning up the original. + * Returns a sanitized duplicate of @path. @gfp indicates the GFP_* flags + * for kstrdup. + * The caller is responsible for freeing the original. */ #define IS_DELIM(c) ((c) == '/' || (c) == '\\') -static char *sanitize_path(char *path) +char *cifs_sanitize_prepath(char *prepath, gfp_t gfp) { - char *cursor1 = path, *cursor2 = path; + char *cursor1 = prepath, *cursor2 = prepath; /* skip all prepended delimiters */ while (IS_DELIM(*cursor1)) @@ -469,7 +470,7 @@ static char *sanitize_path(char *path) cursor2--; *(cursor2) = '\0'; - return kstrdup(path, GFP_KERNEL); + return kstrdup(prepath, gfp); } /* @@ -531,7 +532,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) if (!*pos) return 0; - ctx->prepath = sanitize_path(pos); + ctx->prepath = cifs_sanitize_prepath(pos, GFP_KERNEL); if (!ctx->prepath) return -ENOMEM; diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 3de00e7127ec4..f4eaf85589022 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -287,4 +287,7 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); */ #define SMB3_MAX_DCLOSETIMEO (1 << 30) #define SMB3_DEF_DCLOSETIMEO (1 * HZ) /* even 1 sec enough to help eg open/write/close/open/read */ + +extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp); + #endif diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index b44fb51968bfb..7f085ed2d866b 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1195,7 +1195,7 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) kfree(cifs_sb->prepath); if (prefix && *prefix) { - cifs_sb->prepath = kstrdup(prefix, GFP_ATOMIC); + cifs_sb->prepath = cifs_sanitize_prepath(prefix, GFP_ATOMIC); if (!cifs_sb->prepath) return -ENOMEM; From 1edf48991a783d00a3a18dc0d27c88139e4030a2 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 31 Mar 2023 05:06:00 -0400 Subject: [PATCH 753/898] usb: cdnsp: Fixes error: uninitialized symbol 'len' The patch 5bc38d33a5a1: "usb: cdnsp: Fixes issue with redundant Status Stage" leads to the following Smatch static checker warning: drivers/usb/cdns3/cdnsp-ep0.c:470 cdnsp_setup_analyze() error: uninitialized symbol 'len'. cc: Fixes: 5bc38d33a5a1 ("usb: cdnsp: Fixes issue with redundant Status Stage") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20230331090600.454674-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ep0.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c index d63d5d92f2554..f317d3c847810 100644 --- a/drivers/usb/cdns3/cdnsp-ep0.c +++ b/drivers/usb/cdns3/cdnsp-ep0.c @@ -414,7 +414,7 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev, void cdnsp_setup_analyze(struct cdnsp_device *pdev) { struct usb_ctrlrequest *ctrl = &pdev->setup; - int ret = 0; + int ret = -EINVAL; u16 len; trace_cdnsp_ctrl_req(ctrl); @@ -424,7 +424,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) if (pdev->gadget.state == USB_STATE_NOTATTACHED) { dev_err(pdev->dev, "ERR: Setup detected in unattached state\n"); - ret = -EINVAL; goto out; } From 78dfc9d1d1abb9e400386fa9c5724a8f7d75e3b9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 4 Apr 2023 13:02:46 +0200 Subject: [PATCH 754/898] ACPI: video: Add auto_detect arg to __acpi_video_get_backlight_type() Allow callers of __acpi_video_get_backlight_type() to pass a pointer to a bool which will get set to false if the backlight-type comes from the cmdline or a DMI quirk and set to true if auto-detection was used. And make __acpi_video_get_backlight_type() non static so that it can be called directly outside of video_detect.c . While at it turn the acpi_video_get_backlight_type() and acpi_video_backlight_use_native() wrappers into static inline functions in include/acpi/video.h, so that we need to export one less symbol. Fixes: 5aa9d943e9b6 ("ACPI: video: Don't enable fallback path for creating ACPI backlight by default") Cc: All applicable Reviewed-by: Mario Limonciello Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 21 ++++++++------------- include/acpi/video.h | 15 +++++++++++++-- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index fd7cbce8076e2..f7c218dd87420 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -782,7 +782,7 @@ static bool prefer_native_over_acpi_video(void) * Determine which type of backlight interface to use on this system, * First check cmdline, then dmi quirks, then do autodetect. */ -static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) +enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto_detect) { static DEFINE_MUTEX(init_mutex); static bool nvidia_wmi_ec_present; @@ -807,6 +807,9 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) native_available = true; mutex_unlock(&init_mutex); + if (auto_detect) + *auto_detect = false; + /* * The below heuristics / detection steps are in order of descending * presedence. The commandline takes presedence over anything else. @@ -818,6 +821,9 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) if (acpi_backlight_dmi != acpi_backlight_undef) return acpi_backlight_dmi; + if (auto_detect) + *auto_detect = true; + /* Special cases such as nvidia_wmi_ec and apple gmux. */ if (nvidia_wmi_ec_present) return acpi_backlight_nvidia_wmi_ec; @@ -837,15 +843,4 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) /* No ACPI video/native (old hw), use vendor specific fw methods. */ return acpi_backlight_vendor; } - -enum acpi_backlight_type acpi_video_get_backlight_type(void) -{ - return __acpi_video_get_backlight_type(false); -} -EXPORT_SYMBOL(acpi_video_get_backlight_type); - -bool acpi_video_backlight_use_native(void) -{ - return __acpi_video_get_backlight_type(true) == acpi_backlight_native; -} -EXPORT_SYMBOL(acpi_video_backlight_use_native); +EXPORT_SYMBOL(__acpi_video_get_backlight_type); diff --git a/include/acpi/video.h b/include/acpi/video.h index 8ed9bec03e534..ff5a8da5d8832 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -59,8 +59,6 @@ extern void acpi_video_unregister(void); extern void acpi_video_register_backlight(void); extern int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid); -extern enum acpi_backlight_type acpi_video_get_backlight_type(void); -extern bool acpi_video_backlight_use_native(void); /* * Note: The value returned by acpi_video_handles_brightness_key_presses() * may change over time and should not be cached. @@ -69,6 +67,19 @@ extern bool acpi_video_handles_brightness_key_presses(void); extern int acpi_video_get_levels(struct acpi_device *device, struct acpi_video_device_brightness **dev_br, int *pmax_level); + +extern enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, + bool *auto_detect); + +static inline enum acpi_backlight_type acpi_video_get_backlight_type(void) +{ + return __acpi_video_get_backlight_type(false, NULL); +} + +static inline bool acpi_video_backlight_use_native(void) +{ + return __acpi_video_get_backlight_type(true, NULL) == acpi_backlight_native; +} #else static inline void acpi_video_report_nolcd(void) { return; }; static inline int acpi_video_register(void) { return -ENODEV; } From e506731c8f35699d746c615164ed620cd53c00ca Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 4 Apr 2023 13:02:47 +0200 Subject: [PATCH 755/898] ACPI: video: Make acpi_backlight=video work independent from GPU driver Commit 3dbc80a3e4c5 ("ACPI: video: Make backlight class device registration a separate step (v2)") combined with commit 5aa9d943e9b6 ("ACPI: video: Don't enable fallback path for creating ACPI backlight by default") Means that the video.ko code now fully depends on the GPU driver calling acpi_video_register_backlight() for the acpi_video# backlight class devices to get registered. This means that if the GPU driver does not do this, acpi_backlight=video on the cmdline, or DMI quirks for selecting acpi_video# will not work. This is a problem on for example Apple iMac14,1 all-in-ones where the monitor's LCD panel shows up as a regular DP connection instead of eDP so the GPU driver will not call acpi_video_register_backlight() [1]. Fix this by making video.ko directly register the acpi_video# devices when these have been explicitly requested either on the cmdline or through DMI quirks (rather then auto-detection being used). [1] GPU drivers only call acpi_video_register_backlight() when an internal panel is detected, to avoid non working acpi_video# devices getting registered on desktops which unfortunately is a real issue. Fixes: 5aa9d943e9b6 ("ACPI: video: Don't enable fallback path for creating ACPI backlight by default") Cc: All applicable Reviewed-by: Mario Limonciello Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 97b711e57bff4..c7a6d0b69dabd 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -1984,6 +1984,7 @@ static int instance; static int acpi_video_bus_add(struct acpi_device *device) { struct acpi_video_bus *video; + bool auto_detect; int error; acpi_status status; @@ -2045,10 +2046,20 @@ static int acpi_video_bus_add(struct acpi_device *device) mutex_unlock(&video_list_lock); /* - * The userspace visible backlight_device gets registered separately - * from acpi_video_register_backlight(). + * If backlight-type auto-detection is used then a native backlight may + * show up later and this may change the result from video to native. + * Therefor normally the userspace visible /sys/class/backlight device + * gets registered separately by the GPU driver calling + * acpi_video_register_backlight() when an internal panel is detected. + * Register the backlight now when not using auto-detection, so that + * when the kernel cmdline or DMI-quirks are used the backlight will + * get registered even if acpi_video_register_backlight() is not called. */ acpi_video_run_bcl_for_osi(video); + if (__acpi_video_get_backlight_type(false, &auto_detect) == acpi_backlight_video && + !auto_detect) + acpi_video_bus_register_backlight(video); + acpi_video_bus_add_notify_handler(video); return 0; From 2699107989431d6db44f8a9e809ea74c387336d1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 4 Apr 2023 13:02:48 +0200 Subject: [PATCH 756/898] ACPI: video: Add acpi_backlight=video quirk for Apple iMac14,1 and iMac14,2 On the Apple iMac14,1 and iMac14,2 all-in-ones (monitors with builtin "PC") the connection between the GPU and the panel is seen by the GPU driver as regular DP instead of eDP, causing the GPU driver to never call acpi_video_register_backlight(). (GPU drivers only call acpi_video_register_backlight() when an internal panel is detected, to avoid non working acpi_video# devices getting registered on desktops which unfortunately is a real issue.) Fix the missing acpi_video# backlight device on these all-in-ones by adding a acpi_backlight=video DMI quirk, so that video.ko will immediately register the backlight device instead of waiting for an acpi_video_register_backlight() call. Fixes: 5aa9d943e9b6 ("ACPI: video: Don't enable fallback path for creating ACPI backlight by default") Cc: All applicable Reviewed-by: Mario Limonciello Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index f7c218dd87420..295744fe7c920 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -276,6 +276,29 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, + /* + * Models which need acpi_video backlight control where the GPU drivers + * do not call acpi_video_register_backlight() because no internal panel + * is detected. Typically these are all-in-ones (monitors with builtin + * PC) where the panel connection shows up as regular DP instead of eDP. + */ + { + .callback = video_detect_force_video, + /* Apple iMac14,1 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac14,1"), + }, + }, + { + .callback = video_detect_force_video, + /* Apple iMac14,2 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac14,2"), + }, + }, + /* * These models have a working acpi_video backlight control, and using * native backlight causes a regression where backlight does not work From a5b2781dcab2c77979a4b8adda781d2543580901 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 4 Apr 2023 13:02:49 +0200 Subject: [PATCH 757/898] ACPI: video: Add acpi_backlight=video quirk for Lenovo ThinkPad W530 The Lenovo ThinkPad W530 uses a nvidia k1000m GPU. When this gets used together with one of the older nvidia binary driver series (the latest series does not support it), then backlight control does not work. This is caused by commit 3dbc80a3e4c5 ("ACPI: video: Make backlight class device registration a separate step (v2)") combined with commit 5aa9d943e9b6 ("ACPI: video: Don't enable fallback path for creating ACPI backlight by default"). After these changes the acpi_video# backlight device is only registered when requested by a GPU driver calling acpi_video_register_backlight() which the nvidia binary driver does not do. I realize that using the nvidia binary driver is not a supported use-case and users can workaround this by adding acpi_backlight=video on the kernel commandline, but the ThinkPad W530 is a popular model under Linux users, so it seems worthwhile to add a quirk for this. I will also email Nvidia asking them to make the driver call acpi_video_register_backlight() when an internal LCD panel is detected. So maybe the next maintenance release of the drivers will fix this... Fixes: 5aa9d943e9b6 ("ACPI: video: Don't enable fallback path for creating ACPI backlight by default") Cc: All applicable Reviewed-by: Mario Limonciello Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 295744fe7c920..e85729fc481fd 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -299,6 +299,20 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, + /* + * Older models with nvidia GPU which need acpi_video backlight + * control and where the old nvidia binary driver series does not + * call acpi_video_register_backlight(). + */ + { + .callback = video_detect_force_video, + /* ThinkPad W530 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W530"), + }, + }, + /* * These models have a working acpi_video backlight control, and using * native backlight causes a regression where backlight does not work From 39d0bd86c499ecd6abae42a9b7112056c5560691 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 27 Feb 2023 09:36:00 -0800 Subject: [PATCH 758/898] maple_tree: be more cautious about dead nodes Patch series "Fix VMA tree modification under mmap read lock". Syzbot reported a BUG_ON in mm/mmap.c which was found to be caused by an inconsistency between threads walking the VMA maple tree. The inconsistency is caused by the page fault handler modifying the maple tree while holding the mmap_lock for read. This only happens for stack VMAs. We had thought this was safe as it only modifies a single pivot in the tree. Unfortunately, syzbot constructed a test case where the stack had no guard page and grew the stack to abut the next VMA. This causes us to delete the NULL entry between the two VMAs and rewrite the node. We considered several options for fixing this, including dropping the mmap_lock, then reacquiring it for write; and relaxing the definition of the tree to permit a zero-length NULL entry in the node. We decided the best option was to backport some of the RCU patches from -next, which solve the problem by allocating a new node and RCU-freeing the old node. Since the problem exists in 6.1, we preferred a solution which is similar to the one we intended to merge next merge window. These patches have been in -next since next-20230301, and have received intensive testing in Android as part of the RCU page fault patchset. They were also sent as part of the "Per-VMA locks" v4 patch series. Patches 1 to 7 are bug fixes for RCU mode of the tree and patch 8 enables RCU mode for the tree. Performance v6.3-rc3 vs patched v6.3-rc3: Running these changes through mmtests showed there was a 15-20% performance decrease in will-it-scale/brk1-processes. This tests creating and inserting a single VMA repeatedly through the brk interface and isn't representative of any real world applications. This patch (of 8): ma_pivots() and ma_data_end() may be called with a dead node. Ensure to that the node isn't dead before using the returned values. This is necessary for RCU mode of the maple tree. Link: https://lkml.kernel.org/r/20230327185532.2354250-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230227173632.3292573-1-surenb@google.com Link: https://lkml.kernel.org/r/20230227173632.3292573-2-surenb@google.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: Chris Li Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: David Rientjes Cc: Eric Dumazet Cc: freak07 Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 52 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 9e2735cbc2b49..095b9cb1f4f11 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -544,6 +544,7 @@ static inline bool ma_dead_node(const struct maple_node *node) return (parent == node); } + /* * mte_dead_node() - check if the @enode is dead. * @enode: The encoded maple node @@ -625,6 +626,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas) * @node - the maple node * @type - the node type * + * In the event of a dead node, this array may be %NULL + * * Return: A pointer to the maple node pivots */ static inline unsigned long *ma_pivots(struct maple_node *node, @@ -1096,8 +1099,11 @@ static int mas_ascend(struct ma_state *mas) a_type = mas_parent_enum(mas, p_enode); a_node = mte_parent(p_enode); a_slot = mte_parent_slot(p_enode); - pivots = ma_pivots(a_node, a_type); a_enode = mt_mk_node(a_node, a_type); + pivots = ma_pivots(a_node, a_type); + + if (unlikely(ma_dead_node(a_node))) + return 1; if (!set_min && a_slot) { set_min = true; @@ -1401,6 +1407,9 @@ static inline unsigned char ma_data_end(struct maple_node *node, { unsigned char offset; + if (!pivots) + return 0; + if (type == maple_arange_64) return ma_meta_end(node, type); @@ -1436,6 +1445,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas) return ma_meta_end(node, type); pivots = ma_pivots(node, type); + if (unlikely(ma_dead_node(node))) + return 0; + offset = mt_pivots[type] - 1; if (likely(!pivots[offset])) return ma_meta_end(node, type); @@ -4505,6 +4517,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min) node = mas_mn(mas); slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + mas->max = pivots[offset]; if (offset) mas->min = pivots[offset - 1] + 1; @@ -4526,6 +4541,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min) slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); offset = ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + if (offset) mas->min = pivots[offset - 1] + 1; @@ -4574,6 +4592,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, struct maple_enode *enode; int level = 0; unsigned char offset; + unsigned char node_end; enum maple_type mt; void __rcu **slots; @@ -4597,7 +4616,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, node = mas_mn(mas); mt = mte_node_type(mas->node); pivots = ma_pivots(node, mt); - } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max))); + node_end = ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + + } while (unlikely(offset == node_end)); slots = ma_slots(node, mt); pivot = mas_safe_pivot(mas, pivots, ++offset, mt); @@ -4613,6 +4636,9 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, mt = mte_node_type(mas->node); slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + offset = 0; pivot = pivots[0]; } @@ -4659,11 +4685,14 @@ static inline void *mas_next_nentry(struct ma_state *mas, return NULL; } - pivots = ma_pivots(node, type); slots = ma_slots(node, type); - mas->index = mas_safe_min(mas, pivots, mas->offset); + pivots = ma_pivots(node, type); count = ma_data_end(node, type, pivots, mas->max); - if (ma_dead_node(node)) + if (unlikely(ma_dead_node(node))) + return NULL; + + mas->index = mas_safe_min(mas, pivots, mas->offset); + if (unlikely(ma_dead_node(node))) return NULL; if (mas->index > max) @@ -4817,6 +4846,11 @@ static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit, slots = ma_slots(mn, mt); pivots = ma_pivots(mn, mt); + if (unlikely(ma_dead_node(mn))) { + mas_rewalk(mas, index); + goto retry; + } + if (offset == mt_pivots[mt]) pivot = mas->max; else @@ -6617,11 +6651,11 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn, while (likely(!ma_is_leaf(mt))) { MT_BUG_ON(mas->tree, mte_dead_node(mas->node)); slots = ma_slots(mn, mt); - pivots = ma_pivots(mn, mt); - max = pivots[0]; entry = mas_slot(mas, slots, 0); + pivots = ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; + max = pivots[0]; mas->node = entry; mn = mas_mn(mas); mt = mte_node_type(mas->node); @@ -6641,13 +6675,13 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn, if (likely(entry)) return entry; - pivots = ma_pivots(mn, mt); - mas->index = pivots[0] + 1; mas->offset = 1; entry = mas_slot(mas, slots, 1); + pivots = ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; + mas->index = pivots[0] + 1; if (mas->index > limit) goto none; From a7b92d59c885018cb7bb88539892278e4fd64b29 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 27 Feb 2023 09:36:01 -0800 Subject: [PATCH 759/898] maple_tree: detect dead nodes in mas_start() When initially starting a search, the root node may already be in the process of being replaced in RCU mode. Detect and restart the walk if this is the case. This is necessary for RCU mode of the maple tree. Link: https://lkml.kernel.org/r/20230227173632.3292573-3-surenb@google.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 095b9cb1f4f11..3d53339656e1d 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1360,12 +1360,16 @@ static inline struct maple_enode *mas_start(struct ma_state *mas) mas->max = ULONG_MAX; mas->depth = 0; +retry: root = mas_root(mas); /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth = 1; mas->node = mte_safe_root(root); mas->offset = 0; + if (mte_dead_node(mas->node)) + goto retry; + return NULL; } From 2e5b4921f8efc9e845f4f04741797d16f36847eb Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 27 Feb 2023 09:36:02 -0800 Subject: [PATCH 760/898] maple_tree: fix freeing of nodes in rcu mode The walk to destroy the nodes was not always setting the node type and would result in a destroy method potentially using the values as nodes. Avoid this by setting the correct node types. This is necessary for the RCU mode of the maple tree. Link: https://lkml.kernel.org/r/20230227173632.3292573-4-surenb@google.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 73 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 3d53339656e1d..946acda295219 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -902,6 +902,44 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, meta->end = end; } +/* + * mas_clear_meta() - clear the metadata information of a node, if it exists + * @mas: The maple state + * @mn: The maple node + * @mt: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +static inline void mas_clear_meta(struct ma_state *mas, struct maple_node *mn, + enum maple_type mt) +{ + struct maple_metadata *meta; + unsigned long *pivots; + void __rcu **slots; + void *next; + + switch (mt) { + case maple_range_64: + pivots = mn->mr64.pivot; + if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { + slots = mn->mr64.slot; + next = mas_slot_locked(mas, slots, + MAPLE_RANGE64_SLOTS - 1); + if (unlikely((mte_to_node(next) && mte_node_type(next)))) + return; /* The last slot is a node, no metadata */ + } + fallthrough; + case maple_arange_64: + meta = ma_meta(mn, mt); + break; + default: + return; + } + + meta->gap = 0; + meta->end = 0; +} + /* * ma_meta_end() - Get the data end of a node from the metadata * @mn: The maple node @@ -5441,20 +5479,22 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min, * mas_dead_leaves() - Mark all leaves of a node as dead. * @mas: The maple state * @slots: Pointer to the slot array + * @type: The maple node type * * Must hold the write lock. * * Return: The number of leaves marked as dead. */ static inline -unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) +unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, + enum maple_type mt) { struct maple_node *node; enum maple_type type; void *entry; int offset; - for (offset = 0; offset < mt_slot_count(mas->node); offset++) { + for (offset = 0; offset < mt_slots[mt]; offset++) { entry = mas_slot_locked(mas, slots, offset); type = mte_node_type(entry); node = mte_to_node(entry); @@ -5473,14 +5513,13 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) { - struct maple_node *node, *next; + struct maple_node *next; void __rcu **slots = NULL; next = mas_mn(mas); do { - mas->node = ma_enode_ptr(next); - node = mas_mn(mas); - slots = ma_slots(node, node->type); + mas->node = mt_mk_node(next, next->type); + slots = ma_slots(next, next->type); next = mas_slot_locked(mas, slots, offset); offset = 0; } while (!ma_is_leaf(next->type)); @@ -5544,11 +5583,14 @@ static inline void __rcu **mas_destroy_descend(struct ma_state *mas, node = mas_mn(mas); slots = ma_slots(node, mte_node_type(mas->node)); next = mas_slot_locked(mas, slots, 0); - if ((mte_dead_node(next))) + if ((mte_dead_node(next))) { + mte_to_node(next)->type = mte_node_type(next); next = mas_slot_locked(mas, slots, 1); + } mte_set_node_dead(mas->node); node->type = mte_node_type(mas->node); + mas_clear_meta(mas, node, node->type); node->piv_parent = prev; node->parent_slot = offset; offset = 0; @@ -5568,13 +5610,18 @@ static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, MA_STATE(mas, &mt, 0, 0); - if (mte_is_leaf(enode)) + mas.node = enode; + if (mte_is_leaf(enode)) { + node->type = mte_node_type(enode); goto free_leaf; + } + ma_flags &= ~MT_FLAGS_LOCK_MASK; mt_init_flags(&mt, ma_flags); mas_lock(&mas); - mas.node = start = enode; + mte_to_node(enode)->ma_flags = ma_flags; + start = enode; slots = mas_destroy_descend(&mas, start, 0); node = mas_mn(&mas); do { @@ -5582,7 +5629,8 @@ static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, unsigned char offset; struct maple_enode *parent, *tmp; - node->slot_len = mas_dead_leaves(&mas, slots); + node->type = mte_node_type(mas.node); + node->slot_len = mas_dead_leaves(&mas, slots, node->type); if (free) mt_free_bulk(node->slot_len, slots); offset = node->parent_slot + 1; @@ -5606,7 +5654,8 @@ static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, } while (start != mas.node); node = mas_mn(&mas); - node->slot_len = mas_dead_leaves(&mas, slots); + node->type = mte_node_type(mas.node); + node->slot_len = mas_dead_leaves(&mas, slots, node->type); if (free) mt_free_bulk(node->slot_len, slots); @@ -5616,6 +5665,8 @@ static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, free_leaf: if (free) mt_free_rcu(&node->rcu); + else + mas_clear_meta(&mas, node, node->type); } /* From 8372f4d83f96f35915106093cde4565836587123 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 27 Feb 2023 09:36:03 -0800 Subject: [PATCH 761/898] maple_tree: remove extra smp_wmb() from mas_dead_leaves() The call to mte_set_dead_node() before the smp_wmb() already calls smp_wmb() so this is not needed. This is an optimization for the RCU mode of the maple tree. Link: https://lkml.kernel.org/r/20230227173632.3292573-5-surenb@google.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 946acda295219..96d673e4ba5b2 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5503,7 +5503,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, break; mte_set_node_dead(entry); - smp_wmb(); /* Needed for RCU */ node->type = type; rcu_assign_pointer(slots[offset], node); } From c13af03de46ba27674dd9fb31a17c0d480081139 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 27 Feb 2023 09:36:04 -0800 Subject: [PATCH 762/898] maple_tree: fix write memory barrier of nodes once dead for RCU mode During the development of the maple tree, the strategy of freeing multiple nodes changed and, in the process, the pivots were reused to store pointers to dead nodes. To ensure the readers see accurate pivots, the writers need to mark the nodes as dead and call smp_wmb() to ensure any readers can identify the node as dead before using the pivot values. There were two places where the old method of marking the node as dead without smp_wmb() were being used, which resulted in RCU readers seeing the wrong pivot value before seeing the node was dead. Fix this race condition by using mte_set_node_dead() which has the smp_wmb() call to ensure the race is closed. Add a WARN_ON() to the ma_free_rcu() call to ensure all nodes being freed are marked as dead to ensure there are no other call paths besides the two updated paths. This is necessary for the RCU mode of the maple tree. Link: https://lkml.kernel.org/r/20230227173632.3292573-6-surenb@google.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 7 +++++-- tools/testing/radix-tree/maple.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 96d673e4ba5b2..5202d89ba56e5 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head) */ static void ma_free_rcu(struct maple_node *node) { - node->parent = ma_parent_ptr(node); + WARN_ON(node->parent != ma_parent_ptr(node)); call_rcu(&node->rcu, mt_free_rcu); } @@ -1778,8 +1778,10 @@ static inline void mas_replace(struct ma_state *mas, bool advanced) rcu_assign_pointer(slots[offset], mas->node); } - if (!advanced) + if (!advanced) { + mte_set_node_dead(old_enode); mas_free(mas, old_enode); + } } /* @@ -4218,6 +4220,7 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas) done: mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); if (in_rcu) { + mte_set_node_dead(mas->node); mas->node = mt_mk_node(newnode, wr_mas->type); mas_replace(mas, false); } else { diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 958ee9bdb3166..4c89ff333f6f0 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -108,6 +108,7 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mn->slot[1] != NULL); MT_BUG_ON(mt, mas_allocated(&mas) != 0); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); mas.node = MAS_START; mas_nomem(&mas, GFP_KERNEL); @@ -160,6 +161,7 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != i); MT_BUG_ON(mt, !mn); MT_BUG_ON(mt, not_empty(mn)); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); } @@ -192,6 +194,7 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) != i - 1); MT_BUG_ON(mt, !mn); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); } @@ -210,6 +213,7 @@ static noinline void check_new_node(struct maple_tree *mt) mn = mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) != j - 1); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -233,6 +237,7 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != i - j); mn = mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); } @@ -269,6 +274,7 @@ static noinline void check_new_node(struct maple_tree *mt) mn = mas_pop_node(&mas); /* get the next node. */ MT_BUG_ON(mt, mn == NULL); MT_BUG_ON(mt, not_empty(mn)); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -294,6 +300,7 @@ static noinline void check_new_node(struct maple_tree *mt) mn = mas_pop_node(&mas2); /* get the next node. */ MT_BUG_ON(mt, mn == NULL); MT_BUG_ON(mt, not_empty(mn)); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas2) != 0); @@ -334,10 +341,12 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); mn = mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) { mn = mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -375,6 +384,7 @@ static noinline void check_new_node(struct maple_tree *mt) mas_node_count(&mas, i); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ mn = mas_pop_node(&mas); /* get the next node. */ + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); mas_destroy(&mas); @@ -382,10 +392,13 @@ static noinline void check_new_node(struct maple_tree *mt) mas_node_count(&mas, i); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ mn = mas_pop_node(&mas); /* get the next node. */ + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); mn = mas_pop_node(&mas); /* get the next node. */ + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); mn = mas_pop_node(&mas); /* get the next node. */ + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); mas_destroy(&mas); } @@ -35369,6 +35382,7 @@ static noinline void check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); mas_destroy(&mas); @@ -35386,6 +35400,7 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); + mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); @@ -35756,6 +35771,7 @@ void farmer_tests(void) tree.ma_root = mt_mk_node(node, maple_leaf_64); mt_dump(&tree); + node->parent = ma_parent_ptr(node); ma_free_rcu(node); /* Check things that will make lockdep angry */ From 0a2b18d948838e16912b3b627b504ab062b7d02a Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 27 Feb 2023 09:36:05 -0800 Subject: [PATCH 763/898] maple_tree: add smp_rmb() to dead node detection Add an smp_rmb() before reading the parent pointer to ensure that anything read from the node prior to the parent pointer hasn't been reordered ahead of this check. The is necessary for RCU mode. Link: https://lkml.kernel.org/r/20230227173632.3292573-7-surenb@google.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 5202d89ba56e5..72c89eb033932 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -539,9 +539,11 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode) */ static inline bool ma_dead_node(const struct maple_node *node) { - struct maple_node *parent = (void *)((unsigned long) - node->parent & ~MAPLE_NODE_MASK); + struct maple_node *parent; + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); + parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); return (parent == node); } @@ -556,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_enode *enode) struct maple_node *parent, *node; node = mte_to_node(enode); + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); parent = mte_parent(enode); return (parent == node); } From 790e1fa86b340c2bd4a327e01c161f7a1ad885f6 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 27 Feb 2023 09:36:06 -0800 Subject: [PATCH 764/898] maple_tree: add RCU lock checking to rcu callback functions Dereferencing RCU objects within the RCU callback without the RCU check has caused lockdep to complain. Fix the RCU dereferencing by using the RCU callback lock to ensure the operation is safe. Also stop creating a new lock to use for dereferencing during destruction of the tree or subtree. Instead, pass through a pointer to the tree that has the lock that is held for RCU dereferencing checking. It also does not make sense to use the maple state in the freeing scenario as the tree walk is a special case where the tree no longer has the normal encodings and parent pointers. Link: https://lkml.kernel.org/r/20230227173632.3292573-8-surenb@google.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Reported-by: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 188 ++++++++++++++++++++++++----------------------- 1 file changed, 96 insertions(+), 92 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 72c89eb033932..b1db0bd71aede 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -824,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt, return rcu_dereference_check(slots[offset], mt_locked(mt)); } +static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, + unsigned char offset) +{ + return rcu_dereference_protected(slots[offset], mt_locked(mt)); +} /* * mas_slot_locked() - Get the slot value when holding the maple tree lock. * @mas: The maple state @@ -835,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt, static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, unsigned char offset) { - return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); + return mt_slot_locked(mas->tree, slots, offset); } /* @@ -907,34 +912,35 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, } /* - * mas_clear_meta() - clear the metadata information of a node, if it exists - * @mas: The maple state + * mt_clear_meta() - clear the metadata information of a node, if it exists + * @mt: The maple tree * @mn: The maple node - * @mt: The maple node type + * @type: The maple node type * @offset: The offset of the highest sub-gap in this node. * @end: The end of the data in this node. */ -static inline void mas_clear_meta(struct ma_state *mas, struct maple_node *mn, - enum maple_type mt) +static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, + enum maple_type type) { struct maple_metadata *meta; unsigned long *pivots; void __rcu **slots; void *next; - switch (mt) { + switch (type) { case maple_range_64: pivots = mn->mr64.pivot; if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { slots = mn->mr64.slot; - next = mas_slot_locked(mas, slots, - MAPLE_RANGE64_SLOTS - 1); - if (unlikely((mte_to_node(next) && mte_node_type(next)))) - return; /* The last slot is a node, no metadata */ + next = mt_slot_locked(mt, slots, + MAPLE_RANGE64_SLOTS - 1); + if (unlikely((mte_to_node(next) && + mte_node_type(next)))) + return; /* no metadata, could be node */ } fallthrough; case maple_arange_64: - meta = ma_meta(mn, mt); + meta = ma_meta(mn, type); break; default: return; @@ -5483,7 +5489,7 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min, } /* - * mas_dead_leaves() - Mark all leaves of a node as dead. + * mte_dead_leaves() - Mark all leaves of a node as dead. * @mas: The maple state * @slots: Pointer to the slot array * @type: The maple node type @@ -5493,16 +5499,16 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min, * Return: The number of leaves marked as dead. */ static inline -unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, - enum maple_type mt) +unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, + void __rcu **slots) { struct maple_node *node; enum maple_type type; void *entry; int offset; - for (offset = 0; offset < mt_slots[mt]; offset++) { - entry = mas_slot_locked(mas, slots, offset); + for (offset = 0; offset < mt_slot_count(enode); offset++) { + entry = mt_slot(mt, slots, offset); type = mte_node_type(entry); node = mte_to_node(entry); /* Use both node and type to catch LE & BE metadata */ @@ -5517,162 +5523,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, return offset; } -static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) +/** + * mte_dead_walk() - Walk down a dead tree to just before the leaves + * @enode: The maple encoded node + * @offset: The starting offset + * + * Note: This can only be used from the RCU callback context. + */ +static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) { - struct maple_node *next; + struct maple_node *node, *next; void __rcu **slots = NULL; - next = mas_mn(mas); + next = mte_to_node(*enode); do { - mas->node = mt_mk_node(next, next->type); - slots = ma_slots(next, next->type); - next = mas_slot_locked(mas, slots, offset); + *enode = ma_enode_ptr(next); + node = mte_to_node(*enode); + slots = ma_slots(node, node->type); + next = rcu_dereference_protected(slots[offset], + lock_is_held(&rcu_callback_map)); offset = 0; } while (!ma_is_leaf(next->type)); return slots; } +/** + * mt_free_walk() - Walk & free a tree in the RCU callback context + * @head: The RCU head that's within the node. + * + * Note: This can only be used from the RCU callback context. + */ static void mt_free_walk(struct rcu_head *head) { void __rcu **slots; struct maple_node *node, *start; - struct maple_tree mt; + struct maple_enode *enode; unsigned char offset; enum maple_type type; - MA_STATE(mas, &mt, 0, 0); node = container_of(head, struct maple_node, rcu); if (ma_is_leaf(node->type)) goto free_leaf; - mt_init_flags(&mt, node->ma_flags); - mas_lock(&mas); start = node; - mas.node = mt_mk_node(node, node->type); - slots = mas_dead_walk(&mas, 0); - node = mas_mn(&mas); + enode = mt_mk_node(node, node->type); + slots = mte_dead_walk(&enode, 0); + node = mte_to_node(enode); do { mt_free_bulk(node->slot_len, slots); offset = node->parent_slot + 1; - mas.node = node->piv_parent; - if (mas_mn(&mas) == node) - goto start_slots_free; - - type = mte_node_type(mas.node); - slots = ma_slots(mte_to_node(mas.node), type); - if ((offset < mt_slots[type]) && (slots[offset])) - slots = mas_dead_walk(&mas, offset); - - node = mas_mn(&mas); + enode = node->piv_parent; + if (mte_to_node(enode) == node) + goto free_leaf; + + type = mte_node_type(enode); + slots = ma_slots(mte_to_node(enode), type); + if ((offset < mt_slots[type]) && + rcu_dereference_protected(slots[offset], + lock_is_held(&rcu_callback_map))) + slots = mte_dead_walk(&enode, offset); + node = mte_to_node(enode); } while ((node != start) || (node->slot_len < offset)); slots = ma_slots(node, node->type); mt_free_bulk(node->slot_len, slots); -start_slots_free: - mas_unlock(&mas); free_leaf: mt_free_rcu(&node->rcu); } -static inline void __rcu **mas_destroy_descend(struct ma_state *mas, - struct maple_enode *prev, unsigned char offset) +static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, + struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) { struct maple_node *node; - struct maple_enode *next = mas->node; + struct maple_enode *next = *enode; void __rcu **slots = NULL; + enum maple_type type; + unsigned char next_offset = 0; do { - mas->node = next; - node = mas_mn(mas); - slots = ma_slots(node, mte_node_type(mas->node)); - next = mas_slot_locked(mas, slots, 0); - if ((mte_dead_node(next))) { - mte_to_node(next)->type = mte_node_type(next); - next = mas_slot_locked(mas, slots, 1); - } + *enode = next; + node = mte_to_node(*enode); + type = mte_node_type(*enode); + slots = ma_slots(node, type); + next = mt_slot_locked(mt, slots, next_offset); + if ((mte_dead_node(next))) + next = mt_slot_locked(mt, slots, ++next_offset); - mte_set_node_dead(mas->node); - node->type = mte_node_type(mas->node); - mas_clear_meta(mas, node, node->type); + mte_set_node_dead(*enode); + node->type = type; node->piv_parent = prev; node->parent_slot = offset; - offset = 0; - prev = mas->node; + offset = next_offset; + next_offset = 0; + prev = *enode; } while (!mte_is_leaf(next)); return slots; } -static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, +static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, bool free) { void __rcu **slots; struct maple_node *node = mte_to_node(enode); struct maple_enode *start; - struct maple_tree mt; - - MA_STATE(mas, &mt, 0, 0); - mas.node = enode; if (mte_is_leaf(enode)) { node->type = mte_node_type(enode); goto free_leaf; } - ma_flags &= ~MT_FLAGS_LOCK_MASK; - mt_init_flags(&mt, ma_flags); - mas_lock(&mas); - - mte_to_node(enode)->ma_flags = ma_flags; start = enode; - slots = mas_destroy_descend(&mas, start, 0); - node = mas_mn(&mas); + slots = mte_destroy_descend(&enode, mt, start, 0); + node = mte_to_node(enode); // Updated in the above call. do { enum maple_type type; unsigned char offset; struct maple_enode *parent, *tmp; - node->type = mte_node_type(mas.node); - node->slot_len = mas_dead_leaves(&mas, slots, node->type); + node->slot_len = mte_dead_leaves(enode, mt, slots); if (free) mt_free_bulk(node->slot_len, slots); offset = node->parent_slot + 1; - mas.node = node->piv_parent; - if (mas_mn(&mas) == node) - goto start_slots_free; + enode = node->piv_parent; + if (mte_to_node(enode) == node) + goto free_leaf; - type = mte_node_type(mas.node); - slots = ma_slots(mte_to_node(mas.node), type); + type = mte_node_type(enode); + slots = ma_slots(mte_to_node(enode), type); if (offset >= mt_slots[type]) goto next; - tmp = mas_slot_locked(&mas, slots, offset); + tmp = mt_slot_locked(mt, slots, offset); if (mte_node_type(tmp) && mte_to_node(tmp)) { - parent = mas.node; - mas.node = tmp; - slots = mas_destroy_descend(&mas, parent, offset); + parent = enode; + enode = tmp; + slots = mte_destroy_descend(&enode, mt, parent, offset); } next: - node = mas_mn(&mas); - } while (start != mas.node); + node = mte_to_node(enode); + } while (start != enode); - node = mas_mn(&mas); - node->type = mte_node_type(mas.node); - node->slot_len = mas_dead_leaves(&mas, slots, node->type); + node = mte_to_node(enode); + node->slot_len = mte_dead_leaves(enode, mt, slots); if (free) mt_free_bulk(node->slot_len, slots); -start_slots_free: - mas_unlock(&mas); - free_leaf: if (free) mt_free_rcu(&node->rcu); else - mas_clear_meta(&mas, node, node->type); + mt_clear_meta(mt, node, node->type); } /* @@ -5688,10 +5692,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode, struct maple_node *node = mte_to_node(enode); if (mt_in_rcu(mt)) { - mt_destroy_walk(enode, mt->ma_flags, false); + mt_destroy_walk(enode, mt, false); call_rcu(&node->rcu, mt_free_walk); } else { - mt_destroy_walk(enode, mt->ma_flags, true); + mt_destroy_walk(enode, mt, true); } } From 3dd4432549415f3c65dd52d5c687629efbf4ece1 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 27 Feb 2023 09:36:07 -0800 Subject: [PATCH 765/898] mm: enable maple tree RCU mode by default Use the maple tree in RCU mode for VMA tracking. The maple tree tracks the stack and is able to update the pivot (lower/upper boundary) in-place to allow the page fault handler to write to the tree while holding just the mmap read lock. This is safe as the writes to the stack have a guard VMA which ensures there will always be a NULL in the direction of the growth and thus will only update a pivot. It is possible, but not recommended, to have VMAs that grow up/down without guard VMAs. syzbot has constructed a testcase which sets up a VMA to grow and consume the empty space. Overwriting the entire NULL entry causes the tree to be altered in a way that is not safe for concurrent readers; the readers may see a node being rewritten or one that does not match the maple state they are using. Enabling RCU mode allows the concurrent readers to see a stable node and will return the expected result. [Liam.Howlett@Oracle.com: we don't need to free the nodes with RCU[ Link: https://lore.kernel.org/linux-mm/000000000000b0a65805f663ace6@google.com/ Link: https://lkml.kernel.org/r/20230227173632.3292573-9-surenb@google.com Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+8d95422d3537159ca390@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 3 ++- kernel/fork.c | 3 +++ mm/mmap.c | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0722859c36478..a57e6ae78e656 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -774,7 +774,8 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; -#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ + MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ diff --git a/kernel/fork.c b/kernel/fork.c index c0257cbee0931..0c92f224c68ca 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (retval) goto out; + mt_clear_in_rcu(vmi.mas.tree); for_each_vma(old_vmi, mpnt) { struct file *file; @@ -700,6 +701,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, retval = arch_dup_mmap(oldmm, mm); loop_out: vma_iter_free(&vmi); + if (!retval) + mt_set_in_rcu(vmi.mas.tree); out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); diff --git a/mm/mmap.c b/mm/mmap.c index ad499f7b767fa..ff68a67a2a7c3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2277,7 +2277,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, int count = 0; int error = -ENOMEM; MA_STATE(mas_detach, &mt_detach, 0, 0); - mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_set_external_lock(&mt_detach, &mm->mmap_lock); /* @@ -3037,6 +3037,7 @@ void exit_mmap(struct mm_struct *mm) */ set_bit(MMF_OOM_SKIP, &mm->flags); mmap_write_lock(mm); + mt_clear_in_rcu(&mm->mm_mt); free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb); From 60d5b473d61be61ac315e544fcd6a8234a79500e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 21 Mar 2023 15:18:40 -0400 Subject: [PATCH 766/898] mm/hugetlb: fix uffd wr-protection for CoW optimization path This patch fixes an issue that a hugetlb uffd-wr-protected mapping can be writable even with uffd-wp bit set. It only happens with hugetlb private mappings, when someone firstly wr-protects a missing pte (which will install a pte marker), then a write to the same page without any prior access to the page. Userfaultfd-wp trap for hugetlb was implemented in hugetlb_fault() before reaching hugetlb_wp() to avoid taking more locks that userfault won't need. However there's one CoW optimization path that can trigger hugetlb_wp() inside hugetlb_no_page(), which will bypass the trap. This patch skips hugetlb_wp() for CoW and retries the fault if uffd-wp bit is detected. The new path will only trigger in the CoW optimization path because generic hugetlb_fault() (e.g. when a present pte was wr-protected) will resolve the uffd-wp bit already. Also make sure anonymous UNSHARE won't be affected and can still be resolved, IOW only skip CoW not CoR. This patch will be needed for v5.19+ hence copy stable. [peterx@redhat.com: v2] Link: https://lkml.kernel.org/r/ZBzOqwF2wrHgBVZb@x1n [peterx@redhat.com: v3] Link: https://lkml.kernel.org/r/20230324142620.2344140-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20230321191840.1897940-1-peterx@redhat.com Fixes: 166f3ecc0daf ("mm/hugetlb: hook page faults for uffd write protection") Signed-off-by: Peter Xu Reported-by: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Acked-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Mike Rapoport Cc: Nadav Amit Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 07abcb6eb2030..245038a9fe4ea 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5478,7 +5478,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, struct folio *pagecache_folio, spinlock_t *ptl) { const bool unshare = flags & FAULT_FLAG_UNSHARE; - pte_t pte; + pte_t pte = huge_ptep_get(ptep); struct hstate *h = hstate_vma(vma); struct page *old_page; struct folio *new_folio; @@ -5487,6 +5487,17 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr = address & huge_page_mask(h); struct mmu_notifier_range range; + /* + * Never handle CoW for uffd-wp protected pages. It should be only + * handled when the uffd-wp protection is removed. + * + * Note that only the CoW optimization path (in hugetlb_no_page()) + * can trigger this, because hugetlb_fault() will always resolve + * uffd-wp bit first. + */ + if (!unshare && huge_pte_uffd_wp(pte)) + return 0; + /* * hugetlb does not support FOLL_FORCE-style write faults that keep the * PTE mapped R/O such as maybe_mkwrite() would do. @@ -5500,7 +5511,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } - pte = huge_ptep_get(ptep); old_page = pte_page(pte); delayacct_wpcopy_start(); From f76b3a32879de215ced3f8c754c4077b0c2f79e3 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 24 Mar 2023 10:28:00 +0000 Subject: [PATCH 767/898] fsdax: force clear dirty mark if CoW XFS allows CoW on non-shared extents to combat fragmentation[1]. The old non-shared extent could be mwrited before, its dax entry is marked dirty. This results in a WARNing: [ 28.512349] ------------[ cut here ]------------ [ 28.512622] WARNING: CPU: 2 PID: 5255 at fs/dax.c:390 dax_insert_entry+0x342/0x390 [ 28.513050] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache netfs nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables [ 28.515462] CPU: 2 PID: 5255 Comm: fsstress Kdump: loaded Not tainted 6.3.0-rc1-00001-g85e1481e19c1-dirty #117 [ 28.515902] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.16.1-1-1 04/01/2014 [ 28.516307] RIP: 0010:dax_insert_entry+0x342/0x390 [ 28.516536] Code: 30 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 8b 45 20 48 83 c0 01 e9 e2 fe ff ff 48 8b 45 20 48 83 c0 01 e9 cd fe ff ff <0f> 0b e9 53 ff ff ff 48 8b 7c 24 08 31 f6 e8 1b 61 a1 00 eb 8c 48 [ 28.517417] RSP: 0000:ffffc9000845fb18 EFLAGS: 00010086 [ 28.517721] RAX: 0000000000000053 RBX: 0000000000000155 RCX: 000000000018824b [ 28.518113] RDX: 0000000000000000 RSI: ffffffff827525a6 RDI: 00000000ffffffff [ 28.518515] RBP: ffffea00062092c0 R08: 0000000000000000 R09: ffffc9000845f9c8 [ 28.518905] R10: 0000000000000003 R11: ffffffff82ddb7e8 R12: 0000000000000155 [ 28.519301] R13: 0000000000000000 R14: 000000000018824b R15: ffff88810cfa76b8 [ 28.519703] FS: 00007f14a0c94740(0000) GS:ffff88817bd00000(0000) knlGS:0000000000000000 [ 28.520148] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 28.520472] CR2: 00007f14a0c8d000 CR3: 000000010321c004 CR4: 0000000000770ee0 [ 28.520863] PKRU: 55555554 [ 28.521043] Call Trace: [ 28.521219] [ 28.521368] dax_fault_iter+0x196/0x390 [ 28.521595] dax_iomap_pte_fault+0x19b/0x3d0 [ 28.521852] __xfs_filemap_fault+0x234/0x2b0 [ 28.522116] __do_fault+0x30/0x130 [ 28.522334] do_fault+0x193/0x340 [ 28.522586] __handle_mm_fault+0x2d3/0x690 [ 28.522975] handle_mm_fault+0xe6/0x2c0 [ 28.523259] do_user_addr_fault+0x1bc/0x6f0 [ 28.523521] exc_page_fault+0x60/0x140 [ 28.523763] asm_exc_page_fault+0x22/0x30 [ 28.524001] RIP: 0033:0x7f14a0b589ca [ 28.524225] Code: c5 fe 7f 07 c5 fe 7f 47 20 c5 fe 7f 47 40 c5 fe 7f 47 60 c5 f8 77 c3 66 0f 1f 84 00 00 00 00 00 40 0f b6 c6 48 89 d1 48 89 fa aa 48 89 d0 c5 f8 77 c3 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 [ 28.525198] RSP: 002b:00007fff1dea1c98 EFLAGS: 00010202 [ 28.525505] RAX: 000000000000001e RBX: 000000000014a000 RCX: 0000000000006046 [ 28.525895] RDX: 00007f14a0c82000 RSI: 000000000000001e RDI: 00007f14a0c8d000 [ 28.526290] RBP: 000000000000006f R08: 0000000000000004 R09: 000000000014a000 [ 28.526681] R10: 0000000000000008 R11: 0000000000000246 R12: 028f5c28f5c28f5c [ 28.527067] R13: 8f5c28f5c28f5c29 R14: 0000000000011046 R15: 00007f14a0c946c0 [ 28.527449] [ 28.527600] ---[ end trace 0000000000000000 ]--- To be able to delete this entry, clear its dirty mark before invalidate_inode_pages2_range(). [1] https://lore.kernel.org/linux-xfs/20230321151339.GA11376@frogsfrogsfrogs/ Link: https://lkml.kernel.org/r/1679653680-2-1-git-send-email-ruansy.fnst@fujitsu.com Fixes: f80e1668888f3 ("fsdax: invalidate pages when CoW") Signed-off-by: Shiyang Ruan Cc: Dan Williams Cc: Darrick J. Wong Cc: Jan Kara Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton --- fs/dax.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 5d2e9b10030e9..2ababb89918de 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -781,6 +781,33 @@ static int __dax_invalidate_entry(struct address_space *mapping, return ret; } +static int __dax_clear_dirty_range(struct address_space *mapping, + pgoff_t start, pgoff_t end) +{ + XA_STATE(xas, &mapping->i_pages, start); + unsigned int scanned = 0; + void *entry; + + xas_lock_irq(&xas); + xas_for_each(&xas, entry, end) { + entry = get_unlocked_entry(&xas, 0); + xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); + put_unlocked_entry(&xas, entry, WAKE_NEXT); + + if (++scanned % XA_CHECK_SCHED) + continue; + + xas_pause(&xas); + xas_unlock_irq(&xas); + cond_resched(); + xas_lock_irq(&xas); + } + xas_unlock_irq(&xas); + + return 0; +} + /* * Delete DAX entry at @index from @mapping. Wait for it * to be unlocked before deleting it. @@ -1440,6 +1467,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, * written by write(2) is visible in mmap. */ if (iomap->flags & IOMAP_F_NEW || cow) { + /* + * Filesystem allows CoW on non-shared extents. The src extents + * may have been mmapped with dirty mark before. To be able to + * invalidate its dax entries, we need to clear the dirty mark + * in advance. + */ + if (cow) + __dax_clear_dirty_range(iomi->inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); invalidate_inode_pages2_range(iomi->inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); From 119b57eaf09478ce9e2a8f88a12749c2658b0ed5 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 25 Mar 2023 11:46:30 +0900 Subject: [PATCH 768/898] zsmalloc: document new fullness grouping Patch series "zsmalloc: minor documentation updates". Two minor patches that bring zsmalloc documentation up to date. This patch (of 2): Update documentation and reflect new zspages fullness grouping (we don't use almost_empty and almost_full anymore). Link: https://lkml.kernel.org/r/20230325024631.2817153-1-senozhatsky@chromium.org Link: https://lkml.kernel.org/r/20230325024631.2817153-2-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky Fixes: 67e157eb3639 ("zsmalloc: show per fullness group class stats") Cc: Minchan Kim Cc: Signed-off-by: Andrew Morton --- Documentation/mm/zsmalloc.rst | 133 +++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 59 deletions(-) diff --git a/Documentation/mm/zsmalloc.rst b/Documentation/mm/zsmalloc.rst index 64d127bfc221f..3c6bf639887ff 100644 --- a/Documentation/mm/zsmalloc.rst +++ b/Documentation/mm/zsmalloc.rst @@ -39,13 +39,12 @@ With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via # cat /sys/kernel/debug/zsmalloc/zram0/classes - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage + class size 10% 20% 30% 40% 50% 60% 70% 80% 90% 99% 100% obj_allocated obj_used pages_used pages_per_zspage freeable ... ... - 9 176 0 1 186 129 8 4 - 10 192 1 0 2880 2872 135 3 - 11 208 0 1 819 795 42 2 - 12 224 0 1 219 159 12 4 + 30 512 0 12 4 1 0 1 0 0 1 0 414 3464 3346 433 1 14 + 31 528 2 7 2 2 1 0 1 0 0 2 117 4154 3793 536 4 44 + 32 544 6 3 4 1 2 1 0 0 0 1 260 4170 3965 556 2 26 ... ... @@ -54,10 +53,28 @@ class index size object size zspage stores -almost_empty - the number of ZS_ALMOST_EMPTY zspages(see below) -almost_full - the number of ZS_ALMOST_FULL zspages(see below) +10% + the number of zspages with usage ratio less than 10% (see below) +20% + the number of zspages with usage ratio between 10% and 20% +30% + the number of zspages with usage ratio between 20% and 30% +40% + the number of zspages with usage ratio between 30% and 40% +50% + the number of zspages with usage ratio between 40% and 50% +60% + the number of zspages with usage ratio between 50% and 60% +70% + the number of zspages with usage ratio between 60% and 70% +80% + the number of zspages with usage ratio between 70% and 80% +90% + the number of zspages with usage ratio between 80% and 90% +99% + the number of zspages with usage ratio between 90% and 99% +100% + the number of zspages with usage ratio 100% obj_allocated the number of objects allocated obj_used @@ -67,18 +84,11 @@ pages_used pages_per_zspage the number of 0-order pages to make a zspage -We assign a zspage to ZS_ALMOST_EMPTY fullness group when n <= N / f, where - -* n = number of allocated objects -* N = total number of objects zspage can store -* f = fullness_threshold_frac(ie, 4 at the moment) - -Similarly, we assign zspage to: - -* ZS_ALMOST_FULL when n > N / f -* ZS_EMPTY when n == 0 -* ZS_FULL when n == N - +Each zspage maintains inuse counter which keeps track of the number of +objects stored in the zspage. The inuse counter determines the zspage's +"fullness group" which is calculated as the ratio of the "inuse" objects to +the total number of objects the zspage can hold (objs_per_zspage). The +closer the inuse counter is to objs_per_zspage, the better. Internals ========= @@ -94,10 +104,10 @@ of objects that each zspage can store. For instance, consider the following size classes::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable ... - 94 1536 0 0 0 0 0 3 0 - 100 1632 0 0 0 0 0 2 0 + 94 1536 0 .... 0 0 0 0 3 0 + 100 1632 0 .... 0 0 0 0 2 0 ... @@ -134,10 +144,11 @@ reduces memory wastage. Let's take a closer look at the bottom of `/sys/kernel/debug/zsmalloc/zramX/classes`::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - 202 3264 0 0 0 0 0 4 0 - 254 4096 0 0 0 0 0 1 0 + 202 3264 0 .. 0 0 0 0 4 0 + 254 4096 0 .. 0 0 0 0 1 0 ... Size class #202 stores objects of size 3264 bytes and has a maximum of 4 pages @@ -151,40 +162,42 @@ efficient storage of large objects. For zspage chain size of 8, huge class watermark becomes 3632 bytes::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - 202 3264 0 0 0 0 0 4 0 - 211 3408 0 0 0 0 0 5 0 - 217 3504 0 0 0 0 0 6 0 - 222 3584 0 0 0 0 0 7 0 - 225 3632 0 0 0 0 0 8 0 - 254 4096 0 0 0 0 0 1 0 + 202 3264 0 .. 0 0 0 0 4 0 + 211 3408 0 .. 0 0 0 0 5 0 + 217 3504 0 .. 0 0 0 0 6 0 + 222 3584 0 .. 0 0 0 0 7 0 + 225 3632 0 .. 0 0 0 0 8 0 + 254 4096 0 .. 0 0 0 0 1 0 ... For zspage chain size of 16, huge class watermark becomes 3840 bytes::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - 202 3264 0 0 0 0 0 4 0 - 206 3328 0 0 0 0 0 13 0 - 207 3344 0 0 0 0 0 9 0 - 208 3360 0 0 0 0 0 14 0 - 211 3408 0 0 0 0 0 5 0 - 212 3424 0 0 0 0 0 16 0 - 214 3456 0 0 0 0 0 11 0 - 217 3504 0 0 0 0 0 6 0 - 219 3536 0 0 0 0 0 13 0 - 222 3584 0 0 0 0 0 7 0 - 223 3600 0 0 0 0 0 15 0 - 225 3632 0 0 0 0 0 8 0 - 228 3680 0 0 0 0 0 9 0 - 230 3712 0 0 0 0 0 10 0 - 232 3744 0 0 0 0 0 11 0 - 234 3776 0 0 0 0 0 12 0 - 235 3792 0 0 0 0 0 13 0 - 236 3808 0 0 0 0 0 14 0 - 238 3840 0 0 0 0 0 15 0 - 254 4096 0 0 0 0 0 1 0 + 202 3264 0 .. 0 0 0 0 4 0 + 206 3328 0 .. 0 0 0 0 13 0 + 207 3344 0 .. 0 0 0 0 9 0 + 208 3360 0 .. 0 0 0 0 14 0 + 211 3408 0 .. 0 0 0 0 5 0 + 212 3424 0 .. 0 0 0 0 16 0 + 214 3456 0 .. 0 0 0 0 11 0 + 217 3504 0 .. 0 0 0 0 6 0 + 219 3536 0 .. 0 0 0 0 13 0 + 222 3584 0 .. 0 0 0 0 7 0 + 223 3600 0 .. 0 0 0 0 15 0 + 225 3632 0 .. 0 0 0 0 8 0 + 228 3680 0 .. 0 0 0 0 9 0 + 230 3712 0 .. 0 0 0 0 10 0 + 232 3744 0 .. 0 0 0 0 11 0 + 234 3776 0 .. 0 0 0 0 12 0 + 235 3792 0 .. 0 0 0 0 13 0 + 236 3808 0 .. 0 0 0 0 14 0 + 238 3840 0 .. 0 0 0 0 15 0 + 254 4096 0 .. 0 0 0 0 1 0 ... Overall the combined zspage chain size effect on zsmalloc pool configuration::: @@ -214,9 +227,10 @@ zram as a build artifacts storage (Linux kernel compilation). zsmalloc classes stats::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - Total 13 51 413836 412973 159955 3 + Total 13 .. 51 413836 412973 159955 3 zram mm_stat::: @@ -227,9 +241,10 @@ zram as a build artifacts storage (Linux kernel compilation). zsmalloc classes stats::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - Total 18 87 414852 412978 156666 0 + Total 18 .. 87 414852 412978 156666 0 zram mm_stat::: From 618a8a917dbf5830e2064d2fa0568940eb5d2584 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 25 Mar 2023 11:46:31 +0900 Subject: [PATCH 769/898] zsmalloc: document freeable stats When freeable class stat was added to classes file (back in 2016) we forgot to update zsmalloc documentation. Fix that. Link: https://lkml.kernel.org/r/20230325024631.2817153-3-senozhatsky@chromium.org Fixes: 1120ed548394 ("mm/zsmalloc: add `freeable' column to pool stat") Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Signed-off-by: Andrew Morton --- Documentation/mm/zsmalloc.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/mm/zsmalloc.rst b/Documentation/mm/zsmalloc.rst index 3c6bf639887ff..a3c26d587752f 100644 --- a/Documentation/mm/zsmalloc.rst +++ b/Documentation/mm/zsmalloc.rst @@ -83,6 +83,8 @@ pages_used the number of pages allocated for the class pages_per_zspage the number of 0-order pages to make a zspage +freeable + the approximate number of pages class compaction can free Each zspage maintains inuse counter which keeps track of the number of objects stored in the zspage. The inuse counter determines the zspage's From 6be49d100c22ffea3287a4b19d7639d259888e33 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 28 Mar 2023 02:53:18 +0900 Subject: [PATCH 770/898] nilfs2: fix potential UAF of struct nilfs_sc_info in nilfs_segctor_thread() The finalization of nilfs_segctor_thread() can race with nilfs_segctor_kill_thread() which terminates that thread, potentially causing a use-after-free BUG as KASAN detected. At the end of nilfs_segctor_thread(), it assigns NULL to "sc_task" member of "struct nilfs_sc_info" to indicate the thread has finished, and then notifies nilfs_segctor_kill_thread() of this using waitqueue "sc_wait_task" on the struct nilfs_sc_info. However, here, immediately after the NULL assignment to "sc_task", it is possible that nilfs_segctor_kill_thread() will detect it and return to continue the deallocation, freeing the nilfs_sc_info structure before the thread does the notification. This fixes the issue by protecting the NULL assignment to "sc_task" and its notification, with spinlock "sc_state_lock" of the struct nilfs_sc_info. Since nilfs_segctor_kill_thread() does a final check to see if "sc_task" is NULL with "sc_state_lock" locked, this can eliminate the race. Link: https://lkml.kernel.org/r/20230327175318.8060-1-konishi.ryusuke@gmail.com Reported-by: syzbot+b08ebcc22f8f3e6be43a@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/00000000000000660d05f7dfa877@google.com Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 19446a8243d79..6ad41390fa74a 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2609,11 +2609,10 @@ static int nilfs_segctor_thread(void *arg) goto loop; end_thread: - spin_unlock(&sci->sc_state_lock); - /* end sync. */ sci->sc_task = NULL; wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ + spin_unlock(&sci->sc_state_lock); return 0; } From 7397031622e05ca206e2d674ec199d6bb66fc9ba Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 27 Mar 2023 00:21:46 +0900 Subject: [PATCH 771/898] nilfs2: initialize "struct nilfs_binfo_dat"->bi_pad field nilfs_btree_assign_p() and nilfs_direct_assign_p() are not initializing "struct nilfs_binfo_dat"->bi_pad field, causing uninit-value reports when being passed to CRC function. Link: https://lkml.kernel.org/r/20230326152146.15872-1-konishi.ryusuke@gmail.com Reported-by: syzbot Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b Reported-by: Dipanjan Das Link: https://lkml.kernel.org/r/CANX2M5bVbzRi6zH3PTcNE_31TzerstOXUa9Bay4E6y6dX23_pg@mail.gmail.com Signed-off-by: Tetsuo Handa Signed-off-by: Ryusuke Konishi Cc: Alexander Potapenko Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 1 + fs/nilfs2/direct.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 2681a449edc16..13592e82eaf68 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2219,6 +2219,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, /* on-disk format */ binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index a35f2795b2422..4c85914f2abc3 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -314,6 +314,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct, binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = 0; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } From f349b15e183d6956f1b63d6ff57849ff10c7edd5 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 30 Mar 2023 16:26:25 +0000 Subject: [PATCH 772/898] mm: vmalloc: avoid warn_alloc noise caused by fatal signal There're some suspicious warn_alloc on my test serer, for example, [13366.518837] warn_alloc: 81 callbacks suppressed [13366.518841] test_verifier: vmalloc error: size 4096, page order 0, failed to allocate pages, mode:0x500dc2(GFP_HIGHUSER|__GFP_ZERO|__GFP_ACCOUNT), nodemask=(null),cpuset=/,mems_allowed=0-1 [13366.522240] CPU: 30 PID: 722463 Comm: test_verifier Kdump: loaded Tainted: G W O 6.2.0+ #638 [13366.524216] Call Trace: [13366.524702] [13366.525148] dump_stack_lvl+0x6c/0x80 [13366.525712] dump_stack+0x10/0x20 [13366.526239] warn_alloc+0x119/0x190 [13366.526783] ? alloc_pages_bulk_array_mempolicy+0x9e/0x2a0 [13366.527470] __vmalloc_area_node+0x546/0x5b0 [13366.528066] __vmalloc_node_range+0xc2/0x210 [13366.528660] __vmalloc_node+0x42/0x50 [13366.529186] ? bpf_prog_realloc+0x53/0xc0 [13366.529743] __vmalloc+0x1e/0x30 [13366.530235] bpf_prog_realloc+0x53/0xc0 [13366.530771] bpf_patch_insn_single+0x80/0x1b0 [13366.531351] bpf_jit_blind_constants+0xe9/0x1c0 [13366.531932] ? __free_pages+0xee/0x100 [13366.532457] ? free_large_kmalloc+0x58/0xb0 [13366.533002] bpf_int_jit_compile+0x8c/0x5e0 [13366.533546] bpf_prog_select_runtime+0xb4/0x100 [13366.534108] bpf_prog_load+0x6b1/0xa50 [13366.534610] ? perf_event_task_tick+0x96/0xb0 [13366.535151] ? security_capable+0x3a/0x60 [13366.535663] __sys_bpf+0xb38/0x2190 [13366.536120] ? kvm_clock_get_cycles+0x9/0x10 [13366.536643] __x64_sys_bpf+0x1c/0x30 [13366.537094] do_syscall_64+0x38/0x90 [13366.537554] entry_SYSCALL_64_after_hwframe+0x72/0xdc [13366.538107] RIP: 0033:0x7f78310f8e29 [13366.538561] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 17 e0 2c 00 f7 d8 64 89 01 48 [13366.540286] RSP: 002b:00007ffe2a61fff8 EFLAGS: 00000206 ORIG_RAX: 0000000000000141 [13366.541031] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f78310f8e29 [13366.541749] RDX: 0000000000000080 RSI: 00007ffe2a6200b0 RDI: 0000000000000005 [13366.542470] RBP: 00007ffe2a620010 R08: 00007ffe2a6202a0 R09: 00007ffe2a6200b0 [13366.543183] R10: 00000000000f423e R11: 0000000000000206 R12: 0000000000407800 [13366.543900] R13: 00007ffe2a620540 R14: 0000000000000000 R15: 0000000000000000 [13366.544623] [13366.545260] Mem-Info: [13366.546121] active_anon:81319 inactive_anon:20733 isolated_anon:0 active_file:69450 inactive_file:5624 isolated_file:0 unevictable:0 dirty:10 writeback:0 slab_reclaimable:69649 slab_unreclaimable:48930 mapped:27400 shmem:12868 pagetables:4929 sec_pagetables:0 bounce:0 kernel_misc_reclaimable:0 free:15870308 free_pcp:142935 free_cma:0 [13366.551886] Node 0 active_anon:224836kB inactive_anon:33528kB active_file:175692kB inactive_file:13752kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:59248kB dirty:32kB writeback:0kB shmem:18252kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB kernel_stack:4616kB pagetables:10664kB sec_pagetables:0kB all_unreclaimable? no [13366.555184] Node 1 active_anon:100440kB inactive_anon:49404kB active_file:102108kB inactive_file:8744kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:50352kB dirty:8kB writeback:0kB shmem:33220kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB kernel_stack:3896kB pagetables:9052kB sec_pagetables:0kB all_unreclaimable? no [13366.558262] Node 0 DMA free:15360kB boost:0kB min:304kB low:380kB high:456kB reserved_highatomic:0KB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15992kB managed:15360kB mlocked:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB [13366.560821] lowmem_reserve[]: 0 2735 31873 31873 31873 [13366.561981] Node 0 DMA32 free:2790904kB boost:0kB min:56028kB low:70032kB high:84036kB reserved_highatomic:0KB active_anon:1936kB inactive_anon:20kB active_file:396kB inactive_file:344kB unevictable:0kB writepending:0kB present:3129200kB managed:2801520kB mlocked:0kB bounce:0kB free_pcp:5188kB local_pcp:0kB free_cma:0kB [13366.565148] lowmem_reserve[]: 0 0 29137 29137 29137 [13366.566168] Node 0 Normal free:28533824kB boost:0kB min:596740kB low:745924kB high:895108kB reserved_highatomic:28672KB active_anon:222900kB inactive_anon:33508kB active_file:175296kB inactive_file:13408kB unevictable:0kB writepending:32kB present:30408704kB managed:29837172kB mlocked:0kB bounce:0kB free_pcp:295724kB local_pcp:0kB free_cma:0kB [13366.569485] lowmem_reserve[]: 0 0 0 0 0 [13366.570416] Node 1 Normal free:32141144kB boost:0kB min:660504kB low:825628kB high:990752kB reserved_highatomic:69632KB active_anon:100440kB inactive_anon:49404kB active_file:102108kB inactive_file:8744kB unevictable:0kB writepending:8kB present:33554432kB managed:33025372kB mlocked:0kB bounce:0kB free_pcp:270880kB local_pcp:46860kB free_cma:0kB [13366.573403] lowmem_reserve[]: 0 0 0 0 0 [13366.574015] Node 0 DMA: 0*4kB 0*8kB 0*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 1*1024kB (U) 1*2048kB (M) 3*4096kB (M) = 15360kB [13366.575474] Node 0 DMA32: 782*4kB (UME) 756*8kB (UME) 736*16kB (UME) 745*32kB (UME) 694*64kB (UME) 653*128kB (UME) 595*256kB (UME) 552*512kB (UME) 454*1024kB (UME) 347*2048kB (UME) 246*4096kB (UME) = 2790904kB [13366.577442] Node 0 Normal: 33856*4kB (UMEH) 51815*8kB (UMEH) 42418*16kB (UMEH) 36272*32kB (UMEH) 22195*64kB (UMEH) 10296*128kB (UMEH) 7238*256kB (UMEH) 5638*512kB (UEH) 5337*1024kB (UMEH) 3506*2048kB (UMEH) 1470*4096kB (UME) = 28533784kB [13366.580460] Node 1 Normal: 15776*4kB (UMEH) 37485*8kB (UMEH) 29509*16kB (UMEH) 21420*32kB (UMEH) 14818*64kB (UMEH) 13051*128kB (UMEH) 9918*256kB (UMEH) 7374*512kB (UMEH) 5397*1024kB (UMEH) 3887*2048kB (UMEH) 2002*4096kB (UME) = 32141240kB [13366.583027] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB [13366.584380] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB [13366.585702] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB [13366.587042] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB [13366.588372] 87386 total pagecache pages [13366.589266] 0 pages in swap cache [13366.590327] Free swap = 0kB [13366.591227] Total swap = 0kB [13366.592142] 16777082 pages RAM [13366.593057] 0 pages HighMem/MovableOnly [13366.594037] 357226 pages reserved [13366.594979] 0 pages hwpoisoned This failure really confuse me as there're still lots of available pages. Finally I figured out it was caused by a fatal signal. When a process is allocating memory via vm_area_alloc_pages(), it will break directly even if it hasn't allocated the requested pages when it receives a fatal signal. In that case, we shouldn't show this warn_alloc, as it is useless. We only need to show this warning when there're really no enough pages. Link: https://lkml.kernel.org/r/20230330162625.13604-1-laoar.shao@gmail.com Signed-off-by: Yafang Shao Reviewed-by: Lorenzo Stoakes Cc: Christoph Hellwig Cc: Uladzislau Rezki (Sony) Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index bef6cf2b4d46d..a50072066221a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3042,9 +3042,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, * allocation request, free them via vfree() if any. */ if (area->nr_pages != nr_small_pages) { - warn_alloc(gfp_mask, NULL, - "vmalloc error: size %lu, page order %u, failed to allocate pages", - area->nr_pages * PAGE_SIZE, page_order); + /* vm_area_alloc_pages() can also fail due to a fatal signal */ + if (!fatal_signal_pending(current)) + warn_alloc(gfp_mask, NULL, + "vmalloc error: size %lu, page order %u, failed to allocate pages", + area->nr_pages * PAGE_SIZE, page_order); goto fail; } From 7c7b962938ddda6a9cd095de557ee5250706ea88 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 30 Mar 2023 12:25:19 +1100 Subject: [PATCH 773/898] mm: take a page reference when removing device exclusive entries Device exclusive page table entries are used to prevent CPU access to a page whilst it is being accessed from a device. Typically this is used to implement atomic operations when the underlying bus does not support atomic access. When a CPU thread encounters a device exclusive entry it locks the page and restores the original entry after calling mmu notifiers to signal drivers that exclusive access is no longer available. The device exclusive entry holds a reference to the page making it safe to access the struct page whilst the entry is present. However the fault handling code does not hold the PTL when taking the page lock. This means if there are multiple threads faulting concurrently on the device exclusive entry one will remove the entry whilst others will wait on the page lock without holding a reference. This can lead to threads locking or waiting on a folio with a zero refcount. Whilst mmap_lock prevents the pages getting freed via munmap() they may still be freed by a migration. This leads to warnings such as PAGE_FLAGS_CHECK_AT_FREE due to the page being locked when the refcount drops to zero. Fix this by trying to take a reference on the folio before locking it. The code already checks the PTE under the PTL and aborts if the entry is no longer there. It is also possible the folio has been unmapped, freed and re-allocated allowing a reference to be taken on an unrelated folio. This case is also detected by the PTE check and the folio is unlocked without further changes. Link: https://lkml.kernel.org/r/20230330012519.804116-1-apopple@nvidia.com Fixes: b756a3b5e7ea ("mm: device exclusive memory access") Signed-off-by: Alistair Popple Reviewed-by: Ralph Campbell Reviewed-by: John Hubbard Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index f456f3b5049cf..01a23ad48a042 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3563,8 +3563,21 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct mmu_notifier_range range; - if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) + /* + * We need a reference to lock the folio because we don't hold + * the PTL so a racing thread can remove the device-exclusive + * entry and unmap it. If the folio is free the entry must + * have been removed already. If it happens to have already + * been re-allocated after being freed all we do is lock and + * unlock it. + */ + if (!folio_try_get(folio)) + return 0; + + if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) { + folio_put(folio); return VM_FAULT_RETRY; + } mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma->vm_mm, vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); @@ -3577,6 +3590,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); folio_unlock(folio); + folio_put(folio); mmu_notifier_invalidate_range_end(&range); return 0; From 42560f9c92cc43dce75dbf06cc0d840dced39b12 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 31 Mar 2023 05:55:15 +0900 Subject: [PATCH 774/898] nilfs2: fix sysfs interface lifetime The current nilfs2 sysfs support has issues with the timing of creation and deletion of sysfs entries, potentially leading to null pointer dereferences, use-after-free, and lockdep warnings. Some of the sysfs attributes for nilfs2 per-filesystem instance refer to metadata file "cpfile", "sufile", or "dat", but nilfs_sysfs_create_device_group that creates those attributes is executed before the inodes for these metadata files are loaded, and nilfs_sysfs_delete_device_group which deletes these sysfs entries is called after releasing their metadata file inodes. Therefore, access to some of these sysfs attributes may occur outside of the lifetime of these metadata files, resulting in inode NULL pointer dereferences or use-after-free. In addition, the call to nilfs_sysfs_create_device_group() is made during the locking period of the semaphore "ns_sem" of nilfs object, so the shrinker call caused by the memory allocation for the sysfs entries, may derive lock dependencies "ns_sem" -> (shrinker) -> "locks acquired in nilfs_evict_inode()". Since nilfs2 may acquire "ns_sem" deep in the call stack holding other locks via its error handler __nilfs_error(), this causes lockdep to report circular locking. This is a false positive and no circular locking actually occurs as no inodes exist yet when nilfs_sysfs_create_device_group() is called. Fortunately, the lockdep warnings can be resolved by simply moving the call to nilfs_sysfs_create_device_group() out of "ns_sem". This fixes these sysfs issues by revising where the device's sysfs interface is created/deleted and keeping its lifetime within the lifetime of the metadata files above. Link: https://lkml.kernel.org/r/20230330205515.6167-1-konishi.ryusuke@gmail.com Fixes: dd70edbde262 ("nilfs2: integrate sysfs support into driver") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+979fa7f9c0d086fdc282@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/0000000000003414b505f7885f7e@google.com Reported-by: syzbot+5b7d542076d9bddc3c6a@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/0000000000006ac86605f5f44eb9@google.com Cc: Viacheslav Dubeyko Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/super.c | 2 ++ fs/nilfs2/the_nilfs.c | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1422b8ba24ed6..77f1e5778d1c8 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -482,6 +482,7 @@ static void nilfs_put_super(struct super_block *sb) up_write(&nilfs->ns_sem); } + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); @@ -1105,6 +1106,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) nilfs_put_root(fsroot); failed_unload: + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 3a4c9c150cbf5..2894152a6b25c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -87,7 +87,6 @@ void destroy_nilfs(struct the_nilfs *nilfs) { might_sleep(); if (nilfs_init(nilfs)) { - nilfs_sysfs_delete_device_group(nilfs); brelse(nilfs->ns_sbh[0]); brelse(nilfs->ns_sbh[1]); } @@ -305,6 +304,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; } + err = nilfs_sysfs_create_device_group(sb); + if (unlikely(err)) + goto sysfs_error; + if (valid_fs) goto skip_recovery; @@ -366,6 +369,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; failed_unload: + nilfs_sysfs_delete_device_group(nilfs); + + sysfs_error: iput(nilfs->ns_cpfile); iput(nilfs->ns_sufile); iput(nilfs->ns_dat); @@ -697,10 +703,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto failed_sbh; - err = nilfs_sysfs_create_device_group(sb); - if (err) - goto failed_sbh; - set_nilfs_init(nilfs); err = 0; out: From 6fe7d6b992113719e96744d974212df3fcddc76c Mon Sep 17 00:00:00 2001 From: Rongwei Wang Date: Tue, 4 Apr 2023 23:47:16 +0800 Subject: [PATCH 775/898] mm/swap: fix swap_info_struct race between swapoff and get_swap_pages() The si->lock must be held when deleting the si from the available list. Otherwise, another thread can re-add the si to the available list, which can lead to memory corruption. The only place we have found where this happens is in the swapoff path. This case can be described as below: core 0 core 1 swapoff del_from_avail_list(si) waiting try lock si->lock acquire swap_avail_lock and re-add si into swap_avail_head acquire si->lock but missing si already being added again, and continuing to clear SWP_WRITEOK, etc. It can be easily found that a massive warning messages can be triggered inside get_swap_pages() by some special cases, for example, we call madvise(MADV_PAGEOUT) on blocks of touched memory concurrently, meanwhile, run much swapon-swapoff operations (e.g. stress-ng-swap). However, in the worst case, panic can be caused by the above scene. In swapoff(), the memory used by si could be kept in swap_info[] after turning off a swap. This means memory corruption will not be caused immediately until allocated and reset for a new swap in the swapon path. A panic message caused: (with CONFIG_PLIST_DEBUG enabled) ------------[ cut here ]------------ top: 00000000e58a3003, n: 0000000013e75cda, p: 000000008cd4451a prev: 0000000035b1e58a, n: 000000008cd4451a, p: 000000002150ee8d next: 000000008cd4451a, n: 000000008cd4451a, p: 000000008cd4451a WARNING: CPU: 21 PID: 1843 at lib/plist.c:60 plist_check_prev_next_node+0x50/0x70 Modules linked in: rfkill(E) crct10dif_ce(E)... CPU: 21 PID: 1843 Comm: stress-ng Kdump: ... 5.10.134+ Hardware name: Alibaba Cloud ECS, BIOS 0.0.0 02/06/2015 pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) pc : plist_check_prev_next_node+0x50/0x70 lr : plist_check_prev_next_node+0x50/0x70 sp : ffff0018009d3c30 x29: ffff0018009d3c40 x28: ffff800011b32a98 x27: 0000000000000000 x26: ffff001803908000 x25: ffff8000128ea088 x24: ffff800011b32a48 x23: 0000000000000028 x22: ffff001800875c00 x21: ffff800010f9e520 x20: ffff001800875c00 x19: ffff001800fdc6e0 x18: 0000000000000030 x17: 0000000000000000 x16: 0000000000000000 x15: 0736076307640766 x14: 0730073007380731 x13: 0736076307640766 x12: 0730073007380731 x11: 000000000004058d x10: 0000000085a85b76 x9 : ffff8000101436e4 x8 : ffff800011c8ce08 x7 : 0000000000000000 x6 : 0000000000000001 x5 : ffff0017df9ed338 x4 : 0000000000000001 x3 : ffff8017ce62a000 x2 : ffff0017df9ed340 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: plist_check_prev_next_node+0x50/0x70 plist_check_head+0x80/0xf0 plist_add+0x28/0x140 add_to_avail_list+0x9c/0xf0 _enable_swap_info+0x78/0xb4 __do_sys_swapon+0x918/0xa10 __arm64_sys_swapon+0x20/0x30 el0_svc_common+0x8c/0x220 do_el0_svc+0x2c/0x90 el0_svc+0x1c/0x30 el0_sync_handler+0xa8/0xb0 el0_sync+0x148/0x180 irq event stamp: 2082270 Now, si->lock locked before calling 'del_from_avail_list()' to make sure other thread see the si had been deleted and SWP_WRITEOK cleared together, will not reinsert again. This problem exists in versions after stable 5.10.y. Link: https://lkml.kernel.org/r/20230404154716.23058-1-rongwei.wang@linux.alibaba.com Fixes: a2468cc9bfdff ("swap: choose swap device according to numa node") Tested-by: Yongchen Yin Signed-off-by: Rongwei Wang Cc: Bagas Sanjaya Cc: Matthew Wilcox (Oracle) Cc: Aaron Lu Cc: Signed-off-by: Andrew Morton --- mm/swapfile.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 62ba2bf577d7e..2c718f45745f8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -679,6 +679,7 @@ static void __del_from_avail_list(struct swap_info_struct *p) { int nid; + assert_spin_locked(&p->lock); for_each_node(nid) plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]); } @@ -2434,8 +2435,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); goto out_dput; } - del_from_avail_list(p); spin_lock(&p->lock); + del_from_avail_list(p); if (p->prio < 0) { struct swap_info_struct *si = p; int nid; From ec07967d7523adb3670f9dfee0232e3bc868f3de Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Tue, 14 Mar 2023 20:42:01 +0800 Subject: [PATCH 776/898] maple_tree: fix get wrong data_end in mtree_lookup_walk() if (likely(offset > end)) max = pivots[offset]; The above code should be changed to if (likely(offset < end)), which is correct. This affects the correctness of ma_data_end(). Now it seems that the final result will not be wrong, but it is best to change it. This patch does not change the code as above, because it simplifies the code by the way. Link: https://lkml.kernel.org/r/20230314124203.91572-1-zhangpeng.00@bytedance.com Link: https://lkml.kernel.org/r/20230314124203.91572-2-zhangpeng.00@bytedance.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index b1db0bd71aede..b8a230f5d94e5 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -3941,18 +3941,13 @@ static inline void *mtree_lookup_walk(struct ma_state *mas) end = ma_data_end(node, type, pivots, max); if (unlikely(ma_dead_node(node))) goto dead_node; - - if (pivots[offset] >= mas->index) - goto next; - do { - offset++; - } while ((offset < end) && (pivots[offset] < mas->index)); - - if (likely(offset > end)) - max = pivots[offset]; + if (pivots[offset] >= mas->index) { + max = pivots[offset]; + break; + } + } while (++offset < end); -next: slots = ma_slots(node, type); next = mt_slot(mas->tree, slots, offset); if (unlikely(ma_dead_node(node))) From c45ea315a602d45569b08b93e9ab30f6a63a38aa Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Tue, 14 Mar 2023 20:42:03 +0800 Subject: [PATCH 777/898] maple_tree: fix a potential concurrency bug in RCU mode There is a concurrency bug that may cause the wrong value to be loaded when a CPU is modifying the maple tree. CPU1: mtree_insert_range() mas_insert() mas_store_root() ... mas_root_expand() ... rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); ma_set_meta(node, maple_leaf_64, 0, slot); <---IP CPU2: mtree_load() mtree_lookup_walk() ma_data_end(); When CPU1 is about to execute the instruction pointed to by IP, the ma_data_end() executed by CPU2 may return the wrong end position, which will cause the value loaded by mtree_load() to be wrong. An example of triggering the bug: Add mdelay(100) between rcu_assign_pointer() and ma_set_meta() in mas_root_expand(). static DEFINE_MTREE(tree); int work(void *p) { unsigned long val; for (int i = 0 ; i< 30; ++i) { val = (unsigned long)mtree_load(&tree, 8); mdelay(5); pr_info("%lu",val); } return 0; } mt_init_flags(&tree, MT_FLAGS_USE_RCU); mtree_insert(&tree, 0, (void*)12345, GFP_KERNEL); run_thread(work) mtree_insert(&tree, 1, (void*)56789, GFP_KERNEL); In RCU mode, mtree_load() should always return the value before or after the data structure is modified, and in this example mtree_load(&tree, 8) may return 56789 which is not expected, it should always return NULL. Fix it by put ma_set_meta() before rcu_assign_pointer(). Link: https://lkml.kernel.org/r/20230314124203.91572-4-zhangpeng.00@bytedance.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index b8a230f5d94e5..db60edb55f2f5 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -3725,10 +3725,9 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry) slot++; mas->depth = 1; mas_set_height(mas); - + ma_set_meta(node, maple_leaf_64, 0, slot); /* swap the new root into the tree */ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); - ma_set_meta(node, maple_leaf_64, 0, slot); return slot; } From abc33494ddd5c1e4bd0e17c4abe361038fb6693f Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 4 Apr 2023 15:22:07 +1000 Subject: [PATCH 778/898] net: fec: make use of MDIO C45 quirk Not all fec MDIO bus drivers support C45 mode transactions. The older fec hardware block in many ColdFire SoCs does not appear to support them, at least according to most of the different ColdFire SoC reference manuals. The bits used to generate C45 access on the iMX parts, in the OP field of the MMFR register, are documented as generating non-compliant MII frames (it is not documented as to exactly how they are non-compliant). Commit 8d03ad1ab0b0 ("net: fec: Separate C22 and C45 transactions") means the fec driver will always register c45 MDIO read and write methods. During probe these will always be accessed now generating non-compliant MII accesses on ColdFire based devices. Add a quirk define, FEC_QUIRK_HAS_MDIO_C45, that can be used to distinguish silicon that supports MDIO C45 framing or not. Add this to all the existing iMX quirks, so they will be behave as they do now (*). (*) it seems that some iMX parts may not support C45 transactions either. The iMX25 and iMX50 Reference Manuals contain similar wording to the ColdFire Reference Manuals on this. Fixes: 8d03ad1ab0b0 ("net: fec: Separate C22 and C45 transactions") Signed-off-by: Greg Ungerer Reviewed-by: Wei Fang Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230404052207.3064861-1-gerg@linux-m68k.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec.h | 5 ++++ drivers/net/ethernet/freescale/fec_main.c | 32 ++++++++++++++--------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 5ba1e0d71c687..9939ccafb5566 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -507,6 +507,11 @@ struct bufdesc_ex { /* i.MX6Q adds pm_qos support */ #define FEC_QUIRK_HAS_PMQOS BIT(23) +/* Not all FEC hardware block MDIOs support accesses in C45 mode. + * Older blocks in the ColdFire parts do not support it. + */ +#define FEC_QUIRK_HAS_MDIO_C45 BIT(24) + struct bufdesc_prop { int qid; /* Address of Rx and Tx buffers */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f3b16a6673e2b..160c1b3525f5b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -100,18 +100,19 @@ struct fec_devinfo { static const struct fec_devinfo fec_imx25_info = { .quirks = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | - FEC_QUIRK_HAS_FRREG, + FEC_QUIRK_HAS_FRREG | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx27_info = { - .quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, + .quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx28_info = { .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_FRREG | FEC_QUIRK_CLEAR_SETUP_MII | - FEC_QUIRK_NO_HARD_RESET, + FEC_QUIRK_NO_HARD_RESET | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx6q_info = { @@ -119,11 +120,12 @@ static const struct fec_devinfo fec_imx6q_info = { FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 | FEC_QUIRK_HAS_RACC | FEC_QUIRK_CLEAR_SETUP_MII | - FEC_QUIRK_HAS_PMQOS, + FEC_QUIRK_HAS_PMQOS | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_mvf600_info = { - .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC, + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx6x_info = { @@ -132,7 +134,8 @@ static const struct fec_devinfo fec_imx6x_info = { FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | - FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES, + FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx6ul_info = { @@ -140,7 +143,8 @@ static const struct fec_devinfo fec_imx6ul_info = { FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | - FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII, + FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx8mq_info = { @@ -150,7 +154,8 @@ static const struct fec_devinfo fec_imx8mq_info = { FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | - FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2, + FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2 | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx8qm_info = { @@ -160,14 +165,15 @@ static const struct fec_devinfo fec_imx8qm_info = { FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | - FEC_QUIRK_DELAYED_CLKS_SUPPORT, + FEC_QUIRK_DELAYED_CLKS_SUPPORT | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_s32v234_info = { .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | - FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE, + FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | + FEC_QUIRK_HAS_MDIO_C45, }; static struct platform_device_id fec_devtype[] = { @@ -2434,8 +2440,10 @@ static int fec_enet_mii_init(struct platform_device *pdev) fep->mii_bus->name = "fec_enet_mii_bus"; fep->mii_bus->read = fec_enet_mdio_read_c22; fep->mii_bus->write = fec_enet_mdio_write_c22; - fep->mii_bus->read_c45 = fec_enet_mdio_read_c45; - fep->mii_bus->write_c45 = fec_enet_mdio_write_c45; + if (fep->quirks & FEC_QUIRK_HAS_MDIO_C45) { + fep->mii_bus->read_c45 = fec_enet_mdio_read_c45; + fep->mii_bus->write_c45 = fec_enet_mdio_write_c45; + } snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, fep->dev_id + 1); fep->mii_bus->priv = fep; From 38e058cc7d245dc8034426415bee8fec16ace1bd Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 4 Apr 2023 15:24:11 +0800 Subject: [PATCH 779/898] selftests: net: rps_default_mask.sh: delete veth link specifically When deleting the netns and recreating a new one while re-adding the veth interface, there is a small window of time during which the old veth interface has not yet been removed. This can cause the new addition to fail. To resolve this issue, we can either wait for a short while to ensure that the old veth interface is deleted, or we can specifically remove the veth interface. Before this patch: # ./rps_default_mask.sh empty rps_default_mask [ ok ] changing rps_default_mask dont affect existing devices [ ok ] changing rps_default_mask dont affect existing netns [ ok ] changing rps_default_mask affect newly created devices [ ok ] changing rps_default_mask don't affect newly child netns[II][ ok ] rps_default_mask is 0 by default in child netns [ ok ] RTNETLINK answers: File exists changing rps_default_mask in child ns don't affect the main one[ ok ] cat: /sys/class/net/vethC11an1/queues/rx-0/rps_cpus: No such file or directory changing rps_default_mask in child ns affects new childns devices./rps_default_mask.sh: line 36: [: -eq: unary operator expected [fail] expected 1 found changing rps_default_mask in child ns don't affect existing devices[ ok ] After this patch: # ./rps_default_mask.sh empty rps_default_mask [ ok ] changing rps_default_mask dont affect existing devices [ ok ] changing rps_default_mask dont affect existing netns [ ok ] changing rps_default_mask affect newly created devices [ ok ] changing rps_default_mask don't affect newly child netns[II][ ok ] rps_default_mask is 0 by default in child netns [ ok ] changing rps_default_mask in child ns don't affect the main one[ ok ] changing rps_default_mask in child ns affects new childns devices[ ok ] changing rps_default_mask in child ns don't affect existing devices[ ok ] Fixes: 3a7d84eae03b ("self-tests: more rps self tests") Signed-off-by: Hangbin Liu Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20230404072411.879476-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rps_default_mask.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 0fd0d2db3abc1..a26c5624429fb 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -60,6 +60,7 @@ ip link set dev $VETH up ip -n $NETNS link set dev $VETH up chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +ip link del dev $VETH ip netns del $NETNS setup From 24e3fce00c0b557491ff596c0682a29dee6fe848 Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Tue, 4 Apr 2023 12:48:23 +0800 Subject: [PATCH 780/898] net: stmmac: Add queue reset into stmmac_xdp_open() function Queue reset was moved out from __init_dma_rx_desc_rings() and __init_dma_tx_desc_rings() functions. Thus, the driver fails to transmit and receive packet after XDP prog setup. This commit adds the missing queue reset into stmmac_xdp_open() function. Fixes: f9ec5723c3db ("net: ethernet: stmicro: stmmac: move queue reset to dedicated functions") Cc: # 6.0+ Signed-off-by: Song Yoong Siang Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230404044823.3226144-1-yoong.siang.song@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 59cbf3597eb42..38abc898f149c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6624,6 +6624,8 @@ int stmmac_xdp_open(struct net_device *dev) goto init_error; } + stmmac_reset_queues_param(priv); + /* DMA CSR Channel configuration */ for (chan = 0; chan < dma_csr_ch; chan++) { stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); From 8c68ae3b22fa6fb2dbe83ef955ff10936503d28e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Apr 2023 20:00:46 -0600 Subject: [PATCH 781/898] ublk: read any SQE values upfront Since SQE memory is shared with userspace, we should only be reading it once. We cannot read it multiple times, particularly when it's read once for validation and then read again for the actual use. ublk_ch_uring_cmd() is safe when called as a retry operation, as the memory backing is stable at that point. But for normal issue, we want to ensure that we only read ublksrv_io_cmd once. Wrap the function in a helper that reads the value into an on-stack copy of the struct. Cc: stable@vger.kernel.org # 6.0+ Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c73cc57ec5477..a36934ee97397 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1261,9 +1261,10 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, ublk_queue_cmd(ubq, req); } -static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, + unsigned int issue_flags, + struct ublksrv_io_cmd *ub_cmd) { - struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd; struct ublk_device *ub = cmd->file->private_data; struct ublk_queue *ubq; struct ublk_io *io; @@ -1362,6 +1363,23 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EIOCBQUEUED; } +static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd; + struct ublksrv_io_cmd ub_cmd; + + /* + * Not necessary for async retry, but let's keep it simple and always + * copy the values to avoid any potential reuse. + */ + ub_cmd.q_id = READ_ONCE(ub_src->q_id); + ub_cmd.tag = READ_ONCE(ub_src->tag); + ub_cmd.result = READ_ONCE(ub_src->result); + ub_cmd.addr = READ_ONCE(ub_src->addr); + + return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd); +} + static const struct file_operations ublk_ch_fops = { .owner = THIS_MODULE, .open = ublk_ch_open, From e98e7a82bca2b6dce3e03719cff800ec913f9af7 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Wed, 5 Apr 2023 22:12:19 +0200 Subject: [PATCH 782/898] ALSA: i2c/cs8427: fix iec958 mixer control deactivation snd_cs8427_iec958_active() would always delete SNDRV_CTL_ELEM_ACCESS_INACTIVE, even though the function has an argument `active`. Signed-off-by: Oswald Buddenhagen Cc: Link: https://lore.kernel.org/r/20230405201219.2197811-1-oswald.buddenhagen@gmx.de Signed-off-by: Takashi Iwai --- sound/i2c/cs8427.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/i2c/cs8427.c b/sound/i2c/cs8427.c index 65012af6a36e4..f58b14b490455 100644 --- a/sound/i2c/cs8427.c +++ b/sound/i2c/cs8427.c @@ -561,10 +561,13 @@ int snd_cs8427_iec958_active(struct snd_i2c_device *cs8427, int active) if (snd_BUG_ON(!cs8427)) return -ENXIO; chip = cs8427->private_data; - if (active) + if (active) { memcpy(chip->playback.pcm_status, chip->playback.def_status, 24); - chip->playback.pcm_ctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE; + chip->playback.pcm_ctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE; + } else { + chip->playback.pcm_ctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE; + } snd_ctl_notify(cs8427->bus->card, SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO, &chip->playback.pcm_ctl->id); From c17f8fd31700392b1bb9e7b66924333568cb3700 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Wed, 5 Apr 2023 22:12:19 +0200 Subject: [PATCH 783/898] ALSA: hda/sigmatel: add pin overrides for Intel DP45SG motherboard Like the other boards from the D*45* series, this one sets up the outputs not quite correctly. Signed-off-by: Oswald Buddenhagen Cc: Link: https://lore.kernel.org/r/20230405201220.2197826-1-oswald.buddenhagen@gmx.de Signed-off-by: Takashi Iwai --- Documentation/sound/hd-audio/models.rst | 2 +- sound/pci/hda/patch_sigmatel.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 9b52f50a68542..1204304500147 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -704,7 +704,7 @@ ref no-jd BIOS setup but without jack-detection intel - Intel DG45* mobos + Intel D*45* mobos dell-m6-amic Dell desktops/laptops with analog mics dell-m6-dmic diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index a794a01a68ca6..64a97d8c1b031 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1955,6 +1955,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = { "DFI LanParty", STAC_92HD73XX_REF), SND_PCI_QUIRK(PCI_VENDOR_ID_DFI, 0x3101, "DFI LanParty", STAC_92HD73XX_REF), + SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5001, + "Intel DP45SG", STAC_92HD73XX_INTEL), SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5002, "Intel DG45ID", STAC_92HD73XX_INTEL), SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5003, From f342ac00da1064eb4f94b1f4bcacbdfea955797a Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Wed, 5 Apr 2023 22:12:20 +0200 Subject: [PATCH 784/898] ALSA: hda/sigmatel: fix S/PDIF out on Intel D*45* motherboards The BIOS botches this one completely - it says the 2nd S/PDIF output is used, while in fact it's the 1st one. This is tested on DP45SG, but I'm assuming it's valid for the other boards in the series as well. Also add some comments regarding the pins. FWIW, the codec is apparently still sold by Tempo Semiconductor, Inc., where one can download the documentation. Signed-off-by: Oswald Buddenhagen Cc: Link: https://lore.kernel.org/r/20230405201220.2197826-2-oswald.buddenhagen@gmx.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 64a97d8c1b031..61258b0aac8d6 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1707,6 +1707,7 @@ static const struct snd_pci_quirk stac925x_fixup_tbl[] = { }; static const struct hda_pintbl ref92hd73xx_pin_configs[] = { + // Port A-H { 0x0a, 0x02214030 }, { 0x0b, 0x02a19040 }, { 0x0c, 0x01a19020 }, @@ -1715,9 +1716,12 @@ static const struct hda_pintbl ref92hd73xx_pin_configs[] = { { 0x0f, 0x01014010 }, { 0x10, 0x01014020 }, { 0x11, 0x01014030 }, + // CD in { 0x12, 0x02319040 }, + // Digial Mic ins { 0x13, 0x90a000f0 }, { 0x14, 0x90a000f0 }, + // Digital outs { 0x22, 0x01452050 }, { 0x23, 0x01452050 }, {} @@ -1758,6 +1762,7 @@ static const struct hda_pintbl alienware_m17x_pin_configs[] = { }; static const struct hda_pintbl intel_dg45id_pin_configs[] = { + // Analog outputs { 0x0a, 0x02214230 }, { 0x0b, 0x02A19240 }, { 0x0c, 0x01013214 }, @@ -1765,6 +1770,9 @@ static const struct hda_pintbl intel_dg45id_pin_configs[] = { { 0x0e, 0x01A19250 }, { 0x0f, 0x01011212 }, { 0x10, 0x01016211 }, + // Digital output + { 0x22, 0x01451380 }, + { 0x23, 0x40f000f0 }, {} }; From b09c551c77c7e01dc6e4f3c8bf06b5ffa7b06db5 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Wed, 5 Apr 2023 22:12:20 +0200 Subject: [PATCH 785/898] ALSA: emu10k1: fix capture interrupt handler unlinking Due to two copy/pastos, closing the MIC or EFX capture device would make a running ADC capture hang due to unsetting its interrupt handler. In principle, this would have also allowed dereferencing dangling pointers, but we're actually rather thorough at disabling and flushing the ints. While it may sound like one, this actually wasn't a hypothetical bug: PortAudio will open a capture stream at startup (and close it right away) even if not asked to. If the first device is busy, it will just proceed with the next one ... thus killing a concurrent capture. Signed-off-by: Oswald Buddenhagen Cc: Link: https://lore.kernel.org/r/20230405201220.2197923-1-oswald.buddenhagen@gmx.de Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emupcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 48af77ae8020f..908f76f1bb9f3 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -1236,7 +1236,7 @@ static int snd_emu10k1_capture_mic_close(struct snd_pcm_substream *substream) { struct snd_emu10k1 *emu = snd_pcm_substream_chip(substream); - emu->capture_interrupt = NULL; + emu->capture_mic_interrupt = NULL; emu->pcm_capture_mic_substream = NULL; return 0; } @@ -1344,7 +1344,7 @@ static int snd_emu10k1_capture_efx_close(struct snd_pcm_substream *substream) { struct snd_emu10k1 *emu = snd_pcm_substream_chip(substream); - emu->capture_interrupt = NULL; + emu->capture_efx_interrupt = NULL; emu->pcm_capture_efx_substream = NULL; return 0; } From 8dd13214a810c695044aa168c0ddba1a9c433e4f Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Wed, 5 Apr 2023 22:12:20 +0200 Subject: [PATCH 786/898] ALSA: emu10k1: don't create old pass-through playback device on Audigy It could have never worked, as snd_emu10k1_fx8010_playback_prepare() and snd_emu10k1_fx8010_playback_hw_free() assume the emu10k1 offset for the ETRAM, and the default DSP code includes no handler for it. It also wouldn't make a lot of sense to make it work, as Audigy has an own, much simpler, pass-through mechanism. So just skip creation of the device. Signed-off-by: Oswald Buddenhagen Cc: Link: https://lore.kernel.org/r/20230405201220.2197938-1-oswald.buddenhagen@gmx.de Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emupcm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 908f76f1bb9f3..6ec394fb18468 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -1781,17 +1781,21 @@ int snd_emu10k1_pcm_efx(struct snd_emu10k1 *emu, int device) struct snd_kcontrol *kctl; int err; - err = snd_pcm_new(emu->card, "emu10k1 efx", device, 8, 1, &pcm); + err = snd_pcm_new(emu->card, "emu10k1 efx", device, emu->audigy ? 0 : 8, 1, &pcm); if (err < 0) return err; pcm->private_data = emu; - snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_emu10k1_fx8010_playback_ops); + if (!emu->audigy) + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_emu10k1_fx8010_playback_ops); snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_emu10k1_capture_efx_ops); pcm->info_flags = 0; - strcpy(pcm->name, "Multichannel Capture/PT Playback"); + if (emu->audigy) + strcpy(pcm->name, "Multichannel Capture"); + else + strcpy(pcm->name, "Multichannel Capture/PT Playback"); emu->pcm_efx = pcm; /* EFX capture - record the "FXBUS2" channels, by default we connect the EXTINs From 1d1665279a845d16c93687389e364386e3fe0f38 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 6 Apr 2023 20:40:59 +0800 Subject: [PATCH 787/898] block: ublk: make sure that block size is set correctly block size is one very key setting for block layer, and bad block size could panic kernel easily. Make sure that block size is set correctly. Meantime if ublk_validate_params() fails, clear ub->params so that disk is prevented from being added. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Reported-and-tested-by: Breno Leitao Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index a36934ee97397..604c1a13c76ef 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -246,7 +246,7 @@ static int ublk_validate_params(const struct ublk_device *ub) if (ub->params.types & UBLK_PARAM_TYPE_BASIC) { const struct ublk_param_basic *p = &ub->params.basic; - if (p->logical_bs_shift > PAGE_SHIFT) + if (p->logical_bs_shift > PAGE_SHIFT || p->logical_bs_shift < 9) return -EINVAL; if (p->logical_bs_shift > p->physical_bs_shift) @@ -1970,6 +1970,8 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, /* clear all we don't support yet */ ub->params.types &= UBLK_PARAM_TYPE_ALL; ret = ublk_validate_params(ub); + if (ret) + ub->params.types = 0; } mutex_unlock(&ub->mutex); From 40fac6472f22a59f5694496e179988ab4a1dfe07 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Mar 2023 12:56:13 +0900 Subject: [PATCH 788/898] btrfs: restore the thread_pool= behavior in remount for the end I/O workqueues Commit d7b9416fe5c5 ("btrfs: remove btrfs_end_io_wq") converted the read and I/O handling from btrfs_workqueues to Linux workqueues, and as part of that lost the code to apply the thread_pool= based max_active limit on remount. Restore it. Fixes: d7b9416fe5c5 ("btrfs: remove btrfs_end_io_wq") CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 581845bc206ad..dd6d5b6844f1d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1631,6 +1631,8 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); + workqueue_set_max_active(fs_info->endio_workers, new_pool_size); + workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); From 68d99ab0e9221ef54506f827576c5a914680eeaf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Mar 2023 09:13:05 +0900 Subject: [PATCH 789/898] btrfs: fix fast csum implementation detection The BTRFS_FS_CSUM_IMPL_FAST flag is currently set whenever a non-generic crc32c is detected, which is the incorrect check if the file system uses a different checksumming algorithm. Refactor the code to only check this if crc32c is actually used. Note that in an ideal world the information if an algorithm is hardware accelerated or not should be provided by the crypto API instead, but that's left for another day. CC: stable@vger.kernel.org # 5.4.x: c8a5f8ca9a9c: btrfs: print checksum type and implementation at mount time CC: stable@vger.kernel.org # 5.4.x Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 14 ++++++++++++++ fs/btrfs/super.c | 2 -- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b53f0e30ce2b3..9e1596bb208db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2250,6 +2250,20 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) fs_info->csum_shash = csum_shash; + /* + * Check if the checksum implementation is a fast accelerated one. + * As-is this is a bit of a hack and should be replaced once the csum + * implementations provide that information themselves. + */ + switch (csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + if (!strstr(crypto_shash_driver_name(csum_shash), "generic")) + set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); + break; + default: + break; + } + btrfs_info(fs_info, "using %s (%s) checksum algorithm", btrfs_super_csum_name(csum_type), crypto_shash_driver_name(csum_shash)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index dd6d5b6844f1d..366fb4cde1458 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1516,8 +1516,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, s->s_id); btrfs_sb(s)->bdev_holder = fs_type; - if (!strstr(crc32c_impl(), "generic")) - set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); error = btrfs_fill_super(s, fs_devices, data); } if (!error) From fb4a624f88f658c7b7ae124452bd42eaa8ac7168 Mon Sep 17 00:00:00 2001 From: Xu Biang Date: Thu, 6 Apr 2023 06:28:01 -0700 Subject: [PATCH 790/898] ALSA: firewire-tascam: add missing unwind goto in snd_tscm_stream_start_duplex() Smatch Warns: sound/firewire/tascam/tascam-stream.c:493 snd_tscm_stream_start_duplex() warn: missing unwind goto? The direct return will cause the stream list of "&tscm->domain" unemptied and the session in "tscm" unfinished if amdtp_domain_start() returns with an error. Fix this by changing the direct return to a goto which will empty the stream list of "&tscm->domain" and finish the session in "tscm". The snd_tscm_stream_start_duplex() function is called in the prepare callback of PCM. According to "ALSA Kernel API Documentation", the prepare callback of PCM will be called many times at each setup. So, if the "&d->streams" list is not emptied, when the prepare callback is called next time, snd_tscm_stream_start_duplex() will receive -EBUSY from amdtp_domain_add_stream() that tries to add an existing stream to the domain. The error handling code after the "error" label will be executed in this case, and the "&d->streams" list will be emptied. So not emptying the "&d->streams" list will not cause an issue. But it is more efficient and readable to empty it on the first error by changing the direct return to a goto statement. The session in "tscm" has been begun before amdtp_domain_start(), so it needs to be finished when amdtp_domain_start() fails. Fixes: c281d46a51e3 ("ALSA: firewire-tascam: support AMDTP domain") Signed-off-by: Xu Biang Reviewed-by: Dan Carpenter Acked-by: Takashi Sakamoto Cc: Link: https://lore.kernel.org/r/20230406132801.105108-1-xubiang@hust.edu.cn Signed-off-by: Takashi Iwai --- sound/firewire/tascam/tascam-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/tascam/tascam-stream.c b/sound/firewire/tascam/tascam-stream.c index 53e094cc411f8..dfe783d01d7d2 100644 --- a/sound/firewire/tascam/tascam-stream.c +++ b/sound/firewire/tascam/tascam-stream.c @@ -490,7 +490,7 @@ int snd_tscm_stream_start_duplex(struct snd_tscm *tscm, unsigned int rate) // packet is important for media clock recovery. err = amdtp_domain_start(&tscm->domain, tx_init_skip_cycles, true, true); if (err < 0) - return err; + goto error; if (!amdtp_domain_wait_ready(&tscm->domain, READY_TIMEOUT_MS)) { err = -ETIMEDOUT; From bbb73a103fbbed6f63cb738d3783261c4241b4b2 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 6 Apr 2023 16:35:39 +0200 Subject: [PATCH 791/898] swiotlb: fix a braino in the alignment check fix The alignment mask in swiotlb_do_find_slots() masks off the high bits which are not relevant for the alignment, so multiple requirements are combined with a bitwise OR rather than AND. In plain English, the stricter the alignment, the more bits must be set in iotlb_align_mask. Confusion may arise from the fact that the same variable is also used to mask off the offset within a swiotlb slot, which is achieved with a bitwise AND. Fixes: 0eee5ae10256 ("swiotlb: fix slot alignment checks") Reported-by: Dexuan Cui Link: https://lore.kernel.org/all/CAA42JLa1y9jJ7BgQvXeUYQh-K2mDNHd2BYZ4iZUz33r5zY7oAQ@mail.gmail.com/ Reported-by: Kelsey Steele Link: https://lore.kernel.org/all/20230405003549.GA21326@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/ Signed-off-by: Petr Tesarik Tested-by: Dexuan Cui Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 5b919ef832b6f..dac42a2ad5883 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -623,7 +623,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); + dma_get_min_align_mask(dev) | alloc_align_mask; unsigned int nslots = nr_slots(alloc_size), stride; unsigned int offset = swiotlb_align_offset(dev, orig_addr); unsigned int index, slots_checked, count = 0, i; @@ -639,8 +639,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, * allocations. */ if (alloc_size >= PAGE_SIZE) - iotlb_align_mask &= PAGE_MASK; - iotlb_align_mask &= alloc_align_mask; + iotlb_align_mask |= ~PAGE_MASK; + iotlb_align_mask &= ~(IO_TLB_SIZE - 1); /* * For mappings with an alignment requirement don't bother looping to From 2a2d8c51defb446e8d89a83f42f8e5cd529111e9 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 30 Mar 2023 10:52:23 +0800 Subject: [PATCH 792/898] ftrace: Fix issue that 'direct->addr' not restored in modify_ftrace_direct() Syzkaller report a WARNING: "WARN_ON(!direct)" in modify_ftrace_direct(). Root cause is 'direct->addr' was changed from 'old_addr' to 'new_addr' but not restored if error happened on calling ftrace_modify_direct_caller(). Then it can no longer find 'direct' by that 'old_addr'. To fix it, restore 'direct->addr' to 'old_addr' explicitly in error path. Link: https://lore.kernel.org/linux-trace-kernel/20230330025223.1046087-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Cc: Cc: Cc: Cc: Fixes: 8a141dd7f706 ("ftrace: Fix modify_ftrace_direct.") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0feea145bb299..c67bcc89a7716 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5667,12 +5667,15 @@ int modify_ftrace_direct(unsigned long ip, ret = 0; } - if (unlikely(ret && new_direct)) { - direct->count++; - list_del_rcu(&new_direct->next); - synchronize_rcu_tasks(); - kfree(new_direct); - ftrace_direct_func_count--; + if (ret) { + direct->addr = old_addr; + if (unlikely(new_direct)) { + direct->count++; + list_del_rcu(&new_direct->next); + synchronize_rcu_tasks(); + kfree(new_direct); + ftrace_direct_func_count--; + } } out_unlock: From 8fbc10b995a506e173f1080dfa2764f232a65e02 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 6 Apr 2023 10:45:41 +0800 Subject: [PATCH 793/898] net: stmmac: check fwnode for phy device before scanning for phy Some DT devices already have phy device configured in the DT/ACPI. Current implementation scans for a phy unconditionally even though there is a phy listed in the DT/ACPI and already attached. We should check the fwnode if there is any phy device listed in fwnode and decide whether to scan for a phy to attach to. Fixes: fe2cfbc96803 ("net: stmmac: check if MAC needs to attach to a PHY") Reported-by: Martin Blumenstingl Link: https://lore.kernel.org/lkml/20230403212434.296975-1-martin.blumenstingl@googlemail.com/ Tested-by: Guenter Roeck Tested-by: Shahab Vahedi Tested-by: Marek Szyprowski Tested-by: Martin Blumenstingl Suggested-by: Russell King (Oracle) Signed-off-by: Michael Sit Wei Hong Link: https://lore.kernel.org/r/20230406024541.3556305-1-michael.wei.hong.sit@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 38abc898f149c..d7fcab0570322 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1134,22 +1134,26 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + struct fwnode_handle *phy_fwnode; struct fwnode_handle *fwnode; - bool phy_needed; int ret; + if (!phylink_expects_phy(priv->phylink)) + return 0; + fwnode = of_fwnode_handle(priv->plat->phylink_node); if (!fwnode) fwnode = dev_fwnode(priv->device); if (fwnode) - ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); + phy_fwnode = fwnode_get_phy_node(fwnode); + else + phy_fwnode = NULL; - phy_needed = phylink_expects_phy(priv->phylink); /* Some DT bindings do not set-up the PHY handle. Let's try to * manually parse it */ - if (!fwnode || phy_needed || ret) { + if (!phy_fwnode || IS_ERR(phy_fwnode)) { int addr = priv->plat->phy_addr; struct phy_device *phydev; @@ -1165,6 +1169,9 @@ static int stmmac_init_phy(struct net_device *dev) } ret = phylink_connect_phy(priv->phylink, phydev); + } else { + fwnode_handle_put(phy_fwnode); + ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); } if (!priv->plat->pmt) { From e959f2beec8e655dba79c5a7111beedae5e757e0 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 6 Apr 2023 10:27:25 -0500 Subject: [PATCH 794/898] ALSA: hda: patch_realtek: add quirk for Asus N7601ZM Add pins and verbs needed to enable speakers and jack. The pins and verbs configurations were identified by snooping the Windows driver commands, with a nice write-up here: https://brakkee.org/site/2023/02/07/fixing-sound-on-the-asus-n7601zm/ Reported-by: Erik Brakkee Link: https://github.com/thesofproject/linux/issues/4176 Tested-by: Erik Brakkee Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Cc: Link: https://lore.kernel.org/r/20230406152725.15191-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 26187f5d56b59..f554db1e7e9a4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6960,6 +6960,8 @@ enum { ALC269_FIXUP_DELL_M101Z, ALC269_FIXUP_SKU_IGNORE, ALC269_FIXUP_ASUS_G73JW, + ALC269_FIXUP_ASUS_N7601ZM_PINS, + ALC269_FIXUP_ASUS_N7601ZM, ALC269_FIXUP_LENOVO_EAPD, ALC275_FIXUP_SONY_HWEQ, ALC275_FIXUP_SONY_DISABLE_AAMIX, @@ -7256,6 +7258,29 @@ static const struct hda_fixup alc269_fixups[] = { { } } }, + [ALC269_FIXUP_ASUS_N7601ZM_PINS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03A11050 }, + { 0x1a, 0x03A11C30 }, + { 0x21, 0x03211420 }, + { } + } + }, + [ALC269_FIXUP_ASUS_N7601ZM] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + {0x20, AC_VERB_SET_COEF_INDEX, 0x62}, + {0x20, AC_VERB_SET_PROC_COEF, 0xa007}, + {0x20, AC_VERB_SET_COEF_INDEX, 0x10}, + {0x20, AC_VERB_SET_PROC_COEF, 0x8420}, + {0x20, AC_VERB_SET_COEF_INDEX, 0x0f}, + {0x20, AC_VERB_SET_PROC_COEF, 0x7774}, + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_ASUS_N7601ZM_PINS, + }, [ALC269_FIXUP_LENOVO_EAPD] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9466,6 +9491,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), From 919e659ed12568b5b8ba6c2ffdd82d8d31fc28af Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 6 Apr 2023 12:40:19 +0200 Subject: [PATCH 795/898] selftests/bpf: fix xdp_redirect xdp-features selftest for veth driver xdp-features supported by veth driver are no more static, but they depends on veth configuration (e.g. if GRO is enabled/disabled or TX/RX queue configuration). Take it into account in xdp_redirect xdp-features selftest for veth driver. Fixes: fccca038f300 ("veth: take into account device reconfiguration for xdp_features flag") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/bc35455cfbb1d4f7f52536955ded81ad47d8dc54.1680777371.git.lorenzo@kernel.org Signed-off-by: Martin KaFai Lau --- .../bpf/prog_tests/xdp_do_redirect.c | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 7271a18ab3e22..8251a0fc6ee94 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -167,8 +167,7 @@ void test_xdp_do_redirect(void) if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | - NETDEV_XDP_ACT_NDO_XMIT_SG, + NETDEV_XDP_ACT_RX_SG, "veth_src query_opts.feature_flags")) goto out; @@ -176,11 +175,36 @@ void test_xdp_do_redirect(void) if (!ASSERT_OK(err, "veth_dst bpf_xdp_query")) goto out; + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG, + "veth_dst query_opts.feature_flags")) + goto out; + + /* Enable GRO */ + SYS("ethtool -K veth_src gro on"); + SYS("ethtool -K veth_dst gro on"); + + err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "veth_src bpf_xdp_query gro on")) + goto out; + if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT_SG, - "veth_dst query_opts.feature_flags")) + "veth_src query_opts.feature_flags gro on")) + goto out; + + err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "veth_dst bpf_xdp_query gro on")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "veth_dst query_opts.feature_flags gro on")) goto out; memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN); From 5da7cb193db32da783a3f3e77d8b639989321d48 Mon Sep 17 00:00:00 2001 From: Ziwei Dai Date: Fri, 31 Mar 2023 20:42:09 +0800 Subject: [PATCH 796/898] rcu/kvfree: Avoid freeing new kfree_rcu() memory after old grace period Memory passed to kvfree_rcu() that is to be freed is tracked by a per-CPU kfree_rcu_cpu structure, which in turn contains pointers to kvfree_rcu_bulk_data structures that contain pointers to memory that has not yet been handed to RCU, along with an kfree_rcu_cpu_work structure that tracks the memory that has already been handed to RCU. These structures track three categories of memory: (1) Memory for kfree(), (2) Memory for kvfree(), and (3) Memory for both that arrived during an OOM episode. The first two categories are tracked in a cache-friendly manner involving a dynamically allocated page of pointers (the aforementioned kvfree_rcu_bulk_data structures), while the third uses a simple (but decidedly cache-unfriendly) linked list through the rcu_head structures in each block of memory. On a given CPU, these three categories are handled as a unit, with that CPU's kfree_rcu_cpu_work structure having one pointer for each of the three categories. Clearly, new memory for a given category cannot be placed in the corresponding kfree_rcu_cpu_work structure until any old memory has had its grace period elapse and thus has been removed. And the kfree_rcu_monitor() function does in fact check for this. Except that the kfree_rcu_monitor() function checks these pointers one at a time. This means that if the previous kfree_rcu() memory passed to RCU had only category 1 and the current one has only category 2, the kfree_rcu_monitor() function will send that current category-2 memory along immediately. This can result in memory being freed too soon, that is, out from under unsuspecting RCU readers. To see this, consider the following sequence of events, in which: o Task A on CPU 0 calls rcu_read_lock(), then uses "from_cset", then is preempted. o CPU 1 calls kfree_rcu(cset, rcu_head) in order to free "from_cset" after a later grace period. Except that "from_cset" is freed right after the previous grace period ended, so that "from_cset" is immediately freed. Task A resumes and references "from_cset"'s member, after which nothing good happens. In full detail: CPU 0 CPU 1 ---------------------- ---------------------- count_memcg_event_mm() |rcu_read_lock() <--- |mem_cgroup_from_task() |// css_set_ptr is the "from_cset" mentioned on CPU 1 |css_set_ptr = rcu_dereference((task)->cgroups) |// Hard irq comes, current task is scheduled out. cgroup_attach_task() |cgroup_migrate() |cgroup_migrate_execute() |css_set_move_task(task, from_cset, to_cset, true) |cgroup_move_task(task, to_cset) |rcu_assign_pointer(.., to_cset) |... |cgroup_migrate_finish() |put_css_set_locked(from_cset) |from_cset->refcount return 0 |kfree_rcu(cset, rcu_head) // free from_cset after new gp |add_ptr_to_bulk_krc_lock() |schedule_delayed_work(&krcp->monitor_work, ..) kfree_rcu_monitor() |krcp->bulk_head[0]'s work attached to krwp->bulk_head_free[] |queue_rcu_work(system_wq, &krwp->rcu_work) |if rwork->rcu.work is not in WORK_STRUCT_PENDING_BIT state, |call_rcu(&rwork->rcu, rcu_work_rcufn) <--- request new gp // There is a perious call_rcu(.., rcu_work_rcufn) // gp end, rcu_work_rcufn() is called. rcu_work_rcufn() |__queue_work(.., rwork->wq, &rwork->work); |kfree_rcu_work() |krwp->bulk_head_free[0] bulk is freed before new gp end!!! |The "from_cset" is freed before new gp end. // the task resumes some time later. |css_set_ptr->subsys[(subsys_id) <--- Caused kernel crash, because css_set_ptr is freed. This commit therefore causes kfree_rcu_monitor() to refrain from moving kfree_rcu() memory to the kfree_rcu_cpu_work structure until the RCU grace period has completed for all three categories. v2: Use helper function instead of inserted code block at kfree_rcu_monitor(). Fixes: 34c881745549 ("rcu: Support kfree_bulk() interface in kfree_rcu()") Fixes: 5f3c8d620447 ("rcu/tree: Maintain separate array for vmalloc ptrs") Reported-by: Mukesh Ojha Signed-off-by: Ziwei Dai Reviewed-by: Uladzislau Rezki (Sony) Tested-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8e880c09ab59e..7b95ee98a1a53 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3024,6 +3024,18 @@ need_offload_krc(struct kfree_rcu_cpu *krcp) return !!READ_ONCE(krcp->head); } +static bool +need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp) +{ + int i; + + for (i = 0; i < FREE_N_CHANNELS; i++) + if (!list_empty(&krwp->bulk_head_free[i])) + return true; + + return !!krwp->head_free; +} + static int krc_count(struct kfree_rcu_cpu *krcp) { int sum = atomic_read(&krcp->head_count); @@ -3107,15 +3119,14 @@ static void kfree_rcu_monitor(struct work_struct *work) for (i = 0; i < KFREE_N_BATCHES; i++) { struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]); - // Try to detach bulk_head or head and attach it over any - // available corresponding free channel. It can be that - // a previous RCU batch is in progress, it means that - // immediately to queue another one is not possible so - // in that case the monitor work is rearmed. - if ((!list_empty(&krcp->bulk_head[0]) && list_empty(&krwp->bulk_head_free[0])) || - (!list_empty(&krcp->bulk_head[1]) && list_empty(&krwp->bulk_head_free[1])) || - (READ_ONCE(krcp->head) && !krwp->head_free)) { + // Try to detach bulk_head or head and attach it, only when + // all channels are free. Any channel is not free means at krwp + // there is on-going rcu work to handle krwp's free business. + if (need_wait_for_krwp_work(krwp)) + continue; + // kvfree_rcu_drain_ready() might handle this krcp, if so give up. + if (need_offload_krc(krcp)) { // Channel 1 corresponds to the SLAB-pointer bulk path. // Channel 2 corresponds to vmalloc-pointer bulk path. for (j = 0; j < FREE_N_CHANNELS; j++) { From 10b6b4a8ac6120ec36555fd286eed577f7632e3b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 31 Mar 2023 11:08:42 -0500 Subject: [PATCH 797/898] ACPI: x86: utils: Add Picasso to the list for forcing StorageD3Enable Picasso was the first APU that introduced s2idle support from AMD, and it was predating before vendors started to use `StorageD3Enable` in their firmware. Windows doesn't have problems with this hardware and NVME so it was likely on the list of hardcoded CPUs to use this behavior in Windows. Add it to the list for Linux to avoid NVME resume issues. Reported-by: Stuart Axon Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2449 Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index da5727069d851..ba420a28a4aad 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -213,6 +213,7 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s disk in the system. */ static const struct x86_cpu_id storage_d3_cpu_ids[] = { + X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 24, NULL), /* Picasso */ X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */ X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */ X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL), /* Cezanne */ From 31c683967174b487939efaf65e41f5ff1404e141 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 6 Apr 2023 11:10:33 -0400 Subject: [PATCH 798/898] tracing/synthetic: Make lastcmd_mutex static The lastcmd_mutex is only used in trace_events_synth.c and should be static. Link: https://lore.kernel.org/linux-trace-kernel/202304062033.cRStgOuP-lkp@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230406111033.6e26de93@gandalf.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Tze-nan Wu Fixes: 4ccf11c4e8a8e ("tracing/synthetic: Fix races on freeing last_cmd") Reviewed-by: Mukesh Ojha Reported-by: kernel test robot Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index f0ff730125bf2..d6a70aff24101 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -44,7 +44,7 @@ enum { ERRORS }; static const char *err_text[] = { ERRORS }; -DEFINE_MUTEX(lastcmd_mutex); +static DEFINE_MUTEX(lastcmd_mutex); static char *last_cmd; static int errpos(const char *str) From 195d8e5da3acb17c5357526494f818a21e97cd10 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 29 Mar 2023 13:13:11 -0700 Subject: [PATCH 799/898] PCI/MSI: Provide missing stub for pci_msix_can_alloc_dyn() pci_msix_can_alloc_dyn() is not declared when CONFIG_PCI_MSI is disabled. There is no existing user of pci_msix_can_alloc_dyn() but work is in progress to change this. This work encounters the following error when CONFIG_PCI_MSI is disabled: drivers/vfio/pci/vfio_pci_intrs.c:427:21: error: implicit declaration of function 'pci_msix_can_alloc_dyn' [-Werror=implicit-function-declaration] Provide definition for pci_msix_can_alloc_dyn() in preparation for users that need to compile when CONFIG_PCI_MSI is disabled. [bhelgaas: Also reported by Arnd Bergmann in drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c; added his Fixes: line] Fixes: fb0a6a268dcd ("net/mlx5: Provide external API for allocating vectors") Fixes: 34026364df8e ("PCI/MSI: Provide post-enable dynamic allocation interfaces for MSI-X") Link: https://lore.kernel.org/oe-kbuild-all/202303291000.PWFqGCxH-lkp@intel.com/ Link: https://lore.kernel.org/r/310ecc4815dae4174031062f525245f0755c70e2.1680119924.git.reinette.chatre@intel.com Reported-by: kernel test robot Signed-off-by: Reinette Chatre Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Cc: stable@vger.kernel.org # v6.2+ --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index fafd8020c6d7f..5e2dab6759437 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1623,6 +1623,8 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, flags, NULL); } +static inline bool pci_msix_can_alloc_dyn(struct pci_dev *dev) +{ return false; } static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, const struct irq_affinity_desc *affdesc) { From f195fc1e9715ba826c3b62d58038f760f66a4fe9 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Wed, 29 Mar 2023 22:58:59 +0530 Subject: [PATCH 800/898] x86/PCI: Add quirk for AMD XHCI controller that loses MSI-X state in D3hot The AMD [1022:15b8] USB controller loses some internal functional MSI-X context when transitioning from D0 to D3hot. BIOS normally traps D0->D3hot and D3hot->D0 transitions so it can save and restore that internal context, but some firmware in the field can't do this because it fails to clear the AMD_15B8_RCC_DEV2_EPF0_STRAP2 NO_SOFT_RESET bit. Clear AMD_15B8_RCC_DEV2_EPF0_STRAP2 NO_SOFT_RESET bit before USB controller initialization during boot. Link: https://lore.kernel.org/linux-usb/Y%2Fz9GdHjPyF2rNG3@glanzmann.de/T/#u Link: https://lore.kernel.org/r/20230329172859.699743-1-Basavaraj.Natikar@amd.com Reported-by: Thomas Glanzmann Tested-by: Thomas Glanzmann Signed-off-by: Basavaraj Natikar Signed-off-by: Bjorn Helgaas Reviewed-by: Mario Limonciello Cc: stable@vger.kernel.org --- arch/x86/pci/fixup.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 615a76d700194..bf5161dcf89e7 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -824,3 +825,23 @@ static void rs690_fix_64bit_dma(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); #endif + +#ifdef CONFIG_AMD_NB + +#define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008 +#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L + +static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev) +{ + u32 data; + + if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) { + data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK; + if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data)) + pci_err(dev, "Failed to write data 0x%x\n", data); + } else { + pci_err(dev, "Failed to read data\n"); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0); +#endif From 1e1d3574e69f70b848f9d50eca2c5ff04931b7ba Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Thu, 6 Apr 2023 01:37:39 +0900 Subject: [PATCH 801/898] drm/scheduler: Fix UAF race in drm_sched_entity_push_job() After a job is pushed into the queue, it is owned by the scheduler core and may be freed at any time, so we can't write nor read the submit timestamp after that point. Fixes oopses observed with the drm/asahi driver, found with kASAN. Signed-off-by: Asahi Lina Link: https://lore.kernel.org/r/20230406-scheduler-uaf-2-v1-1-972531cf0a81@asahilina.net Reviewed-by: Luben Tuikov Signed-off-by: Luben Tuikov --- drivers/gpu/drm/scheduler/sched_entity.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 15d04a0ec6234..e0a8890a62e23 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -507,12 +507,19 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) { struct drm_sched_entity *entity = sched_job->entity; bool first; + ktime_t submit_ts; trace_drm_sched_job(sched_job, entity); atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); + + /* + * After the sched_job is pushed into the entity queue, it may be + * completed and freed up at any time. We can no longer access it. + * Make sure to set the submit_ts first, to avoid a race. + */ + sched_job->submit_ts = submit_ts = ktime_get(); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); - sched_job->submit_ts = ktime_get(); /* first job wakes up scheduler */ if (first) { @@ -529,7 +536,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) spin_unlock(&entity->rq_lock); if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_update_fifo(entity, sched_job->submit_ts); + drm_sched_rq_update_fifo(entity, submit_ts); drm_sched_wakeup(entity->rq->sched); } From 30ba2d09edb5ea857a1473ae3d820911347ada62 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 29 Mar 2023 07:38:35 -0500 Subject: [PATCH 802/898] PCI: Fix use-after-free in pci_bus_release_domain_nr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c14f7ccc9f5d ("PCI: Assign PCI domain IDs by ida_alloc()") introduced a use-after-free bug in the bus removal cleanup. The issue was found with kfence: [ 19.293351] BUG: KFENCE: use-after-free read in pci_bus_release_domain_nr+0x10/0x70 [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in kfence-#115): [ 19.309677] pci_bus_release_domain_nr+0x10/0x70 [ 19.309691] dw_pcie_host_deinit+0x28/0x78 [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194] [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194] [ 19.309752] platform_probe+0x90/0xd8 ... [ 19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8, size=1072, cache=kmalloc-2k [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s: [ 19.311562] __kmem_cache_alloc_node+0x260/0x278 [ 19.311571] kmalloc_trace+0x24/0x30 [ 19.311580] pci_alloc_bus+0x24/0xa0 [ 19.311590] pci_register_host_bridge+0x48/0x4b8 [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8 [ 19.311613] pci_host_probe+0x18/0xc0 [ 19.311623] dw_pcie_host_init+0x2c0/0x568 [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194] [ 19.311647] platform_probe+0x90/0xd8 ... [ 19.311782] freed by task 96 on cpu 10 at 19.285833s: [ 19.311799] release_pcibus_dev+0x30/0x40 [ 19.311808] device_release+0x30/0x90 [ 19.311814] kobject_put+0xa8/0x120 [ 19.311832] device_unregister+0x20/0x30 [ 19.311839] pci_remove_bus+0x78/0x88 [ 19.311850] pci_remove_root_bus+0x5c/0x98 [ 19.311860] dw_pcie_host_deinit+0x28/0x78 [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194] [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194] [ 19.311900] platform_probe+0x90/0xd8 ... [ 19.313579] CPU: 10 PID: 96 Comm: kworker/u24:2 Not tainted 6.2.0 #4 [ 19.320171] Hardware name: /, BIOS 1.0-d7fb19b 08/10/2022 [ 19.325852] Workqueue: events_unbound deferred_probe_work_func The stack trace is a bit misleading as dw_pcie_host_deinit() doesn't directly call pci_bus_release_domain_nr(). The issue turns out to be in pci_remove_root_bus() which first calls pci_remove_bus() which frees the struct pci_bus when its struct device is released. Then pci_bus_release_domain_nr() is called and accesses the freed struct pci_bus. Reordering these fixes the issue. Fixes: c14f7ccc9f5d ("PCI: Assign PCI domain IDs by ida_alloc()") Link: https://lore.kernel.org/r/20230329123835.2724518-1-robh@kernel.org Link: https://lore.kernel.org/r/b529cb69-0602-9eed-fc02-2f068707a006@nvidia.com Reported-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Rob Herring Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Cc: stable@vger.kernel.org # v6.2+ Cc: Pali Rohár --- drivers/pci/remove.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 0145aef1b9301..22d39e12b236a 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -157,8 +157,6 @@ void pci_remove_root_bus(struct pci_bus *bus) list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) pci_remove_bus_device(child); - pci_remove_bus(bus); - host_bridge->bus = NULL; #ifdef CONFIG_PCI_DOMAINS_GENERIC /* Release domain_nr if it was dynamically allocated */ @@ -166,6 +164,9 @@ void pci_remove_root_bus(struct pci_bus *bus) pci_bus_release_domain_nr(bus, host_bridge->dev.parent); #endif + pci_remove_bus(bus); + host_bridge->bus = NULL; + /* remove the host bridge */ device_del(&host_bridge->dev); } From 3723091ea1884d599cc8b8bf719d6f42e8d4d8b1 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 22 Mar 2023 11:59:26 +0800 Subject: [PATCH 803/898] block: don't set GD_NEED_PART_SCAN if scan partition failed Currently if disk_scan_partitions() failed, GD_NEED_PART_SCAN will still set, and partition scan will be proceed again when blkdev_get_by_dev() is called. However, this will cause a problem that re-assemble partitioned raid device will creat partition for underlying disk. Test procedure: mdadm -CR /dev/md0 -l 1 -n 2 /dev/sda /dev/sdb -e 1.0 sgdisk -n 0:0:+100MiB /dev/md0 blockdev --rereadpt /dev/sda blockdev --rereadpt /dev/sdb mdadm -S /dev/md0 mdadm -A /dev/md0 /dev/sda /dev/sdb Test result: underlying disk partition and raid partition can be observed at the same time Note that this can still happen in come corner cases that GD_NEED_PART_SCAN can be set for underlying disk while re-assemble raid device. Fixes: e5cfefa97bcc ("block: fix scan partition for exclusively open device again") Reviewed-by: Jan Kara Reviewed-by: Ming Lei Signed-off-by: Yu Kuai Signed-off-by: Jens Axboe --- block/genhd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/genhd.c b/block/genhd.c index 02d9cfb9e077a..7f874737af682 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -368,7 +368,6 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) if (disk->open_partitions) return -EBUSY; - set_bit(GD_NEED_PART_SCAN, &disk->state); /* * If the device is opened exclusively by current thread already, it's * safe to scan partitons, otherwise, use bd_prepare_to_claim() to @@ -381,12 +380,19 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) return ret; } + set_bit(GD_NEED_PART_SCAN, &disk->state); bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL); if (IS_ERR(bdev)) ret = PTR_ERR(bdev); else blkdev_put(bdev, mode & ~FMODE_EXCL); + /* + * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, + * and this will cause that re-assemble partitioned raid device will + * creat partition for underlying disk. + */ + clear_bit(GD_NEED_PART_SCAN, &disk->state); if (!(mode & FMODE_EXCL)) bd_abort_claiming(disk->part0, disk_scan_partitions); return ret; From 4f5d5b33fc400911d6e1f49095522b361d9cbe13 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Apr 2023 11:55:47 +0300 Subject: [PATCH 804/898] cifs: double lock in cifs_reconnect_tcon() This lock was supposed to be an unlock. Fixes: 6cc041e90c17 ("cifs: avoid races in parallel reconnects in smb1") Signed-off-by: Dan Carpenter Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 0d30b17494e4a..9d963caec35c8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -120,7 +120,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); - mutex_lock(&ses->session_mutex); + mutex_unlock(&ses->session_mutex); if (tcon->retry) goto again; From 8ce07be703456acb00e83d99f3b8036252c33b02 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Wed, 5 Apr 2023 23:31:18 -0700 Subject: [PATCH 805/898] niu: Fix missing unwind goto in niu_alloc_channels() Smatch reports: drivers/net/ethernet/sun/niu.c:4525 niu_alloc_channels() warn: missing unwind goto? If niu_rbr_fill() fails, then we are directly returning 'err' without freeing the channels. Fix this by changing direct return to a goto 'out_err'. Fixes: a3138df9f20e ("[NIU]: Add Sun Neptune ethernet driver.") Signed-off-by: Harshit Mogalapalli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index ab8b09a9ef61d..7a2e767762974 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -4522,7 +4522,7 @@ static int niu_alloc_channels(struct niu *np) err = niu_rbr_fill(np, rp, GFP_KERNEL); if (err) - return err; + goto out_err; } tx_rings = kcalloc(num_tx_rings, sizeof(struct tx_ring_info), From dc5110c2d959c1707e12df5f792f41d90614adaa Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 6 Apr 2023 14:34:50 +0800 Subject: [PATCH 806/898] tcp: restrict net.ipv4.tcp_app_win UBSAN: shift-out-of-bounds in net/ipv4/tcp_input.c:555:23 shift exponent 255 is too large for 32-bit type 'int' CPU: 1 PID: 7907 Comm: ssh Not tainted 6.3.0-rc4-00161-g62bad54b26db-dirty #206 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x136/0x150 __ubsan_handle_shift_out_of_bounds+0x21f/0x5a0 tcp_init_transfer.cold+0x3a/0xb9 tcp_finish_connect+0x1d0/0x620 tcp_rcv_state_process+0xd78/0x4d60 tcp_v4_do_rcv+0x33d/0x9d0 __release_sock+0x133/0x3b0 release_sock+0x58/0x1b0 'maxwin' is int, shifting int for 32 or more bits is undefined behaviour. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: YueHaibing Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 ++ net/ipv4/sysctl_net_ipv4.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 87dd1c5283e61..58a78a3166978 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -340,6 +340,8 @@ tcp_app_win - INTEGER Reserve max(window/2^tcp_app_win, mss) of window for application buffer. Value 0 is special, it means that nothing is reserved. + Possible values are [0, 31], inclusive. + Default: 31 tcp_autocorking - BOOLEAN diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0d0cc4ef2b85a..40fe70fc2015d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -25,6 +25,7 @@ static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; +static int tcp_app_win_max = 31; static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; static int tcp_min_snd_mss_max = 65535; static int ip_privileged_port_min; @@ -1198,6 +1199,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &tcp_app_win_max, }, { .procname = "tcp_adv_win_scale", From 4598380f9c548aa161eb4e990a1583f0a7d1e0d7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Apr 2023 16:23:50 +0800 Subject: [PATCH 807/898] bonding: fix ns validation on backup slaves When arp_validate is set to 2, 3, or 6, validation is performed for backup slaves as well. As stated in the bond documentation, validation involves checking the broadcast ARP request sent out via the active slave. This helps determine which slaves are more likely to function in the event of an active slave failure. However, when the target is an IPv6 address, the NS message sent from the active interface is not checked on backup slaves. Additionally, based on the bond_arp_rcv() rule b, we must reverse the saddr and daddr when checking the NS message. Note that when checking the NS message, the destination address is a multicast address. Therefore, we must convert the target address to solicited multicast in the bond_get_targets_ip6() function. Prior to the fix, the backup slaves had a mii status of "down", but after the fix, all of the slaves' mii status was updated to "UP". Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets") Reviewed-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +++-- include/net/bonding.h | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 236e5219c8112..8cc9a74789b79 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3269,7 +3269,8 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined); if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP || - combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) + (combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION && + combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)) goto out; saddr = &combined->ip6.saddr; @@ -3291,7 +3292,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, else if (curr_active_slave && time_after(slave_last_rx(bond, curr_active_slave), curr_active_slave->last_link_up)) - bond_validate_na(bond, slave, saddr, daddr); + bond_validate_na(bond, slave, daddr, saddr); else if (curr_arp_slave && bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1)) bond_validate_na(bond, slave, saddr, daddr); diff --git a/include/net/bonding.h b/include/net/bonding.h index ea36ab7f9e724..c3843239517d5 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -761,13 +761,17 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) #if IS_ENABLED(CONFIG_IPV6) static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) { + struct in6_addr mcaddr; int i; - for (i = 0; i < BOND_MAX_NS_TARGETS; i++) - if (ipv6_addr_equal(&targets[i], ip)) + for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { + addrconf_addr_solict_mult(&targets[i], &mcaddr); + if ((ipv6_addr_equal(&targets[i], ip)) || + (ipv6_addr_equal(&mcaddr, ip))) return i; else if (ipv6_addr_any(&targets[i])) break; + } return -1; } From 481b56e0391ea46d6bf1a2604422a21063615901 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Apr 2023 16:23:51 +0800 Subject: [PATCH 808/898] selftests: bonding: re-format bond option tests To improve the testing process for bond options, A new bond topology lib is added to our testing setup. The current option_prio.sh file will be renamed to bond_options.sh so that all bonding options can be tested here. Specifically, for priority testing, we will run all tests using modes 1, 5, and 6. These changes will help us streamline the testing process and ensure that our bond options are rigorously evaluated. Acked-by: Jay Vosburgh Signed-off-by: Hangbin Liu Acked-by: Jonathan Toppins Signed-off-by: David S. Miller --- .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_options.sh | 209 +++++++++++++++ .../drivers/net/bonding/bond_topo_3d1c.sh | 143 ++++++++++ .../drivers/net/bonding/option_prio.sh | 245 ------------------ 4 files changed, 354 insertions(+), 246 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_options.sh create mode 100644 tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh delete mode 100755 tools/testing/selftests/drivers/net/bonding/option_prio.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index a39bb2560d9bf..03f92d7aeb19b 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -8,11 +8,12 @@ TEST_PROGS := \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ - option_prio.sh \ + bond_options.sh \ bond-eth-type-change.sh TEST_FILES := \ lag_lib.sh \ + bond_topo_3d1c.sh \ net_forwarding_lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh new file mode 100755 index 0000000000000..7213211d0bde2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -0,0 +1,209 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bonding options with mode 1,5,6 + +ALL_TESTS=" + prio +" + +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source ${lib_dir}/net_forwarding_lib.sh +source ${lib_dir}/bond_topo_3d1c.sh + +skip_prio() +{ + local skip=1 + + # check if iproute support prio option + ip -n ${s_ns} link set eth0 type bond_slave prio 10 + [[ $? -ne 0 ]] && skip=0 + + # check if kernel support prio option + ip -n ${s_ns} -d link show eth0 | grep -q "prio 10" + [[ $? -ne 0 ]] && skip=0 + + return $skip +} + +skip_ns() +{ + local skip=1 + + # check if iproute support ns_ip6_target option + ip -n ${s_ns} link add bond1 type bond ns_ip6_target ${g_ip6} + [[ $? -ne 0 ]] && skip=0 + + # check if kernel support ns_ip6_target option + ip -n ${s_ns} -d link show bond1 | grep -q "ns_ip6_target ${g_ip6}" + [[ $? -ne 0 ]] && skip=0 + + ip -n ${s_ns} link del bond1 + + return $skip +} + +active_slave="" +check_active_slave() +{ + local target_active_slave=$1 + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + test "$active_slave" = "$target_active_slave" + check_err $? "Current active slave is $active_slave but not $target_active_slave" +} + + +# Test bonding prio option +prio_test() +{ + local param="$1" + RET=0 + + # create bond + bond_reset "${param}" + + # check bonding member prio value + ip -n ${s_ns} link set eth0 type bond_slave prio 0 + ip -n ${s_ns} link set eth1 type bond_slave prio 10 + ip -n ${s_ns} link set eth2 type bond_slave prio 11 + cmd_jq "ip -n ${s_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data | select (.prio == 0)" "-e" &> /dev/null + check_err $? "eth0 prio is not 0" + cmd_jq "ip -n ${s_ns} -d -j link show eth1" \ + ".[].linkinfo.info_slave_data | select (.prio == 10)" "-e" &> /dev/null + check_err $? "eth1 prio is not 10" + cmd_jq "ip -n ${s_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data | select (.prio == 11)" "-e" &> /dev/null + check_err $? "eth2 prio is not 11" + + bond_check_connection "setup" + + # active slave should be the primary slave + check_active_slave eth1 + + # active slave should be the higher prio slave + ip -n ${s_ns} link set $active_slave down + bond_check_connection "fail over" + check_active_slave eth2 + + # when only 1 slave is up + ip -n ${s_ns} link set $active_slave down + bond_check_connection "only 1 slave up" + check_active_slave eth0 + + # when a higher prio slave change to up + ip -n ${s_ns} link set eth2 up + bond_check_connection "higher prio slave up" + case $primary_reselect in + "0") + check_active_slave "eth2" + ;; + "1") + check_active_slave "eth0" + ;; + "2") + check_active_slave "eth0" + ;; + esac + local pre_active_slave=$active_slave + + # when the primary slave change to up + ip -n ${s_ns} link set eth1 up + bond_check_connection "primary slave up" + case $primary_reselect in + "0") + check_active_slave "eth1" + ;; + "1") + check_active_slave "$pre_active_slave" + ;; + "2") + check_active_slave "$pre_active_slave" + ip -n ${s_ns} link set $active_slave down + bond_check_connection "pre_active slave down" + check_active_slave "eth1" + ;; + esac + + # Test changing bond slave prio + if [[ "$primary_reselect" == "0" ]];then + ip -n ${s_ns} link set eth0 type bond_slave prio 1000000 + ip -n ${s_ns} link set eth1 type bond_slave prio 0 + ip -n ${s_ns} link set eth2 type bond_slave prio -50 + ip -n ${s_ns} -d link show eth0 | grep -q 'prio 1000000' + check_err $? "eth0 prio is not 1000000" + ip -n ${s_ns} -d link show eth1 | grep -q 'prio 0' + check_err $? "eth1 prio is not 0" + ip -n ${s_ns} -d link show eth2 | grep -q 'prio -50' + check_err $? "eth3 prio is not -50" + check_active_slave "eth1" + + ip -n ${s_ns} link set $active_slave down + bond_check_connection "change slave prio" + check_active_slave "eth0" + fi +} + +prio_miimon() +{ + local primary_reselect + local mode=$1 + + for primary_reselect in 0 1 2; do + prio_test "mode $mode miimon 100 primary eth1 primary_reselect $primary_reselect" + log_test "prio" "$mode miimon primary_reselect $primary_reselect" + done +} + +prio_arp() +{ + local primary_reselect + local mode=$1 + + for primary_reselect in 0 1 2; do + prio_test "mode active-backup arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect" + log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect" + done +} + +prio_ns() +{ + local primary_reselect + local mode=$1 + + if skip_ns; then + log_test_skip "prio ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'." + return 0 + fi + + for primary_reselect in 0 1 2; do + prio_test "mode active-backup arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect" + log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect" + done +} + +prio() +{ + local mode modes="active-backup balance-tlb balance-alb" + + if skip_prio; then + log_test_skip "prio" "Current iproute or kernel doesn't support bond option 'prio'." + return 0 + fi + + for mode in $modes; do + prio_miimon $mode + prio_arp $mode + prio_ns $mode + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh new file mode 100644 index 0000000000000..4045ca97fb22d --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Topology for Bond mode 1,5,6 testing +# +# +-------------------------------------+ +# | bond0 | +# | + | Server +# | eth0 | eth1 eth2 | 192.0.2.1/24 +# | +-------------------+ | 2001:db8::1/24 +# | | | | | +# +-------------------------------------+ +# | | | +# +-------------------------------------+ +# | | | | | +# | +---+---------+---------+---+ | Gateway +# | | br0 | | 192.0.2.254/24 +# | +-------------+-------------+ | 2001:db8::254/24 +# | | | +# +-------------------------------------+ +# | +# +-------------------------------------+ +# | | | Client +# | + | 192.0.2.10/24 +# | eth0 | 2001:db8::10/24 +# +-------------------------------------+ + +s_ns="s-$(mktemp -u XXXXXX)" +c_ns="c-$(mktemp -u XXXXXX)" +g_ns="g-$(mktemp -u XXXXXX)" +s_ip4="192.0.2.1" +c_ip4="192.0.2.10" +g_ip4="192.0.2.254" +s_ip6="2001:db8::1" +c_ip6="2001:db8::10" +g_ip6="2001:db8::254" + +gateway_create() +{ + ip netns add ${g_ns} + ip -n ${g_ns} link add br0 type bridge + ip -n ${g_ns} link set br0 up + ip -n ${g_ns} addr add ${g_ip4}/24 dev br0 + ip -n ${g_ns} addr add ${g_ip6}/24 dev br0 +} + +gateway_destroy() +{ + ip -n ${g_ns} link del br0 + ip netns del ${g_ns} +} + +server_create() +{ + ip netns add ${s_ns} + ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100 + + for i in $(seq 0 2); do + ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + + ip -n ${g_ns} link set s${i} up + ip -n ${g_ns} link set s${i} master br0 + ip -n ${s_ns} link set eth${i} master bond0 + done + + ip -n ${s_ns} link set bond0 up + ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 + ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 + sleep 2 +} + +# Reset bond with new mode and options +bond_reset() +{ + local param="$1" + + ip -n ${s_ns} link set bond0 down + ip -n ${s_ns} link del bond0 + + ip -n ${s_ns} link add bond0 type bond $param + for i in $(seq 0 2); do + ip -n ${s_ns} link set eth$i master bond0 + done + + ip -n ${s_ns} link set bond0 up + ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 + ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 + sleep 2 +} + +server_destroy() +{ + for i in $(seq 0 2); do + ip -n ${s_ns} link del eth${i} + done + ip netns del ${s_ns} +} + +client_create() +{ + ip netns add ${c_ns} + ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns} + + ip -n ${g_ns} link set c0 up + ip -n ${g_ns} link set c0 master br0 + + ip -n ${c_ns} link set eth0 up + ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0 + ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0 +} + +client_destroy() +{ + ip -n ${c_ns} link del eth0 + ip netns del ${c_ns} +} + +setup_prepare() +{ + gateway_create + server_create + client_create +} + +cleanup() +{ + pre_cleanup + + client_destroy + server_destroy + gateway_destroy +} + +bond_check_connection() +{ + local msg=${1:-"check connection"} + + sleep 2 + ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null + check_err $? "${msg}: ping failed" + ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null + check_err $? "${msg}: ping6 failed" +} diff --git a/tools/testing/selftests/drivers/net/bonding/option_prio.sh b/tools/testing/selftests/drivers/net/bonding/option_prio.sh deleted file mode 100755 index c32eebff5005d..0000000000000 --- a/tools/testing/selftests/drivers/net/bonding/option_prio.sh +++ /dev/null @@ -1,245 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Test bonding option prio -# - -ALL_TESTS=" - prio_arp_ip_target_test - prio_miimon_test -" - -REQUIRE_MZ=no -REQUIRE_JQ=no -NUM_NETIFS=0 -lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh - -destroy() -{ - ip link del bond0 &>/dev/null - ip link del br0 &>/dev/null - ip link del veth0 &>/dev/null - ip link del veth1 &>/dev/null - ip link del veth2 &>/dev/null - ip netns del ns1 &>/dev/null - ip link del veth3 &>/dev/null -} - -cleanup() -{ - pre_cleanup - - destroy -} - -skip() -{ - local skip=1 - ip link add name bond0 type bond mode 1 miimon 100 &>/dev/null - ip link add name veth0 type veth peer name veth0_p - ip link set veth0 master bond0 - - # check if iproute support prio option - ip link set dev veth0 type bond_slave prio 10 - [[ $? -ne 0 ]] && skip=0 - - # check if bonding support prio option - ip -d link show veth0 | grep -q "prio 10" - [[ $? -ne 0 ]] && skip=0 - - ip link del bond0 &>/dev/null - ip link del veth0 - - return $skip -} - -active_slave="" -check_active_slave() -{ - local target_active_slave=$1 - active_slave="$(cat /sys/class/net/bond0/bonding/active_slave)" - test "$active_slave" = "$target_active_slave" - check_err $? "Current active slave is $active_slave but not $target_active_slave" -} - - -# Test bonding prio option with mode=$mode monitor=$monitor -# and primary_reselect=$primary_reselect -prio_test() -{ - RET=0 - - local monitor=$1 - local mode=$2 - local primary_reselect=$3 - - local bond_ip4="192.169.1.2" - local peer_ip4="192.169.1.1" - local bond_ip6="2009:0a:0b::02" - local peer_ip6="2009:0a:0b::01" - - - # create veths - ip link add name veth0 type veth peer name veth0_p - ip link add name veth1 type veth peer name veth1_p - ip link add name veth2 type veth peer name veth2_p - - # create bond - if [[ "$monitor" == "miimon" ]];then - ip link add name bond0 type bond mode $mode miimon 100 primary veth1 primary_reselect $primary_reselect - elif [[ "$monitor" == "arp_ip_target" ]];then - ip link add name bond0 type bond mode $mode arp_interval 1000 arp_ip_target $peer_ip4 primary veth1 primary_reselect $primary_reselect - elif [[ "$monitor" == "ns_ip6_target" ]];then - ip link add name bond0 type bond mode $mode arp_interval 1000 ns_ip6_target $peer_ip6 primary veth1 primary_reselect $primary_reselect - fi - ip link set bond0 up - ip link set veth0 master bond0 - ip link set veth1 master bond0 - ip link set veth2 master bond0 - # check bonding member prio value - ip link set dev veth0 type bond_slave prio 0 - ip link set dev veth1 type bond_slave prio 10 - ip link set dev veth2 type bond_slave prio 11 - ip -d link show veth0 | grep -q 'prio 0' - check_err $? "veth0 prio is not 0" - ip -d link show veth1 | grep -q 'prio 10' - check_err $? "veth0 prio is not 10" - ip -d link show veth2 | grep -q 'prio 11' - check_err $? "veth0 prio is not 11" - - ip link set veth0 up - ip link set veth1 up - ip link set veth2 up - ip link set veth0_p up - ip link set veth1_p up - ip link set veth2_p up - - # prepare ping target - ip link add name br0 type bridge - ip link set br0 up - ip link set veth0_p master br0 - ip link set veth1_p master br0 - ip link set veth2_p master br0 - ip link add name veth3 type veth peer name veth3_p - ip netns add ns1 - ip link set veth3_p master br0 up - ip link set veth3 netns ns1 up - ip netns exec ns1 ip addr add $peer_ip4/24 dev veth3 - ip netns exec ns1 ip addr add $peer_ip6/64 dev veth3 - ip addr add $bond_ip4/24 dev bond0 - ip addr add $bond_ip6/64 dev bond0 - sleep 5 - - ping $peer_ip4 -c5 -I bond0 &>/dev/null - check_err $? "ping failed 1." - ping6 $peer_ip6 -c5 -I bond0 &>/dev/null - check_err $? "ping6 failed 1." - - # active salve should be the primary slave - check_active_slave veth1 - - # active slave should be the higher prio slave - ip link set $active_slave down - ping $peer_ip4 -c5 -I bond0 &>/dev/null - check_err $? "ping failed 2." - check_active_slave veth2 - - # when only 1 slave is up - ip link set $active_slave down - ping $peer_ip4 -c5 -I bond0 &>/dev/null - check_err $? "ping failed 3." - check_active_slave veth0 - - # when a higher prio slave change to up - ip link set veth2 up - ping $peer_ip4 -c5 -I bond0 &>/dev/null - check_err $? "ping failed 4." - case $primary_reselect in - "0") - check_active_slave "veth2" - ;; - "1") - check_active_slave "veth0" - ;; - "2") - check_active_slave "veth0" - ;; - esac - local pre_active_slave=$active_slave - - # when the primary slave change to up - ip link set veth1 up - ping $peer_ip4 -c5 -I bond0 &>/dev/null - check_err $? "ping failed 5." - case $primary_reselect in - "0") - check_active_slave "veth1" - ;; - "1") - check_active_slave "$pre_active_slave" - ;; - "2") - check_active_slave "$pre_active_slave" - ip link set $active_slave down - ping $peer_ip4 -c5 -I bond0 &>/dev/null - check_err $? "ping failed 6." - check_active_slave "veth1" - ;; - esac - - # Test changing bond salve prio - if [[ "$primary_reselect" == "0" ]];then - ip link set dev veth0 type bond_slave prio 1000000 - ip link set dev veth1 type bond_slave prio 0 - ip link set dev veth2 type bond_slave prio -50 - ip -d link show veth0 | grep -q 'prio 1000000' - check_err $? "veth0 prio is not 1000000" - ip -d link show veth1 | grep -q 'prio 0' - check_err $? "veth1 prio is not 0" - ip -d link show veth2 | grep -q 'prio -50' - check_err $? "veth3 prio is not -50" - check_active_slave "veth1" - - ip link set $active_slave down - ping $peer_ip4 -c5 -I bond0 &>/dev/null - check_err $? "ping failed 7." - check_active_slave "veth0" - fi - - cleanup - - log_test "prio_test" "Test bonding option 'prio' with mode=$mode monitor=$monitor and primary_reselect=$primary_reselect" -} - -prio_miimon_test() -{ - local mode - local primary_reselect - - for mode in 1 5 6; do - for primary_reselect in 0 1 2; do - prio_test "miimon" $mode $primary_reselect - done - done -} - -prio_arp_ip_target_test() -{ - local primary_reselect - - for primary_reselect in 0 1 2; do - prio_test "arp_ip_target" 1 $primary_reselect - done -} - -if skip;then - log_test_skip "option_prio.sh" "Current iproute doesn't support 'prio'." - exit 0 -fi - -trap cleanup EXIT - -tests_run - -exit "$EXIT_STATUS" From 2e825f8accb4491466677162cd9893fe77aea2f9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Apr 2023 16:23:52 +0800 Subject: [PATCH 809/898] selftests: bonding: add arp validate test This patch add bonding arp validate tests with mode active backup, monitor arp_ip_target and ns_ip6_target. It also checks mii_status to make sure all slaves are UP. Acked-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- .../drivers/net/bonding/bond_options.sh | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index 7213211d0bde2..db29a3146a861 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -5,6 +5,7 @@ ALL_TESTS=" prio + arp_validate " REQUIRE_MZ=no @@ -200,6 +201,60 @@ prio() done } +arp_validate_test() +{ + local param="$1" + RET=0 + + # create bond + bond_reset "${param}" + + bond_check_connection + [ $RET -ne 0 ] && log_test "arp_validate" "$retmsg" + + # wait for a while to make sure the mii status stable + sleep 5 + for i in $(seq 0 2); do + mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status") + if [ ${mii_status} != "UP" ]; then + RET=1 + log_test "arp_validate" "interface eth$i mii_status $mii_status" + fi + done +} + +arp_validate_arp() +{ + local mode=$1 + local val + for val in $(seq 0 6); do + arp_validate_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} arp_validate $val" + log_test "arp_validate" "$mode arp_ip_target arp_validate $val" + done +} + +arp_validate_ns() +{ + local mode=$1 + local val + + if skip_ns; then + log_test_skip "arp_validate ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'." + return 0 + fi + + for val in $(seq 0 6); do + arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} arp_validate $val" + log_test "arp_validate" "$mode ns_ip6_target arp_validate $val" + done +} + +arp_validate() +{ + arp_validate_arp "active-backup" + arp_validate_ns "active-backup" +} + trap cleanup EXIT setup_prepare From 0c0da0e951053fda20412cd284e2714bbbb31bff Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 6 Apr 2023 15:35:27 -0600 Subject: [PATCH 810/898] iavf: refactor VLAN filter states The VLAN filter states are currently being saved as individual bits. This is error prone as multiple bits might be mistakenly set. Fix by replacing the bits with a single state enum. Also, add an "ACTIVE" state for filters that are accepted by the PF. Signed-off-by: Ahmed Zaki Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 15 +++++---- drivers/net/ethernet/intel/iavf/iavf_main.c | 8 ++--- .../net/ethernet/intel/iavf/iavf_virtchnl.c | 31 +++++++++---------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 232bc61d9eee9..692d6a6421183 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -158,15 +158,18 @@ struct iavf_vlan { u16 tpid; }; +enum iavf_vlan_state_t { + IAVF_VLAN_INVALID, + IAVF_VLAN_ADD, /* filter needs to be added */ + IAVF_VLAN_IS_NEW, /* filter is new, wait for PF answer */ + IAVF_VLAN_ACTIVE, /* filter is accepted by PF */ + IAVF_VLAN_REMOVE, /* filter needs to be removed */ +}; + struct iavf_vlan_filter { struct list_head list; struct iavf_vlan vlan; - struct { - u8 is_new_vlan:1; /* filter is new, wait for PF answer */ - u8 remove:1; /* filter needs to be removed */ - u8 add:1; /* filter needs to be added */ - u8 padding:5; - }; + enum iavf_vlan_state_t state; }; #define IAVF_MAX_TRAFFIC_CLASS 4 diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 095201e83c9db..fe9798e4b4ace 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -791,7 +791,7 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, f->vlan = vlan; list_add_tail(&f->list, &adapter->vlan_filter_list); - f->add = true; + f->state = IAVF_VLAN_ADD; adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; } @@ -813,7 +813,7 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) f = iavf_find_vlan(adapter, vlan); if (f) { - f->remove = true; + f->state = IAVF_VLAN_REMOVE; adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; } @@ -1296,11 +1296,11 @@ static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter) /* remove all VLAN filters */ list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, list) { - if (vlf->add) { + if (vlf->state == IAVF_VLAN_ADD) { list_del(&vlf->list); kfree(vlf); } else { - vlf->remove = true; + vlf->state = IAVF_VLAN_REMOVE; } } spin_unlock_bh(&adapter->mac_vlan_list_lock); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 4e17d006c52d4..5047b4c83718d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -642,7 +642,7 @@ static void iavf_vlan_add_reject(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->is_new_vlan) { + if (f->state == IAVF_VLAN_IS_NEW) { if (f->vlan.tpid == ETH_P_8021Q) clear_bit(f->vlan.vid, adapter->vsi.active_cvlans); @@ -679,7 +679,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->add) + if (f->state == IAVF_VLAN_ADD) count++; } if (!count || !VLAN_FILTERING_ALLOWED(adapter)) { @@ -710,11 +710,10 @@ void iavf_add_vlans(struct iavf_adapter *adapter) vvfl->vsi_id = adapter->vsi_res->vsi_id; vvfl->num_elements = count; list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->add) { + if (f->state == IAVF_VLAN_ADD) { vvfl->vlan_id[i] = f->vlan.vid; i++; - f->add = false; - f->is_new_vlan = true; + f->state = IAVF_VLAN_IS_NEW; if (i == count) break; } @@ -760,7 +759,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) vvfl_v2->vport_id = adapter->vsi_res->vsi_id; vvfl_v2->num_elements = count; list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->add) { + if (f->state == IAVF_VLAN_ADD) { struct virtchnl_vlan_supported_caps *filtering_support = &adapter->vlan_v2_caps.filtering.filtering_support; struct virtchnl_vlan *vlan; @@ -778,8 +777,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) vlan->tpid = f->vlan.tpid; i++; - f->add = false; - f->is_new_vlan = true; + f->state = IAVF_VLAN_IS_NEW; } } @@ -822,10 +820,11 @@ void iavf_del_vlans(struct iavf_adapter *adapter) * filters marked for removal to enable bailing out before * sending a virtchnl message */ - if (f->remove && !VLAN_FILTERING_ALLOWED(adapter)) { + if (f->state == IAVF_VLAN_REMOVE && + !VLAN_FILTERING_ALLOWED(adapter)) { list_del(&f->list); kfree(f); - } else if (f->remove) { + } else if (f->state == IAVF_VLAN_REMOVE) { count++; } } @@ -857,7 +856,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl->vsi_id = adapter->vsi_res->vsi_id; vvfl->num_elements = count; list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->remove) { + if (f->state == IAVF_VLAN_REMOVE) { vvfl->vlan_id[i] = f->vlan.vid; i++; list_del(&f->list); @@ -901,7 +900,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl_v2->vport_id = adapter->vsi_res->vsi_id; vvfl_v2->num_elements = count; list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->remove) { + if (f->state == IAVF_VLAN_REMOVE) { struct virtchnl_vlan_supported_caps *filtering_support = &adapter->vlan_v2_caps.filtering.filtering_support; struct virtchnl_vlan *vlan; @@ -2192,7 +2191,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, list_for_each_entry(vlf, &adapter->vlan_filter_list, list) - vlf->add = true; + vlf->state = IAVF_VLAN_ADD; adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; @@ -2260,7 +2259,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, list_for_each_entry(vlf, &adapter->vlan_filter_list, list) - vlf->add = true; + vlf->state = IAVF_VLAN_ADD; aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; } @@ -2444,8 +2443,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->is_new_vlan) { - f->is_new_vlan = false; + if (f->state == IAVF_VLAN_IS_NEW) { + f->state = IAVF_VLAN_ACTIVE; if (f->vlan.tpid == ETH_P_8021Q) set_bit(f->vlan.vid, adapter->vsi.active_cvlans); From 9c85b7fa12ef2e4fc11a4e31ac595fb5f9d0ddf9 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 6 Apr 2023 15:35:28 -0600 Subject: [PATCH 811/898] iavf: remove active_cvlans and active_svlans bitmaps The VLAN filters info is currently being held in a list and 2 bitmaps (active_cvlans and active_svlans). We are experiencing some racing where data is not in sync in the list and bitmaps. For example, the VLAN is initially added to the list but only when the PF replies, it is added to the bitmap. If a user adds many V2 VLANS before the PF responds: while [ $((i++)) ] ip l add l eth0 name eth0.$i type vlan id $i we might end up with more VLAN list entries than the designated limit. Also, The "ip link show" will show more links added than the PF limit. On the other and, the bitmaps are only used to check the number of VLAN filters and to re-enable the filters when the interface goes from DOWN to UP. This patch gets rid of the bitmaps and uses the list only. To do that, the states of the VLAN filter are modified: 1 - IAVF_VLAN_REMOVE: the entry needs to be totally removed after informing the PF. This is the "ip link del eth0.$i" path. 2 - IAVF_VLAN_DISABLE: (new) the netdev went down. The filter needs to be removed from the PF and then marked INACTIVE. 3 - IAVF_VLAN_INACTIVE: (new) no PF filter exists, but the user did not delete the VLAN. Fixes: 48ccc43ecf10 ("iavf: Add support VIRTCHNL_VF_OFFLOAD_VLAN_V2 during netdev config") Signed-off-by: Ahmed Zaki Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 7 +-- drivers/net/ethernet/intel/iavf/iavf_main.c | 40 +++++++---------- .../net/ethernet/intel/iavf/iavf_virtchnl.c | 45 ++++++++++--------- 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 692d6a6421183..746ff76f2fb1e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -59,8 +59,6 @@ enum iavf_vsi_state_t { struct iavf_vsi { struct iavf_adapter *back; struct net_device *netdev; - unsigned long active_cvlans[BITS_TO_LONGS(VLAN_N_VID)]; - unsigned long active_svlans[BITS_TO_LONGS(VLAN_N_VID)]; u16 seid; u16 id; DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__); @@ -163,7 +161,9 @@ enum iavf_vlan_state_t { IAVF_VLAN_ADD, /* filter needs to be added */ IAVF_VLAN_IS_NEW, /* filter is new, wait for PF answer */ IAVF_VLAN_ACTIVE, /* filter is accepted by PF */ - IAVF_VLAN_REMOVE, /* filter needs to be removed */ + IAVF_VLAN_DISABLE, /* filter needs to be deleted by PF, then marked INACTIVE */ + IAVF_VLAN_INACTIVE, /* filter is inactive, we are in IFF_DOWN */ + IAVF_VLAN_REMOVE, /* filter needs to be removed from list */ }; struct iavf_vlan_filter { @@ -261,6 +261,7 @@ struct iavf_adapter { wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; + int num_vlan_filters; struct list_head mac_filter_list; struct mutex crit_lock; struct mutex client_lock; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index fe9798e4b4ace..2de4baff4c205 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -792,6 +792,7 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->vlan_filter_list); f->state = IAVF_VLAN_ADD; + adapter->num_vlan_filters++; adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; } @@ -828,14 +829,18 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) **/ static void iavf_restore_filters(struct iavf_adapter *adapter) { - u16 vid; + struct iavf_vlan_filter *f; /* re-add all VLAN filters */ - for_each_set_bit(vid, adapter->vsi.active_cvlans, VLAN_N_VID) - iavf_add_vlan(adapter, IAVF_VLAN(vid, ETH_P_8021Q)); + spin_lock_bh(&adapter->mac_vlan_list_lock); - for_each_set_bit(vid, adapter->vsi.active_svlans, VLAN_N_VID) - iavf_add_vlan(adapter, IAVF_VLAN(vid, ETH_P_8021AD)); + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->state == IAVF_VLAN_INACTIVE) + f->state = IAVF_VLAN_ADD; + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; } /** @@ -844,8 +849,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter) */ u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter) { - return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) + - bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID); + return adapter->num_vlan_filters; } /** @@ -928,11 +932,6 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev, return 0; iavf_del_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))); - if (proto == cpu_to_be16(ETH_P_8021Q)) - clear_bit(vid, adapter->vsi.active_cvlans); - else - clear_bit(vid, adapter->vsi.active_svlans); - return 0; } @@ -1293,16 +1292,11 @@ static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter) } } - /* remove all VLAN filters */ + /* disable all VLAN filters */ list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, - list) { - if (vlf->state == IAVF_VLAN_ADD) { - list_del(&vlf->list); - kfree(vlf); - } else { - vlf->state = IAVF_VLAN_REMOVE; - } - } + list) + vlf->state = IAVF_VLAN_DISABLE; + spin_unlock_bh(&adapter->mac_vlan_list_lock); } @@ -2914,6 +2908,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) list_del(&fv->list); kfree(fv); } + adapter->num_vlan_filters = 0; spin_unlock_bh(&adapter->mac_vlan_list_lock); @@ -3131,9 +3126,6 @@ static void iavf_reset_task(struct work_struct *work) adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; iavf_misc_irq_enable(adapter); - bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID); - bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID); - mod_delayed_work(adapter->wq, &adapter->watchdog_task, 2); /* We were running when the reset started, so we need to restore some diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 5047b4c83718d..9afbbdac35903 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -643,15 +643,9 @@ static void iavf_vlan_add_reject(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { if (f->state == IAVF_VLAN_IS_NEW) { - if (f->vlan.tpid == ETH_P_8021Q) - clear_bit(f->vlan.vid, - adapter->vsi.active_cvlans); - else - clear_bit(f->vlan.vid, - adapter->vsi.active_svlans); - list_del(&f->list); kfree(f); + adapter->num_vlan_filters--; } } spin_unlock_bh(&adapter->mac_vlan_list_lock); @@ -824,7 +818,12 @@ void iavf_del_vlans(struct iavf_adapter *adapter) !VLAN_FILTERING_ALLOWED(adapter)) { list_del(&f->list); kfree(f); - } else if (f->state == IAVF_VLAN_REMOVE) { + adapter->num_vlan_filters--; + } else if (f->state == IAVF_VLAN_DISABLE && + !VLAN_FILTERING_ALLOWED(adapter)) { + f->state = IAVF_VLAN_INACTIVE; + } else if (f->state == IAVF_VLAN_REMOVE || + f->state == IAVF_VLAN_DISABLE) { count++; } } @@ -856,11 +855,18 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl->vsi_id = adapter->vsi_res->vsi_id; vvfl->num_elements = count; list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_REMOVE) { + if (f->state == IAVF_VLAN_DISABLE) { vvfl->vlan_id[i] = f->vlan.vid; + f->state = IAVF_VLAN_INACTIVE; i++; + if (i == count) + break; + } else if (f->state == IAVF_VLAN_REMOVE) { + vvfl->vlan_id[i] = f->vlan.vid; list_del(&f->list); kfree(f); + adapter->num_vlan_filters--; + i++; if (i == count) break; } @@ -900,7 +906,8 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl_v2->vport_id = adapter->vsi_res->vsi_id; vvfl_v2->num_elements = count; list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_REMOVE) { + if (f->state == IAVF_VLAN_DISABLE || + f->state == IAVF_VLAN_REMOVE) { struct virtchnl_vlan_supported_caps *filtering_support = &adapter->vlan_v2_caps.filtering.filtering_support; struct virtchnl_vlan *vlan; @@ -914,8 +921,13 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vlan->tci = f->vlan.vid; vlan->tpid = f->vlan.tpid; - list_del(&f->list); - kfree(f); + if (f->state == IAVF_VLAN_DISABLE) { + f->state = IAVF_VLAN_INACTIVE; + } else { + list_del(&f->list); + kfree(f); + adapter->num_vlan_filters--; + } i++; if (i == count) break; @@ -2443,15 +2455,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_IS_NEW) { + if (f->state == IAVF_VLAN_IS_NEW) f->state = IAVF_VLAN_ACTIVE; - if (f->vlan.tpid == ETH_P_8021Q) - set_bit(f->vlan.vid, - adapter->vsi.active_cvlans); - else - set_bit(f->vlan.vid, - adapter->vsi.active_svlans); - } } spin_unlock_bh(&adapter->mac_vlan_list_lock); } From 066b86787fa3d97b7aefb5ac0a99a22dad2d15f8 Mon Sep 17 00:00:00 2001 From: Felix Huettner Date: Wed, 5 Apr 2023 07:53:41 +0000 Subject: [PATCH 812/898] net: openvswitch: fix race on port output assume the following setup on a single machine: 1. An openvswitch instance with one bridge and default flows 2. two network namespaces "server" and "client" 3. two ovs interfaces "server" and "client" on the bridge 4. for each ovs interface a veth pair with a matching name and 32 rx and tx queues 5. move the ends of the veth pairs to the respective network namespaces 6. assign ip addresses to each of the veth ends in the namespaces (needs to be the same subnet) 7. start some http server on the server network namespace 8. test if a client in the client namespace can reach the http server when following the actions below the host has a chance of getting a cpu stuck in a infinite loop: 1. send a large amount of parallel requests to the http server (around 3000 curls should work) 2. in parallel delete the network namespace (do not delete interfaces or stop the server, just kill the namespace) there is a low chance that this will cause the below kernel cpu stuck message. If this does not happen just retry. Below there is also the output of bpftrace for the functions mentioned in the output. The series of events happening here is: 1. the network namespace is deleted calling `unregister_netdevice_many_notify` somewhere in the process 2. this sets first `NETREG_UNREGISTERING` on both ends of the veth and then runs `synchronize_net` 3. it then calls `call_netdevice_notifiers` with `NETDEV_UNREGISTER` 4. this is then handled by `dp_device_event` which calls `ovs_netdev_detach_dev` (if a vport is found, which is the case for the veth interface attached to ovs) 5. this removes the rx_handlers of the device but does not prevent packages to be sent to the device 6. `dp_device_event` then queues the vport deletion to work in background as a ovs_lock is needed that we do not hold in the unregistration path 7. `unregister_netdevice_many_notify` continues to call `netdev_unregister_kobject` which sets `real_num_tx_queues` to 0 8. port deletion continues (but details are not relevant for this issue) 9. at some future point the background task deletes the vport If after 7. but before 9. a packet is send to the ovs vport (which is not deleted at this point in time) which forwards it to the `dev_queue_xmit` flow even though the device is unregistering. In `skb_tx_hash` (which is called in the `dev_queue_xmit`) path there is a while loop (if the packet has a rx_queue recorded) that is infinite if `dev->real_num_tx_queues` is zero. To prevent this from happening we update `do_output` to handle devices without carrier the same as if the device is not found (which would be the code path after 9. is done). Additionally we now produce a warning in `skb_tx_hash` if we will hit the infinite loop. bpftrace (first word is function name): __dev_queue_xmit server: real_num_tx_queues: 1, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 1 netdev_core_pick_tx server: addr: 0xffff9f0a46d4a000 real_num_tx_queues: 1, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 1 dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 2, reg_state: 1 synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 6, reg_state: 2 ovs_netdev_detach_dev server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, reg_state: 2 netdev_rx_handler_unregister server: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024, reg_state: 2 synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 netdev_rx_handler_unregister ret server: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024, reg_state: 2 dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 27, reg_state: 2 dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 22, reg_state: 2 dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 18, reg_state: 2 netdev_unregister_kobject: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024 synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 ovs_vport_send server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2 __dev_queue_xmit server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2 netdev_core_pick_tx server: addr: 0xffff9f0a46d4a000 real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2 broken device server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024 ovs_dp_detach_port server: real_num_tx_queues: 0 cpu 9, pid: 9124, tid: 9124, reg_state: 2 synchronize_rcu_expedited: cpu 9, pid: 33604, tid: 33604 stuck message: watchdog: BUG: soft lockup - CPU#5 stuck for 26s! [curl:1929279] Modules linked in: veth pktgen bridge stp llc ip_set_hash_net nft_counter xt_set nft_compat nf_tables ip_set_hash_ip ip_set nfnetlink_cttimeout nfnetlink openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 tls binfmt_misc nls_iso8859_1 input_leds joydev serio_raw dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua sch_fq_codel drm efi_pstore virtio_rng ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear hid_generic usbhid hid crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel virtio_net ahci net_failover crypto_simd cryptd psmouse libahci virtio_blk failover CPU: 5 PID: 1929279 Comm: curl Not tainted 5.15.0-67-generic #74-Ubuntu Hardware name: OpenStack Foundation OpenStack Nova, BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:netdev_pick_tx+0xf1/0x320 Code: 00 00 8d 48 ff 0f b7 c1 66 39 ca 0f 86 e9 01 00 00 45 0f b7 ff 41 39 c7 0f 87 5b 01 00 00 44 29 f8 41 39 c7 0f 87 4f 01 00 00 f2 0f 1f 44 00 00 49 8b 94 24 28 04 00 00 48 85 d2 0f 84 53 01 RSP: 0018:ffffb78b40298820 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff9c8773adc2e0 RCX: 000000000000083f RDX: 0000000000000000 RSI: ffff9c8773adc2e0 RDI: ffff9c870a25e000 RBP: ffffb78b40298858 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9c870a25e000 R13: ffff9c870a25e000 R14: ffff9c87fe043480 R15: 0000000000000000 FS: 00007f7b80008f00(0000) GS:ffff9c8e5f740000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7b80f6a0b0 CR3: 0000000329d66000 CR4: 0000000000350ee0 Call Trace: netdev_core_pick_tx+0xa4/0xb0 __dev_queue_xmit+0xf8/0x510 ? __bpf_prog_exit+0x1e/0x30 dev_queue_xmit+0x10/0x20 ovs_vport_send+0xad/0x170 [openvswitch] do_output+0x59/0x180 [openvswitch] do_execute_actions+0xa80/0xaa0 [openvswitch] ? kfree+0x1/0x250 ? kfree+0x1/0x250 ? kprobe_perf_func+0x4f/0x2b0 ? flow_lookup.constprop.0+0x5c/0x110 [openvswitch] ovs_execute_actions+0x4c/0x120 [openvswitch] ovs_dp_process_packet+0xa1/0x200 [openvswitch] ? ovs_ct_update_key.isra.0+0xa8/0x120 [openvswitch] ? ovs_ct_fill_key+0x1d/0x30 [openvswitch] ? ovs_flow_key_extract+0x2db/0x350 [openvswitch] ovs_vport_receive+0x77/0xd0 [openvswitch] ? __htab_map_lookup_elem+0x4e/0x60 ? bpf_prog_680e8aff8547aec1_kfree+0x3b/0x714 ? trace_call_bpf+0xc8/0x150 ? kfree+0x1/0x250 ? kfree+0x1/0x250 ? kprobe_perf_func+0x4f/0x2b0 ? kprobe_perf_func+0x4f/0x2b0 ? __mod_memcg_lruvec_state+0x63/0xe0 netdev_port_receive+0xc4/0x180 [openvswitch] ? netdev_port_receive+0x180/0x180 [openvswitch] netdev_frame_hook+0x1f/0x40 [openvswitch] __netif_receive_skb_core.constprop.0+0x23d/0xf00 __netif_receive_skb_one_core+0x3f/0xa0 __netif_receive_skb+0x15/0x60 process_backlog+0x9e/0x170 __napi_poll+0x33/0x180 net_rx_action+0x126/0x280 ? ttwu_do_activate+0x72/0xf0 __do_softirq+0xd9/0x2e7 ? rcu_report_exp_cpu_mult+0x1b0/0x1b0 do_softirq+0x7d/0xb0 __local_bh_enable_ip+0x54/0x60 ip_finish_output2+0x191/0x460 __ip_finish_output+0xb7/0x180 ip_finish_output+0x2e/0xc0 ip_output+0x78/0x100 ? __ip_finish_output+0x180/0x180 ip_local_out+0x5e/0x70 __ip_queue_xmit+0x184/0x440 ? tcp_syn_options+0x1f9/0x300 ip_queue_xmit+0x15/0x20 __tcp_transmit_skb+0x910/0x9c0 ? __mod_memcg_state+0x44/0xa0 tcp_connect+0x437/0x4e0 ? ktime_get_with_offset+0x60/0xf0 tcp_v4_connect+0x436/0x530 __inet_stream_connect+0xd4/0x3a0 ? kprobe_perf_func+0x4f/0x2b0 ? aa_sk_perm+0x43/0x1c0 inet_stream_connect+0x3b/0x60 __sys_connect_file+0x63/0x70 __sys_connect+0xa6/0xd0 ? setfl+0x108/0x170 ? do_fcntl+0xe8/0x5a0 __x64_sys_connect+0x18/0x20 do_syscall_64+0x5c/0xc0 ? __x64_sys_fcntl+0xa9/0xd0 ? exit_to_user_mode_prepare+0x37/0xb0 ? syscall_exit_to_user_mode+0x27/0x50 ? do_syscall_64+0x69/0xc0 ? __sys_setsockopt+0xea/0x1e0 ? exit_to_user_mode_prepare+0x37/0xb0 ? syscall_exit_to_user_mode+0x27/0x50 ? __x64_sys_setsockopt+0x1f/0x30 ? do_syscall_64+0x69/0xc0 ? irqentry_exit+0x1d/0x30 ? exc_page_fault+0x89/0x170 entry_SYSCALL_64_after_hwframe+0x61/0xcb RIP: 0033:0x7f7b8101c6a7 Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 24 89 RSP: 002b:00007ffffd6b2198 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7b8101c6a7 RDX: 0000000000000010 RSI: 00007ffffd6b2360 RDI: 0000000000000005 RBP: 0000561f1370d560 R08: 00002795ad21d1ac R09: 0030312e302e302e R10: 00007ffffd73f080 R11: 0000000000000246 R12: 0000561f1370c410 R13: 0000000000000000 R14: 0000000000000005 R15: 0000000000000000 Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Co-developed-by: Luca Czesla Signed-off-by: Luca Czesla Signed-off-by: Felix Huettner Reviewed-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ZC0pBXBAgh7c76CA@kernel-bug-kernel-bug Signed-off-by: Jakub Kicinski --- net/core/dev.c | 1 + net/openvswitch/actions.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 253584777101f..48067321c0dba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3199,6 +3199,7 @@ static u16 skb_tx_hash(const struct net_device *dev, } if (skb_rx_queue_recorded(skb)) { + DEBUG_NET_WARN_ON_ONCE(qcount == 0); hash = skb_get_rx_queue(skb); if (hash >= qoffset) hash -= qoffset; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ca3ebfdb30231..a8cf9a88758ef 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -913,7 +913,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport)) { + if (likely(vport && netif_carrier_ok(vport->dev))) { u16 mru = OVS_CB(skb)->mru; u32 cutlen = OVS_CB(skb)->cutlen; From bdaaecc127d471c422ee9e994978617c8aa79e1e Mon Sep 17 00:00:00 2001 From: "Radu Pirea (OSS)" Date: Thu, 6 Apr 2023 12:59:53 +0300 Subject: [PATCH 813/898] net: phy: nxp-c45-tja11xx: fix unsigned long multiplication overflow Any multiplication between GENMASK(31, 0) and a number bigger than 1 will be truncated because of the overflow, if the size of unsigned long is 32 bits. Replaced GENMASK with GENMASK_ULL to make sure that multiplication will be between 64 bits values. Cc: # 5.15+ Fixes: 514def5dd339 ("phy: nxp-c45-tja11xx: add timestamping support") Signed-off-by: Radu Pirea (OSS) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230406095953.75622-1-radu-nicolae.pirea@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/nxp-c45-tja11xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 5813b07242ce1..1b7d6941b0f35 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -191,7 +191,7 @@ #define MAX_ID_PS 2260U #define DEFAULT_ID_PS 2000U -#define PPM_TO_SUBNS_INC(ppb) div_u64(GENMASK(31, 0) * (ppb) * \ +#define PPM_TO_SUBNS_INC(ppb) div_u64(GENMASK_ULL(31, 0) * (ppb) * \ PTP_CLK_PERIOD_100BT1, NSEC_PER_SEC) #define NXP_C45_SKB_CB(skb) ((struct nxp_c45_skb_cb *)(skb)->cb) From 5cc33f139e11b893ff6dc60d8a0ae865a65521ac Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 6 Apr 2023 17:14:26 -0700 Subject: [PATCH 814/898] r8152: Add __GFP_NOWARN to big allocations When memory is a little tight on my system, it's pretty easy to see warnings that look like this. ksoftirqd/0: page allocation failure: order:3, mode:0x40a20(GFP_ATOMIC|__GFP_COMP), nodemask=(null),cpuset=/,mems_allowed=0 ... Call trace: dump_backtrace+0x0/0x1e8 show_stack+0x20/0x2c dump_stack_lvl+0x60/0x78 dump_stack+0x18/0x38 warn_alloc+0x104/0x174 __alloc_pages+0x588/0x67c alloc_rx_agg+0xa0/0x190 [r8152 ...] r8152_poll+0x270/0x760 [r8152 ...] __napi_poll+0x44/0x1ec net_rx_action+0x100/0x300 __do_softirq+0xec/0x38c run_ksoftirqd+0x38/0xec smpboot_thread_fn+0xb8/0x248 kthread+0x134/0x154 ret_from_fork+0x10/0x20 On a fragmented system it's normal that order 3 allocations will sometimes fail, especially atomic ones. The driver handles these failures fine and the WARN just creates spam in the logs for this case. The __GFP_NOWARN flag is exactly for this situation, so add it to the allocation. NOTE: my testing is on a 5.15 system, but there should be no reason that this would be fundamentally different on a mainline kernel. Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Link: https://lore.kernel.org/r/20230406171411.1.I84dbef45786af440fd269b71e9436a96a8e7a152@changeid Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index decb5ba56a259..0fc4b959edc18 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1943,7 +1943,7 @@ static struct rx_agg *alloc_rx_agg(struct r8152 *tp, gfp_t mflags) if (!rx_agg) return NULL; - rx_agg->page = alloc_pages(mflags | __GFP_COMP, order); + rx_agg->page = alloc_pages(mflags | __GFP_COMP | __GFP_NOWARN, order); if (!rx_agg->page) goto free_rx; From d83806c4c0cccc0d6d3c3581a11983a9c186a138 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Sun, 26 Mar 2023 18:21:21 +0000 Subject: [PATCH 815/898] purgatory: fix disabling debug info Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS. Instead, it includes -g, the appropriate -gdwarf-* flag, and also the -Wa versions of both of those if building with Clang and GNU as. As a result, debug info was being generated for the purgatory objects, even though the intention was that it not be. Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files") Signed-off-by: Alyssa Ross Cc: stable@vger.kernel.org Acked-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- arch/riscv/purgatory/Makefile | 7 +------ arch/x86/purgatory/Makefile | 3 +-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index d16bf715a586b..5730797a6b402 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS) CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_ctype.o += $(PURGATORY_CFLAGS) -AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 +asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 17f09dc263811..82fec66d46d29 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS) CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_string.o += $(PURGATORY_CFLAGS) -AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2 +asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) From dcc11ac9dcaffdce428794f282c100a736244b55 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 7 Apr 2023 14:42:48 -0700 Subject: [PATCH 816/898] Documentation/llvm: Add a note about prebuilt kernel.org toolchains I recently started uploading prebuilt stable versions of LLVM to kernel.org, which should make building the kernel with LLVM more accessible to maintainers and developers. Link them in the LLVM documentation to make this more visible. Link: https://lore.kernel.org/20230319235619.GA18547@dev-arch.thelio-3990X/ Suggested-by: Nick Desaulniers Reviewed-by: Bill Wendling Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Documentation/kbuild/llvm.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index bfb51685073cb..c3851fe1900da 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -171,6 +171,10 @@ Getting Help Getting LLVM ------------- +We provide prebuilt stable versions of LLVM on `kernel.org `_. +Below are links that may be useful for building LLVM from source or procuring +it through a distribution's package manager. + - https://releases.llvm.org/download.html - https://github.com/llvm/llvm-project - https://llvm.org/docs/GettingStarted.html From 813c2dd78618f108fdcf9cd726ea90f081ee2881 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Thu, 6 Apr 2023 16:08:32 +0300 Subject: [PATCH 817/898] net: sfp: initialize sfp->i2c_block_size at sfp allocation sfp->i2c_block_size is initialized at SFP module insertion in sfp_sm_mod_probe(). Because of that, if SFP module was never inserted since boot, sfp_read() call will lead to zero-length I2C read attempt, and not all I2C controllers are happy with zero-length reads. One way to issue sfp_read() on empty SFP cage is to execute ethtool -m. If SFP module was never plugged since boot, there will be a zero-length I2C read attempt. # ethtool -m xge0 i2c i2c-3: adapter quirk: no zero length (addr 0x0050, size 0, read) Cannot get Module EEPROM data: Operation not supported If SFP module was plugged then removed at least once, sfp->i2c_block_size will be initialized and ethtool -m will fail with different exit code and without I2C error # ethtool -m xge0 Cannot get Module EEPROM data: Remote I/O error Fix this by initializing sfp->i2_block_size at struct sfp allocation stage so no wild sfp_read() could issue zero-length I2C read. Signed-off-by: Ivan Bornyakov Fixes: 0d035bed2a4a ("net: sfp: VSOL V2801F / CarlitoxxPro CPGOS03-0490 v2.0 workaround") Cc: stable@vger.kernel.org Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 8af10bb53e57b..81edc457c5a14 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -210,6 +210,12 @@ static const enum gpiod_flags gpio_flags[] = { #define SFP_PHY_ADDR 22 #define SFP_PHY_ADDR_ROLLBALL 17 +/* SFP_EEPROM_BLOCK_SIZE is the size of data chunk to read the EEPROM + * at a time. Some SFP modules and also some Linux I2C drivers do not like + * reads longer than 16 bytes. + */ +#define SFP_EEPROM_BLOCK_SIZE 16 + struct sff_data { unsigned int gpios; bool (*module_supported)(const struct sfp_eeprom_id *id); @@ -1929,11 +1935,7 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) u8 check; int ret; - /* Some SFP modules and also some Linux I2C drivers do not like reads - * longer than 16 bytes, so read the EEPROM in chunks of 16 bytes at - * a time. - */ - sfp->i2c_block_size = 16; + sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE; ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); if (ret < 0) { @@ -2621,6 +2623,7 @@ static struct sfp *sfp_alloc(struct device *dev) return ERR_PTR(-ENOMEM); sfp->dev = dev; + sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE; mutex_init(&sfp->sm_mutex); mutex_init(&sfp->st_mutex); From bef227c1537cb8005311c0842bc5449e8c7a5973 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Thu, 6 Apr 2023 16:08:33 +0300 Subject: [PATCH 818/898] net: sfp: avoid EEPROM read of absent SFP module If SFP module is not present, it is sensible to fail sfp_module_eeprom() and sfp_module_eeprom_by_page() early to avoid excessive I2C transfers which are garanteed to fail. Suggested-by: Andrew Lunn Signed-off-by: Ivan Bornyakov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 81edc457c5a14..bf345032d450c 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2487,6 +2487,9 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee, unsigned int first, last, len; int ret; + if (!(sfp->state & SFP_F_PRESENT)) + return -ENODEV; + if (ee->len == 0) return -EINVAL; @@ -2519,6 +2522,9 @@ static int sfp_module_eeprom_by_page(struct sfp *sfp, const struct ethtool_module_eeprom *page, struct netlink_ext_ack *extack) { + if (!(sfp->state & SFP_F_PRESENT)) + return -ENODEV; + if (page->bank) { NL_SET_ERR_MSG(extack, "Banks not supported"); return -EOPNOTSUPP; From 09a9639e56c01c7a00d6c0ca63f4c7c41abe075d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Apr 2023 11:15:57 -0700 Subject: [PATCH 819/898] Linux 6.3-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef4e96b9cd5b0..5aeea3d98fc0c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 19cf60bf63cbaf5262eac400c707966e19999b83 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 24 Mar 2023 10:57:55 -0700 Subject: [PATCH 820/898] Bluetooth: hci_conn: Fix not cleaning up on LE Connection failure hci_connect_le_scan_cleanup shall always be invoked to cleanup the states and re-enable passive scanning if necessary, otherwise it may cause the pending action to stay active causing multiple attempts to connect. Fixes: 9b3628d79b46 ("Bluetooth: hci_sync: Cleanup hci_conn if it cannot be aborted") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 52 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 17b946f9ba317..5af3f6b011c95 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -68,7 +68,7 @@ static const struct sco_param esco_param_msbc[] = { }; /* This function requires the caller holds hdev->lock */ -static void hci_connect_le_scan_cleanup(struct hci_conn *conn) +static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) { struct hci_conn_params *params; struct hci_dev *hdev = conn->hdev; @@ -88,9 +88,28 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr, bdaddr_type); - if (!params || !params->explicit_connect) + if (!params) return; + if (params->conn) { + hci_conn_drop(params->conn); + hci_conn_put(params->conn); + params->conn = NULL; + } + + if (!params->explicit_connect) + return; + + /* If the status indicates successful cancellation of + * the attempt (i.e. Unknown Connection Id) there's no point of + * notifying failure since we'll go back to keep trying to + * connect. The only exception is explicit connect requests + * where a timeout + cancel does indicate an actual failure. + */ + if (status && status != HCI_ERROR_UNKNOWN_CONN_ID) + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + /* The connection attempt was doing scan for new RPA, and is * in scan phase. If params are not associated with any other * autoconnect action, remove them completely. If they are, just unmark @@ -178,7 +197,7 @@ static void le_scan_cleanup(struct work_struct *work) rcu_read_unlock(); if (c == conn) { - hci_connect_le_scan_cleanup(conn); + hci_connect_le_scan_cleanup(conn, 0x00); hci_conn_cleanup(conn); } @@ -1179,31 +1198,8 @@ EXPORT_SYMBOL(hci_get_route); static void hci_le_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; - struct hci_conn_params *params; - params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst, - conn->dst_type); - if (params && params->conn) { - hci_conn_drop(params->conn); - hci_conn_put(params->conn); - params->conn = NULL; - } - - /* If the status indicates successful cancellation of - * the attempt (i.e. Unknown Connection Id) there's no point of - * notifying failure since we'll go back to keep trying to - * connect. The only exception is explicit connect requests - * where a timeout + cancel does indicate an actual failure. - */ - if (status != HCI_ERROR_UNKNOWN_CONN_ID || - (params && params->explicit_connect)) - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, status); - - /* Since we may have temporarily stopped the background scanning in - * favor of connection establishment, we should restart it. - */ - hci_update_passive_scan(hdev); + hci_connect_le_scan_cleanup(conn, status); /* Enable advertising in case this was a failed connection * attempt as a peripheral. @@ -1240,7 +1236,7 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); if (!err) { - hci_connect_le_scan_cleanup(conn); + hci_connect_le_scan_cleanup(conn, 0x00); goto done; } From b62e72200eaad523f08d8319bba50fc652e032a8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 24 Mar 2023 13:18:20 -0700 Subject: [PATCH 821/898] Bluetooth: Fix printing errors if LE Connection times out This fixes errors like bellow when LE Connection times out since that is actually not a controller error: Bluetooth: hci0: Opcode 0x200d failed: -110 Bluetooth: hci0: request failed to create LE connection: err -110 Instead the code shall properly detect if -ETIMEDOUT is returned and send HCI_OP_LE_CREATE_CONN_CANCEL to give up on the connection. Link: https://github.com/bluez/bluez/issues/340 Fixes: 8e8b92ee60de ("Bluetooth: hci_sync: Add hci_le_create_conn_sync") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 7 +++++-- net/bluetooth/hci_event.c | 16 ++++++---------- net/bluetooth/hci_sync.c | 13 ++++++++++--- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6ed9b4d546a7a..d5311ceb21c62 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -954,6 +954,7 @@ enum { HCI_CONN_STK_ENCRYPT, HCI_CONN_AUTH_INITIATOR, HCI_CONN_DROP, + HCI_CONN_CANCEL, HCI_CONN_PARAM_REMOVAL_PEND, HCI_CONN_NEW_LINK_KEY, HCI_CONN_SCANNING, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5af3f6b011c95..e4aee5950c36a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1233,6 +1233,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) { struct hci_conn *conn = data; + bt_dev_dbg(hdev, "err %d", err); + hci_dev_lock(hdev); if (!err) { @@ -1240,8 +1242,6 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) goto done; } - bt_dev_err(hdev, "request failed to create LE connection: err %d", err); - /* Check if connection is still pending */ if (conn != hci_lookup_le_connect(hdev)) goto done; @@ -2771,6 +2771,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) { int r = 0; + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + return 0; + switch (conn->state) { case BT_CONNECTED: case BT_CONFIG: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ad92a4be58517..e68f2a7d863ac 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2881,16 +2881,6 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr, conn->resp_addr_type = peer_addr_type; bacpy(&conn->resp_addr, peer_addr); - - /* We don't want the connection attempt to stick around - * indefinitely since LE doesn't have a page timeout concept - * like BR/EDR. Set a timer for any connection that doesn't use - * the accept list for connecting. - */ - if (filter_policy == HCI_LE_USE_PEER_ADDR) - queue_delayed_work(conn->hdev->workqueue, - &conn->le_conn_timeout, - conn->conn_timeout); } static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) @@ -5902,6 +5892,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, if (status) goto unlock; + /* Drop the connection if it has been aborted */ + if (test_bit(HCI_CONN_CANCEL, &conn->flags)) { + hci_conn_drop(conn); + goto unlock; + } + if (conn->dst_type == ADDR_LE_DEV_PUBLIC) addr_type = BDADDR_LE_PUBLIC; else diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5a6aa1627791b..632be12672887 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -246,8 +246,9 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk); if (IS_ERR(skb)) { - bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode, - PTR_ERR(skb)); + if (!event) + bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode, + PTR_ERR(skb)); return PTR_ERR(skb); } @@ -5126,8 +5127,11 @@ static int hci_le_connect_cancel_sync(struct hci_dev *hdev, if (test_bit(HCI_CONN_SCANNING, &conn->flags)) return 0; + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + return 0; + return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, - 6, &conn->dst, HCI_CMD_TIMEOUT); + 0, NULL, HCI_CMD_TIMEOUT); } static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn) @@ -6102,6 +6106,9 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) conn->conn_timeout, NULL); done: + if (err == -ETIMEDOUT) + hci_le_connect_cancel_sync(hdev, conn); + /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); return err; From c95930abd687fcd1aa040dc4fe90dff947916460 Mon Sep 17 00:00:00 2001 From: Min Li Date: Sat, 4 Mar 2023 22:23:30 +0800 Subject: [PATCH 822/898] Bluetooth: Fix race condition in hidp_session_thread There is a potential race condition in hidp_session_thread that may lead to use-after-free. For instance, the timer is active while hidp_del_timer is called in hidp_session_thread(). After hidp_session_put, then 'session' will be freed, causing kernel panic when hidp_idle_timeout is running. The solution is to use del_timer_sync instead of del_timer. Here is the call trace: ? hidp_session_probe+0x780/0x780 call_timer_fn+0x2d/0x1e0 __run_timers.part.0+0x569/0x940 hidp_session_probe+0x780/0x780 call_timer_fn+0x1e0/0x1e0 ktime_get+0x5c/0xf0 lapic_next_deadline+0x2c/0x40 clockevents_program_event+0x205/0x320 run_timer_softirq+0xa9/0x1b0 __do_softirq+0x1b9/0x641 __irq_exit_rcu+0xdc/0x190 irq_exit_rcu+0xe/0x20 sysvec_apic_timer_interrupt+0xa1/0xc0 Cc: stable@vger.kernel.org Signed-off-by: Min Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hidp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index bed1a7b9205c2..707f229f896a1 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -433,7 +433,7 @@ static void hidp_set_timer(struct hidp_session *session) static void hidp_del_timer(struct hidp_session *session) { if (session->idle_to > 0) - del_timer(&session->timer); + del_timer_sync(&session->timer); } static void hidp_process_report(struct hidp_session *session, int type, From 73f7b171b7c09139eb3c6a5677c200dc1be5f318 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 9 Mar 2023 00:45:01 +0800 Subject: [PATCH 823/898] Bluetooth: btsdio: fix use after free bug in btsdio_remove due to race condition In btsdio_probe, the data->work is bound with btsdio_work. It will be started in btsdio_send_frame. If the btsdio_remove runs with a unfinished work, there may be a race condition that hdev is freed but used in btsdio_work. Fix it by canceling the work before do cleanup in btsdio_remove. Signed-off-by: Zheng Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btsdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 02893600db390..51000320e1ea8 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -358,6 +358,7 @@ static void btsdio_remove(struct sdio_func *func) if (!data) return; + cancel_work_sync(&data->work); hdev = data->hdev; sdio_set_drvdata(func, NULL); From b76abe4648c1acc791a207e7c08d1719eb9f4ea8 Mon Sep 17 00:00:00 2001 From: Sasha Finkelstein Date: Fri, 10 Mar 2023 11:28:42 +0100 Subject: [PATCH 824/898] bluetooth: btbcm: Fix logic error in forming the board name. This patch fixes an incorrect loop exit condition in code that replaces '/' symbols in the board name. There might also be a memory corruption issue here, but it is unlikely to be a real problem. Cc: Signed-off-by: Sasha Finkelstein Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btbcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 3006e2a0f37e1..43e98a598bd9a 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -511,7 +511,7 @@ static const char *btbcm_get_board_name(struct device *dev) len = strlen(tmp) + 1; board_type = devm_kzalloc(dev, len, GFP_KERNEL); strscpy(board_type, tmp, len); - for (i = 0; i < board_type[i]; i++) { + for (i = 0; i < len; i++) { if (board_type[i] == '/') board_type[i] = '-'; } From 9a8ec9e8ebb5a7c0cfbce2d6b4a6b67b2b78e8f3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 30 Mar 2023 14:15:50 -0700 Subject: [PATCH 825/898] Bluetooth: SCO: Fix possible circular locking dependency on sco_connect_cfm This attempts to fix the following trace: ====================================================== WARNING: possible circular locking dependency detected 6.3.0-rc2-g0b93eeba4454 #4703 Not tainted ------------------------------------------------------ kworker/u3:0/46 is trying to acquire lock: ffff888001fd9130 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}, at: sco_connect_cfm+0x118/0x4a0 but task is already holding lock: ffffffff831e3340 (hci_cb_list_lock){+.+.}-{3:3}, at: hci_sync_conn_complete_evt+0x1ad/0x3d0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (hci_cb_list_lock){+.+.}-{3:3}: __mutex_lock+0x13b/0xcc0 hci_sync_conn_complete_evt+0x1ad/0x3d0 hci_event_packet+0x55c/0x7c0 hci_rx_work+0x34c/0xa00 process_one_work+0x575/0x910 worker_thread+0x89/0x6f0 kthread+0x14e/0x180 ret_from_fork+0x2b/0x50 -> #1 (&hdev->lock){+.+.}-{3:3}: __mutex_lock+0x13b/0xcc0 sco_sock_connect+0xfc/0x630 __sys_connect+0x197/0x1b0 __x64_sys_connect+0x37/0x50 do_syscall_64+0x42/0x90 entry_SYSCALL_64_after_hwframe+0x70/0xda -> #0 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}: __lock_acquire+0x18cc/0x3740 lock_acquire+0x151/0x3a0 lock_sock_nested+0x32/0x80 sco_connect_cfm+0x118/0x4a0 hci_sync_conn_complete_evt+0x1e6/0x3d0 hci_event_packet+0x55c/0x7c0 hci_rx_work+0x34c/0xa00 process_one_work+0x575/0x910 worker_thread+0x89/0x6f0 kthread+0x14e/0x180 ret_from_fork+0x2b/0x50 other info that might help us debug this: Chain exists of: sk_lock-AF_BLUETOOTH-BTPROTO_SCO --> &hdev->lock --> hci_cb_list_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(hci_cb_list_lock); lock(&hdev->lock); lock(hci_cb_list_lock); lock(sk_lock-AF_BLUETOOTH-BTPROTO_SCO); *** DEADLOCK *** 4 locks held by kworker/u3:0/46: #0: ffff8880028d1130 ((wq_completion)hci0#2){+.+.}-{0:0}, at: process_one_work+0x4c0/0x910 #1: ffff8880013dfde0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}, at: process_one_work+0x4c0/0x910 #2: ffff8880025d8070 (&hdev->lock){+.+.}-{3:3}, at: hci_sync_conn_complete_evt+0xa6/0x3d0 #3: ffffffffb79e3340 (hci_cb_list_lock){+.+.}-{3:3}, at: hci_sync_conn_complete_evt+0x1ad/0x3d0 Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 69 ++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 1111da4e2f2bd..f3a5ab9e4fa41 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -235,27 +235,41 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, return err; } -static int sco_connect(struct hci_dev *hdev, struct sock *sk) +static int sco_connect(struct sock *sk) { struct sco_conn *conn; struct hci_conn *hcon; + struct hci_dev *hdev; int err, type; BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); + if (!hdev) + return -EHOSTUNREACH; + + hci_dev_lock(hdev); + if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && - (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) - return -EOPNOTSUPP; + (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { + err = -EOPNOTSUPP; + goto unlock; + } hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, sco_pi(sk)->setting, &sco_pi(sk)->codec); - if (IS_ERR(hcon)) - return PTR_ERR(hcon); + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); + goto unlock; + } + + hci_dev_unlock(hdev); + hci_dev_put(hdev); conn = sco_conn_add(hcon); if (!conn) { @@ -263,13 +277,15 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return -ENOMEM; } - /* Update source addr of the socket */ - bacpy(&sco_pi(sk)->src, &hcon->src); - err = sco_chan_add(conn, sk, NULL); if (err) return err; + lock_sock(sk); + + /* Update source addr of the socket */ + bacpy(&sco_pi(sk)->src, &hcon->src); + if (hcon->state == BT_CONNECTED) { sco_sock_clear_timer(sk); sk->sk_state = BT_CONNECTED; @@ -278,6 +294,13 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) sco_sock_set_timer(sk, sk->sk_sndtimeo); } + release_sock(sk); + + return err; + +unlock: + hci_dev_unlock(hdev); + hci_dev_put(hdev); return err; } @@ -565,7 +588,6 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; - struct hci_dev *hdev; int err; BT_DBG("sk %p", sk); @@ -574,37 +596,26 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen addr->sa_family != AF_BLUETOOTH) return -EINVAL; - lock_sock(sk); - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { - err = -EBADFD; - goto done; - } + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) + return -EBADFD; - if (sk->sk_type != SOCK_SEQPACKET) { + if (sk->sk_type != SOCK_SEQPACKET) err = -EINVAL; - goto done; - } - - hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) { - err = -EHOSTUNREACH; - goto done; - } - hci_dev_lock(hdev); + lock_sock(sk); /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); + release_sock(sk); - err = sco_connect(hdev, sk); - hci_dev_unlock(hdev); - hci_dev_put(hdev); + err = sco_connect(sk); if (err) - goto done; + return err; + + lock_sock(sk); err = bt_sock_wait_state(sk, BT_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); -done: release_sock(sk); return err; } From 975abc0c90fc485ff9b4a6afa475c3b1398d5d47 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 30 Mar 2023 14:45:03 -0700 Subject: [PATCH 826/898] Bluetooth: SCO: Fix possible circular locking dependency sco_sock_getsockopt This attempts to fix the following trace: ====================================================== WARNING: possible circular locking dependency detected 6.3.0-rc2-g68fcb3a7bf97 #4706 Not tainted ------------------------------------------------------ sco-tester/31 is trying to acquire lock: ffff8880025b8070 (&hdev->lock){+.+.}-{3:3}, at: sco_sock_getsockopt+0x1fc/0xa90 but task is already holding lock: ffff888001eeb130 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}, at: sco_sock_getsockopt+0x104/0xa90 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}: lock_sock_nested+0x32/0x80 sco_connect_cfm+0x118/0x4a0 hci_sync_conn_complete_evt+0x1e6/0x3d0 hci_event_packet+0x55c/0x7c0 hci_rx_work+0x34c/0xa00 process_one_work+0x575/0x910 worker_thread+0x89/0x6f0 kthread+0x14e/0x180 ret_from_fork+0x2b/0x50 -> #1 (hci_cb_list_lock){+.+.}-{3:3}: __mutex_lock+0x13b/0xcc0 hci_sync_conn_complete_evt+0x1ad/0x3d0 hci_event_packet+0x55c/0x7c0 hci_rx_work+0x34c/0xa00 process_one_work+0x575/0x910 worker_thread+0x89/0x6f0 kthread+0x14e/0x180 ret_from_fork+0x2b/0x50 -> #0 (&hdev->lock){+.+.}-{3:3}: __lock_acquire+0x18cc/0x3740 lock_acquire+0x151/0x3a0 __mutex_lock+0x13b/0xcc0 sco_sock_getsockopt+0x1fc/0xa90 __sys_getsockopt+0xe9/0x190 __x64_sys_getsockopt+0x5b/0x70 do_syscall_64+0x42/0x90 entry_SYSCALL_64_after_hwframe+0x70/0xda other info that might help us debug this: Chain exists of: &hdev->lock --> hci_cb_list_lock --> sk_lock-AF_BLUETOOTH-BTPROTO_SCO Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_BLUETOOTH-BTPROTO_SCO); lock(hci_cb_list_lock); lock(sk_lock-AF_BLUETOOTH-BTPROTO_SCO); lock(&hdev->lock); *** DEADLOCK *** 1 lock held by sco-tester/31: #0: ffff888001eeb130 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}, at: sco_sock_getsockopt+0x104/0xa90 Fixes: 248733e87d50 ("Bluetooth: Allow querying of supported offload codecs over SCO socket") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f3a5ab9e4fa41..cd1a27ac555d0 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1140,6 +1140,8 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, break; } + release_sock(sk); + /* find total buffer size required to copy codec + caps */ hci_dev_lock(hdev); list_for_each_entry(c, &hdev->local_codecs, list) { @@ -1157,15 +1159,13 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, buf_len += sizeof(struct bt_codecs); if (buf_len > len) { hci_dev_put(hdev); - err = -ENOBUFS; - break; + return -ENOBUFS; } ptr = optval; if (put_user(num_codecs, ptr)) { hci_dev_put(hdev); - err = -EFAULT; - break; + return -EFAULT; } ptr += sizeof(num_codecs); @@ -1205,12 +1205,14 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, ptr += len; } - if (!err && put_user(buf_len, optlen)) - err = -EFAULT; - hci_dev_unlock(hdev); hci_dev_put(hdev); + lock_sock(sk); + + if (!err && put_user(buf_len, optlen)) + err = -EFAULT; + break; default: From 5dc7d23e167e2882ef118456ceccd57873e876d8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 3 Apr 2023 14:19:14 -0700 Subject: [PATCH 827/898] Bluetooth: hci_conn: Fix possible UAF This fixes the following trace: ================================================================== BUG: KASAN: slab-use-after-free in hci_conn_del+0xba/0x3a0 Write of size 8 at addr ffff88800208e9c8 by task iso-tester/31 CPU: 0 PID: 31 Comm: iso-tester Not tainted 6.3.0-rc2-g991aa4a69a47 #4716 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.1-2.fc36 04/01/2014 Call Trace: dump_stack_lvl+0x1d/0x70 print_report+0xce/0x610 ? __virt_addr_valid+0xd4/0x150 ? hci_conn_del+0xba/0x3a0 kasan_report+0xdd/0x110 ? hci_conn_del+0xba/0x3a0 hci_conn_del+0xba/0x3a0 hci_conn_hash_flush+0xf2/0x120 hci_dev_close_sync+0x388/0x920 hci_unregister_dev+0x122/0x260 vhci_release+0x4f/0x90 __fput+0x102/0x430 task_work_run+0xf1/0x160 ? __pfx_task_work_run+0x10/0x10 ? mark_held_locks+0x24/0x90 exit_to_user_mode_prepare+0x170/0x180 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x4e/0x90 entry_SYSCALL_64_after_hwframe+0x70/0xda Fixes: 0f00cd322d22 ("Bluetooth: Free potentially unfreed SCO connection") Link: https://syzkaller.appspot.com/bug?extid=8bb72f86fc823817bc5d Cc: Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index e4aee5950c36a..8455ba141ee61 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1068,6 +1068,17 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, return conn; } +static bool hci_conn_unlink(struct hci_conn *conn) +{ + if (!conn->link) + return false; + + conn->link->link = NULL; + conn->link = NULL; + + return true; +} + int hci_conn_del(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -1079,15 +1090,16 @@ int hci_conn_del(struct hci_conn *conn) cancel_delayed_work_sync(&conn->idle_work); if (conn->type == ACL_LINK) { - struct hci_conn *sco = conn->link; - if (sco) { - sco->link = NULL; + struct hci_conn *link = conn->link; + + if (link) { + hci_conn_unlink(conn); /* Due to race, SCO connection might be not established * yet at this point. Delete it now, otherwise it is * possible for it to be stuck and can't be deleted. */ - if (sco->handle == HCI_CONN_HANDLE_UNSET) - hci_conn_del(sco); + if (link->handle == HCI_CONN_HANDLE_UNSET) + hci_conn_del(link); } /* Unacked frames */ @@ -1103,7 +1115,7 @@ int hci_conn_del(struct hci_conn *conn) struct hci_conn *acl = conn->link; if (acl) { - acl->link = NULL; + hci_conn_unlink(conn); hci_conn_drop(acl); } @@ -2434,6 +2446,12 @@ void hci_conn_hash_flush(struct hci_dev *hdev) c->state = BT_CLOSED; hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); + + /* Unlink before deleting otherwise it is possible that + * hci_conn_del removes the link which may cause the list to + * contain items already freed. + */ + hci_conn_unlink(c); hci_conn_del(c); } } From d2e4f1b1cba8742db66aaf77374cab7c0c7c8656 Mon Sep 17 00:00:00 2001 From: Claudia Draghicescu Date: Wed, 5 Apr 2023 14:19:18 +0300 Subject: [PATCH 828/898] Bluetooth: Set ISO Data Path on broadcast sink This patch enables ISO data rx on broadcast sink. Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Claudia Draghicescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e68f2a7d863ac..e87c928c9e17a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6991,7 +6991,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis->iso_qos.in.latency = le16_to_cpu(ev->interval) * 125 / 100; bis->iso_qos.in.sdu = le16_to_cpu(ev->max_pdu); - hci_connect_cfm(bis, ev->status); + hci_iso_setup_path(bis); } hci_dev_unlock(hdev); From a2a9339e1c9deb7e1e079e12e27a0265aea8421a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 6 Apr 2023 09:33:09 -0700 Subject: [PATCH 829/898] Bluetooth: L2CAP: Fix use-after-free in l2cap_disconnect_{req,rsp} Similar to commit d0be8347c623 ("Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put"), just use l2cap_chan_hold_unless_zero to prevent referencing a channel that is about to be destroyed. Cc: stable@kernel.org Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Min Li --- net/bluetooth/l2cap_core.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 49926f59cc123..55a7226233f96 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4652,33 +4652,27 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid); - mutex_lock(&conn->chan_lock); - - chan = __l2cap_get_chan_by_scid(conn, dcid); + chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) { - mutex_unlock(&conn->chan_lock); cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid); return 0; } - l2cap_chan_hold(chan); - l2cap_chan_lock(chan); - rsp.dcid = cpu_to_le16(chan->scid); rsp.scid = cpu_to_le16(chan->dcid); l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); chan->ops->set_shutdown(chan); + mutex_lock(&conn->chan_lock); l2cap_chan_del(chan, ECONNRESET); + mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); - return 0; } @@ -4698,33 +4692,27 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid); - mutex_lock(&conn->chan_lock); - - chan = __l2cap_get_chan_by_scid(conn, scid); + chan = l2cap_get_chan_by_scid(conn, scid); if (!chan) { mutex_unlock(&conn->chan_lock); return 0; } - l2cap_chan_hold(chan); - l2cap_chan_lock(chan); - if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); return 0; } + mutex_lock(&conn->chan_lock); l2cap_chan_del(chan, 0); + mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); - return 0; } From aa7d233f45b4c549750044c9921f7afcbe50925b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 10 Apr 2023 21:09:07 +0900 Subject: [PATCH 830/898] kbuild: give up untracked files for source package builds When the source tree is dirty and contains untracked files, package builds may fail, for example, when a broken symlink exists, a file path contains whitespaces, etc. Since commit 05e96e96a315 ("kbuild: use git-archive for source package creation"), the source tarball only contains committed files because it is created by 'git archive'. scripts/package/gen-diff-patch tries to address the diff from HEAD, but including untracked files by the hand-crafted script introduces more complexity. I wrote a patch [1] to make it work in most cases, but still wonder if this is what we should aim for. To simplify the code, this patch just gives up untracked files. Going forward, it is your responsibility to do 'git add' for what you want in the source package. The script shows a warning just in case you forgot to do so. It should be checked only when building source packages. [1]: https://lore.kernel.org/all/CAK7LNAShbZ56gSh9PrbLnBDYKnjtTkHMoCXeGrhcxMvqXGq9=g@mail.gmail.com/2-0001-kbuild-make-package-builds-more-robust.patch Fixes: 05e96e96a315 ("kbuild: use git-archive for source package creation") Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/Makefile.package | 3 +- scripts/package/gen-diff-patch | 62 +++++++++----------- scripts/package/mkdebian | 103 +++++++++++++++++++-------------- scripts/package/mkspec | 11 +--- 4 files changed, 90 insertions(+), 89 deletions(-) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 61f72eb8d9be7..49aff12cb6abd 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -94,7 +94,7 @@ binrpm-pkg: $(UTS_MACHINE)-linux -bb $(objtree)/binkernel.spec quiet_cmd_debianize = GEN $@ - cmd_debianize = $(srctree)/scripts/package/mkdebian + cmd_debianize = $(srctree)/scripts/package/mkdebian $(mkdebian-opts) debian: FORCE $(call cmd,debianize) @@ -103,6 +103,7 @@ PHONY += debian-orig debian-orig: private source = $(shell dpkg-parsechangelog -S Source) debian-orig: private version = $(shell dpkg-parsechangelog -S Version | sed 's/-[^-]*$$//') debian-orig: private orig-name = $(source)_$(version).orig.tar.gz +debian-orig: mkdebian-opts = --need-source debian-orig: linux.tar.gz debian $(Q)if [ "$(df --output=target .. 2>/dev/null)" = "$(df --output=target $< 2>/dev/null)" ]; then \ ln -f $< ../$(orig-name); \ diff --git a/scripts/package/gen-diff-patch b/scripts/package/gen-diff-patch index f842ab50a780c..8a98b7bb78a0c 100755 --- a/scripts/package/gen-diff-patch +++ b/scripts/package/gen-diff-patch @@ -1,44 +1,36 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only -diff_patch="${1}" -untracked_patch="${2}" -srctree=$(dirname $0)/../.. +diff_patch=$1 -rm -f ${diff_patch} ${untracked_patch} +mkdir -p "$(dirname "${diff_patch}")" -if ! ${srctree}/scripts/check-git; then - exit -fi - -mkdir -p "$(dirname ${diff_patch})" "$(dirname ${untracked_patch})" +git -C "${srctree:-.}" diff HEAD > "${diff_patch}" -git -C "${srctree}" diff HEAD > "${diff_patch}" - -if [ ! -s "${diff_patch}" ]; then - rm -f "${diff_patch}" +if [ ! -s "${diff_patch}" ] || + [ -z "$(git -C "${srctree:-.}" ls-files --other --exclude-standard | head -n1)" ]; then exit fi -git -C ${srctree} status --porcelain --untracked-files=all | -while read stat path -do - if [ "${stat}" = '??' ]; then - - if ! diff -u /dev/null "${srctree}/${path}" > .tmp_diff && - ! head -n1 .tmp_diff | grep -q "Binary files"; then - { - echo "--- /dev/null" - echo "+++ linux/$path" - cat .tmp_diff | tail -n +3 - } >> ${untracked_patch} - fi - fi -done - -rm -f .tmp_diff - -if [ ! -s "${diff_patch}" ]; then - rm -f "${diff_patch}" - exit -fi +# The source tarball, which is generated by 'git archive', contains everything +# you committed in the repository. If you have local diff ('git diff HEAD'), +# it will go into ${diff_patch}. If untracked files are remaining, the resulting +# source package may not be correct. +# +# Examples: +# - You modified a source file to add #include "new-header.h" +# but forgot to add new-header.h +# - You modified a Makefile to add 'obj-$(CONFIG_FOO) += new-dirver.o' +# but you forgot to add new-driver.c +# +# You need to commit them, or at least stage them by 'git add'. +# +# This script does not take care of untracked files because doing so would +# introduce additional complexity. Instead, print a warning message here if +# untracked files are found. +# If all untracked files are just garbage, you can ignore this warning. +echo >&2 "============================ WARNING ============================" +echo >&2 "Your working tree has diff from HEAD, and also untracked file(s)." +echo >&2 "Please make sure you did 'git add' for all new files you need in" +echo >&2 "the source package." +echo >&2 "=================================================================" diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index e20a2b5be9eb2..a4c2c2276223f 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -84,7 +84,66 @@ set_debarch() { fi } +# Create debian/source/ if it is a source package build +gen_source () +{ + mkdir -p debian/source + + echo "3.0 (quilt)" > debian/source/format + + { + echo "diff-ignore" + echo "extend-diff-ignore = .*" + } > debian/source/local-options + + # Add .config as a patch + mkdir -p debian/patches + { + echo "Subject: Add .config" + echo "Author: ${maintainer}" + echo + echo "--- /dev/null" + echo "+++ linux/.config" + diff -u /dev/null "${KCONFIG_CONFIG}" | tail -n +3 + } > debian/patches/config.patch + echo config.patch > debian/patches/series + + "${srctree}/scripts/package/gen-diff-patch" debian/patches/diff.patch + if [ -s debian/patches/diff.patch ]; then + sed -i " + 1iSubject: Add local diff + 1iAuthor: ${maintainer} + 1i + " debian/patches/diff.patch + + echo diff.patch >> debian/patches/series + else + rm -f debian/patches/diff.patch + fi +} + rm -rf debian +mkdir debian + +email=${DEBEMAIL-$EMAIL} + +# use email string directly if it contains +if echo "${email}" | grep -q '<.*>'; then + maintainer=${email} +else + # or construct the maintainer string + user=${KBUILD_BUILD_USER-$(id -nu)} + name=${DEBFULLNAME-${user}} + if [ -z "${email}" ]; then + buildhost=${KBUILD_BUILD_HOST-$(hostname -f 2>/dev/null || hostname)} + email="${user}@${buildhost}" + fi + maintainer="${name} <${email}>" +fi + +if [ "$1" = --need-source ]; then + gen_source +fi # Some variables and settings used throughout the script version=$KERNELRELEASE @@ -104,22 +163,6 @@ fi debarch= set_debarch -email=${DEBEMAIL-$EMAIL} - -# use email string directly if it contains -if echo $email | grep -q '<.*>'; then - maintainer=$email -else - # or construct the maintainer string - user=${KBUILD_BUILD_USER-$(id -nu)} - name=${DEBFULLNAME-$user} - if [ -z "$email" ]; then - buildhost=${KBUILD_BUILD_HOST-$(hostname -f 2>/dev/null || hostname)} - email="$user@$buildhost" - fi - maintainer="$name <$email>" -fi - # Try to determine distribution if [ -n "$KDEB_CHANGELOG_DIST" ]; then distribution=$KDEB_CHANGELOG_DIST @@ -132,34 +175,6 @@ else echo >&2 "Install lsb-release or set \$KDEB_CHANGELOG_DIST explicitly" fi -mkdir -p debian/source/ -echo "3.0 (quilt)" > debian/source/format - -{ - echo "diff-ignore" - echo "extend-diff-ignore = .*" -} > debian/source/local-options - -# Add .config as a patch -mkdir -p debian/patches -{ - echo "Subject: Add .config" - echo "Author: ${maintainer}" - echo - echo "--- /dev/null" - echo "+++ linux/.config" - diff -u /dev/null "${KCONFIG_CONFIG}" | tail -n +3 -} > debian/patches/config -echo config > debian/patches/series - -$(dirname $0)/gen-diff-patch debian/patches/diff.patch debian/patches/untracked.patch -if [ -f debian/patches/diff.patch ]; then - echo diff.patch >> debian/patches/series -fi -if [ -f debian/patches/untracked.patch ]; then - echo untracked.patch >> debian/patches/series -fi - echo $debarch > debian/arch extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)" extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)" diff --git a/scripts/package/mkspec b/scripts/package/mkspec index b7d1dc28a5d6d..fc8ad3fbc0a95 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -19,8 +19,7 @@ else mkdir -p rpmbuild/SOURCES cp linux.tar.gz rpmbuild/SOURCES cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config - $(dirname $0)/gen-diff-patch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch - touch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch + "${srctree}/scripts/package/gen-diff-patch" rpmbuild/SOURCES/diff.patch fi if grep -q CONFIG_MODULES=y include/config/auto.conf; then @@ -56,7 +55,6 @@ sed -e '/^DEL/d' -e 's/^\t*//' < Date: Mon, 10 Apr 2023 11:31:11 +0900 Subject: [PATCH 831/898] MAINTAINERS: Change ata maintainer email addresses Change my email address referenced in the MAINTAINERS file for the ata subsystem to dlemoal@kernel.org. And while at it, also change other references for zonefs and the k210 drivers to the same address. Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ec57c42ed5440..8e156c754e207 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4461,14 +4461,14 @@ F: Documentation/devicetree/bindings/net/ieee802154/ca8210.txt F: drivers/net/ieee802154/ca8210.c CANAAN/KENDRYTE K210 SOC FPIOA DRIVER -M: Damien Le Moal +M: Damien Le Moal L: linux-riscv@lists.infradead.org L: linux-gpio@vger.kernel.org (pinctrl driver) F: Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml F: drivers/pinctrl/pinctrl-k210.c CANAAN/KENDRYTE K210 SOC RESET CONTROLLER DRIVER -M: Damien Le Moal +M: Damien Le Moal L: linux-kernel@vger.kernel.org L: linux-riscv@lists.infradead.org S: Maintained @@ -4476,7 +4476,7 @@ F: Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml F: drivers/reset/reset-k210.c CANAAN/KENDRYTE K210 SOC SYSTEM CONTROLLER DRIVER -M: Damien Le Moal +M: Damien Le Moal L: linux-riscv@lists.infradead.org S: Maintained F: Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml @@ -11757,7 +11757,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git F: drivers/ata/sata_promise.* LIBATA SUBSYSTEM (Serial and Parallel ATA drivers) -M: Damien Le Moal +M: Damien Le Moal L: linux-ide@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git @@ -23122,7 +23122,7 @@ S: Maintained F: arch/x86/kernel/cpu/zhaoxin.c ZONEFS FILESYSTEM -M: Damien Le Moal +M: Damien Le Moal M: Naohiro Aota R: Johannes Thumshirn L: linux-fsdevel@vger.kernel.org From 6b8446859c971a5783a2cdc90adf32e64de3bd23 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 1 Mar 2023 17:14:09 +0200 Subject: [PATCH 832/898] drm/i915/dsi: fix DSS CTL register offsets for TGL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On TGL+ the DSS control registers are at different offsets, and there's one per pipe. Fix the offsets to fix dual link DSI for TGL+. There would be helpers for this in the DSC code, but just do the quick fix now for DSI. Long term, we should probably move all the DSS handling into intel_vdsc.c, so exporting the helpers seems counter-productive. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8232 Cc: Ville Syrjala Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230301151409.1581574-1-jani.nikula@intel.com (cherry picked from commit 1a62dd9895dca78bee28bba3a36f08836fdd143d) --- drivers/gpu/drm/i915/display/icl_dsi.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 468a792e6a405..fc0eaf40dc941 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -300,9 +300,21 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + i915_reg_t dss_ctl1_reg, dss_ctl2_reg; u32 dss_ctl1; - dss_ctl1 = intel_de_read(dev_priv, DSS_CTL1); + /* FIXME: Move all DSS handling to intel_vdsc.c */ + if (DISPLAY_VER(dev_priv) >= 12) { + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + + dss_ctl1_reg = ICL_PIPE_DSS_CTL1(crtc->pipe); + dss_ctl2_reg = ICL_PIPE_DSS_CTL2(crtc->pipe); + } else { + dss_ctl1_reg = DSS_CTL1; + dss_ctl2_reg = DSS_CTL2; + } + + dss_ctl1 = intel_de_read(dev_priv, dss_ctl1_reg); dss_ctl1 |= SPLITTER_ENABLE; dss_ctl1 &= ~OVERLAP_PIXELS_MASK; dss_ctl1 |= OVERLAP_PIXELS(intel_dsi->pixel_overlap); @@ -323,16 +335,16 @@ static void configure_dual_link_mode(struct intel_encoder *encoder, dss_ctl1 &= ~LEFT_DL_BUF_TARGET_DEPTH_MASK; dss_ctl1 |= LEFT_DL_BUF_TARGET_DEPTH(dl_buffer_depth); - dss_ctl2 = intel_de_read(dev_priv, DSS_CTL2); + dss_ctl2 = intel_de_read(dev_priv, dss_ctl2_reg); dss_ctl2 &= ~RIGHT_DL_BUF_TARGET_DEPTH_MASK; dss_ctl2 |= RIGHT_DL_BUF_TARGET_DEPTH(dl_buffer_depth); - intel_de_write(dev_priv, DSS_CTL2, dss_ctl2); + intel_de_write(dev_priv, dss_ctl2_reg, dss_ctl2); } else { /* Interleave */ dss_ctl1 |= DUAL_LINK_MODE_INTERLEAVE; } - intel_de_write(dev_priv, DSS_CTL1, dss_ctl1); + intel_de_write(dev_priv, dss_ctl1_reg, dss_ctl1); } /* aka DSI 8X clock */ From 86d8740dae5a397d8344ae75f8758103c1fcba97 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 5 Apr 2023 13:04:55 +0200 Subject: [PATCH 833/898] drm/nouveau/fb: add missing sysmen flush callbacks Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/203 Fixes: 5728d064190e1 ("drm/nouveau/fb: handle sysmem flush page from common code") Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Reviewed-by: Ben Skeggs Link: https://patchwork.freedesktop.org/patch/msgid/20230405110455.1368428-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c index 76678dd60f93f..c4c6f67af7ccc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c @@ -31,6 +31,7 @@ gf108_fb = { .init = gf100_fb_init, .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, + .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init, .ram_new = gf108_ram_new, .default_bigpage = 17, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c index f73442ccb424b..433fa966ba231 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c @@ -77,6 +77,7 @@ gk104_fb = { .init = gf100_fb_init, .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, + .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init, .ram_new = gk104_ram_new, .default_bigpage = 17, .clkgate_pack = gk104_fb_clkgate_pack, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c index 45d6cdffafeed..4dc283dedf8b5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk110.c @@ -59,6 +59,7 @@ gk110_fb = { .init = gf100_fb_init, .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, + .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init, .ram_new = gk104_ram_new, .default_bigpage = 17, .clkgate_pack = gk110_fb_clkgate_pack, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c index de52462a92bf0..90bfff616d35b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c @@ -31,6 +31,7 @@ gm107_fb = { .init = gf100_fb_init, .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, + .sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init, .ram_new = gm107_ram_new, .default_bigpage = 17, }; From a4506722dc39ca840593f14e3faa4c9ba9408211 Mon Sep 17 00:00:00 2001 From: "Radu Pirea (OSS)" Date: Thu, 6 Apr 2023 12:59:04 +0300 Subject: [PATCH 834/898] net: phy: nxp-c45-tja11xx: add remove callback Unregister PTP clock when the driver is removed. Purge the RX and TX skb queues. Fixes: 514def5dd339 ("phy: nxp-c45-tja11xx: add timestamping support") CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Radu Pirea (OSS) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230406095904.75456-1-radu-nicolae.pirea@oss.nxp.com Signed-off-by: Paolo Abeni --- drivers/net/phy/nxp-c45-tja11xx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 1b7d6941b0f35..029875a59ff89 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1337,6 +1337,17 @@ static int nxp_c45_probe(struct phy_device *phydev) return ret; } +static void nxp_c45_remove(struct phy_device *phydev) +{ + struct nxp_c45_phy *priv = phydev->priv; + + if (priv->ptp_clock) + ptp_clock_unregister(priv->ptp_clock); + + skb_queue_purge(&priv->tx_queue); + skb_queue_purge(&priv->rx_queue); +} + static struct phy_driver nxp_c45_driver[] = { { PHY_ID_MATCH_MODEL(PHY_ID_TJA_1103), @@ -1359,6 +1370,7 @@ static struct phy_driver nxp_c45_driver[] = { .set_loopback = genphy_c45_loopback, .get_sqi = nxp_c45_get_sqi, .get_sqi_max = nxp_c45_get_sqi_max, + .remove = nxp_c45_remove, }, }; From 6fd33a3333c7916689b8f051a185defe4dd515b0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 4 Apr 2023 21:39:34 +0200 Subject: [PATCH 835/898] fbmem: Reject FB_ACTIVATE_KD_TEXT from userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an oversight from dc5bdb68b5b3 ("drm/fb-helper: Fix vt restore") - I failed to realize that nasty userspace could set this. It's not pretty to mix up kernel-internal and userspace uapi flags like this, but since the entire fb_var_screeninfo structure is uapi we'd need to either add a new parameter to the ->fb_set_par callback and fb_set_par() function, which has a _lot_ of users. Or some other fairly ugly side-channel int fb_info. Neither is a pretty prospect. Instead just correct the issue at hand by filtering out this kernel-internal flag in the ioctl handling code. Reviewed-by: Javier Martinez Canillas Acked-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Fixes: dc5bdb68b5b3 ("drm/fb-helper: Fix vt restore") Cc: Alex Deucher Cc: shlomo@fastmail.com Cc: Michel Dänzer Cc: Noralf Trønnes Cc: Thomas Zimmermann Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.7+ Cc: Bartlomiej Zolnierkiewicz Cc: Geert Uytterhoeven Cc: Nathan Chancellor Cc: Qiujun Huang Cc: Peter Rosin Cc: linux-fbdev@vger.kernel.org Cc: Helge Deller Cc: Sam Ravnborg Cc: Geert Uytterhoeven Cc: Samuel Thibault Cc: Tetsuo Handa Cc: Shigeru Yoshida Link: https://patchwork.freedesktop.org/patch/msgid/20230404193934.472457-1-daniel.vetter@ffwll.ch --- drivers/video/fbdev/core/fbmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 875541ff185bf..3fd95a79e4c33 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1116,6 +1116,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, case FBIOPUT_VSCREENINFO: if (copy_from_user(&var, argp, sizeof(var))) return -EFAULT; + /* only for kernel-internal use */ + var.activate &= ~FB_ACTIVATE_KD_TEXT; console_lock(); lock_fb_info(info); ret = fbcon_modechange_possible(info, &var); From 117e4e5bd9d47b89777dbf6b37a709dcfe59520f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 10 Apr 2023 10:35:01 -0700 Subject: [PATCH 836/898] thermal: intel: Avoid updating unsupported THERM_STATUS_CLEAR mask bits Some older processors don't allow BIT(13) and BIT(15) in the current mask set by "THERM_STATUS_CLEAR_CORE_MASK". This results in: unchecked MSR access error: WRMSR to 0x19c (tried to write 0x000000000000aaa8) at rIP: 0xffffffff816f66a6 (throttle_active_work+0xa6/0x1d0) To avoid unchecked MSR issues, check CPUID for each relevant feature and use that information to set the supported feature bits only in the "clear" mask for cores. Do the same for the analogous package mask set by "THERM_STATUS_CLEAR_PKG_MASK". Introduce functions thermal_intr_init_core_clear_mask() and thermal_intr_init_pkg_clear_mask() to set core and package mask bits, respectively. These functions are called during initialization. Fixes: 6fe1e64b6026 ("thermal: intel: Prevent accidental clearing of HFI status") Reported-by: Rui Salvaterra Link: https://lore.kernel.org/lkml/cdf43fb423368ee3994124a9e8c9b4f8d00712c6.camel@linux.intel.com/T/ Tested-by: Rui Salvaterra Signed-off-by: Srinivas Pandruvada Cc: 6.2+ # 6.2+ [ rjw: Renamed 2 funtions and 2 static variables, edited subject and changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/therm_throt.c | 73 ++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index 2e22bb82b7389..e69868e868eb9 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -193,8 +193,67 @@ static const struct attribute_group thermal_attr_group = { #define THERM_THROT_POLL_INTERVAL HZ #define THERM_STATUS_PROCHOT_LOG BIT(1) -#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15)) -#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)) +static u64 therm_intr_core_clear_mask; +static u64 therm_intr_pkg_clear_mask; + +static void thermal_intr_init_core_clear_mask(void) +{ + if (therm_intr_core_clear_mask) + return; + + /* + * Reference: Intel SDM Volume 4 + * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C + * IA32_THERM_STATUS. + */ + + /* + * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not + * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI. + */ + therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5)); + + /* + * Bit 7 and 9: Thermal Threshold #1 and #2 log + * If CPUID.01H:ECX[8] = 1 + */ + if (boot_cpu_has(X86_FEATURE_TM2)) + therm_intr_core_clear_mask |= (BIT(7) | BIT(9)); + + /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */ + if (boot_cpu_has(X86_FEATURE_PLN)) + therm_intr_core_clear_mask |= BIT(11); + + /* + * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1 + * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1 + */ + if (boot_cpu_has(X86_FEATURE_HWP)) + therm_intr_core_clear_mask |= (BIT(13) | BIT(15)); +} + +static void thermal_intr_init_pkg_clear_mask(void) +{ + if (therm_intr_pkg_clear_mask) + return; + + /* + * Reference: Intel SDM Volume 4 + * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1 + * IA32_PACKAGE_THERM_STATUS. + */ + + /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */ + if (boot_cpu_has(X86_FEATURE_PTS)) + therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)); + + /* + * Intel SDM Volume 2A: Thermal and Power Management Leaf + * Bit 26: CPUID.06H: EAX[19] = 1 + */ + if (boot_cpu_has(X86_FEATURE_HFI)) + therm_intr_pkg_clear_mask |= BIT(26); +} /* * Clear the bits in package thermal status register for bit = 1 @@ -207,13 +266,10 @@ void thermal_clear_package_intr_status(int level, u64 bit_mask) if (level == CORE_LEVEL) { msr = MSR_IA32_THERM_STATUS; - msr_val = THERM_STATUS_CLEAR_CORE_MASK; + msr_val = therm_intr_core_clear_mask; } else { msr = MSR_IA32_PACKAGE_THERM_STATUS; - msr_val = THERM_STATUS_CLEAR_PKG_MASK; - if (boot_cpu_has(X86_FEATURE_HFI)) - msr_val |= BIT(26); - + msr_val = therm_intr_pkg_clear_mask; } msr_val &= ~bit_mask; @@ -708,6 +764,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c) h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; apic_write(APIC_LVTTHMR, h); + thermal_intr_init_core_clear_mask(); + thermal_intr_init_pkg_clear_mask(); + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) wrmsr(MSR_IA32_THERM_INTERRUPT, From b89ce1177d42d5c124e83f3858818cd4e6a2c46f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 17:34:16 +0100 Subject: [PATCH 837/898] drm/armada: Fix a potential double free in an error handling path 'priv' is a managed resource, so there is no need to free it explicitly or there will be a double free(). Fixes: 90ad200b4cbc ("drm/armada: Use devm_drm_dev_alloc") Signed-off-by: Christophe JAILLET Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/c4f3c9207a9fce35cb6dd2cc60e755275961588a.1640536364.git.christophe.jaillet@wanadoo.fr --- drivers/gpu/drm/armada/armada_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 0643887800b4d..142668cd6d7cd 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -99,7 +99,6 @@ static int armada_drm_bind(struct device *dev) if (ret) { dev_err(dev, "[" DRM_NAME ":%s] can't kick out simple-fb: %d\n", __func__, ret); - kfree(priv); return ret; } From 4654e9f9f43993eb9ce383fa7c88d14b052b8cc3 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Thu, 30 Mar 2023 14:13:14 +0000 Subject: [PATCH 838/898] amd-pstate: Fix amd_pstate mode switch amd_pstate mode can be changed by writing the mode name to the `status` sysfs. But some combinations are not working. Fix this issue by taking care of the edge cases. Before the fix the mode change combination test fails: #./pst_test.sh Test passed: from: disable, to Test passed: from: disable, to disable Test failed: 1, From mode: disable, to mode: passive Test failed: 1, From mode: disable, to mode: active Test failed: 1, From mode: passive, to mode: active Test passed: from: passive, to disable Test failed: 1, From mode: passive, to mode: passive Test failed: 1, From mode: passive, to mode: active Test failed: 1, From mode: active, to mode: active Test passed: from: active, to disable Test failed: 1, From mode: active, to mode: passive Test failed: 1, From mode: active, to mode: active After the fix test passes: #./pst_test.sh Test passed: from: disable, to Test passed: from: disable, to disable Test passed: from: disable, to passive Test passed: from: disable, to active Test passed: from: passive, to active Test passed: from: passive, to disable Test passed: from: passive, to passive Test passed: from: passive, to active Test passed: from: active, to active Test passed: from: active, to disable Test passed: from: active, to passive Test passed: from: active, to active Fixes: abd61c08ef349 ("cpufreq: amd-pstate: add driver working mode switch support") Acked-by: Huang Rui Reviewed-by: Alexey Kardashevskiy Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 73c7643b26972..8dd46fad151eb 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -840,22 +840,20 @@ static int amd_pstate_update_status(const char *buf, size_t size) switch(mode_idx) { case AMD_PSTATE_DISABLE: - if (!current_pstate_driver) - return -EINVAL; - if (cppc_state == AMD_PSTATE_ACTIVE) - return -EBUSY; - cpufreq_unregister_driver(current_pstate_driver); - amd_pstate_driver_cleanup(); + if (current_pstate_driver) { + cpufreq_unregister_driver(current_pstate_driver); + amd_pstate_driver_cleanup(); + } break; case AMD_PSTATE_PASSIVE: if (current_pstate_driver) { if (current_pstate_driver == &amd_pstate_driver) return 0; cpufreq_unregister_driver(current_pstate_driver); - cppc_state = AMD_PSTATE_PASSIVE; - current_pstate_driver = &amd_pstate_driver; } + current_pstate_driver = &amd_pstate_driver; + cppc_state = AMD_PSTATE_PASSIVE; ret = cpufreq_register_driver(current_pstate_driver); break; case AMD_PSTATE_ACTIVE: @@ -863,10 +861,10 @@ static int amd_pstate_update_status(const char *buf, size_t size) if (current_pstate_driver == &amd_pstate_epp_driver) return 0; cpufreq_unregister_driver(current_pstate_driver); - current_pstate_driver = &amd_pstate_epp_driver; - cppc_state = AMD_PSTATE_ACTIVE; } + current_pstate_driver = &amd_pstate_epp_driver; + cppc_state = AMD_PSTATE_ACTIVE; ret = cpufreq_register_driver(current_pstate_driver); break; default: From 05cda427126f30ce3fc8ffd82fd6f5196398d502 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 11 Apr 2023 20:31:44 +0200 Subject: [PATCH 839/898] ACPI: resource: Skip IRQ override on ASUS ExpertBook B1502CBA Like the ASUS ExpertBook B2502CBA and various ASUS Vivobook laptops, the ASUS ExpertBook B1502CBA has an ACPI DSDT table that describes IRQ 1 as ActiveLow while the kernel overrides it to Edge_High. $ sudo dmesg | grep DMI DMI: ASUSTeK COMPUTER INC. ASUS EXPERTBOOK B1502CBA_B1502CBA/B1502CBA, BIOS B1502CBA.300 01/18/2023 $ grep -A 40 PS2K dsdt.dsl | grep IRQ -A 1 IRQ (Level, ActiveLow, Exclusive, ) {1} This prevents the keyboard from working. To fix this issue, add this laptop to the skip_override_table so that the kernel does not override IRQ 1. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217323 Signed-off-by: Paul Menzel Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 7b4801ce62d6b..e8492b3a393ab 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -439,6 +439,13 @@ static const struct dmi_system_id asus_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "S5602ZA"), }, }, + { + .ident = "Asus ExpertBook B1502CBA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B1502CBA"), + }, + }, { .ident = "Asus ExpertBook B2402CBA", .matches = { From e87245045b2b23e6dae3406c30b11cd8429eb061 Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Sat, 1 Apr 2023 17:13:04 +0800 Subject: [PATCH 840/898] dt-bindings: interrupt-controller: loongarch: Fix mismatched compatible The "compatible" doesn't match what the kernel is using. Fix it as kernel using. Fixes: 6b2748ada244 ("dt-bindings: interrupt-controller: add yaml for LoongArch CPU interrupt controller") Reported-by: Rob Herring Link: https://lore.kernel.org/all/20221208020954.GA3368836-robh@kernel.org/ Acked-by: Krzysztof Kozlowski Signed-off-by: Liu Peibao Link: https://lore.kernel.org/r/20230401091304.12633-1-liupeibao@loongson.cn [robh: Rename file to match compatible, fix subject typo] Signed-off-by: Rob Herring --- ...ntroller.yaml => loongson,cpu-interrupt-controller.yaml} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename Documentation/devicetree/bindings/interrupt-controller/{loongarch,cpu-interrupt-controller.yaml => loongson,cpu-interrupt-controller.yaml} (72%) diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,cpu-interrupt-controller.yaml similarity index 72% rename from Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml rename to Documentation/devicetree/bindings/interrupt-controller/loongson,cpu-interrupt-controller.yaml index 2a1cf885c99df..adf989976dccc 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,cpu-interrupt-controller.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause %YAML 1.2 --- -$id: http://devicetree.org/schemas/interrupt-controller/loongarch,cpu-interrupt-controller.yaml# +$id: http://devicetree.org/schemas/interrupt-controller/loongson,cpu-interrupt-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: LoongArch CPU Interrupt Controller @@ -11,7 +11,7 @@ maintainers: properties: compatible: - const: loongarch,cpu-interrupt-controller + const: loongson,cpu-interrupt-controller '#interrupt-cells': const: 1 @@ -28,7 +28,7 @@ required: examples: - | interrupt-controller { - compatible = "loongarch,cpu-interrupt-controller"; + compatible = "loongson,cpu-interrupt-controller"; #interrupt-cells = <1>; interrupt-controller; }; From 1a50d9403fb90cbe4dea0ec9fd0351d2ecbd8924 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Mar 2023 15:26:13 +0200 Subject: [PATCH 841/898] treewide: Fix probing of devices in DT overlays When loading a DT overlay that creates a device, the device is not probed, unless the DT overlay is unloaded and reloaded again. After the recent refactoring to improve fw_devlink, it no longer depends on the "compatible" property to identify which device tree nodes will become struct devices. fw_devlink now picks up dangling consumers (consumers pointing to descendent device tree nodes of a device that aren't converted to child devices) when a device is successfully bound to a driver. See __fw_devlink_pickup_dangling_consumers(). However, during DT overlay, a device's device tree node can have sub-nodes added/removed without unbinding/rebinding the driver. This difference in behavior between the normal device instantiation and probing flow vs. the DT overlay flow has a bunch of implications that are pointed out elsewhere[1]. One of them is that the fw_devlink logic to pick up dangling consumers is never exercised. This patch solves the fw_devlink issue by marking all DT nodes added by DT overlays with FWNODE_FLAG_NOT_DEVICE (fwnode that won't become device), and by clearing the flag when a struct device is actually created for the DT node. This way, fw_devlink knows not to have consumers waiting on these newly added DT nodes, and to propagate the dependency to an ancestor DT node that has the corresponding struct device. Based on a patch by Saravana Kannan, which covered only platform and spi devices. [1] https://lore.kernel.org/r/CAGETcx_bkuFaLCiPrAWCPQz+w79ccDp6=9e881qmK=vx3hBMyg@mail.gmail.com Fixes: 4a032827daa89350 ("of: property: Simplify of_link_to_phandle()") Link: https://lore.kernel.org/r/CAGETcx_+rhHvaC_HJXGrr5_WAd2+k5f=rWYnkCZ6z5bGX-wj4w@mail.gmail.com Signed-off-by: Geert Uytterhoeven Acked-by: Mark Brown Acked-by: Wolfram Sang # for I2C Acked-by: Shawn Guo Acked-by: Saravana Kannan Tested-by: Ivan Bornyakov Link: https://lore.kernel.org/r/e1fa546682ea4c8474ff997ab6244c5e11b6f8bc.1680182615.git.geert+renesas@glider.be Signed-off-by: Rob Herring --- drivers/bus/imx-weim.c | 6 ++++++ drivers/i2c/i2c-core-of.c | 5 +++++ drivers/of/dynamic.c | 1 + drivers/of/platform.c | 5 +++++ drivers/spi/spi.c | 5 +++++ 5 files changed, 22 insertions(+) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 828c66bbaa676..a8dfe94491fd7 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -331,6 +331,12 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action, "Failed to setup timing for '%pOF'\n", rd->dn); if (!of_node_check_flag(rd->dn, OF_POPULATED)) { + /* + * Clear the flag before adding the device so that + * fw_devlink doesn't skip adding consumers to this + * device. + */ + rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; if (!of_platform_device_create(rd->dn, NULL, &pdev->dev)) { dev_err(&pdev->dev, "Failed to create child device '%pOF'\n", diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index 3ed74aa4b44bb..1073f82d5dd47 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -244,6 +244,11 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, return NOTIFY_OK; } + /* + * Clear the flag before adding the device so that fw_devlink + * doesn't skip adding consumers to this device. + */ + rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; client = of_i2c_register_device(adap, rd->dn); if (IS_ERR(client)) { dev_err(&adap->dev, "failed to create client for '%pOF'\n", diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index cd3821a6444f0..788c77b257b38 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -226,6 +226,7 @@ static void __of_attach_node(struct device_node *np) np->sibling = np->parent->child; np->parent->child = np; of_node_clear_flag(np, OF_DETACHED); + np->fwnode.flags |= FWNODE_FLAG_NOT_DEVICE; } /** diff --git a/drivers/of/platform.c b/drivers/of/platform.c index b3878a98d27f7..daf2acf7dad5a 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -740,6 +740,11 @@ static int of_platform_notify(struct notifier_block *nb, if (of_node_check_flag(rd->dn, OF_POPULATED)) return NOTIFY_OK; + /* + * Clear the flag before adding the device so that fw_devlink + * doesn't skip adding consumers to this device. + */ + rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; /* pdev_parent may be NULL when no bus platform device */ pdev_parent = of_find_device_by_node(rd->dn->parent); pdev = of_platform_device_create(rd->dn, NULL, diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 3cc7bb4d03dec..94f60f7561d4c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4436,6 +4436,11 @@ static int of_spi_notify(struct notifier_block *nb, unsigned long action, return NOTIFY_OK; } + /* + * Clear the flag before adding the device so that fw_devlink + * doesn't skip adding consumers to this device. + */ + rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; spi = of_register_spi_device(ctlr, rd->dn); put_device(&ctlr->dev); From 8d736482749f6d350892ef83a7a11d43cd49981e Mon Sep 17 00:00:00 2001 From: Mathis Salmen Date: Thu, 6 Apr 2023 12:11:31 +0200 Subject: [PATCH 842/898] riscv: add icache flush for nommu sigreturn trampoline In a NOMMU kernel, sigreturn trampolines are generated on the user stack by setup_rt_frame. Currently, these trampolines are not instruction fenced, thus their visibility to ifetch is not guaranteed. This patch adds a flush_icache_range in setup_rt_frame to fix this problem. Signed-off-by: Mathis Salmen Fixes: 6bd33e1ece52 ("riscv: add nommu support") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230406101130.82304-1-mathis.salmen@matsal.de Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/signal.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index bfb2afa4135f8..dee66c9290cce 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -19,6 +19,7 @@ #include #include #include +#include extern u32 __user_rt_sigreturn[2]; @@ -181,6 +182,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, { struct rt_sigframe __user *frame; long err = 0; + unsigned long __maybe_unused addr; frame = get_sigframe(ksig, regs, sizeof(*frame)); if (!access_ok(frame, sizeof(*frame))) @@ -209,7 +211,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn, sizeof(frame->sigreturn_code))) return -EFAULT; - regs->ra = (unsigned long)&frame->sigreturn_code; + + addr = (unsigned long)&frame->sigreturn_code; + /* Make sure the two instructions are pushed to icache. */ + flush_icache_range(addr, addr + sizeof(frame->sigreturn_code)); + + regs->ra = addr; #endif /* CONFIG_MMU */ /* From 534e465845ebfb4a97eb5459d3931a0b35e3b9a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Tue, 11 Apr 2023 13:49:32 +0000 Subject: [PATCH 843/898] Revert "pinctrl: amd: Disable and mask interrupts on resume" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b26cd9325be4c1fcd331b77f10acb627c560d4d7. This patch introduces a regression on Lenovo Z13, which can't wake from the lid with it applied; and some unspecified AMD based Dell platforms are unable to wake from hitting the power button Signed-off-by: Kornel Dulęba Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20230411134932.292287-1-korneld@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 36 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 609821b756c2f..9236a132c7bab 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -872,34 +872,32 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; -static void amd_gpio_irq_init_pin(struct amd_gpio *gpio_dev, int pin) +static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) { - const struct pin_desc *pd; + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; unsigned long flags; u32 pin_reg, mask; + int i; mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | BIT(WAKE_CNTRL_OFF_S4); - pd = pin_desc_get(gpio_dev->pctrl, pin); - if (!pd) - return; + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); - raw_spin_lock_irqsave(&gpio_dev->lock, flags); - pin_reg = readl(gpio_dev->base + pin * 4); - pin_reg &= ~mask; - writel(pin_reg, gpio_dev->base + pin * 4); - raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); -} + if (!pd) + continue; -static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) -{ - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; - int i; + raw_spin_lock_irqsave(&gpio_dev->lock, flags); - for (i = 0; i < desc->npins; i++) - amd_gpio_irq_init_pin(gpio_dev, i); + pin_reg = readl(gpio_dev->base + i * 4); + pin_reg &= ~mask; + writel(pin_reg, gpio_dev->base + i * 4); + + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); + } } #ifdef CONFIG_PM_SLEEP @@ -952,10 +950,8 @@ static int amd_gpio_resume(struct device *dev) for (i = 0; i < desc->npins; i++) { int pin = desc->pins[i].number; - if (!amd_gpio_should_save(gpio_dev, pin)) { - amd_gpio_irq_init_pin(gpio_dev, pin); + if (!amd_gpio_should_save(gpio_dev, pin)) continue; - } raw_spin_lock_irqsave(&gpio_dev->lock, flags); gpio_dev->saved_regs[i] |= readl(gpio_dev->base + pin * 4) & PIN_IRQ_PENDING; From c8e22b7a1694bb8d025ea636816472739d859145 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 4 Apr 2023 21:23:42 +0200 Subject: [PATCH 844/898] scsi: ses: Handle enclosure with just a primary component gracefully This reverts commit 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure has no components") and introduces proper handling of case where there are no detected secondary components, but primary component (enumerated in num_enclosures) does exist. That fix was originally proposed by Ding Hui . Completely ignoring devices that have one primary enclosure and no secondary one results in ses_intf_add() bailing completely scsi 2:0:0:254: enclosure has no enumerated components scsi 2:0:0:254: Failed to bind enclosure -12ven in valid configurations such even on valid configurations with 1 primary and 0 secondary enclosures as below: # sg_ses /dev/sg0 3PARdata SES 3321 Supported diagnostic pages: Supported Diagnostic Pages [sdp] [0x0] Configuration (SES) [cf] [0x1] Short Enclosure Status (SES) [ses] [0x8] # sg_ses -p cf /dev/sg0 3PARdata SES 3321 Configuration diagnostic page: number of secondary subenclosures: 0 generation code: 0x0 enclosure descriptor list Subenclosure identifier: 0 [primary] relative ES process id: 0, number of ES processes: 1 number of type descriptor headers: 1 enclosure logical identifier (hex): 20000002ac02068d enclosure vendor: 3PARdata product: VV rev: 3321 type descriptor header and text list Element type: Unspecified, subenclosure id: 0 number of possible elements: 1 The changelog for the original fix follows ===== We can get a crash when disconnecting the iSCSI session, the call trace like this: [ffff00002a00fb70] kfree at ffff00000830e224 [ffff00002a00fba0] ses_intf_remove at ffff000001f200e4 [ffff00002a00fbd0] device_del at ffff0000086b6a98 [ffff00002a00fc50] device_unregister at ffff0000086b6d58 [ffff00002a00fc70] __scsi_remove_device at ffff00000870608c [ffff00002a00fca0] scsi_remove_device at ffff000008706134 [ffff00002a00fcc0] __scsi_remove_target at ffff0000087062e4 [ffff00002a00fd10] scsi_remove_target at ffff0000087064c0 [ffff00002a00fd70] __iscsi_unbind_session at ffff000001c872c4 [ffff00002a00fdb0] process_one_work at ffff00000810f35c [ffff00002a00fe00] worker_thread at ffff00000810f648 [ffff00002a00fe70] kthread at ffff000008116e98 In ses_intf_add, components count could be 0, and kcalloc 0 size scomp, but not saved in edev->component[i].scratch In this situation, edev->component[0].scratch is an invalid pointer, when kfree it in ses_intf_remove_enclosure, a crash like above would happen The call trace also could be other random cases when kfree cannot catch the invalid pointer We should not use edev->component[] array when the components count is 0 We also need check index when use edev->component[] array in ses_enclosure_data_process ===== Reported-by: Michal Kolar Originally-by: Ding Hui Cc: stable@vger.kernel.org Fixes: 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure has no components") Signed-off-by: Jiri Kosina Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2304042122270.29760@cbobk.fhfr.pm Tested-by: Michal Kolar Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index b11a9162e73aa..b54f2c6c08c36 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -509,9 +509,6 @@ static int ses_enclosure_find_by_addr(struct enclosure_device *edev, int i; struct ses_component *scomp; - if (!edev->component[0].scratch) - return 0; - for (i = 0; i < edev->components; i++) { scomp = edev->component[i].scratch; if (scomp->addr != efd->addr) @@ -602,8 +599,10 @@ static void ses_enclosure_data_process(struct enclosure_device *edev, components++, type_ptr[0], name); - else + else if (components < edev->components) ecomp = &edev->component[components++]; + else + ecomp = ERR_PTR(-EINVAL); if (!IS_ERR(ecomp)) { if (addl_desc_ptr) { @@ -734,11 +733,6 @@ static int ses_intf_add(struct device *cdev, components += type_ptr[1]; } - if (components == 0) { - sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n"); - goto err_free; - } - ses_dev->page1 = buf; ses_dev->page1_len = len; buf = NULL; @@ -780,9 +774,11 @@ static int ses_intf_add(struct device *cdev, buf = NULL; } page2_not_supported: - scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL); - if (!scomp) - goto err_free; + if (components > 0) { + scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL); + if (!scomp) + goto err_free; + } edev = enclosure_register(cdev->parent, dev_name(&sdev->sdev_gendev), components, &ses_enclosure_callbacks); From 7573099e10ca69c3be33995c1fcd0d241226816d Mon Sep 17 00:00:00 2001 From: Denis Plotnikov Date: Fri, 7 Apr 2023 10:18:49 +0300 Subject: [PATCH 845/898] qlcnic: check pci_reset_function result Static code analyzer complains to unchecked return value. The result of pci_reset_function() is unchecked. Despite, the issue is on the FLR supported code path and in that case reset can be done with pcie_flr(), the patch uses less invasive approach by adding the result check of pci_reset_function(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism") Signed-off-by: Denis Plotnikov Reviewed-by: Simon Horman Reviewed-by: Bjorn Helgaas Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index 87f76bac2e463..eb827b86ecae8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -628,7 +628,13 @@ int qlcnic_fw_create_ctx(struct qlcnic_adapter *dev) int i, err, ring; if (dev->flags & QLCNIC_NEED_FLR) { - pci_reset_function(dev->pdev); + err = pci_reset_function(dev->pdev); + if (err) { + dev_err(&dev->pdev->dev, + "Adapter reset failed (%d). Please reboot\n", + err); + return err; + } dev->flags &= ~QLCNIC_NEED_FLR; } From 9744d2bf19762703704ecba885b7ac282c02eacf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 8 Apr 2023 11:49:43 -0700 Subject: [PATCH 846/898] smc: Fix use-after-free in tcp_write_timer_handler(). With Eric's ref tracker, syzbot finally found a repro for use-after-free in tcp_write_timer_handler() by kernel TCP sockets. [0] If SMC creates a kernel socket in __smc_create(), the kernel socket is supposed to be freed in smc_clcsock_release() by calling sock_release() when we close() the parent SMC socket. However, at the end of smc_clcsock_release(), the kernel socket's sk_state might not be TCP_CLOSE. This means that we have not called inet_csk_destroy_sock() in __tcp_close() and have not stopped the TCP timers. The kernel socket's TCP timers can be fired later, so we need to hold a refcnt for net as we do for MPTCP subflows in mptcp_subflow_create_socket(). [0]: leaked reference. sk_alloc (./include/net/net_namespace.h:335 net/core/sock.c:2108) inet_create (net/ipv4/af_inet.c:319 net/ipv4/af_inet.c:244) __sock_create (net/socket.c:1546) smc_create (net/smc/af_smc.c:3269 net/smc/af_smc.c:3284) __sock_create (net/socket.c:1546) __sys_socket (net/socket.c:1634 net/socket.c:1618 net/socket.c:1661) __x64_sys_socket (net/socket.c:1672) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) ================================================================== BUG: KASAN: slab-use-after-free in tcp_write_timer_handler (net/ipv4/tcp_timer.c:378 net/ipv4/tcp_timer.c:624 net/ipv4/tcp_timer.c:594) Read of size 1 at addr ffff888052b65e0d by task syzrepro/18091 CPU: 0 PID: 18091 Comm: syzrepro Tainted: G W 6.3.0-rc4-01174-gb5d54eb5899a #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.amzn2022.0.1 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107) print_report (mm/kasan/report.c:320 mm/kasan/report.c:430) kasan_report (mm/kasan/report.c:538) tcp_write_timer_handler (net/ipv4/tcp_timer.c:378 net/ipv4/tcp_timer.c:624 net/ipv4/tcp_timer.c:594) tcp_write_timer (./include/linux/spinlock.h:390 net/ipv4/tcp_timer.c:643) call_timer_fn (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/timer.h:127 kernel/time/timer.c:1701) __run_timers.part.0 (kernel/time/timer.c:1752 kernel/time/timer.c:2022) run_timer_softirq (kernel/time/timer.c:2037) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:572) __irq_exit_rcu (kernel/softirq.c:445 kernel/softirq.c:650) irq_exit_rcu (kernel/softirq.c:664) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1107 (discriminator 14)) Fixes: ac7138746e14 ("smc: establish new socket family") Reported-by: syzbot+7e1e1bdb852961150198@syzkaller.appspotmail.com Link: https://lore.kernel.org/netdev/000000000000a3f51805f8bcc43a@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/af_smc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c6b4a62276f6d..50c38b624f772 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3270,6 +3270,17 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, sk_common_release(sk); goto out; } + + /* smc_clcsock_release() does not wait smc->clcsock->sk's + * destruction; its sk_state might not be TCP_CLOSE after + * smc->sk is close()d, and TCP timers can be fired later, + * which need net ref. + */ + sk = smc->clcsock->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); } else { smc->clcsock = clcsock; } From a56ef25619e079bd7d744636cf18d054d1e91982 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Sat, 8 Apr 2023 12:43:21 -0700 Subject: [PATCH 847/898] net: wwan: iosm: Fix error handling path in ipc_pcie_probe() Smatch reports: drivers/net/wwan/iosm/iosm_ipc_pcie.c:298 ipc_pcie_probe() warn: missing unwind goto? When dma_set_mask fails it directly returns without disabling pci device and freeing ipc_pcie. Fix this my calling a correct goto label As dma_set_mask returns either 0 or -EIO, we can use a goto label, as it finally returns -EIO. Add a set_mask_fail goto label which stands consistent with other goto labels in this function.. Fixes: 035e3befc191 ("net: wwan: iosm: fix driver not working with INTEL_IOMMU disabled") Reviewed-by: Simon Horman Signed-off-by: Harshit Mogalapalli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_pcie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index 5bf5a93937c9c..04517bd3325a2 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -295,7 +295,7 @@ static int ipc_pcie_probe(struct pci_dev *pci, ret = dma_set_mask(ipc_pcie->dev, DMA_BIT_MASK(64)); if (ret) { dev_err(ipc_pcie->dev, "Could not set PCI DMA mask: %d", ret); - return ret; + goto set_mask_fail; } ipc_pcie_config_aspm(ipc_pcie); @@ -323,6 +323,7 @@ static int ipc_pcie_probe(struct pci_dev *pci, imem_init_fail: ipc_pcie_resources_release(ipc_pcie); resources_req_fail: +set_mask_fail: pci_disable_device(pci); pci_enable_fail: kfree(ipc_pcie); From 136f36c74b0345d5d0087d4094894a006470bbd5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 10 Apr 2023 18:27:19 -0500 Subject: [PATCH 848/898] net: ti/cpsw: Add explicit platform_device.h and of_platform.h includes TI CPSW uses of_platform_* functions which are declared in of_platform.h. of_platform.h gets implicitly included by of_device.h, but that is going to be removed soon. Nothing else depends on of_device.h so it can be dropped. of_platform.h also implicitly includes platform_device.h, so add an explicit include for it, too. Signed-off-by: Rob Herring Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/cpsw_new.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 37f0b62ec5d6a..f9cd566d1c9b5 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 35128dd45ffce..c61e4e44a78f0 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -23,7 +24,7 @@ #include #include #include -#include +#include #include #include #include From 91dcf1e8068e9a8823e419a7a34ff4341275fb70 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 11 Apr 2023 11:06:11 +0200 Subject: [PATCH 849/898] sched/fair: Fix imbalance overflow When local group is fully busy but its average load is above system load, computing the imbalance will overflow and local group is not the best target for pulling this load. Fixes: 0b0695f2b34a ("sched/fair: Rework load_balance()") Reported-by: Tingjia Cao Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Tingjia Cao Link: https://lore.kernel.org/lkml/CABcWv9_DAhVBOq2=W=2ypKE9dKM5s2DvoV8-U0+GDwwuKZ89jQ@mail.gmail.com/T/ --- kernel/sched/fair.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6986ea31c9844..5f6587d94c1dd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10238,6 +10238,16 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) / sds->total_capacity; + + /* + * If the local group is more loaded than the average system + * load, don't try to pull any tasks. + */ + if (local->avg_load >= sds->avg_load) { + env->imbalance = 0; + return; + } + } /* From 0bcc4025550403ae28d2984bddacafbca0a2f112 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Apr 2023 13:18:57 +0100 Subject: [PATCH 850/898] netfs: Fix netfs_extract_iter_to_sg() for ITER_UBUF/IOVEC Fix netfs_extract_iter_to_sg() for ITER_UBUF and ITER_IOVEC to set the size of the page to the part of the page extracted, not the remaining amount of data in the extracted page array at that point. This doesn't yet affect anything as cifs, the only current user, only passes in non-user-backed iterators. Fixes: 018584697533 ("netfs: Add a function to extract an iterator into a scatterlist") Signed-off-by: David Howells Reviewed-by: Jeff Layton Cc: Steve French Cc: Shyam Prasad N Cc: Rohith Surabattula Signed-off-by: Linus Torvalds --- fs/netfs/iterator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index e9a45dea748a2..8a4c866874297 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -139,7 +139,7 @@ static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter, size_t seg = min_t(size_t, PAGE_SIZE - off, len); *pages++ = NULL; - sg_set_page(sg, page, len, off); + sg_set_page(sg, page, seg, off); sgtable->nents++; sg++; len -= seg; From 57dcd64c7e036299ef526b400a8d12b8a2352f26 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 5 Apr 2023 22:15:32 +0900 Subject: [PATCH 851/898] cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex syzbot is reporting circular locking dependency between cpu_hotplug_lock and freezer_mutex, for commit f5d39b020809 ("freezer,sched: Rewrite core freezer logic") replaced atomic_inc() in freezer_apply_state() with static_branch_inc() which holds cpu_hotplug_lock. cpu_hotplug_lock => cgroup_threadgroup_rwsem => freezer_mutex cgroup_file_write() { cgroup_procs_write() { __cgroup_procs_write() { cgroup_procs_write_start() { cgroup_attach_lock() { cpus_read_lock() { percpu_down_read(&cpu_hotplug_lock); } percpu_down_write(&cgroup_threadgroup_rwsem); } } cgroup_attach_task() { cgroup_migrate() { cgroup_migrate_execute() { freezer_attach() { mutex_lock(&freezer_mutex); (...snipped...) } } } } (...snipped...) } } } freezer_mutex => cpu_hotplug_lock cgroup_file_write() { freezer_write() { freezer_change_state() { mutex_lock(&freezer_mutex); freezer_apply_state() { static_branch_inc(&freezer_active) { static_key_slow_inc() { cpus_read_lock(); static_key_slow_inc_cpuslocked(); cpus_read_unlock(); } } } mutex_unlock(&freezer_mutex); } } } Swap locking order by moving cpus_read_lock() in freezer_apply_state() to before mutex_lock(&freezer_mutex) in freezer_change_state(). Reported-by: syzbot Link: https://syzkaller.appspot.com/bug?extid=c39682e86c9d84152f93 Suggested-by: Hillf Danton Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Signed-off-by: Tetsuo Handa Acked-by: Peter Zijlstra (Intel) Reviewed-by: Mukesh Ojha Signed-off-by: Tejun Heo --- kernel/cgroup/legacy_freezer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 1b6b21851e9d4..936473203a6b5 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is @@ -350,7 +351,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, if (freeze) { if (!(freezer->state & CGROUP_FREEZING)) - static_branch_inc(&freezer_active); + static_branch_inc_cpuslocked(&freezer_active); freezer->state |= state; freeze_cgroup(freezer); } else { @@ -361,7 +362,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, if (!(freezer->state & CGROUP_FREEZING)) { freezer->state &= ~CGROUP_FROZEN; if (was_freezing) - static_branch_dec(&freezer_active); + static_branch_dec_cpuslocked(&freezer_active); unfreeze_cgroup(freezer); } } @@ -379,6 +380,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) { struct cgroup_subsys_state *pos; + cpus_read_lock(); /* * Update all its descendants in pre-order traversal. Each * descendant will try to inherit its parent's FREEZING state as @@ -407,6 +409,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) } rcu_read_unlock(); mutex_unlock(&freezer_mutex); + cpus_read_unlock(); } static ssize_t freezer_write(struct kernfs_open_file *of, From ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 11 Apr 2023 09:35:57 -0400 Subject: [PATCH 852/898] cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a successful cpuset_can_attach() call which increments the attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach() will be called later. In cpuset_attach(), tasks in cpuset_attach_wq, if present, will be woken up at the end. That is not the case in cpuset_cancel_attach(). So missed wakeup is possible if the attach operation is somehow cancelled. Fix that by doing the wakeup in cpuset_cancel_attach() as well. Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching") Signed-off-by: Waiman Long Reviewed-by: Michal Koutný Cc: stable@vger.kernel.org # v3.11+ Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f310915d12575..ff7eb8e2efdc0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2502,11 +2502,15 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) static void cpuset_cancel_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; + struct cpuset *cs; cgroup_taskset_first(tset, &css); + cs = css_cs(css); percpu_down_write(&cpuset_rwsem); - css_cs(css)->attach_in_progress--; + cs->attach_in_progress--; + if (!cs->attach_in_progress) + wake_up(&cpuset_attach_wq); percpu_up_write(&cpuset_rwsem); } From 42a11bf5c5436e91b040aeb04063be1710bb9f9c Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 11 Apr 2023 09:35:58 -0400 Subject: [PATCH 853/898] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly By default, the clone(2) syscall spawn a child process into the same cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes into cgroups"), the child will be spawned into a different cgroup which is somewhat similar to writing the child's tid into "cgroup.threads". The current cpuset_fork() method does not properly handle the CLONE_INTO_CGROUP case where the cpuset of the child may be different from that of its parent. Update the cpuset_fork() method to treat the CLONE_INTO_CGROUP case similar to cpuset_attach(). Since the newly cloned task has not been running yet, its actual memory usage isn't known. So it is not necessary to make change to mm in cpuset_fork(). Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") Reported-by: Giuseppe Scrivano Signed-off-by: Waiman Long Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 62 ++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index ff7eb8e2efdc0..2ccfae74acf9a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) } /* - * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach() + * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task() * but we can't allocate it dynamically there. Define it global and * allocate from cpuset_init(). */ static cpumask_var_t cpus_attach; +static nodemask_t cpuset_attach_nodemask_to; + +static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) +{ + percpu_rwsem_assert_held(&cpuset_rwsem); + + if (cs != &top_cpuset) + guarantee_online_cpus(task, cpus_attach); + else + cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); + /* + * can_attach beforehand should guarantee that this doesn't + * fail. TODO: have a better way to handle failure here + */ + WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); + + cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); + cpuset_update_task_spread_flags(cs, task); +} static void cpuset_attach(struct cgroup_taskset *tset) { - /* static buf protected by cpuset_rwsem */ - static nodemask_t cpuset_attach_nodemask_to; struct task_struct *task; struct task_struct *leader; struct cgroup_subsys_state *css; @@ -2555,20 +2572,8 @@ static void cpuset_attach(struct cgroup_taskset *tset) guarantee_online_mems(cs, &cpuset_attach_nodemask_to); - cgroup_taskset_for_each(task, css, tset) { - if (cs != &top_cpuset) - guarantee_online_cpus(task, cpus_attach); - else - cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); - /* - * can_attach beforehand should guarantee that this doesn't - * fail. TODO: have a better way to handle failure here - */ - WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); - - cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); - cpuset_update_task_spread_flags(cs, task); - } + cgroup_taskset_for_each(task, css, tset) + cpuset_attach_task(cs, task); /* * Change mm for all threadgroup leaders. This is expensive and may @@ -3266,11 +3271,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) */ static void cpuset_fork(struct task_struct *task) { - if (task_css_is_root(task, cpuset_cgrp_id)) + struct cpuset *cs; + bool same_cs; + + rcu_read_lock(); + cs = task_cs(task); + same_cs = (cs == task_cs(current)); + rcu_read_unlock(); + + if (same_cs) { + if (cs == &top_cpuset) + return; + + set_cpus_allowed_ptr(task, current->cpus_ptr); + task->mems_allowed = current->mems_allowed; return; + } - set_cpus_allowed_ptr(task, current->cpus_ptr); - task->mems_allowed = current->mems_allowed; + /* CLONE_INTO_CGROUP */ + percpu_down_write(&cpuset_rwsem); + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); + cpuset_attach_task(cs, task); + percpu_up_write(&cpuset_rwsem); } struct cgroup_subsys cpuset_cgrp_subsys = { From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 11 Apr 2023 09:35:59 -0400 Subject: [PATCH 854/898] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept new tasks. It is too late to check that in cpuset_fork(). So we need to add the cpuset_can_fork() and cpuset_cancel_fork() methods to pre-check it before we can allow attachment to a different cpuset. We also need to set the attach_in_progress flag to alert other code that a new task is going to be added to the cpuset. Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") Suggested-by: Michal Koutný Signed-off-by: Waiman Long Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 97 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 86 insertions(+), 11 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 2ccfae74acf9a..166a45019f665 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp) static struct cpuset *cpuset_attach_old_cs; +/* + * Check to see if a cpuset can accept a new task + * For v1, cpus_allowed and mems_allowed can't be empty. + * For v2, effective_cpus can't be empty. + * Note that in v1, effective_cpus = cpus_allowed. + */ +static int cpuset_can_attach_check(struct cpuset *cs) +{ + if (cpumask_empty(cs->effective_cpus) || + (!is_in_v2_mode() && nodes_empty(cs->mems_allowed))) + return -ENOSPC; + return 0; +} + /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */ static int cpuset_can_attach(struct cgroup_taskset *tset) { @@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) percpu_down_write(&cpuset_rwsem); - /* allow moving tasks into an empty cpuset if on default hierarchy */ - ret = -ENOSPC; - if (!is_in_v2_mode() && - (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) - goto out_unlock; - - /* - * Task cannot be moved to a cpuset with empty effective cpus. - */ - if (cpumask_empty(cs->effective_cpus)) + /* Check to see if task is allowed in the cpuset */ + ret = cpuset_can_attach_check(cs); + if (ret) goto out_unlock; cgroup_taskset_for_each(task, css, tset) { @@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; - ret = 0; out_unlock: percpu_up_write(&cpuset_rwsem); return ret; @@ -3264,6 +3270,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) percpu_up_write(&cpuset_rwsem); } +/* + * In case the child is cloned into a cpuset different from its parent, + * additional checks are done to see if the move is allowed. + */ +static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) +{ + struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); + bool same_cs; + int ret; + + rcu_read_lock(); + same_cs = (cs == task_cs(current)); + rcu_read_unlock(); + + if (same_cs) + return 0; + + lockdep_assert_held(&cgroup_mutex); + percpu_down_write(&cpuset_rwsem); + + /* Check to see if task is allowed in the cpuset */ + ret = cpuset_can_attach_check(cs); + if (ret) + goto out_unlock; + + ret = task_can_attach(task, cs->effective_cpus); + if (ret) + goto out_unlock; + + ret = security_task_setscheduler(task); + if (ret) + goto out_unlock; + + /* + * Mark attach is in progress. This makes validate_change() fail + * changes which zero cpus/mems_allowed. + */ + cs->attach_in_progress++; +out_unlock: + percpu_up_write(&cpuset_rwsem); + return ret; +} + +static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset) +{ + struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); + bool same_cs; + + rcu_read_lock(); + same_cs = (cs == task_cs(current)); + rcu_read_unlock(); + + if (same_cs) + return; + + percpu_down_write(&cpuset_rwsem); + cs->attach_in_progress--; + if (!cs->attach_in_progress) + wake_up(&cpuset_attach_wq); + percpu_up_write(&cpuset_rwsem); +} + /* * Make sure the new task conform to the current state of its parent, * which could have been changed by cpuset just after it inherits the @@ -3292,6 +3360,11 @@ static void cpuset_fork(struct task_struct *task) percpu_down_write(&cpuset_rwsem); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cpuset_attach_task(cs, task); + + cs->attach_in_progress--; + if (!cs->attach_in_progress) + wake_up(&cpuset_attach_wq); + percpu_up_write(&cpuset_rwsem); } @@ -3305,6 +3378,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .attach = cpuset_attach, .post_attach = cpuset_post_attach, .bind = cpuset_bind, + .can_fork = cpuset_can_fork, + .cancel_fork = cpuset_cancel_fork, .fork = cpuset_fork, .legacy_cftypes = legacy_files, .dfl_cftypes = dfl_files, From 7e27cb6ad4d85fc8bac2a2a896da62ef66b8598e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 11 Apr 2023 09:36:00 -0400 Subject: [PATCH 855/898] cgroup/cpuset: Make cpuset_attach_task() skip subpartitions CPUs for top_cpuset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is found that attaching a task to the top_cpuset does not currently ignore CPUs allocated to subpartitions in cpuset_attach_task(). So the code is changed to fix that. Signed-off-by: Waiman Long Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 166a45019f665..505d86b166426 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2535,7 +2535,8 @@ static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) if (cs != &top_cpuset) guarantee_online_cpus(task, cpus_attach); else - cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); + cpumask_andnot(cpus_attach, task_cpu_possible_mask(task), + cs->subparts_cpus); /* * can_attach beforehand should guarantee that this doesn't * fail. TODO: have a better way to handle failure here From b8ca445f550a9a079134f836466ddda3bfad6108 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 17 Feb 2023 13:26:56 +0800 Subject: [PATCH 856/898] drm/amd/display: Pass the right info to drm_dp_remove_payload [Why & How] drm_dp_remove_payload() interface was changed. Correct amdgpu dm code to pass the right parameter to the drm helper function. Reviewed-by: Jerry Zuo Acked-by: Qingqing Zhuo Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 57 ++++++++++++++++--- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 1583157da355b..efd025d8961e6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -177,6 +177,40 @@ void dm_helpers_dp_update_branch_info( const struct dc_link *link) {} +static void dm_helpers_construct_old_payload( + struct dc_link *link, + int pbn_per_slot, + struct drm_dp_mst_atomic_payload *new_payload, + struct drm_dp_mst_atomic_payload *old_payload) +{ + struct link_mst_stream_allocation_table current_link_table = + link->mst_stream_alloc_table; + struct link_mst_stream_allocation *dc_alloc; + int i; + + *old_payload = *new_payload; + + /* Set correct time_slots/PBN of old payload. + * other fields (delete & dsc_enabled) in + * struct drm_dp_mst_atomic_payload are don't care fields + * while calling drm_dp_remove_payload() + */ + for (i = 0; i < current_link_table.stream_count; i++) { + dc_alloc = + ¤t_link_table.stream_allocations[i]; + + if (dc_alloc->vcp_id == new_payload->vcpi) { + old_payload->time_slots = dc_alloc->slot_count; + old_payload->pbn = dc_alloc->slot_count * pbn_per_slot; + break; + } + } + + /* make sure there is an old payload*/ + ASSERT(i != current_link_table.stream_count); + +} + /* * Writes payload allocation table in immediate downstream device. */ @@ -188,7 +222,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( { struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_state *mst_state; - struct drm_dp_mst_atomic_payload *payload; + struct drm_dp_mst_atomic_payload *target_payload, *new_payload, old_payload; struct drm_dp_mst_topology_mgr *mst_mgr; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; @@ -204,17 +238,26 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); /* It's OK for this to fail */ - payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); - if (enable) - drm_dp_add_payload_part1(mst_mgr, mst_state, payload); - else - drm_dp_remove_payload(mst_mgr, mst_state, payload, payload); + new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); + + if (enable) { + target_payload = new_payload; + + drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload); + } else { + /* construct old payload by VCPI*/ + dm_helpers_construct_old_payload(stream->link, mst_state->pbn_div, + new_payload, &old_payload); + target_payload = &old_payload; + + drm_dp_remove_payload(mst_mgr, mst_state, &old_payload, new_payload); + } /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or * AUX message. The sequence is slot 1-63 allocated sequence for each * stream. AMD ASIC stream slot allocation should follow the same * sequence. copy DRM MST allocation to dc */ - fill_dc_mst_payload_table_from_drm(stream->link, enable, payload, proposed_table); + fill_dc_mst_payload_table_from_drm(stream->link, enable, target_payload, proposed_table); return true; } From f06b8887e3ef4f50098d3a949aef392c529c831a Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Thu, 6 Apr 2023 11:17:38 +0800 Subject: [PATCH 857/898] drm/amd/pm: correct SMU13.0.7 pstate profiling clock settings Correct the pstate standard/peak profiling mode clock settings for SMU13.0.7. Signed-off-by: Horatio Zhang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 9e1967d8049e3..fbde124aea2dc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1329,9 +1329,17 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.fclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + DriverReportedClocks_t driver_clocks = + pptable->SkuTable.DriverReportedClocks; pstate_table->gfxclk_pstate.min = gfx_table->min; - pstate_table->gfxclk_pstate.peak = gfx_table->max; + if (driver_clocks.GameClockAc && + (driver_clocks.GameClockAc < gfx_table->max)) + pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc; + else + pstate_table->gfxclk_pstate.peak = gfx_table->max; pstate_table->uclk_pstate.min = mem_table->min; pstate_table->uclk_pstate.peak = mem_table->max; @@ -1348,12 +1356,12 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) pstate_table->fclk_pstate.min = fclk_table->min; pstate_table->fclk_pstate.peak = fclk_table->max; - /* - * For now, just use the mininum clock frequency. - * TODO: update them when the real pstate settings available - */ - pstate_table->gfxclk_pstate.standard = gfx_table->min; - pstate_table->uclk_pstate.standard = mem_table->min; + if (driver_clocks.BaseClockAc && + driver_clocks.BaseClockAc < gfx_table->max) + pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc; + else + pstate_table->gfxclk_pstate.standard = gfx_table->max; + pstate_table->uclk_pstate.standard = mem_table->max; pstate_table->socclk_pstate.standard = soc_table->min; pstate_table->vclk_pstate.standard = vclk_table->min; pstate_table->dclk_pstate.standard = dclk_table->min; From 85e0689eb6b10cd3b2fb455d1b3f4d4d0b13ff78 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Thu, 6 Apr 2023 13:32:14 +0800 Subject: [PATCH 858/898] drm/amd/pm: correct SMU13.0.7 max shader clock reporting Correct the max shader clock reporting on SMU 13.0.7. Signed-off-by: Horatio Zhang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 61 ++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index fbde124aea2dc..ea364e9c157ca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -575,6 +575,14 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu) dpm_table); if (ret) return ret; + + if (skutable->DriverReportedClocks.GameClockAc && + (dpm_table->dpm_levels[dpm_table->count - 1].value > + skutable->DriverReportedClocks.GameClockAc)) { + dpm_table->dpm_levels[dpm_table->count - 1].value = + skutable->DriverReportedClocks.GameClockAc; + dpm_table->max = skutable->DriverReportedClocks.GameClockAc; + } } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; @@ -828,6 +836,57 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu, return ret; } +static int smu_v13_0_7_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + struct smu_13_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_13_0_dpm_table *dpm_table; + + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + /* uclk dpm table */ + dpm_table = &dpm_context->dpm_tables.uclk_table; + break; + case SMU_GFXCLK: + case SMU_SCLK: + /* gfxclk dpm table */ + dpm_table = &dpm_context->dpm_tables.gfx_table; + break; + case SMU_SOCCLK: + /* socclk dpm table */ + dpm_table = &dpm_context->dpm_tables.soc_table; + break; + case SMU_FCLK: + /* fclk dpm table */ + dpm_table = &dpm_context->dpm_tables.fclk_table; + break; + case SMU_VCLK: + case SMU_VCLK1: + /* vclk dpm table */ + dpm_table = &dpm_context->dpm_tables.vclk_table; + break; + case SMU_DCLK: + case SMU_DCLK1: + /* dclk dpm table */ + dpm_table = &dpm_context->dpm_tables.dclk_table; + break; + default: + dev_err(smu->adev->dev, "Unsupported clock type!\n"); + return -EINVAL; + } + + if (min) + *min = dpm_table->min; + if (max) + *max = dpm_table->max; + + return 0; +} + static int smu_v13_0_7_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, @@ -1684,7 +1743,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable, .init_pptable_microcode = smu_v13_0_init_pptable_microcode, .populate_umd_state_clk = smu_v13_0_7_populate_umd_state_clk, - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = smu_v13_0_7_get_dpm_ultimate_freq, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, .read_sensor = smu_v13_0_7_read_sensor, .feature_is_enabled = smu_cmn_feature_is_enabled, From b9a24d8bd51e2db425602fa82d7f4c06aa3db852 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 7 Apr 2023 17:12:15 +0800 Subject: [PATCH 859/898] drm/amd/pm: correct the pcie link state check for SMU13 Update the driver implementations to fit those data exposed by PMFW. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 6 ++++++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index f085cb97a6206..85a090b9e3d97 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -61,6 +61,12 @@ #define CTF_OFFSET_HOTSPOT 5 #define CTF_OFFSET_MEM 5 +static const int pmfw_decoded_link_speed[5] = {1, 2, 3, 4, 5}; +static const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; + +#define DECODE_GEN_SPEED(gen_speed_idx) (pmfw_decoded_link_speed[gen_speed_idx]) +#define DECODE_LANE_WIDTH(lane_width_idx) (pmfw_decoded_link_width[lane_width_idx]) + struct smu_13_0_max_sustainable_clocks { uint32_t display_clock; uint32_t phy_clock; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 27448ffe60a43..a5c97d61e92a6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1144,8 +1144,8 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_lane[i] == 5) ? "x12" : (pcie_table->pcie_lane[i] == 6) ? "x16" : "", pcie_table->clk_freq[i], - ((gen_speed - 1) == pcie_table->pcie_gen[i]) && - (lane_width == link_width[pcie_table->pcie_lane[i]]) ? + (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && + (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index ea364e9c157ca..4399416dd9b8f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1133,8 +1133,8 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_lane[i] == 5) ? "x12" : (pcie_table->pcie_lane[i] == 6) ? "x16" : "", pcie_table->clk_freq[i], - (gen_speed == pcie_table->pcie_gen[i]) && - (lane_width == pcie_table->pcie_lane[i]) ? + (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && + (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ? "*" : ""); break; From 59d3efd27c11c59b32291e5ebc307bed2edb65ee Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 11 Apr 2023 09:43:19 +0200 Subject: [PATCH 860/898] rtnetlink: Restore RTM_NEW/DELLINK notification behavior The commits referenced below allows userspace to use the NLM_F_ECHO flag for RTM_NEW/DELLINK operations to receive unicast notifications for the affected link. Prior to these changes, applications may have relied on multicast notifications to learn the same information without specifying the NLM_F_ECHO flag. For such applications, the mentioned commits changed the behavior for requests not using NLM_F_ECHO. Multicast notifications are still received, but now use the portid of the requester and the sequence number of the request instead of zero values used previously. For the application, this message may be unexpected and likely handled as a response to the NLM_F_ACKed request, especially if it uses the same socket to handle requests and notifications. To fix existing applications relying on the old notification behavior, set the portid and sequence number in the notification only if the request included the NLM_F_ECHO flag. This restores the old behavior for applications not using it, but allows unicasted notifications for others. Fixes: f3a63cce1b4f ("rtnetlink: Honour NLM_F_ECHO flag in rtnl_delete_link") Fixes: d88e136cab37 ("rtnetlink: Honour NLM_F_ECHO flag in rtnl_newlink_create") Signed-off-by: Martin Willi Acked-by: Guillaume Nault Acked-by: Hangbin Liu Link: https://lore.kernel.org/r/20230411074319.24133-1-martin@strongswan.org Signed-off-by: Jakub Kicinski --- include/linux/rtnetlink.h | 3 ++- net/core/dev.c | 2 +- net/core/rtnetlink.c | 11 +++++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 92ad75549e9cd..b6e6378dcbbd7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -25,7 +25,8 @@ void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, gfp_t flags, int *new_nsid, - int new_ifindex, u32 portid, u32 seq); + int new_ifindex, u32 portid, + const struct nlmsghdr *nlh); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags, u32 portid, const struct nlmsghdr *nlh); diff --git a/net/core/dev.c b/net/core/dev.c index 48067321c0dba..1488f700bf819 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10847,7 +10847,7 @@ void unregister_netdevice_many_notify(struct list_head *head, dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, GFP_KERNEL, NULL, 0, - portid, nlmsg_seq(nlh)); + portid, nlh); /* * Flush the unicast and multicast chains diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5d8eb57867a96..6e44e92ebdf5d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3972,16 +3972,23 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, u32 event, gfp_t flags, int *new_nsid, - int new_ifindex, u32 portid, u32 seq) + int new_ifindex, u32 portid, + const struct nlmsghdr *nlh) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + u32 seq = 0; skb = nlmsg_new(if_nlmsg_size(dev, 0), flags); if (skb == NULL) goto errout; + if (nlmsg_report(nlh)) + seq = nlmsg_seq(nlh); + else + portid = 0; + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), type, portid, seq, change, 0, 0, event, new_nsid, new_ifindex, -1, flags); @@ -4017,7 +4024,7 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, return; skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid, - new_ifindex, portid, nlmsg_seq(nlh)); + new_ifindex, portid, nlh); if (skb) rtmsg_ifinfo_send(skb, dev, flags, portid, nlh); } From 8eda19cd59cedbfe4ec11aea4bcecabe4c98e9e4 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 12 Apr 2023 17:05:31 +0100 Subject: [PATCH 861/898] ALSA: hda/realtek: Add quirks for Lenovo Z13/Z16 Gen2 These Lenovo laptops use Realtek HDA codec combined with 2xCS35L41 Amplifiers using I2C with External Boost. Signed-off-by: Stefan Binding Cc: Link: https://lore.kernel.org/r/20230412160531.182007-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f554db1e7e9a4..3b9f077a227f7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9689,6 +9689,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), From 6417070918de3bcdbe0646e7256dae58fd8083ba Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 10 Apr 2023 09:23:52 +0800 Subject: [PATCH 862/898] net: qrtr: Fix an uninit variable access bug in qrtr_tx_resume() Syzbot reported a bug as following: ===================================================== BUG: KMSAN: uninit-value in qrtr_tx_resume+0x185/0x1f0 net/qrtr/af_qrtr.c:230 qrtr_tx_resume+0x185/0x1f0 net/qrtr/af_qrtr.c:230 qrtr_endpoint_post+0xf85/0x11b0 net/qrtr/af_qrtr.c:519 qrtr_tun_write_iter+0x270/0x400 net/qrtr/tun.c:108 call_write_iter include/linux/fs.h:2189 [inline] aio_write+0x63a/0x950 fs/aio.c:1600 io_submit_one+0x1d1c/0x3bf0 fs/aio.c:2019 __do_sys_io_submit fs/aio.c:2078 [inline] __se_sys_io_submit+0x293/0x770 fs/aio.c:2048 __x64_sys_io_submit+0x92/0xd0 fs/aio.c:2048 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook mm/slab.h:766 [inline] slab_alloc_node mm/slub.c:3452 [inline] __kmem_cache_alloc_node+0x71f/0xce0 mm/slub.c:3491 __do_kmalloc_node mm/slab_common.c:967 [inline] __kmalloc_node_track_caller+0x114/0x3b0 mm/slab_common.c:988 kmalloc_reserve net/core/skbuff.c:492 [inline] __alloc_skb+0x3af/0x8f0 net/core/skbuff.c:565 __netdev_alloc_skb+0x120/0x7d0 net/core/skbuff.c:630 qrtr_endpoint_post+0xbd/0x11b0 net/qrtr/af_qrtr.c:446 qrtr_tun_write_iter+0x270/0x400 net/qrtr/tun.c:108 call_write_iter include/linux/fs.h:2189 [inline] aio_write+0x63a/0x950 fs/aio.c:1600 io_submit_one+0x1d1c/0x3bf0 fs/aio.c:2019 __do_sys_io_submit fs/aio.c:2078 [inline] __se_sys_io_submit+0x293/0x770 fs/aio.c:2048 __x64_sys_io_submit+0x92/0xd0 fs/aio.c:2048 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd It is because that skb->len requires at least sizeof(struct qrtr_ctrl_pkt) in qrtr_tx_resume(). And skb->len equals to size in qrtr_endpoint_post(). But size is less than sizeof(struct qrtr_ctrl_pkt) when qrtr_cb->type equals to QRTR_TYPE_RESUME_TX in qrtr_endpoint_post() under the syzbot scenario. This triggers the uninit variable access bug. Add size check when qrtr_cb->type equals to QRTR_TYPE_RESUME_TX in qrtr_endpoint_post() to fix the bug. Fixes: 5fdeb0d372ab ("net: qrtr: Implement outgoing flow control") Reported-by: syzbot+4436c9630a45820fda76@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=c14607f0963d27d5a3d5f4c8639b500909e43540 Suggested-by: Manivannan Sadhasivam Signed-off-by: Ziyang Xuan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230410012352.3997823-1-william.xuanziyang@huawei.com Signed-off-by: Paolo Abeni --- net/qrtr/af_qrtr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 3a70255c8d02f..76f0434d3d06a 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -498,6 +498,11 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (!size || len != ALIGN(size, 4) + hdrlen) goto err; + if ((cb->type == QRTR_TYPE_NEW_SERVER || + cb->type == QRTR_TYPE_RESUME_TX) && + size < sizeof(struct qrtr_ctrl_pkt)) + goto err; + if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA && cb->type != QRTR_TYPE_RESUME_TX) goto err; @@ -510,9 +515,6 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) /* Remote node endpoint can bridge other distant nodes */ const struct qrtr_ctrl_pkt *pkt; - if (size < sizeof(*pkt)) - goto err; - pkt = data + hdrlen; qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); } From 32832a2caf82663870126c5186cf8f86c8b2a649 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 10 Apr 2023 15:43:30 -0400 Subject: [PATCH 863/898] sctp: fix a potential overflow in sctp_ifwdtsn_skip Currently, when traversing ifwdtsn skips with _sctp_walk_ifwdtsn, it only checks the pos against the end of the chunk. However, the data left for the last pos may be < sizeof(struct sctp_ifwdtsn_skip), and dereference it as struct sctp_ifwdtsn_skip may cause coverflow. This patch fixes it by checking the pos against "the end of the chunk - sizeof(struct sctp_ifwdtsn_skip)" in sctp_ifwdtsn_skip, similar to sctp_fwdtsn_skip. Fixes: 0fc2ea922c8a ("sctp: implement validate_ftsn for sctp_stream_interleave") Signed-off-by: Xin Long Link: https://lore.kernel.org/r/2a71bffcd80b4f2c61fac6d344bb2f11c8fd74f7.1681155810.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/sctp/stream_interleave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 94727feb07b3e..b046b11200c93 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -1154,7 +1154,8 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn) #define _sctp_walk_ifwdtsn(pos, chunk, end) \ for (pos = chunk->subh.ifwdtsn_hdr->skip; \ - (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++) + (void *)pos <= (void *)chunk->subh.ifwdtsn_hdr->skip + (end) - \ + sizeof(struct sctp_ifwdtsn_skip); pos++) #define sctp_walk_ifwdtsn(pos, ch) \ _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \ From 5b7be2d4fd6eb8bec14c2de96c664e07c7d0bd82 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 11 Apr 2023 22:26:45 +0300 Subject: [PATCH 864/898] net: enetc: workaround for unresponsive pMAC after receiving express traffic I have observed an issue where the RX direction of the LS1028A ENETC pMAC seems unresponsive. The minimal procedure to reproduce the issue is: 1. Connect ENETC port 0 with a loopback RJ45 cable to one of the Felix switch ports (0). 2. Bring the ports up (MAC Merge layer is not enabled on either end). 3. Send a large quantity of unidirectional (express) traffic from Felix to ENETC. I tried altering frame size and frame count, and it doesn't appear to be specific to either of them, but rather, to the quantity of octets received. Lowering the frame count, the minimum quantity of packets to reproduce relatively consistently seems to be around 37000 frames at 1514 octets (w/o FCS) each. 4. Using ethtool --set-mm, enable the pMAC in the Felix and in the ENETC ports, in both RX and TX directions, and with verification on both ends. 5. Wait for verification to complete on both sides. 6. Configure a traffic class as preemptible on both ends. 7. Send some packets again. The issue is at step 5, where the verification process of ENETC ends (meaning that Felix responds with an SMD-R and ENETC sees the response), but the verification process of Felix never ends (it remains VERIFYING). If step 3 is skipped or if ENETC receives less traffic than approximately that threshold, the test runs all the way through (verification succeeds on both ends, preemptible traffic passes fine). If, between step 4 and 5, the step below is also introduced: 4.1. Disable and re-enable PM0_COMMAND_CONFIG bit RX_EN then again, the sequence of steps runs all the way through, and verification succeeds, even if there was the previous RX traffic injected into ENETC. Traffic sent *by* the ENETC port prior to enabling the MAC Merge layer does not seem to influence the verification result, only received traffic does. The LS1028A manual does not mention any relationship between PM0_COMMAND_CONFIG and MMCSR, and the hardware people don't seem to know for now either. The bit that is toggled to work around the issue is also toggled by enetc_mac_enable(), called from phylink's mac_link_down() and mac_link_up() methods - which is how the workaround was found: verification would work after a link down/up. Fixes: c7b9e8086902 ("net: enetc: add support for MAC Merge layer") Signed-off-by: Vladimir Oltean Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20230411192645.1896048-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- .../net/ethernet/freescale/enetc/enetc_ethtool.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index da9d4b310fcdd..838750a03cf68 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -989,6 +989,20 @@ static int enetc_get_mm(struct net_device *ndev, struct ethtool_mm_state *state) return 0; } +/* FIXME: Workaround for the link partner's verification failing if ENETC + * priorly received too much express traffic. The documentation doesn't + * suggest this is needed. + */ +static void enetc_restart_emac_rx(struct enetc_si *si) +{ + u32 val = enetc_port_rd(&si->hw, ENETC_PM0_CMD_CFG); + + enetc_port_wr(&si->hw, ENETC_PM0_CMD_CFG, val & ~ENETC_PM0_RX_EN); + + if (val & ENETC_PM0_RX_EN) + enetc_port_wr(&si->hw, ENETC_PM0_CMD_CFG, val); +} + static int enetc_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, struct netlink_ext_ack *extack) { @@ -1040,6 +1054,8 @@ static int enetc_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, enetc_port_wr(hw, ENETC_MMCSR, val); + enetc_restart_emac_rx(priv->si); + mutex_unlock(&priv->mm_lock); return 0; From 775d3c514c5b2763a50ab7839026d7561795924d Mon Sep 17 00:00:00 2001 From: Matija Glavinic Pecotic Date: Thu, 6 Apr 2023 08:26:52 +0200 Subject: [PATCH 865/898] x86/rtc: Remove __init for runtime functions set_rtc_noop(), get_rtc_noop() are after booting, therefore their __init annotation is wrong. A crash was observed on an x86 platform where CMOS RTC is unused and disabled via device tree. set_rtc_noop() was invoked from ntp: sync_hw_clock(), although CONFIG_RTC_SYSTOHC=n, however sync_cmos_clock() doesn't honour that. Workqueue: events_power_efficient sync_hw_clock RIP: 0010:set_rtc_noop Call Trace: update_persistent_clock64 sync_hw_clock Fix this by dropping the __init annotation from set/get_rtc_noop(). Fixes: c311ed6183f4 ("x86/init: Allow DT configured systems to disable RTC at boot time") Signed-off-by: Matija Glavinic Pecotic Signed-off-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/59f7ceb1-446b-1d3d-0bc8-1f0ee94b1e18@nokia.com --- arch/x86/kernel/x86_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ef80d361b4632..10622cf2b30f4 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; } static void iommu_shutdown_noop(void) { } bool __init bool_x86_init_noop(void) { return false; } void x86_op_int_noop(int cpu) { } -static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } -static __init void get_rtc_noop(struct timespec64 *now) { } +static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } +static void get_rtc_noop(struct timespec64 *now) { } static __initconst const struct of_device_id of_cmos_match[] = { { .compatible = "motorola,mc146818" }, From aca3b0fa3d04b40c96934d86cc224cccfa7ea8e0 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 31 Mar 2023 23:34:24 -0700 Subject: [PATCH 866/898] RDMA/core: Fix GID entry ref leak when create_ah fails If AH create request fails, release sgid_attr to avoid GID entry referrence leak reported while releasing GID table Fixes: 1a1f460ff151 ("RDMA: Hold the sgid_attr inside the struct ib_ah/qp") Link: https://lore.kernel.org/r/20230401063424.342204-1-saravanan.vajravel@broadcom.com Reviewed-by: Selvin Xavier Signed-off-by: Saravanan Vajravel Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 11b1c1603aeb4..b99b3cc283b65 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -532,6 +532,8 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, else ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { + if (ah->sgid_attr) + rdma_put_gid_attr(ah->sgid_attr); kfree(ah); return ERR_PTR(ret); } From f8160d3b35fc94491bb0cb974dbda310ef96c0e2 Mon Sep 17 00:00:00 2001 From: Gregor Herburger Date: Thu, 13 Apr 2023 11:37:37 +0200 Subject: [PATCH 867/898] i2c: ocores: generate stop condition after timeout in polling mode In polling mode, no stop condition is generated after a timeout. This causes SCL to remain low and thereby block the bus. If this happens during a transfer it can cause slaves to misinterpret the subsequent transfer and return wrong values. To solve this, pass the ETIMEDOUT error up from ocores_process_polling() instead of setting STATE_ERROR directly. The caller is adjusted to call ocores_process_timeout() on error both in polling and in IRQ mode, which will set STATE_ERROR and generate a stop condition. Fixes: 69c8c0c0efa8 ("i2c: ocores: add polling interface") Signed-off-by: Gregor Herburger Signed-off-by: Matthias Schiffer Acked-by: Peter Korsgaard Reviewed-by: Andrew Lunn Reviewed-by: Federico Vaga Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ocores.c | 35 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index a0af027db04c1..2e575856c5cd5 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -342,18 +342,18 @@ static int ocores_poll_wait(struct ocores_i2c *i2c) * ocores_isr(), we just add our polling code around it. * * It can run in atomic context + * + * Return: 0 on success, -ETIMEDOUT on timeout */ -static void ocores_process_polling(struct ocores_i2c *i2c) +static int ocores_process_polling(struct ocores_i2c *i2c) { - while (1) { - irqreturn_t ret; - int err; + irqreturn_t ret; + int err = 0; + while (1) { err = ocores_poll_wait(i2c); - if (err) { - i2c->state = STATE_ERROR; + if (err) break; /* timeout */ - } ret = ocores_isr(-1, i2c); if (ret == IRQ_NONE) @@ -364,13 +364,15 @@ static void ocores_process_polling(struct ocores_i2c *i2c) break; } } + + return err; } static int ocores_xfer_core(struct ocores_i2c *i2c, struct i2c_msg *msgs, int num, bool polling) { - int ret; + int ret = 0; u8 ctrl; ctrl = oc_getreg(i2c, OCI2C_CONTROL); @@ -388,15 +390,16 @@ static int ocores_xfer_core(struct ocores_i2c *i2c, oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_START); if (polling) { - ocores_process_polling(i2c); + ret = ocores_process_polling(i2c); } else { - ret = wait_event_timeout(i2c->wait, - (i2c->state == STATE_ERROR) || - (i2c->state == STATE_DONE), HZ); - if (ret == 0) { - ocores_process_timeout(i2c); - return -ETIMEDOUT; - } + if (wait_event_timeout(i2c->wait, + (i2c->state == STATE_ERROR) || + (i2c->state == STATE_DONE), HZ) == 0) + ret = -ETIMEDOUT; + } + if (ret) { + ocores_process_timeout(i2c); + return ret; } return (i2c->state == STATE_DONE) ? num : -EIO; From a5cb752b125766524c921faab1a45cc96065b0a7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 11 Apr 2023 22:42:09 +0200 Subject: [PATCH 868/898] mptcp: use mptcp_schedule_work instead of open-coding it Beyond reducing code duplication this also avoids scheduling the mptcp_worker on a closed socket on some edge scenarios. The addressed issue is actually older than the blamed commit below, but this fix needs it as a pre-requisite. Fixes: ba8f48f7a4d7 ("mptcp: introduce mptcp_schedule_work") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 5 ++--- net/mptcp/subflow.c | 18 ++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b30cea2fbf3fd..355f798d575a4 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1192,9 +1192,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && - mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) && - schedule_work(&msk->work)) - sock_hold(subflow->conn); + mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64)) + mptcp_schedule_work((struct sock *)msk); return true; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a0041360ee9d9..d345888505457 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -408,9 +408,8 @@ void mptcp_subflow_reset(struct sock *ssk) tcp_send_active_reset(ssk, GFP_ATOMIC); tcp_done(ssk); - if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && - schedule_work(&mptcp_sk(sk)->work)) - return; /* worker will put sk for us */ + if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); sock_put(sk); } @@ -1118,8 +1117,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk, skb_ext_del(skb, SKB_EXT_MPTCP); return MAPPING_OK; } else { - if (updated && schedule_work(&msk->work)) - sock_hold((struct sock *)msk); + if (updated) + mptcp_schedule_work((struct sock *)msk); return MAPPING_DATA_FIN; } @@ -1222,17 +1221,12 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, /* sched mptcp worker to remove the subflow if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { - struct sock *sk = (struct sock *)msk; - if (likely(ssk->sk_state != TCP_CLOSE)) return; if (skb_queue_empty(&ssk->sk_receive_queue) && - !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) { - sock_hold(sk); - if (!schedule_work(&msk->work)) - sock_put(sk); - } + !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + mptcp_schedule_work((struct sock *)msk); } static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) From d6a0443733434408f2cbd4c53fea6910599bab9e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 11 Apr 2023 22:42:10 +0200 Subject: [PATCH 869/898] mptcp: stricter state check in mptcp_worker As reported by Christoph, the mptcp protocol can run the worker when the relevant msk socket is in an unexpected state: connect() // incoming reset + fastclose // the mptcp worker is scheduled mptcp_disconnect() // msk is now CLOSED listen() mptcp_worker() Leading to the following splat: divide error: 0000 [#1] PREEMPT SMP CPU: 1 PID: 21 Comm: kworker/1:0 Not tainted 6.3.0-rc1-gde5e8fd0123c #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Workqueue: events mptcp_worker RIP: 0010:__tcp_select_window+0x22c/0x4b0 net/ipv4/tcp_output.c:3018 RSP: 0018:ffffc900000b3c98 EFLAGS: 00010293 RAX: 000000000000ffd7 RBX: 000000000000ffd7 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8214ce97 RDI: 0000000000000004 RBP: 000000000000ffd7 R08: 0000000000000004 R09: 0000000000010000 R10: 000000000000ffd7 R11: ffff888005afa148 R12: 000000000000ffd7 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000405270 CR3: 000000003011e006 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_select_window net/ipv4/tcp_output.c:262 [inline] __tcp_transmit_skb+0x356/0x1280 net/ipv4/tcp_output.c:1345 tcp_transmit_skb net/ipv4/tcp_output.c:1417 [inline] tcp_send_active_reset+0x13e/0x320 net/ipv4/tcp_output.c:3459 mptcp_check_fastclose net/mptcp/protocol.c:2530 [inline] mptcp_worker+0x6c7/0x800 net/mptcp/protocol.c:2705 process_one_work+0x3bd/0x950 kernel/workqueue.c:2390 worker_thread+0x5b/0x610 kernel/workqueue.c:2537 kthread+0x138/0x170 kernel/kthread.c:376 ret_from_fork+0x2c/0x50 arch/x86/entry/entry_64.S:308 This change addresses the issue explicitly checking for bad states before running the mptcp worker. Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Link: https://github.com/multipath-tcp/mptcp_net-next/issues/374 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Tested-by: Christoph Paasch Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 60b23b2716c40..06c5872e3b003 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2626,7 +2626,7 @@ static void mptcp_worker(struct work_struct *work) lock_sock(sk); state = sk->sk_state; - if (unlikely(state == TCP_CLOSE)) + if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN))) goto unlock; mptcp_check_data_fin_ack(sk); From c0ff6f6da66a7791a32c0234388b1bdc00244917 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 11 Apr 2023 22:42:11 +0200 Subject: [PATCH 870/898] mptcp: fix NULL pointer dereference on fastopen early fallback In case of early fallback to TCP, subflow_syn_recv_sock() deletes the subflow context before returning the newly allocated sock to the caller. The fastopen path does not cope with the above unconditionally dereferencing the subflow context. Fixes: 36b122baf6a8 ("mptcp: add subflow_v(4,6)_send_synack()") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/fastopen.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index d237d142171c5..bceaab8dd8e46 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -9,11 +9,18 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req) { - struct sock *ssk = subflow->tcp_sock; - struct sock *sk = subflow->conn; + struct sock *sk, *ssk; struct sk_buff *skb; struct tcp_sock *tp; + /* on early fallback the subflow context is deleted by + * subflow_syn_recv_sock() + */ + if (!subflow) + return; + + ssk = subflow->tcp_sock; + sk = subflow->conn; tp = tcp_sk(ssk); subflow->is_mptfo = 1; From 711ae788cbbb82818531b55e32b09518ee09a11a Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 11 Apr 2023 22:42:12 +0200 Subject: [PATCH 871/898] selftests: mptcp: userspace pm: uniform verify events Simply adding a "sleep" before checking something is usually not a good idea because the time that has been picked can not be enough or too much. The best is to wait for events with a timeout. In this selftest, 'sleep 0.5' is used more than 40 times. It is always used before calling a 'verify_*' function except for this verify_listener_events which has been added later. At the end, using all these 'sleep 0.5' seems to work: the slow CIs don't complain so far. Also because it doesn't take too much time, we can just add two more 'sleep 0.5' to uniform what is done before calling a 'verify_*' function. For the same reasons, we can also delay a bigger refactoring to replace all these 'sleep 0.5' by functions waiting for events instead of waiting for a fix time and hope for the best. Fixes: 6c73008aa301 ("selftests: mptcp: listener test for userspace PM") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 48e52f995a98c..b1eb7bce599dc 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -913,6 +913,7 @@ test_listener() $client4_port > /dev/null 2>&1 & local listener_pid=$! + sleep 0.5 verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -928,6 +929,7 @@ test_listener() # Delete the listener from the client ns, if one was created kill_wait $listener_pid + sleep 0.5 verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } From 306dc21361993f4fe50a15d4db6b1a4de5d0adb0 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 12 Apr 2023 07:58:28 -0400 Subject: [PATCH 872/898] selftests: openvswitch: adjust datapath NL message declaration The netlink message for creating a new datapath takes an array of ports for the PID creation. This shouldn't cause much issue but correct it for future cases where we need to do decode of datapath information that could include the per-cpu PID map. Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") Signed-off-by: Aaron Conole Link: https://lore.kernel.org/r/20230412115828.3991806-1-aconole@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 3243c90d449e6..5d467d1993cb1 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -62,7 +62,7 @@ class dp_cmd_msg(ovs_dp_msg): nla_map = ( ("OVS_DP_ATTR_UNSPEC", "none"), ("OVS_DP_ATTR_NAME", "asciiz"), - ("OVS_DP_ATTR_UPCALL_PID", "uint32"), + ("OVS_DP_ATTR_UPCALL_PID", "array(uint32)"), ("OVS_DP_ATTR_STATS", "dpstats"), ("OVS_DP_ATTR_MEGAFLOW_STATS", "megaflowstats"), ("OVS_DP_ATTR_USER_FEATURES", "uint32"), From 1c5950fc6fe996235f1d18539b9c6b64b597f50f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Apr 2023 13:03:08 +0000 Subject: [PATCH 873/898] udp6: fix potential access to stale information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lena wang reported an issue caused by udpv6_sendmsg() mangling msg->msg_name and msg->msg_namelen, which are later read from ____sys_sendmsg() : /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. */ if (used_address && err >= 0) { used_address->name_len = msg_sys->msg_namelen; if (msg_sys->msg_name) memcpy(&used_address->name, msg_sys->msg_name, used_address->name_len); } udpv6_sendmsg() wants to pretend the remote address family is AF_INET in order to call udp_sendmsg(). A fix would be to modify the address in-place, instead of using a local variable, but this could have other side effects. Instead, restore initial values before we return from udpv6_sendmsg(). Fixes: c71d8ebe7a44 ("net: Fix security_socket_sendmsg() bypass problem.") Reported-by: lena wang Signed-off-by: Eric Dumazet Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20230412130308.1202254-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9fb2f33ee3a76..a675acfb901d1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1395,9 +1395,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); do_udp_sendmsg: - if (ipv6_only_sock(sk)) - return -ENETUNREACH; - return udp_sendmsg(sk, msg, len); + err = ipv6_only_sock(sk) ? + -ENETUNREACH : udp_sendmsg(sk, msg, len); + msg->msg_name = sin6; + msg->msg_namelen = addr_len; + return err; } } From 3a0385be133e7091cc9a9a998c7ec712bb9585db Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 12 Apr 2023 11:13:06 -0400 Subject: [PATCH 874/898] selftests: add the missing CONFIG_IP_SCTP in net config The selftest sctp_vrf needs CONFIG_IP_SCTP set in config when building the kernel, so add it. Fixes: a61bd7b9fef3 ("selftests: add a selftest for sctp vrf") Reported-by: Naresh Kamboju Signed-off-by: Xin Long Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/61dddebc4d2dd98fe7fb145e24d4b2430e42b572.1681312386.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cc9fd55ab8699..2529226ce87ca 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -48,3 +48,4 @@ CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m +CONFIG_IP_SCTP=m From e8b74453555872851bdd7ea43a7c0ec39659834f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 12 Apr 2023 16:21:44 -0700 Subject: [PATCH 875/898] net: macb: fix a memory corruption in extended buffer descriptor mode For quite some time we were chasing a bug which looked like a sudden permanent failure of networking and mmc on some of our devices. The bug was very sensitive to any software changes and even more to any kernel debug options. Finally we got a setup where the problem was reproducible with CONFIG_DMA_API_DEBUG=y and it revealed the issue with the rx dma: [ 16.992082] ------------[ cut here ]------------ [ 16.996779] DMA-API: macb ff0b0000.ethernet: device driver tries to free DMA memory it has not allocated [device address=0x0000000875e3e244] [size=1536 bytes] [ 17.011049] WARNING: CPU: 0 PID: 85 at kernel/dma/debug.c:1011 check_unmap+0x6a0/0x900 [ 17.018977] Modules linked in: xxxxx [ 17.038823] CPU: 0 PID: 85 Comm: irq/55-8000f000 Not tainted 5.4.0 #28 [ 17.045345] Hardware name: xxxxx [ 17.049528] pstate: 60000005 (nZCv daif -PAN -UAO) [ 17.054322] pc : check_unmap+0x6a0/0x900 [ 17.058243] lr : check_unmap+0x6a0/0x900 [ 17.062163] sp : ffffffc010003c40 [ 17.065470] x29: ffffffc010003c40 x28: 000000004000c03c [ 17.070783] x27: ffffffc010da7048 x26: ffffff8878e38800 [ 17.076095] x25: ffffff8879d22810 x24: ffffffc010003cc8 [ 17.081407] x23: 0000000000000000 x22: ffffffc010a08750 [ 17.086719] x21: ffffff8878e3c7c0 x20: ffffffc010acb000 [ 17.092032] x19: 0000000875e3e244 x18: 0000000000000010 [ 17.097343] x17: 0000000000000000 x16: 0000000000000000 [ 17.102647] x15: ffffff8879e4a988 x14: 0720072007200720 [ 17.107959] x13: 0720072007200720 x12: 0720072007200720 [ 17.113261] x11: 0720072007200720 x10: 0720072007200720 [ 17.118565] x9 : 0720072007200720 x8 : 000000000000022d [ 17.123869] x7 : 0000000000000015 x6 : 0000000000000098 [ 17.129173] x5 : 0000000000000000 x4 : 0000000000000000 [ 17.134475] x3 : 00000000ffffffff x2 : ffffffc010a1d370 [ 17.139778] x1 : b420c9d75d27bb00 x0 : 0000000000000000 [ 17.145082] Call trace: [ 17.147524] check_unmap+0x6a0/0x900 [ 17.151091] debug_dma_unmap_page+0x88/0x90 [ 17.155266] gem_rx+0x114/0x2f0 [ 17.158396] macb_poll+0x58/0x100 [ 17.161705] net_rx_action+0x118/0x400 [ 17.165445] __do_softirq+0x138/0x36c [ 17.169100] irq_exit+0x98/0xc0 [ 17.172234] __handle_domain_irq+0x64/0xc0 [ 17.176320] gic_handle_irq+0x5c/0xc0 [ 17.179974] el1_irq+0xb8/0x140 [ 17.183109] xiic_process+0x5c/0xe30 [ 17.186677] irq_thread_fn+0x28/0x90 [ 17.190244] irq_thread+0x208/0x2a0 [ 17.193724] kthread+0x130/0x140 [ 17.196945] ret_from_fork+0x10/0x20 [ 17.200510] ---[ end trace 7240980785f81d6f ]--- [ 237.021490] ------------[ cut here ]------------ [ 237.026129] DMA-API: exceeded 7 overlapping mappings of cacheline 0x0000000021d79e7b [ 237.033886] WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:499 add_dma_entry+0x214/0x240 [ 237.041802] Modules linked in: xxxxx [ 237.061637] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 5.4.0 #28 [ 237.068941] Hardware name: xxxxx [ 237.073116] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 237.077900] pc : add_dma_entry+0x214/0x240 [ 237.081986] lr : add_dma_entry+0x214/0x240 [ 237.086072] sp : ffffffc010003c30 [ 237.089379] x29: ffffffc010003c30 x28: ffffff8878a0be00 [ 237.094683] x27: 0000000000000180 x26: ffffff8878e387c0 [ 237.099987] x25: 0000000000000002 x24: 0000000000000000 [ 237.105290] x23: 000000000000003b x22: ffffffc010a0fa00 [ 237.110594] x21: 0000000021d79e7b x20: ffffffc010abe600 [ 237.115897] x19: 00000000ffffffef x18: 0000000000000010 [ 237.121201] x17: 0000000000000000 x16: 0000000000000000 [ 237.126504] x15: ffffffc010a0fdc8 x14: 0720072007200720 [ 237.131807] x13: 0720072007200720 x12: 0720072007200720 [ 237.137111] x11: 0720072007200720 x10: 0720072007200720 [ 237.142415] x9 : 0720072007200720 x8 : 0000000000000259 [ 237.147718] x7 : 0000000000000001 x6 : 0000000000000000 [ 237.153022] x5 : ffffffc010003a20 x4 : 0000000000000001 [ 237.158325] x3 : 0000000000000006 x2 : 0000000000000007 [ 237.163628] x1 : 8ac721b3a7dc1c00 x0 : 0000000000000000 [ 237.168932] Call trace: [ 237.171373] add_dma_entry+0x214/0x240 [ 237.175115] debug_dma_map_page+0xf8/0x120 [ 237.179203] gem_rx_refill+0x190/0x280 [ 237.182942] gem_rx+0x224/0x2f0 [ 237.186075] macb_poll+0x58/0x100 [ 237.189384] net_rx_action+0x118/0x400 [ 237.193125] __do_softirq+0x138/0x36c [ 237.196780] irq_exit+0x98/0xc0 [ 237.199914] __handle_domain_irq+0x64/0xc0 [ 237.204000] gic_handle_irq+0x5c/0xc0 [ 237.207654] el1_irq+0xb8/0x140 [ 237.210789] arch_cpu_idle+0x40/0x200 [ 237.214444] default_idle_call+0x18/0x30 [ 237.218359] do_idle+0x200/0x280 [ 237.221578] cpu_startup_entry+0x20/0x30 [ 237.225493] rest_init+0xe4/0xf0 [ 237.228713] arch_call_rest_init+0xc/0x14 [ 237.232714] start_kernel+0x47c/0x4a8 [ 237.236367] ---[ end trace 7240980785f81d70 ]--- Lars was fast to find an explanation: according to the datasheet bit 2 of the rx buffer descriptor entry has a different meaning in the extended mode: Address [2] of beginning of buffer, or in extended buffer descriptor mode (DMA configuration register [28] = 1), indicates a valid timestamp in the buffer descriptor entry. The macb driver didn't mask this bit while getting an address and it eventually caused a memory corruption and a dma failure. The problem is resolved by explicitly clearing the problematic bit if hw timestamping is used. Fixes: 7b4296148066 ("net: macb: Add support for PTP timestamps in DMA descriptors") Signed-off-by: Roman Gushchin Co-developed-by: Lars-Peter Clausen Signed-off-by: Lars-Peter Clausen Acked-by: Nicolas Ferre Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20230412232144.770336-1-roman.gushchin@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 66e30561569eb..e43d99ec50ba2 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1064,6 +1064,10 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc) } #endif addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); +#ifdef CONFIG_MACB_USE_HWSTAMP + if (bp->hw_dma_cap & HW_DMA_CAP_PTP) + addr &= ~GEM_BIT(DMA_RXVALID); +#endif return addr; } From 0646dc31ca886693274df5749cd0c8c1eaaeb5ca Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Thu, 13 Apr 2023 17:03:53 +0800 Subject: [PATCH 876/898] skbuff: Fix a race between coalescing and releasing SKBs Commit 1effe8ca4e34 ("skbuff: fix coalescing for page_pool fragment recycling") allowed coalescing to proceed with non page pool page and page pool page when @from is cloned, i.e. to->pp_recycle --> false from->pp_recycle --> true skb_cloned(from) --> true However, it actually requires skb_cloned(@from) to hold true until coalescing finishes in this situation. If the other cloned SKB is released while the merging is in process, from_shinfo->nr_frags will be set to 0 toward the end of the function, causing the increment of frag page _refcount to be unexpectedly skipped resulting in inconsistent reference counts. Later when SKB(@to) is released, it frees the page directly even though the page pool page is still in use, leading to use-after-free or double-free errors. So it should be prohibited. The double-free error message below prompted us to investigate: BUG: Bad page state in process swapper/1 pfn:0e0d1 page:00000000c6548b28 refcount:-1 mapcount:0 mapping:0000000000000000 index:0x2 pfn:0xe0d1 flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff) raw: 000fffffc0000000 0000000000000000 ffffffff00000101 0000000000000000 raw: 0000000000000002 0000000000000000 ffffffffffffffff 0000000000000000 page dumped because: nonzero _refcount CPU: 1 PID: 0 Comm: swapper/1 Tainted: G E 6.2.0+ Call Trace: dump_stack_lvl+0x32/0x50 bad_page+0x69/0xf0 free_pcp_prepare+0x260/0x2f0 free_unref_page+0x20/0x1c0 skb_release_data+0x10b/0x1a0 napi_consume_skb+0x56/0x150 net_rx_action+0xf0/0x350 ? __napi_schedule+0x79/0x90 __do_softirq+0xc8/0x2b1 __irq_exit_rcu+0xb9/0xf0 common_interrupt+0x82/0xa0 asm_common_interrupt+0x22/0x40 RIP: 0010:default_idle+0xb/0x20 Fixes: 53e0961da1c7 ("page_pool: add frag page recycling support in page pool") Signed-off-by: Liang Chen Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230413090353.14448-1-liangchen.linux@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1a31815104d61..4c0879798eb8a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5599,18 +5599,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_cloned(to)) return false; - /* In general, avoid mixing slab allocated and page_pool allocated - * pages within the same SKB. However when @to is not pp_recycle and - * @from is cloned, we can transition frag pages from page_pool to - * reference counted. - * - * On the other hand, don't allow coalescing two pp_recycle SKBs if - * @from is cloned, in case the SKB is using page_pool fragment + /* In general, avoid mixing page_pool and non-page_pool allocated + * pages within the same SKB. Additionally avoid dealing with clones + * with page_pool pages, in case the SKB is using page_pool fragment * references (PP_FLAG_PAGE_FRAG). Since we only take full page * references for cloned SKBs at the moment that would result in * inconsistent reference counts. + * In theory we could take full references if @from is cloned and + * !@to->pp_recycle but its tricky (due to potential race with + * the clone disappearing) and rare, so not worth dealing with. */ - if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from))) + if (to->pp_recycle != from->pp_recycle || + (from->pp_recycle && skb_cloned(from))) return false; if (len <= skb_tailroom(to)) { From edf79dd2172233452ff142dcc98b19d955fc8974 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 12 Apr 2023 17:23:49 +0200 Subject: [PATCH 877/898] fbcon: Fix error paths in set_con2fb_map This is a regressoin introduced in b07db3958485 ("fbcon: Ditch error handling for con2fb_release_oldinfo"). I failed to realize what the if (!err) checks. The mentioned commit was dropping the con2fb_release_oldinfo() return value but the if (!err) was also checking whether the con2fb_acquire_newinfo() function call above failed or not. Fix this with an early return statement. Note that there's still a difference compared to the orginal state of the code, the below lines are now also skipped on error: if (!search_fb_in_map(info_idx)) info_idx = newidx; These are only needed when we've actually thrown out an old fb_info from the console mappings, which only happens later on. Also move the fbcon_add_cursor_work() call into the same if block, it's all protected by console_lock so doesn't matter when we set up the blinking cursor delayed work anyway. This further simplifies the control flow and allows us to ditch the found local variable. v2: Clarify commit message (Javier) Signed-off-by: Daniel Vetter Reviewed-by: Javier Martinez Canillas Acked-by: Helge Deller Tested-by: Xingyuan Mo Fixes: b07db3958485 ("fbcon: Ditch error handling for con2fb_release_oldinfo") Cc: Thomas Zimmermann Cc: Sam Ravnborg Cc: Xingyuan Mo Cc: Thomas Zimmermann Cc: Helge Deller Cc: # v5.19+ --- drivers/video/fbdev/core/fbcon.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 0a2c47df01f40..52f6ce8e794a9 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -823,7 +823,7 @@ static int set_con2fb_map(int unit, int newidx, int user) int oldidx = con2fb_map[unit]; struct fb_info *info = fbcon_registered_fb[newidx]; struct fb_info *oldinfo = NULL; - int found, err = 0, show_logo; + int err = 0, show_logo; WARN_CONSOLE_UNLOCKED(); @@ -841,26 +841,25 @@ static int set_con2fb_map(int unit, int newidx, int user) if (oldidx != -1) oldinfo = fbcon_registered_fb[oldidx]; - found = search_fb_in_map(newidx); - - if (!err && !found) { + if (!search_fb_in_map(newidx)) { err = con2fb_acquire_newinfo(vc, info, unit); - if (!err) - con2fb_map[unit] = newidx; + if (err) + return err; + + con2fb_map[unit] = newidx; + fbcon_add_cursor_work(info); } /* * If old fb is not mapped to any of the consoles, * fbcon should release it. */ - if (!err && oldinfo && !search_fb_in_map(oldidx)) + if (oldinfo && !search_fb_in_map(oldidx)) con2fb_release_oldinfo(vc, oldinfo, info); show_logo = (fg_console == 0 && !user && logo_shown != FBCON_LOGO_DONTSHOW); - if (!found) - fbcon_add_cursor_work(info); con2fb_map_boot[unit] = newidx; con2fb_init_display(vc, info, unit, show_logo); From fffb0b52d5258554c645c966c6cbef7de50b851d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 12 Apr 2023 17:31:46 +0200 Subject: [PATCH 878/898] fbcon: set_con2fb_map needs to set con2fb_map! I got really badly confused in d443d9386472 ("fbcon: move more common code into fb_open()") because we set the con2fb_map before the failure points, which didn't look good. But in trying to fix that I moved the assignment into the wrong path - we need to do it for _all_ vc we take over, not just the first one (which additionally requires the call to con2fb_acquire_newinfo). I've figured this out because of a KASAN bug report, where the fbcon_registered_fb and fbcon_display arrays went out of sync in fbcon_mode_deleted() because the con2fb_map pointed at the old fb_info, but the modes and everything was updated for the new one. Signed-off-by: Daniel Vetter Reviewed-by: Javier Martinez Canillas Acked-by: Helge Deller Tested-by: Xingyuan Mo Fixes: d443d9386472 ("fbcon: move more common code into fb_open()") Reported-by: Xingyuan Mo Cc: Thomas Zimmermann Cc: Sam Ravnborg Cc: Xingyuan Mo Cc: Thomas Zimmermann Cc: Helge Deller Cc: # v5.19+ --- drivers/video/fbdev/core/fbcon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 52f6ce8e794a9..eb565a10e5cda 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -846,10 +846,11 @@ static int set_con2fb_map(int unit, int newidx, int user) if (err) return err; - con2fb_map[unit] = newidx; fbcon_add_cursor_work(info); } + con2fb_map[unit] = newidx; + /* * If old fb is not mapped to any of the consoles, * fbcon should release it. From e8163b98d96c4d87c870689f560c53be7ccd55c8 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 12 Apr 2023 21:48:35 +0200 Subject: [PATCH 879/898] selftests/bpf: xdp_hw_metadata remove bpf_printk and add counters The tool xdp_hw_metadata can be used by driver developers implementing XDP-hints metadata kfuncs. Remove all bpf_printk calls, as the tool already transfers all the XDP-hints related information via metadata area to AF_XDP userspace process. Add counters for providing remaining information about failure and skipped packet events. Signed-off-by: Jesper Dangaard Brouer Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/168132891533.340624.7313781245316405141.stgit@firesoul Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/xdp_hw_metadata.c | 36 +++++++++++-------- tools/testing/selftests/bpf/xdp_hw_metadata.c | 4 ++- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index 4c55b4d79d3d4..0687d11162f6b 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -12,6 +12,10 @@ struct { __type(value, __u32); } xsk SEC(".maps"); +__u64 pkts_skip = 0; +__u64 pkts_fail = 0; +__u64 pkts_redir = 0; + extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, @@ -26,7 +30,7 @@ int rx(struct xdp_md *ctx) struct udphdr *udp = NULL; struct iphdr *iph = NULL; struct xdp_meta *meta; - int ret; + int err; data = (void *)(long)ctx->data; data_end = (void *)(long)ctx->data_end; @@ -46,17 +50,20 @@ int rx(struct xdp_md *ctx) udp = NULL; } - if (!udp) + if (!udp) { + __sync_add_and_fetch(&pkts_skip, 1); return XDP_PASS; + } - if (udp->dest != bpf_htons(9091)) + /* Forwarding UDP:9091 to AF_XDP */ + if (udp->dest != bpf_htons(9091)) { + __sync_add_and_fetch(&pkts_skip, 1); return XDP_PASS; + } - bpf_printk("forwarding UDP:9091 to AF_XDP"); - - ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta)); - if (ret != 0) { - bpf_printk("bpf_xdp_adjust_meta returned %d", ret); + err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta)); + if (err) { + __sync_add_and_fetch(&pkts_fail, 1); return XDP_PASS; } @@ -65,20 +72,19 @@ int rx(struct xdp_md *ctx) meta = data_meta; if (meta + 1 > data) { - bpf_printk("bpf_xdp_adjust_meta doesn't appear to work"); + __sync_add_and_fetch(&pkts_fail, 1); return XDP_PASS; } - if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp)) - bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp); - else + err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp); + if (err) meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ - if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash)) - bpf_printk("populated rx_hash with %u", meta->rx_hash); - else + err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash); + if (err) meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */ + __sync_add_and_fetch(&pkts_redir, 1); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 1c8acb68b977c..3b942ef7297bf 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -212,7 +212,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd) while (true) { errno = 0; ret = poll(fds, rxq + 1, 1000); - printf("poll: %d (%d)\n", ret, errno); + printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n", + ret, errno, bpf_obj->bss->pkts_skip, + bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir); if (ret < 0) break; if (ret == 0) From 0cd917a4a8ace70ff9082d797c899f6bf10de910 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 12 Apr 2023 21:48:40 +0200 Subject: [PATCH 880/898] xdp: rss hash types representation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RSS hash type specifies what portion of packet data NIC hardware used when calculating RSS hash value. The RSS types are focused on Internet traffic protocols at OSI layers L3 and L4. L2 (e.g. ARP) often get hash value zero and no RSS type. For L3 focused on IPv4 vs. IPv6, and L4 primarily TCP vs UDP, but some hardware supports SCTP. Hardware RSS types are differently encoded for each hardware NIC. Most hardware represent RSS hash type as a number. Determining L3 vs L4 often requires a mapping table as there often isn't a pattern or sorting according to ISO layer. The patch introduce a XDP RSS hash type (enum xdp_rss_hash_type) that contains both BITs for the L3/L4 types, and combinations to be used by drivers for their mapping tables. The enum xdp_rss_type_bits get exposed to BPF via BTF, and it is up to the BPF-programmer to match using these defines. This proposal change the kfunc API bpf_xdp_metadata_rx_hash() adding a pointer value argument for provide the RSS hash type. Change signature for all xmo_rx_hash calls in drivers to make it compile. The RSS type implementations for each driver comes as separate patches. Fixes: 3d76a4d3d4e5 ("bpf: XDP metadata RX kfuncs") Signed-off-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/168132892042.340624.582563003880565460.stgit@firesoul Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 +- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 +- .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 3 +- drivers/net/veth.c | 3 +- include/linux/netdevice.h | 3 +- include/net/xdp.h | 45 +++++++++++++++++++ net/core/xdp.c | 10 ++++- 7 files changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 4b5e459b6d49f..73d10aa4c503f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -681,7 +681,8 @@ int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) return 0; } -int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) +int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) { struct mlx4_en_xdp_buff *_ctx = (void *)ctx; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 544e09b97483c..4ac4d883047b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -798,7 +798,8 @@ int mlx4_en_netdev_event(struct notifier_block *this, struct xdp_md; int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp); -int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash); +int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type); /* * Functions for time stamping diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index c5dae48b7932f..efe609f8e3aac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -169,7 +169,8 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) return 0; } -static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) +static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) { const struct mlx5e_xdp_buff *_ctx = (void *)ctx; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index c1178915496d8..424e8876a16b6 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1648,7 +1648,8 @@ static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) return 0; } -static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) +static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) { struct veth_xdp_buff *_ctx = (void *)ctx; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 470085b121d3c..c35f04f636f15 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1624,7 +1624,8 @@ struct net_device_ops { struct xdp_metadata_ops { int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); - int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash); + int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type); }; /** diff --git a/include/net/xdp.h b/include/net/xdp.h index 41c57b8b16714..a76c4ea203eaa 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -8,6 +8,7 @@ #include /* skb_shared_info */ #include +#include /** * DOC: XDP RX-queue information @@ -425,6 +426,50 @@ XDP_METADATA_KFUNC_xxx MAX_XDP_METADATA_KFUNC, }; +enum xdp_rss_hash_type { + /* First part: Individual bits for L3/L4 types */ + XDP_RSS_L3_IPV4 = BIT(0), + XDP_RSS_L3_IPV6 = BIT(1), + + /* The fixed (L3) IPv4 and IPv6 headers can both be followed by + * variable/dynamic headers, IPv4 called Options and IPv6 called + * Extension Headers. HW RSS type can contain this info. + */ + XDP_RSS_L3_DYNHDR = BIT(2), + + /* When RSS hash covers L4 then drivers MUST set XDP_RSS_L4 bit in + * addition to the protocol specific bit. This ease interaction with + * SKBs and avoids reserving a fixed mask for future L4 protocol bits. + */ + XDP_RSS_L4 = BIT(3), /* L4 based hash, proto can be unknown */ + XDP_RSS_L4_TCP = BIT(4), + XDP_RSS_L4_UDP = BIT(5), + XDP_RSS_L4_SCTP = BIT(6), + XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */ + + /* Second part: RSS hash type combinations used for driver HW mapping */ + XDP_RSS_TYPE_NONE = 0, + XDP_RSS_TYPE_L2 = XDP_RSS_TYPE_NONE, + + XDP_RSS_TYPE_L3_IPV4 = XDP_RSS_L3_IPV4, + XDP_RSS_TYPE_L3_IPV6 = XDP_RSS_L3_IPV6, + XDP_RSS_TYPE_L3_IPV4_OPT = XDP_RSS_L3_IPV4 | XDP_RSS_L3_DYNHDR, + XDP_RSS_TYPE_L3_IPV6_EX = XDP_RSS_L3_IPV6 | XDP_RSS_L3_DYNHDR, + + XDP_RSS_TYPE_L4_ANY = XDP_RSS_L4, + XDP_RSS_TYPE_L4_IPV4_TCP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_TCP, + XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP, + XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, + + XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP, + XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP, + XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, + + XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR, + XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR, + XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR, +}; + #ifdef CONFIG_NET u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); diff --git a/net/core/xdp.c b/net/core/xdp.c index 528d4b37983df..fb85aca819619 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -734,13 +734,21 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim * bpf_xdp_metadata_rx_hash - Read XDP frame RX hash. * @ctx: XDP context pointer. * @hash: Return value pointer. + * @rss_type: Return value pointer for RSS type. + * + * The RSS hash type (@rss_type) specifies what portion of packet headers NIC + * hardware used when calculating RSS hash value. The RSS type can be decoded + * via &enum xdp_rss_hash_type either matching on individual L3/L4 bits + * ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types* + * ``XDP_RSS_TYPE_L*``. * * Return: * * Returns 0 on success or ``-errno`` on error. * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc * * ``-ENODATA`` : means no RX-hash available for this frame */ -__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash) +__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) { return -EOPNOTSUPP; } From 67f245c2ec0af17d7a90c78910e28bc8b206297c Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 12 Apr 2023 21:48:45 +0200 Subject: [PATCH 881/898] mlx5: bpf_xdp_metadata_rx_hash add xdp rss hash type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update API for bpf_xdp_metadata_rx_hash() with arg for xdp rss hash type via mapping table. The mlx5 hardware can also identify and RSS hash IPSEC. This indicate hash includes SPI (Security Parameters Index) as part of IPSEC hash. Extend xdp core enum xdp_rss_hash_type with IPSEC hash type. Fixes: bc8d405b1ba9 ("net/mlx5e: Support RX XDP metadata") Signed-off-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/168132892548.340624.11185734579430124869.stgit@firesoul Signed-off-by: Alexei Starovoitov --- .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 60 ++++++++++++++++++- include/linux/mlx5/device.h | 14 ++++- include/net/xdp.h | 2 + 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index efe609f8e3aac..d9d3b9e1f15aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -34,6 +34,7 @@ #include #include "en/xdp.h" #include "en/params.h" +#include int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { @@ -169,15 +170,72 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) return 0; } +/* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/ +#define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */ +#define RSS_L4 GENMASK(1, 0) +#define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */ + +/* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */ +enum mlx5_rss_hash_type { + RSS_TYPE_NO_HASH = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), + RSS_TYPE_L3_IPV4 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), + RSS_TYPE_L4_IPV4_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)), + RSS_TYPE_L4_IPV4_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)), + RSS_TYPE_L4_IPV4_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)), + RSS_TYPE_L3_IPV6 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), + RSS_TYPE_L4_IPV6_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)), + RSS_TYPE_L4_IPV6_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)), + RSS_TYPE_L4_IPV6_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)), +}; + +/* Invalid combinations will simply return zero, allows no boundary checks */ +static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = { + [RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_NONE, + [1] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [2] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [3] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [RSS_TYPE_L3_IPV4] = XDP_RSS_TYPE_L3_IPV4, + [RSS_TYPE_L4_IPV4_TCP] = XDP_RSS_TYPE_L4_IPV4_TCP, + [RSS_TYPE_L4_IPV4_UDP] = XDP_RSS_TYPE_L4_IPV4_UDP, + [RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC, + [RSS_TYPE_L3_IPV6] = XDP_RSS_TYPE_L3_IPV6, + [RSS_TYPE_L4_IPV6_TCP] = XDP_RSS_TYPE_L4_IPV6_TCP, + [RSS_TYPE_L4_IPV6_UDP] = XDP_RSS_TYPE_L4_IPV6_UDP, + [RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC, + [12] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [13] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [14] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [15] = XDP_RSS_TYPE_NONE, /* Implicit zero */ +}; + static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type) { const struct mlx5e_xdp_buff *_ctx = (void *)ctx; + const struct mlx5_cqe64 *cqe = _ctx->cqe; + u32 hash_type, l4_type, ip_type, lookup; if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))) return -ENODATA; - *hash = be32_to_cpu(_ctx->cqe->rss_hash_result); + *hash = be32_to_cpu(cqe->rss_hash_result); + + hash_type = cqe->rss_hash_type; + BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */ + ip_type = hash_type & CQE_RSS_HTYPE_IP; + l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type); + lookup = ip_type | l4_type; + *rss_type = mlx5_xdp_rss_type[lookup]; + return 0; } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 71b06ebad4024..1db19a9d26e32 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -36,6 +36,7 @@ #include #include #include +#include #if defined(__LITTLE_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0 @@ -980,14 +981,23 @@ enum { }; enum { - CQE_RSS_HTYPE_IP = 0x3 << 2, + CQE_RSS_HTYPE_IP = GENMASK(3, 2), /* cqe->rss_hash_type[3:2] - IP destination selected for hash * (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved) */ - CQE_RSS_HTYPE_L4 = 0x3 << 6, + CQE_RSS_IP_NONE = 0x0, + CQE_RSS_IPV4 = 0x1, + CQE_RSS_IPV6 = 0x2, + CQE_RSS_RESERVED = 0x3, + + CQE_RSS_HTYPE_L4 = GENMASK(7, 6), /* cqe->rss_hash_type[7:6] - L4 destination selected for hash * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI */ + CQE_RSS_L4_NONE = 0x0, + CQE_RSS_L4_TCP = 0x1, + CQE_RSS_L4_UDP = 0x2, + CQE_RSS_L4_IPSEC = 0x3, }; enum { diff --git a/include/net/xdp.h b/include/net/xdp.h index a76c4ea203eaa..76aa748e79237 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -460,10 +460,12 @@ enum xdp_rss_hash_type { XDP_RSS_TYPE_L4_IPV4_TCP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_TCP, XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, + XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP, XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, + XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR, XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR, From 96b1a098f3db06223a6b6268e756f980d5c07f10 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 12 Apr 2023 21:48:50 +0200 Subject: [PATCH 882/898] veth: bpf_xdp_metadata_rx_hash add xdp rss hash type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update API for bpf_xdp_metadata_rx_hash() with arg for xdp rss hash type. The veth driver currently only support XDP-hints based on SKB code path. The SKB have lost information about the RSS hash type, by compressing the information down to a single bitfield skb->l4_hash, that only knows if this was a L4 hash value. In preparation for veth, the xdp_rss_hash_type have an L4 indication bit that allow us to return a meaningful L4 indication when working with SKB based packets. Fixes: 306531f0249f ("veth: Support RX XDP metadata") Signed-off-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/168132893055.340624.16209448340644513469.stgit@firesoul Signed-off-by: Alexei Starovoitov --- drivers/net/veth.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 424e8876a16b6..e1b38fbf1dd95 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1652,11 +1652,14 @@ static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type) { struct veth_xdp_buff *_ctx = (void *)ctx; + struct sk_buff *skb = _ctx->skb; - if (!_ctx->skb) + if (!skb) return -ENODATA; - *hash = skb_get_hash(_ctx->skb); + *hash = skb_get_hash(skb); + *rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE; + return 0; } From 9123397aeeb4f93dda5828e37c35312f1b62231e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 12 Apr 2023 21:48:55 +0200 Subject: [PATCH 883/898] mlx4: bpf_xdp_metadata_rx_hash add xdp rss hash type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update API for bpf_xdp_metadata_rx_hash() with arg for xdp rss hash type via matching individual Completion Queue Entry (CQE) status bits. Fixes: ab46182d0dcb ("net/mlx4_en: Support RX XDP metadata") Signed-off-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/168132893562.340624.12779118462402031248.stgit@firesoul Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 73d10aa4c503f..332472fe49902 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -685,11 +685,28 @@ int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type) { struct mlx4_en_xdp_buff *_ctx = (void *)ctx; + struct mlx4_cqe *cqe = _ctx->cqe; + enum xdp_rss_hash_type xht = 0; + __be16 status; if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH))) return -ENODATA; - *hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid); + *hash = be32_to_cpu(cqe->immed_rss_invalid); + status = cqe->status; + if (status & cpu_to_be16(MLX4_CQE_STATUS_TCP)) + xht = XDP_RSS_L4_TCP; + if (status & cpu_to_be16(MLX4_CQE_STATUS_UDP)) + xht = XDP_RSS_L4_UDP; + if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F)) + xht |= XDP_RSS_L3_IPV4; + if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) { + xht |= XDP_RSS_L3_IPV6; + if (cqe->ipv6_ext_mask) + xht |= XDP_RSS_L3_DYNHDR; + } + *rss_type = xht; + return 0; } From 0f26b74e7d071b0dc18e2c43d79d496c2b144035 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 12 Apr 2023 21:49:00 +0200 Subject: [PATCH 884/898] selftests/bpf: Adjust bpf_xdp_metadata_rx_hash for new arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update BPF selftests to use the new RSS type argument for kfunc bpf_xdp_metadata_rx_hash. Signed-off-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/168132894068.340624.8914711185697163690.stgit@firesoul Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/xdp_metadata.c | 2 ++ tools/testing/selftests/bpf/progs/xdp_hw_metadata.c | 10 +++++----- tools/testing/selftests/bpf/progs/xdp_metadata.c | 6 +++--- tools/testing/selftests/bpf/progs/xdp_metadata2.c | 7 ++++--- tools/testing/selftests/bpf/xdp_hw_metadata.c | 6 +++++- tools/testing/selftests/bpf/xdp_metadata.h | 4 ++++ 6 files changed, 23 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index aa4beae99f4f6..8c5e98da9ae9f 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -273,6 +273,8 @@ static int verify_xsk_metadata(struct xsk *xsk) if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) return -1; + ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); + xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index 0687d11162f6b..e1c787815e44b 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -18,8 +18,8 @@ __u64 pkts_redir = 0; extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; -extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, - __u32 *hash) __ksym; +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, + enum xdp_rss_hash_type *rss_type) __ksym; SEC("xdp") int rx(struct xdp_md *ctx) @@ -80,9 +80,9 @@ int rx(struct xdp_md *ctx) if (err) meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ - err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash); - if (err) - meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */ + err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); + if (err < 0) + meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */ __sync_add_and_fetch(&pkts_redir, 1); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index 77678b0343897..d151d406a123e 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -21,8 +21,8 @@ struct { extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; -extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, - __u32 *hash) __ksym; +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, + enum xdp_rss_hash_type *rss_type) __ksym; SEC("xdp") int rx(struct xdp_md *ctx) @@ -56,7 +56,7 @@ int rx(struct xdp_md *ctx) if (timestamp == 0) meta->rx_timestamp = 1; - bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash); + bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata2.c b/tools/testing/selftests/bpf/progs/xdp_metadata2.c index cf69d05451c39..85f88d9d7a785 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata2.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata2.c @@ -5,17 +5,18 @@ #include #include -extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, - __u32 *hash) __ksym; +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, + enum xdp_rss_hash_type *rss_type) __ksym; int called; SEC("freplace/rx") int freplace_rx(struct xdp_md *ctx) { + enum xdp_rss_hash_type type = 0; u32 hash = 0; /* Call _any_ metadata function to make sure we don't crash. */ - bpf_xdp_metadata_rx_hash(ctx, &hash); + bpf_xdp_metadata_rx_hash(ctx, &hash, &type); called++; return XDP_PASS; } diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 3b942ef7297bf..987cf0db5ebc8 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -141,7 +141,11 @@ static void verify_xdp_metadata(void *data) meta = data - sizeof(*meta); printf("rx_timestamp: %llu\n", meta->rx_timestamp); - printf("rx_hash: %u\n", meta->rx_hash); + if (meta->rx_hash_err < 0) + printf("No rx_hash err=%d\n", meta->rx_hash_err); + else + printf("rx_hash: 0x%X with RSS type:0x%X\n", + meta->rx_hash, meta->rx_hash_type); } static void verify_skb_metadata(int fd) diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index f6780fbb0a214..0c4624dc6f2f7 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -12,4 +12,8 @@ struct xdp_meta { __u64 rx_timestamp; __u32 rx_hash; + union { + __u32 rx_hash_type; + __s32 rx_hash_err; + }; }; From e7067a446264a7514fa1cfaa4052cdb6803bc6a2 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Thu, 13 Apr 2023 23:49:57 +0900 Subject: [PATCH 885/898] ksmbd: avoid out of bounds access in decode_preauth_ctxt() Confirm that the accessed pneg_ctxt->HashAlgorithms address sits within the SMB request boundary; deassemble_neg_contexts() only checks that the eight byte smb2_neg_context header + (client controlled) DataLength are within the packet boundary, which is insufficient. Checking for sizeof(struct smb2_preauth_neg_context) is overkill given that the type currently assumes SMB311_SALT_SIZE bytes of trailing Salt. Signed-off-by: David Disseldorp Acked-by: Namjae Jeon Cc: Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 8af939a181be5..67b7e766a06ba 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -876,17 +876,21 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn, } static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn, - struct smb2_preauth_neg_context *pneg_ctxt) + struct smb2_preauth_neg_context *pneg_ctxt, + int len_of_ctxts) { - __le32 err = STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP; + /* + * sizeof(smb2_preauth_neg_context) assumes SMB311_SALT_SIZE Salt, + * which may not be present. Only check for used HashAlgorithms[1]. + */ + if (len_of_ctxts < MIN_PREAUTH_CTXT_DATA_LEN) + return STATUS_INVALID_PARAMETER; - if (pneg_ctxt->HashAlgorithms == SMB2_PREAUTH_INTEGRITY_SHA512) { - conn->preauth_info->Preauth_HashId = - SMB2_PREAUTH_INTEGRITY_SHA512; - err = STATUS_SUCCESS; - } + if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512) + return STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP; - return err; + conn->preauth_info->Preauth_HashId = SMB2_PREAUTH_INTEGRITY_SHA512; + return STATUS_SUCCESS; } static void decode_encrypt_ctxt(struct ksmbd_conn *conn, @@ -1014,7 +1018,8 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, break; status = decode_preauth_ctxt(conn, - (struct smb2_preauth_neg_context *)pctx); + (struct smb2_preauth_neg_context *)pctx, + len_of_ctxts); if (status != STATUS_SUCCESS) break; } else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) { From ef69d2559fe91f23d27a3d6fd640b5641787d22e Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 29 Mar 2023 10:19:30 +0200 Subject: [PATCH 886/898] riscv: Move early dtb mapping into the fixmap region riscv establishes 2 virtual mappings: - early_pg_dir maps the kernel which allows to discover the system memory - swapper_pg_dir installs the final mapping (linear mapping included) We used to map the dtb in early_pg_dir using DTB_EARLY_BASE_VA, and this mapping was not carried over in swapper_pg_dir. It happens that early_init_fdt_scan_reserved_mem() must be called before swapper_pg_dir is setup otherwise we could allocate reserved memory defined in the dtb. And this function initializes reserved_mem variable with addresses that lie in the early_pg_dir dtb mapping: when those addresses are reused with swapper_pg_dir, this mapping does not exist and then we trap. The previous "fix" was incorrect as early_init_fdt_scan_reserved_mem() must be called before swapper_pg_dir is set up otherwise we could allocate in reserved memory defined in the dtb. So move the dtb mapping in the fixmap region which is established in early_pg_dir and handed over to swapper_pg_dir. Fixes: 922b0375fc93 ("riscv: Fix memblock reservation for device tree blob") Fixes: 8f3a2b4a96dc ("RISC-V: Move DT mapping outof fixmap") Fixes: 50e63dd8ed92 ("riscv: fix reserved memory setup") Reported-by: Conor Dooley Link: https://lore.kernel.org/all/f8e67f82-103d-156c-deb0-d6d6e2756f5e@microchip.com/ Signed-off-by: Alexandre Ghiti Reviewed-by: Conor Dooley Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20230329081932.79831-2-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- Documentation/riscv/vm-layout.rst | 6 +-- arch/riscv/include/asm/fixmap.h | 8 ++++ arch/riscv/include/asm/pgtable.h | 8 +++- arch/riscv/kernel/setup.c | 1 - arch/riscv/mm/init.c | 61 +++++++++++++++++-------------- 5 files changed, 51 insertions(+), 33 deletions(-) diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst index 3be44e74ec5d6..5462c84f4723f 100644 --- a/Documentation/riscv/vm-layout.rst +++ b/Documentation/riscv/vm-layout.rst @@ -47,7 +47,7 @@ RISC-V Linux Kernel SV39 | Kernel-space virtual memory, shared between all processes: ____________________________________________________________|___________________________________________________________ | | | | - ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap + ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space @@ -83,7 +83,7 @@ RISC-V Linux Kernel SV48 | Kernel-space virtual memory, shared between all processes: ____________________________________________________________|___________________________________________________________ | | | | - ffff8d7ffee00000 | -114.5 TB | ffff8d7ffeffffff | 2 MB | fixmap + ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space @@ -119,7 +119,7 @@ RISC-V Linux Kernel SV57 | Kernel-space virtual memory, shared between all processes: ____________________________________________________________|___________________________________________________________ | | | | - ff1bfffffee00000 | -57 PB | ff1bfffffeffffff | 2 MB | fixmap + ff1bfffffea00000 | -57 PB | ff1bfffffeffffff | 6 MB | fixmap ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 5c3e7b97fcc6f..0a55099bb7349 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -22,6 +22,14 @@ */ enum fixed_addresses { FIX_HOLE, + /* + * The fdt fixmap mapping must be PMD aligned and will be mapped + * using PMD entries in fixmap_pmd in 64-bit and a PGD entry in 32-bit. + */ + FIX_FDT_END, + FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + + /* Below fixmaps will be mapped using fixmap_pte */ FIX_PTE, FIX_PMD, FIX_PUD, diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index ab05f892d317a..f641837ccf31d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -87,9 +87,13 @@ #define FIXADDR_TOP PCI_IO_START #ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE +#define MAX_FDT_SIZE PMD_SIZE +#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M) +#define FIXADDR_SIZE (PMD_SIZE + FIX_FDT_SIZE) #else -#define FIXADDR_SIZE PGDIR_SIZE +#define MAX_FDT_SIZE PGDIR_SIZE +#define FIX_FDT_SIZE MAX_FDT_SIZE +#define FIXADDR_SIZE (PGDIR_SIZE + FIX_FDT_SIZE) #endif #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 376d2827e7365..542eed85ad2c5 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -283,7 +283,6 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif - early_init_fdt_scan_reserved_mem(); misc_mem_init(); init_resources(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 478d6763a01a1..fb78d6bbabae3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -57,7 +57,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] EXPORT_SYMBOL(empty_zero_page); extern char _start[]; -#define DTB_EARLY_BASE_VA PGDIR_SIZE void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; @@ -236,6 +235,14 @@ static void __init setup_bootmem(void) set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); reserve_initrd_mem(); + + /* + * No allocation should be done before reserving the memory as defined + * in the device tree, otherwise the allocation could end up in a + * reserved region. + */ + early_init_fdt_scan_reserved_mem(); + /* * If DTB is built in, no need to reserve its memblock. * Otherwise, do reserve it but avoid using @@ -279,9 +286,6 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); -static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); -static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) @@ -626,9 +630,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define trampoline_pgd_next (pgtable_l5_enabled ? \ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) -#define early_dtb_pgd_next (pgtable_l5_enabled ? \ - (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ - (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) @@ -636,7 +637,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next ((uintptr_t)fixmap_pte) -#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) #define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) @@ -860,32 +860,28 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR * entry. */ -static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) +static void __init create_fdt_early_page_table(pgd_t *pgdir, + uintptr_t fix_fdt_va, + uintptr_t dtb_pa) { -#ifndef CONFIG_BUILTIN_DTB uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); - create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, - PGDIR_SIZE, - IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); - - if (pgtable_l5_enabled) - create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, - (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); - - if (pgtable_l4_enabled) - create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, - (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); +#ifndef CONFIG_BUILTIN_DTB + /* Make sure the fdt fixmap address is always aligned on PMD size */ + BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); - if (IS_ENABLED(CONFIG_64BIT)) { - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + /* In 32-bit only, the fdt lies in its own PGD */ + if (!IS_ENABLED(CONFIG_64BIT)) { + create_pgd_mapping(early_pg_dir, fix_fdt_va, + pa, MAX_FDT_SIZE, PAGE_KERNEL); + } else { + create_pmd_mapping(fixmap_pmd, fix_fdt_va, pa, PMD_SIZE, PAGE_KERNEL); - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE, pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); } - dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); + dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1)); #else /* * For 64-bit kernel, __va can't be used since it would return a linear @@ -1055,7 +1051,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_kernel_page_table(early_pg_dir, true); /* Setup early mapping for FDT early scan */ - create_fdt_early_page_table(early_pg_dir, dtb_pa); + create_fdt_early_page_table(early_pg_dir, + __fix_to_virt(FIX_FDT), dtb_pa); /* * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap @@ -1097,6 +1094,16 @@ static void __init setup_vm_final(void) u64 i; /* Setup swapper PGD for fixmap */ +#if !defined(CONFIG_64BIT) + /* + * In 32-bit, the device tree lies in a pgd entry, so it must be copied + * directly in swapper_pg_dir in addition to the pgd entry that points + * to fixmap_pte. + */ + unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT)); + + set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]); +#endif create_pgd_mapping(swapper_pg_dir, FIXADDR_START, __pa_symbol(fixmap_pgd_next), PGDIR_SIZE, PAGE_TABLE); From f1581626071c8e37c58c5e8f0b4126b17172a211 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 29 Mar 2023 10:19:31 +0200 Subject: [PATCH 887/898] riscv: Do not set initial_boot_params to the linear address of the dtb early_init_dt_verify() is already called in parse_dtb() and since the dtb address does not change anymore (it is now in the fixmap region), no need to reset initial_boot_params by calling early_init_dt_verify() again. Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230329081932.79831-3-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 542eed85ad2c5..a059b73f4ddb2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -278,10 +278,7 @@ void __init setup_arch(char **cmdline_p) #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else - if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa)))) - unflatten_device_tree(); - else - pr_err("No DTB found in kernel mappings\n"); + unflatten_device_tree(); #endif misc_mem_init(); From 1b50f956c8fe9082bdee4a9cfd798149c52f7043 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 29 Mar 2023 10:19:32 +0200 Subject: [PATCH 888/898] riscv: No need to relocate the dtb as it lies in the fixmap region We used to access the dtb via its linear mapping address but now that the dtb early mapping was moved in the fixmap region, we can keep using this address since it is present in swapper_pg_dir, and remove the dtb relocation. Note that the relocation was wrong anyway since early_memremap() is restricted to 256K whereas the maximum fdt size is 2MB. Signed-off-by: Alexandre Ghiti Reviewed-by: Conor Dooley Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20230329081932.79831-4-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fb78d6bbabae3..0f14f4a8d179a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -249,25 +249,8 @@ static void __init setup_bootmem(void) * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) { - /* - * In case the DTB is not located in a memory region we won't - * be able to locate it later on via the linear mapping and - * get a segfault when accessing it via __va(dtb_early_pa). - * To avoid this situation copy DTB to a memory region. - * Note that memblock_phys_alloc will also reserve DTB region. - */ - if (!memblock_is_memory(dtb_early_pa)) { - size_t fdt_size = fdt_totalsize(dtb_early_va); - phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE); - void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size); - - memcpy(new_dtb_early_va, dtb_early_va, fdt_size); - early_memunmap(new_dtb_early_va, fdt_size); - _dtb_early_pa = new_dtb_early_pa; - } else - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); - } + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) From 74391b3e69855e7dd65a9cef36baf5fc1345affd Mon Sep 17 00:00:00 2001 From: Duy Truong Date: Thu, 13 Apr 2023 17:55:48 -0700 Subject: [PATCH 889/898] nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD Added a quirk to fix the TeamGroup T-Force Cardea Zero Z330 SSDs reporting duplicate NGUIDs. Signed-off-by: Duy Truong Cc: stable@vger.kernel.org Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 282d808400c5b..cd7873de31215 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3443,6 +3443,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */ .driver_data = NVME_QUIRK_BOGUS_NID | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), From 6ab6f98fcdc9d4fbe245aa67de03542deea65322 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Thu, 13 Apr 2023 22:11:53 +0300 Subject: [PATCH 890/898] ALSA: hda/hdmi: disable KAE for Intel DG2 Use of keep-alive (KAE) has resulted in loss of audio on some A750/770 cards as the transition from keep-alive to stream playback is not working as expected. As there is limited benefit of the new KAE mode on discrete cards, revert back to older silent-stream implementation on these systems. Cc: stable@vger.kernel.org Fixes: 15175a4f2bbb ("ALSA: hda/hdmi: add keep-alive support for ADL-P and DG2") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8307 Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20230413191153.3692049-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4ffa3a59f419f..5c6980394dcec 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4604,7 +4604,7 @@ HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862818, "Raptorlake HDMI", patch_i915_tgl_hdmi), -HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI", patch_i915_adlp_hdmi), +HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_adlp_hdmi), From 860e1c7f8b0b43fbf91b4d689adfaa13adb89452 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Apr 2023 15:53:13 +0800 Subject: [PATCH 891/898] io_uring: complete request via task work in case of DEFER_TASKRUN So far io_req_complete_post() only covers DEFER_TASKRUN by completing request via task work when the request is completed from IOWQ. However, uring command could be completed from any context, and if io uring is setup with DEFER_TASKRUN, the command is required to be completed from current context, otherwise wait on IORING_ENTER_GETEVENTS can't be wakeup, and may hang forever. The issue can be observed on removing ublk device, but turns out it is one generic issue for uring command & DEFER_TASKRUN, so solve it in io_uring core code. Fixes: e6aeb2721d3b ("io_uring: complete all requests in task context") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-block/b3fc9991-4c53-9218-a8cc-5b4dd3952108@kernel.dk/ Reported-by: Jens Axboe Cc: Kanchan Joshi Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2a8b8c304d2ab..4a865f0e85d0b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -998,7 +998,7 @@ static void __io_req_complete_post(struct io_kiocb *req) void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags) { - if (req->ctx->task_complete && (issue_flags & IO_URING_F_IOWQ)) { + if (req->ctx->task_complete && req->ctx->submitter_task != current) { req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); } else if (!(issue_flags & IO_URING_F_UNLOCKED) || From 5105a7ffce19160e7062aee67fb6b3b8a1b56d78 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 7 Apr 2023 00:34:11 +0200 Subject: [PATCH 892/898] cifs: fix negotiate context parsing smb311_decode_neg_context() doesn't properly check against SMB packet boundaries prior to accessing individual negotiate context entries. This is due to the length check omitting the eight byte smb2_neg_context header, as well as incorrect decrementing of len_of_ctxts. Fixes: 5100d8a3fe03 ("SMB311: Improve checking of negotiate security contexts") Reported-by: Volker Lendecke Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: David Disseldorp Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2b92132097dc8..4245249dbba86 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -587,11 +587,15 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, } +/* If invalid preauth context warn but use what we requested, SHA-512 */ static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt) { unsigned int len = le16_to_cpu(ctxt->DataLength); - /* If invalid preauth context warn but use what we requested, SHA-512 */ + /* + * Caller checked that DataLength remains within SMB boundary. We still + * need to confirm that one HashAlgorithms member is accounted for. + */ if (len < MIN_PREAUTH_CTXT_DATA_LEN) { pr_warn_once("server sent bad preauth context\n"); return; @@ -610,7 +614,11 @@ static void decode_compress_ctx(struct TCP_Server_Info *server, { unsigned int len = le16_to_cpu(ctxt->DataLength); - /* sizeof compress context is a one element compression capbility struct */ + /* + * Caller checked that DataLength remains within SMB boundary. We still + * need to confirm that one CompressionAlgorithms member is accounted + * for. + */ if (len < 10) { pr_warn_once("server sent bad compression cntxt\n"); return; @@ -632,6 +640,11 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server, unsigned int len = le16_to_cpu(ctxt->DataLength); cifs_dbg(FYI, "decode SMB3.11 encryption neg context of len %d\n", len); + /* + * Caller checked that DataLength remains within SMB boundary. We still + * need to confirm that one Cipher flexible array member is accounted + * for. + */ if (len < MIN_ENCRYPT_CTXT_DATA_LEN) { pr_warn_once("server sent bad crypto ctxt len\n"); return -EINVAL; @@ -678,6 +691,11 @@ static void decode_signing_ctx(struct TCP_Server_Info *server, { unsigned int len = le16_to_cpu(pctxt->DataLength); + /* + * Caller checked that DataLength remains within SMB boundary. We still + * need to confirm that one SigningAlgorithms flexible array member is + * accounted for. + */ if ((len < 4) || (len > 16)) { pr_warn_once("server sent bad signing negcontext\n"); return; @@ -719,14 +737,19 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp, for (i = 0; i < ctxt_cnt; i++) { int clen; /* check that offset is not beyond end of SMB */ - if (len_of_ctxts == 0) - break; - if (len_of_ctxts < sizeof(struct smb2_neg_context)) break; pctx = (struct smb2_neg_context *)(offset + (char *)rsp); - clen = le16_to_cpu(pctx->DataLength); + clen = sizeof(struct smb2_neg_context) + + le16_to_cpu(pctx->DataLength); + /* + * 2.2.4 SMB2 NEGOTIATE Response + * Subsequent negotiate contexts MUST appear at the first 8-byte + * aligned offset following the previous negotiate context. + */ + if (i + 1 != ctxt_cnt) + clen = ALIGN(clen, 8); if (clen > len_of_ctxts) break; @@ -747,12 +770,10 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp, else cifs_server_dbg(VFS, "unknown negcontext of type %d ignored\n", le16_to_cpu(pctx->ContextType)); - if (rc) break; - /* offsets must be 8 byte aligned */ - clen = ALIGN(clen, 8); - offset += clen + sizeof(struct smb2_neg_context); + + offset += clen; len_of_ctxts -= clen; } return rc; From 5efb685bb3af112038af78a2cdf28f0ffdad45f5 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Mon, 20 Mar 2023 15:08:38 +1100 Subject: [PATCH 893/898] initramfs: Check negative timestamp to prevent broken cpio archive Similar to commit 4c9d410f32b3 ("initramfs: Check timestamp to prevent broken cpio archive"), except asserts that the timestamp is non-negative. This can happen when the KBUILD_BUILD_TIMESTAMP is a value before UNIX epoch, which may be set when making reproducible builds that don't want to look like they use a valid date. While support for dates before 1970 might not be supported, this is more about preventing undetected CPIO corruption. The printf's use a minimum length format specifier, and will happily make the field longer than 8 characters if they need to. Signed-off-by: Benjamin Gray Reviewed-by: Andrew Donnellan Tested-by: Andrew Donnellan Signed-off-by: Masahiro Yamada --- usr/gen_init_cpio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c index ee01e40e8bc65..61230532fef10 100644 --- a/usr/gen_init_cpio.c +++ b/usr/gen_init_cpio.c @@ -353,6 +353,12 @@ static int cpio_mkfile(const char *name, const char *location, buf.st_mtime = 0xffffffff; } + if (buf.st_mtime < 0) { + fprintf(stderr, "%s: Timestamp negative, clipping.\n", + location); + buf.st_mtime = 0; + } + if (buf.st_size > 0xffffffff) { fprintf(stderr, "%s: Size exceeds maximum cpio file size\n", location); @@ -602,10 +608,10 @@ int main (int argc, char *argv[]) /* * Timestamps after 2106-02-07 06:28:15 UTC have an ascii hex time_t * representation that exceeds 8 chars and breaks the cpio header - * specification. + * specification. Negative timestamps similarly exceed 8 chars. */ - if (default_mtime > 0xffffffff) { - fprintf(stderr, "ERROR: Timestamp too large for cpio format\n"); + if (default_mtime > 0xffffffff || default_mtime < 0) { + fprintf(stderr, "ERROR: Timestamp out of range for cpio format\n"); exit(1); } From 735faf92fb06d083ddcf6cfcf6665666dea5dcc1 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 21 Mar 2023 10:05:34 +1100 Subject: [PATCH 894/898] init/initramfs: Fix argument forwarding to panic() in panic_show_mem() Forwarding variadic argument lists can't be done by passing a va_list to a function with signature foo(...) (as panic() has). It ends up interpreting the va_list itself as a single argument instead of iterating it. printf() happily accepts it of course, leading to corrupt output. Convert panic_show_mem() to a macro to allow forwarding the arguments. The function is trivial enough that it's easier than trying to introduce a vpanic() variant. Signed-off-by: Benjamin Gray Reviewed-by: Andrew Donnellan Signed-off-by: Masahiro Yamada --- init/initramfs.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index f6c112e30bd47..e7a01c2ccd1b0 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -60,15 +60,8 @@ static void __init error(char *x) message = x; } -static void panic_show_mem(const char *fmt, ...) -{ - va_list args; - - show_mem(0, NULL); - va_start(args, fmt); - panic(fmt, args); - va_end(args); -} +#define panic_show_mem(fmt, ...) \ + ({ show_mem(0, NULL); panic(fmt, ##__VA_ARGS__); }) /* link hash */ From f6d8283549bc200e2babdd627239ece3547d634c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Apr 2023 19:16:27 +0900 Subject: [PATCH 895/898] kbuild: merge cmd_archive_linux and cmd_archive_perf The two commands, cmd_archive_linux and cmd_archive_perf, are similar. Merge them to make it easier to add more changes to the git-archive command. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/Makefile.package | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 49aff12cb6abd..d80a9b59f7842 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -57,16 +57,17 @@ check-git: false; \ fi +quiet_cmd_archive = ARCHIVE $@ + cmd_archive = git -C $(srctree) archive \ + --output=$$(realpath $@) --prefix=$(basename $@)/ $(archive-args) + # Linux source tarball # --------------------------------------------------------------------------- -quiet_cmd_archive_linux = ARCHIVE $@ - cmd_archive_linux = \ - git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ $$(cat $<) - targets += linux.tar +linux.tar: archive-args = $$(cat $<) linux.tar: .tmp_HEAD FORCE - $(call if_changed,archive_linux) + $(call if_changed,archive) # rpm-pkg # --------------------------------------------------------------------------- @@ -181,16 +182,14 @@ quiet_cmd_perf_version_file = GEN $@ .tmp_perf/PERF-VERSION-FILE: .tmp_HEAD $(srctree)/tools/perf/util/PERF-VERSION-GEN | .tmp_perf $(call cmd,perf_version_file) -quiet_cmd_archive_perf = ARCHIVE $@ - cmd_archive_perf = \ - git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ \ - --add-file=$$(realpath $(word 2, $^)) \ +perf-archive-args = --add-file=$$(realpath $(word 2, $^)) \ --add-file=$$(realpath $(word 3, $^)) \ $$(cat $(word 2, $^))^{tree} $$(cat $<) targets += perf-$(KERNELVERSION).tar +perf-$(KERNELVERSION).tar: archive-args = $(perf-archive-args) perf-$(KERNELVERSION).tar: tools/perf/MANIFEST .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE FORCE - $(call if_changed,archive_perf) + $(call if_changed,archive) PHONY += perf-tar-src-pkg perf-tar-src-pkg: perf-$(KERNELVERSION).tar From f8d94c4e403c89ec6b09ba69f65e4547ba99dd07 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Apr 2023 19:16:28 +0900 Subject: [PATCH 896/898] kbuild: do not create intermediate *.tar for source tarballs Since commit 05e96e96a315 ("kbuild: use git-archive for source package creation"), a source tarball is created in two steps; create *.tar file then compress it. I split the compression as a separate rule because I just thought 'git archive' supported only gzip. For other compression algorithms, I could pipe the two commands: $ git archive HEAD | xz > linux.tar.xz I read git-archive(1) carefully, and I realized GIT had provided a more elegant way: $ git -c tar.tar.xz.command=xz archive -o linux.tar.xz HEAD This commit uses 'tar.tar.*.command' configuration to specify the compression backend so we can compress a source tarball on-the-fly. GIT commit 767cf4579f0e ("archive: implement configurable tar filters") is more than a decade old, so it should be available on almost all build environments. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/Makefile.package | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index d80a9b59f7842..ed1784be72ce9 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -57,16 +57,23 @@ check-git: false; \ fi +git-config-tar.gz = -c tar.tar.gz.command="$(KGZIP)" +git-config-tar.bz2 = -c tar.tar.bz2.command="$(KBZIP2)" +git-config-tar.xz = -c tar.tar.xz.command="$(XZ)" +git-config-tar.zst = -c tar.tar.zst.command="$(ZSTD)" + quiet_cmd_archive = ARCHIVE $@ - cmd_archive = git -C $(srctree) archive \ + cmd_archive = git -C $(srctree) $(git-config-tar$(suffix $@)) archive \ --output=$$(realpath $@) --prefix=$(basename $@)/ $(archive-args) # Linux source tarball # --------------------------------------------------------------------------- -targets += linux.tar -linux.tar: archive-args = $$(cat $<) -linux.tar: .tmp_HEAD FORCE +linux-tarballs := $(addprefix linux, .tar.gz) + +targets += $(linux-tarballs) +$(linux-tarballs): archive-args = $$(cat $<) +$(linux-tarballs): .tmp_HEAD FORCE $(call if_changed,archive) # rpm-pkg @@ -186,9 +193,12 @@ perf-archive-args = --add-file=$$(realpath $(word 2, $^)) \ --add-file=$$(realpath $(word 3, $^)) \ $$(cat $(word 2, $^))^{tree} $$(cat $<) -targets += perf-$(KERNELVERSION).tar -perf-$(KERNELVERSION).tar: archive-args = $(perf-archive-args) -perf-$(KERNELVERSION).tar: tools/perf/MANIFEST .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE FORCE + +perf-tarballs := $(addprefix perf-$(KERNELVERSION), .tar .tar.gz .tar.bz2 .tar.xz .tar.zst) + +targets += $(perf-tarballs) +$(perf-tarballs): archive-args = $(perf-archive-args) +$(perf-tarballs): tools/perf/MANIFEST .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE FORCE $(call if_changed,archive) PHONY += perf-tar-src-pkg From 3c65a2704cdd2a0cd0766352e587bae4a6268155 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Apr 2023 19:16:29 +0900 Subject: [PATCH 897/898] kbuild: do not create intermediate *.tar for tar packages Commit 05e96e96a315 ("kbuild: use git-archive for source package creation") split the compression as a separate step to factor out the common build rules. With the previous commit, we got back to the situation where source tarballs are compressed on-the-fly. There is no reason to keep the separate compression rules. Generate the comressed tar packages directly. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/Makefile.package | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index ed1784be72ce9..4d90691505b12 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -27,21 +27,6 @@ fi ; \ tar -I $(KGZIP) -c $(RCS_TAR_IGNORE) -f $(2).tar.gz \ --transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3) -# tarball compression -# --------------------------------------------------------------------------- - -%.tar.gz: %.tar - $(call cmd,gzip) - -%.tar.bz2: %.tar - $(call cmd,bzip2) - -%.tar.xz: %.tar - $(call cmd,xzmisc) - -%.tar.zst: %.tar - $(call cmd,zstd) - # Git # --------------------------------------------------------------------------- @@ -154,10 +139,17 @@ tar-install: FORCE $(Q)$(MAKE) -f $(srctree)/Makefile +$(Q)$(srctree)/scripts/package/buildtar $@ +compress-tar.gz = -I "$(KGZIP)" +compress-tar.bz2 = -I "$(KBZIP2)" +compress-tar.xz = -I "$(XZ)" +compress-tar.zst = -I "$(ZSTD)" + quiet_cmd_tar = TAR $@ - cmd_tar = cd $<; tar cf ../$@ --owner=root --group=root --sort=name * + cmd_tar = cd $<; tar cf ../$@ $(compress-tar$(suffix $@)) --owner=root --group=root --sort=name * + +dir-tarballs := $(addprefix linux-$(KERNELRELEASE)-$(ARCH), .tar .tar.gz .tar.bz2 .tar.xz .tar.zst) -linux-$(KERNELRELEASE)-$(ARCH).tar: tar-install +$(dir-tarballs): tar-install $(call cmd,tar) PHONY += dir-pkg From 6a8f57ae2eb07ab39a6f0ccad60c760743051026 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Apr 2023 15:23:53 -0700 Subject: [PATCH 898/898] Linux 6.3-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5aeea3d98fc0c..b5c48e3c935ae 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*