From deffa2d75db7e7a9a1fe3dad4f99310bff7b6449 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 6 Jul 2022 12:32:08 -0700 Subject: [PATCH 001/654] drm/msm/dp: make eDP panel as the first connected connector Some userspace presumes that the first connected connector is the main display, where it's supposed to display e.g. the login screen. For laptops, this should be the main panel. This patch call drm_helper_move_panel_connectors_to_head() after drm_bridge_connector_init() to make sure eDP stay at head of connected connector list. This fixes unexpected corruption happen at eDP panel if eDP is not placed at head of connected connector list. Changes in v2: -- move drm_helper_move_panel_connectors_to_head() to dpu_kms_drm_obj_init() Changes in v4: -- move drm_helper_move_panel_connectors_to_head() to msm_drm_init() Signed-off-by: Kuogee Hsieh Reviewed-by: Abhinav Kumar Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Reviewed-by: Dmitry Baryshkov Reviewed-by: Stephen Boyd Fixes: ef7837ff091c ("drm/msm/dp: Add DP controllers for sc7280") Patchwork: https://patchwork.freedesktop.org/patch/492581/ Link: https://lore.kernel.org/r/1657135928-31195-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 1ed4cd09dbf8b..16884db272deb 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -469,6 +469,8 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) } } + drm_helper_move_panel_connectors_to_head(ddev); + ddev->mode_config.funcs = &mode_config_funcs; ddev->mode_config.helper_private = &mode_config_helper_funcs; From 2f25a1fb4ec516c5ad67afd754334b491b9f09a5 Mon Sep 17 00:00:00 2001 From: sunliming Date: Tue, 19 Jul 2022 09:56:22 +0800 Subject: [PATCH 002/654] drm/msm/dsi: fix the inconsistent indenting Fix the inconsistent indenting in function msm_dsi_dphy_timing_calc_v3(). Fix the following smatch warnings: drivers/gpu/drm/msm/dsi/phy/dsi_phy.c:350 msm_dsi_dphy_timing_calc_v3() warn: inconsistent indenting Fixes: f1fa7ff44056 ("drm/msm/dsi: implement auto PHY timing calculator for 10nm PHY") Reported-by: kernel test robot Signed-off-by: sunliming Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/494662/ Link: https://lore.kernel.org/r/20220719015622.646718-1-sunliming@kylinos.cn Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index a39de3bdc7faf..56dfa2d24be1f 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -347,7 +347,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, } else { timing->shared_timings.clk_pre = linear_inter(tmax, tmin, pcnt2, 0, false); - timing->shared_timings.clk_pre_inc_by_2 = 0; + timing->shared_timings.clk_pre_inc_by_2 = 0; } timing->ta_go = 3; From ef3ac3ae147c6ab370875727791e9b3eaf176cea Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Fri, 15 Jul 2022 12:14:28 -0700 Subject: [PATCH 003/654] drm/msm/dpu: populate wb or intf before reset_intf_cfg dpu_encoder_helper_phys_cleanup() was not populating neither wb or intf to the intf_cfg before calling the reset_intf_cfg(). This causes the reset of the active bits of wb/intf to be skipped which is incorrect. Fix this by populating the relevant wb or intf indices correctly. Fixes: ae4d721ce100 ("drm/msm/dpu: add an API to reset the encoder related hw blocks") Signed-off-by: Abhinav Kumar Reviewed-by: Jessica Zhang Tested-by: Jessica Zhang # Trogdor (SC8170) Patchwork: https://patchwork.freedesktop.org/patch/494298/ Link: https://lore.kernel.org/r/1657912468-17254-1-git-send-email-quic_abhinavk@quicinc.com --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index c682d4e02d1bc..52a626117f70f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2061,6 +2061,12 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) intf_cfg.stream_sel = 0; /* Don't care value for video mode */ intf_cfg.mode_3d = dpu_encoder_helper_get_3d_blend_mode(phys_enc); + + if (phys_enc->hw_intf) + intf_cfg.intf = phys_enc->hw_intf->idx; + if (phys_enc->hw_wb) + intf_cfg.wb = phys_enc->hw_wb->idx; + if (phys_enc->hw_pp->merge_3d) intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; From 9c9cb23e00ddf45679b21b4dacc11d1ae7961ebe Mon Sep 17 00:00:00 2001 From: Xin Xiong Date: Sun, 24 Jul 2022 17:55:58 +0800 Subject: [PATCH 004/654] xfrm: fix refcount leak in __xfrm_policy_check() The issue happens on an error path in __xfrm_policy_check(). When the fetching process of the object `pols[1]` fails, the function simply returns 0, forgetting to decrement the reference count of `pols[0]`, which is incremented earlier by either xfrm_sk_policy_lookup() or xfrm_policy_lookup(). This may result in memory leaks. Fix it by decreasing the reference count of `pols[0]` in that path. Fixes: 134b0fc544ba ("IPsec: propagate security module errors up from flow_cache_lookup") Signed-off-by: Xin Xiong Signed-off-by: Xin Tan Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f1a0bab920a55..4f8bbb825abcb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3599,6 +3599,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (pols[1]) { if (IS_ERR(pols[1])) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); + xfrm_pol_put(pols[0]); return 0; } pols[1]->curlft.use_time = ktime_get_real_seconds(); From d2139dfca361a1f5bfc4d4a23455b1a409a69cd4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 30 Jul 2022 18:28:46 +0200 Subject: [PATCH 005/654] firmware: dmi: Use the proper accessor for the version field The byte at offset 6 represents length. Don't take it and drop it immediately by using proper accessor, i.e. get_unaligned_be24(). [JD: Change the subject to something less frightening] Signed-off-by: Andy Shevchenko Signed-off-by: Jean Delvare --- drivers/firmware/dmi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index f191a1f901ac7..0eb6b617f709a 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -630,7 +630,7 @@ static int __init dmi_smbios3_present(const u8 *buf) { if (memcmp(buf, "_SM3_", 5) == 0 && buf[6] < 32 && dmi_checksum(buf, buf[6])) { - dmi_ver = get_unaligned_be32(buf + 6) & 0xFFFFFF; + dmi_ver = get_unaligned_be24(buf + 7); dmi_num = 0; /* No longer specified */ dmi_len = get_unaligned_le32(buf + 12); dmi_base = get_unaligned_le64(buf + 16); From 717ada9f10f2de8c4f4d72ad045f3b67a7ced715 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 27 Jul 2022 17:38:35 +0200 Subject: [PATCH 006/654] Revert "xfrm: update SA curlft.use_time" This reverts commit af734a26a1a95a9fda51f2abb0c22a7efcafd5ca. The abvoce commit is a regression according RFC 2367. A better fix would be use x->lastused. Which will be propsed later. according to RFC 2367 use_time == sadb_lifetime_usetime. "sadb_lifetime_usetime For CURRENT, the time, in seconds, when association was first used. For HARD and SOFT, the number of seconds after the first use of the association until it expires." Fixes: af734a26a1a9 ("xfrm: update SA curlft.use_time") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 1 - net/xfrm/xfrm_output.c | 1 - 2 files changed, 2 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 144238a50f3d4..70a8c36f0ba6e 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -669,7 +669,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) x->curlft.bytes += skb->len; x->curlft.packets++; - x->curlft.use_time = ktime_get_real_seconds(); spin_unlock(&x->lock); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 555ab35cd119a..9a5e79a38c679 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -534,7 +534,6 @@ static int xfrm_output_one(struct sk_buff *skb, int err) x->curlft.bytes += skb->len; x->curlft.packets++; - x->curlft.use_time = ktime_get_real_seconds(); spin_unlock_bh(&x->lock); From 36d763509be326bb383b1b1852a129ff58d74e3b Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 27 Jul 2022 17:40:53 +0200 Subject: [PATCH 007/654] xfrm: fix XFRMA_LASTUSED comment It is a __u64, internally time64_t. Fixes: bf825f81b454 ("xfrm: introduce basic mark infrastructure") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 65e13a099b1a0..a9f5d884560ac 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -296,7 +296,7 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ - XFRMA_LASTUSED, /* unsigned long */ + XFRMA_LASTUSED, /* __u64 */ XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ From 6aa811acdb76facca0b705f4e4c1d948ccb6af8b Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 27 Jul 2022 17:41:22 +0200 Subject: [PATCH 008/654] xfrm: clone missing x->lastused in xfrm_do_migrate x->lastused was not cloned in xfrm_do_migrate. Add it to clone during migrate. Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ccfb172eb5b8d..11d89af9cb55a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1592,6 +1592,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->replay = orig->replay; x->preplay = orig->preplay; x->mapping_maxage = orig->mapping_maxage; + x->lastused = orig->lastused; x->new_mapping = 0; x->new_mapping_sport = 0; From ba953a9d89a00c078b85f4b190bc1dde66fe16b5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Aug 2022 18:03:46 +0800 Subject: [PATCH 009/654] af_key: Do not call xfrm_probe_algs in parallel When namespace support was added to xfrm/afkey, it caused the previously single-threaded call to xfrm_probe_algs to become multi-threaded. This is buggy and needs to be fixed with a mutex. Reported-by: Abhishek Shah Fixes: 283bc9f35bbb ("xfrm: Namespacify xfrm state/policy locks") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/key/af_key.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index fb16d7c4e1b8d..20e73643b9c89 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad pfk->registered |= (1<sadb_msg_satype); } + mutex_lock(&pfkey_mutex); xfrm_probe_algs(); supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); + mutex_unlock(&pfkey_mutex); + if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<sadb_msg_satype); From 032d57960176ac01cc5adff5bcc5eb51317f8781 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Thu, 11 Aug 2022 15:57:50 -0700 Subject: [PATCH 010/654] drm/msm/dp: delete DP_RECOVERED_CLOCK_OUT_EN to fix tps4 Data Symbols scrambled is required for tps4 at link training 2. Therefore SCRAMBLING_DISABLE bit should not be set for tps4 to work. RECOVERED_CLOCK_OUT_EN is for enable simple EYE test for jitter measurement with minimal equipment for embedded applications purpose and is not required to be set during normal operation. Current implementation always have RECOVERED_CLOCK_OUT_EN bit set which cause SCRAMBLING_DISABLE bit wrongly set at tps4 which prevent tps4 from working. This patch delete setting RECOVERED_CLOCK_OUT_EN to fix SCRAMBLING_DISABLE be wrongly set at tps4. Changes in v2: -- fix Fixes tag Changes in v3: -- revise commit text Changes in v4: -- fix commit text newline Changes in v5: -- fix commit text line over 75 chars Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Reviewed-by: Abhinav Kumar Reviewed-by: Stephen Boyd Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/497194/ Link: https://lore.kernel.org/r/1660258670-4200-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index ab6aa13b1639a..013ca02e17cbf 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1214,7 +1214,7 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, if (ret) return ret; - dp_ctrl_train_pattern_set(ctrl, pattern | DP_RECOVERED_CLOCK_OUT_EN); + dp_ctrl_train_pattern_set(ctrl, pattern); for (tries = 0; tries <= maximum_retries; tries++) { drm_dp_link_train_channel_eq_delay(ctrl->aux, ctrl->panel->dpcd); From 1e00d6ac8a3422765bae37aeac2002dfd3c0bda6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 4 Aug 2022 07:38:48 -0700 Subject: [PATCH 011/654] drm/msm/dsi: Fix number of regulators for msm8996_dsi_cfg 3 regulators are listed but the number 2 is specified. Fix it. Fixes: 3a3ff88a0fc1 ("drm/msm/dsi: Add 8x96 info in dsi_cfg") Signed-off-by: Douglas Anderson Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/496318/ Link: https://lore.kernel.org/r/20220804073608.v4.1.I1056ee3f77f71287f333279efe4c85f88d403f65@changeid Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/dsi_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index 2c23324a2296b..02000a7b7a18c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -109,7 +109,7 @@ static const char * const dsi_8996_bus_clk_names[] = { static const struct msm_dsi_config msm8996_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { - .num = 2, + .num = 3, .regs = { {"vdda", 18160, 1 }, /* 1.25 V */ {"vcca", 17000, 32 }, /* 0.925 V */ From a1653a75987749ba6dba94fa2e62f0f36b387d1a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 4 Aug 2022 07:38:49 -0700 Subject: [PATCH 012/654] drm/msm/dsi: Fix number of regulators for SDM660 1 regulator is listed but the number 2 is specified. This presumably means we try to get a regulator with no name. Fix it. Fixes: 462f7017a691 ("drm/msm/dsi: Fix DSI and DSI PHY regulator config from SDM660") Signed-off-by: Douglas Anderson Reviewed-by: Dmitry Baryshkov Reviewed-by: Marijn Suijten Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/496323/ Link: https://lore.kernel.org/r/20220804073608.v4.2.I94b3c3e412b7c208061349f05659e126483171b1@changeid Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/dsi_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index 02000a7b7a18c..72c018e26f47f 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -148,7 +148,7 @@ static const char * const dsi_sdm660_bus_clk_names[] = { static const struct msm_dsi_config sdm660_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { - .num = 2, + .num = 1, .regs = { {"vdda", 12560, 4 }, /* 1.2 V */ }, From b142af334de60f669f36461036360867babcc912 Mon Sep 17 00:00:00 2001 From: Jarrah Gosbell Date: Thu, 11 Aug 2022 16:16:28 -0700 Subject: [PATCH 013/654] dt-bindings: input: touchscreen: add compatible string for Goodix GT1158 Goodix GT1158 is a touchscreen chip from Goodix used in the PinePhone and PinePhone Pro. Patches to correct these devices dts files will be sent in a later patch series. This driver was modified to support the GT1158 in the patch linked below. Add its compatible string to the device tree binding. Suggested-by: Ondrej Jirman Signed-off-by: Jarrah Gosbell Link: https://lore.kernel.org/r/20220809101633.352315-1-kernel@undef.tools Signed-off-by: Dmitry Torokhov --- Documentation/devicetree/bindings/input/touchscreen/goodix.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml index 93f2ce3130ae7..19ac9da421df3 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml @@ -16,6 +16,7 @@ properties: compatible: enum: - goodix,gt1151 + - goodix,gt1158 - goodix,gt5663 - goodix,gt5688 - goodix,gt911 From 425fe4709c76e35f93f4c0e50240f0b61b2a2e54 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Thu, 11 Aug 2022 16:16:54 -0700 Subject: [PATCH 014/654] Input: goodix - add support for GT1158 This controller is used by PinePhone and PinePhone Pro. Support for the PinePhone Pro will be added in a later patch set. Signed-off-by: Ondrej Jirman Signed-off-by: Jarrah Gosbell Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20220809091200.290492-1-kernel@undef.tools Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index d016505fc081f..ab03619d6b508 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -95,6 +95,7 @@ static const struct goodix_chip_data gt9x_chip_data = { static const struct goodix_chip_id goodix_chip_ids[] = { { .id = "1151", .data = >1x_chip_data }, + { .id = "1158", .data = >1x_chip_data }, { .id = "5663", .data = >1x_chip_data }, { .id = "5688", .data = >1x_chip_data }, { .id = "917S", .data = >1x_chip_data }, From 53661ded2460b414644532de6b99bd87f71987e9 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 7 Jul 2022 15:08:01 -0400 Subject: [PATCH 015/654] scsi: qla2xxx: Disable ATIO interrupt coalesce for quad port ISP27XX This partially reverts commit d2b292c3f6fd ("scsi: qla2xxx: Enable ATIO interrupt handshake for ISP27XX") For some workloads where the host sends a batch of commands and then pauses, ATIO interrupt coalesce can cause some incoming ATIO entries to be ignored for extended periods of time, resulting in slow performance, timeouts, and aborted commands. Disable interrupt coalesce and re-enable the dedicated ATIO MSI-X interrupt. Link: https://lore.kernel.org/r/97dcf365-89ff-014d-a3e5-1404c6af511c@cybernetics.com Reviewed-by: Himanshu Madhani Reviewed-by: Nilesh Javali Signed-off-by: Tony Battersby Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2b2f682883752..62666df1a59eb 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6935,14 +6935,8 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha) if (ha->flags.msix_enabled) { if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { - if (IS_QLA2071(ha)) { - /* 4 ports Baker: Enable Interrupt Handshake */ - icb->msix_atio = 0; - icb->firmware_options_2 |= cpu_to_le32(BIT_26); - } else { - icb->msix_atio = cpu_to_le16(msix->entry); - icb->firmware_options_2 &= cpu_to_le32(~BIT_26); - } + icb->msix_atio = cpu_to_le16(msix->entry); + icb->firmware_options_2 &= cpu_to_le32(~BIT_26); ql_dbg(ql_dbg_init, vha, 0xf072, "Registering ICB vector 0x%x for atio que.\n", msix->entry); From 54249306e2776774ccb827969e62d34570f991db Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Fri, 29 Jul 2022 14:41:10 -0700 Subject: [PATCH 016/654] scsi: core: Allow the ALUA transitioning state enough time The error path for the SCSI check condition of not ready, target in ALUA state transition, will result in the failure of that path after the retries are exhausted. In most cases that is well ahead of the transition timeout established in the SCSI ALUA device handler. Instead, reprep the command and re-add it to the queue after a 1 second delay. This will allow the handler to take care of the timeout and only fail the path if the target has exceeded the transition expiry timeout (default 60 seconds). If the expiry timeout is exceeded, the handler will change the path state from transitioning to standby leading to a path failure eliminating the potential of this re-prep to continue endlessly. In most cases the target will exit the transitioning state well before the expiry timeout but after the retries are exhausted as mentioned. Additionally remove the scsi_io_completion_reprep() function which provides little value. Link: https://lore.kernel.org/r/20220729214110.58576-1-brian@purestorage.com Reviewed-by: Martin Wilck Acked-by: Krishna Kant Acked-by: Seamus Connor Signed-off-by: Brian Bunker Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2aca0a838ca50..7846610355906 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -111,7 +111,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason) } } -static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) +static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs) { struct request *rq = scsi_cmd_to_rq(cmd); @@ -121,7 +121,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) } else { WARN_ON_ONCE(true); } - blk_mq_requeue_request(rq, true); + + if (msecs) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(rq->q, msecs); + } else + blk_mq_requeue_request(rq, true); } /** @@ -651,14 +656,6 @@ static unsigned int scsi_rq_err_bytes(const struct request *rq) return bytes; } -/* Helper for scsi_io_completion() when "reprep" action required. */ -static void scsi_io_completion_reprep(struct scsi_cmnd *cmd, - struct request_queue *q) -{ - /* A new command will be prepared and issued. */ - scsi_mq_requeue_cmd(cmd); -} - static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) { struct request *req = scsi_cmd_to_rq(cmd); @@ -676,14 +673,21 @@ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) return false; } +/* + * When ALUA transition state is returned, reprep the cmd to + * use the ALUA handler's transition timeout. Delay the reprep + * 1 sec to avoid aggressive retries of the target in that + * state. + */ +#define ALUA_TRANSITION_REPREP_DELAY 1000 + /* Helper for scsi_io_completion() when special action required. */ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) { - struct request_queue *q = cmd->device->request_queue; struct request *req = scsi_cmd_to_rq(cmd); int level = 0; - enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY, - ACTION_DELAYED_RETRY} action; + enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP, + ACTION_RETRY, ACTION_DELAYED_RETRY} action; struct scsi_sense_hdr sshdr; bool sense_valid; bool sense_current = true; /* false implies "deferred sense" */ @@ -772,8 +776,8 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ - blk_stat = BLK_STS_TRANSPORT; - fallthrough; + action = ACTION_DELAYED_REPREP; + break; default: action = ACTION_FAIL; break; @@ -832,7 +836,10 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) return; fallthrough; case ACTION_REPREP: - scsi_io_completion_reprep(cmd, q); + scsi_mq_requeue_cmd(cmd, 0); + break; + case ACTION_DELAYED_REPREP: + scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY); break; case ACTION_RETRY: /* Retry the same command immediately */ @@ -926,7 +933,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, * command block will be released and the queue function will be goosed. If we * are not done then we have to figure out what to do next: * - * a) We can call scsi_io_completion_reprep(). The request will be + * a) We can call scsi_mq_requeue_cmd(). The request will be * unprepared and put back on the queue. Then a new command will * be created for it. This should be used if we made forward * progress, or if we want to switch from READ(10) to READ(6) for @@ -942,7 +949,6 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; - struct request_queue *q = cmd->device->request_queue; struct request *req = scsi_cmd_to_rq(cmd); blk_status_t blk_stat = BLK_STS_OK; @@ -979,7 +985,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) * request just queue the command up again. */ if (likely(result == 0)) - scsi_io_completion_reprep(cmd, q); + scsi_mq_requeue_cmd(cmd, 0); else scsi_io_completion_action(cmd, result); } From 6d17a112e9a63ff6a5edffd1676b99e0ffbcd269 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Tue, 2 Aug 2022 10:42:31 +0900 Subject: [PATCH 017/654] scsi: ufs: core: Enable link lost interrupt Link lost is treated as fatal error with commit c99b9b230149 ("scsi: ufs: Treat link loss as fatal error"), but the event isn't registered as interrupt source. Enable it. Link: https://lore.kernel.org/r/1659404551-160958-1-git-send-email-kwmad.kim@samsung.com Fixes: c99b9b230149 ("scsi: ufs: Treat link loss as fatal error") Reviewed-by: Bart Van Assche Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index f81aa95ffbc40..f525566a0864d 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -135,11 +135,7 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK) -#define UFSHCD_ERROR_MASK (UIC_ERROR |\ - DEVICE_FATAL_ERROR |\ - CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) +#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ From 8c499e49240bd93628368c3588975cfb94169b8b Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 2 Aug 2022 15:18:49 +0800 Subject: [PATCH 018/654] scsi: megaraid_sas: Fix double kfree() When allocating log_to_span fails, kfree(instance->ctrl_context) is called twice. Remove redundant call. Link: https://lore.kernel.org/r/1659424729-46502-1-git-send-email-kanie@linux.alibaba.com Acked-by: Sumit Saxena Signed-off-by: Guixin Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index e48d4261d0bca..09c5fe37754c5 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -5310,7 +5310,6 @@ megasas_alloc_fusion_context(struct megasas_instance *instance) if (!fusion->log_to_span) { dev_err(&instance->pdev->dev, "Failed from %s %d\n", __func__, __LINE__); - kfree(instance->ctrl_context); return -ENOMEM; } } From 7dd6f4af9482c319fa829583799e63e38967177d Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 2 Aug 2022 15:19:00 +0800 Subject: [PATCH 019/654] scsi: megaraid_sas: Remove unnecessary kfree() When alloc ctrl mem fails, the reply_map will subsequently be freed in megasas_free_ctrl_mem(). No need to free it in megasas_alloc_ctrl_mem(). Link: https://lore.kernel.org/r/1659424740-46918-1-git-send-email-kanie@linux.alibaba.com Acked-by: Sumit Saxena Signed-off-by: Guixin Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index fb28c8178828d..6b3d54c04baa8 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -7150,22 +7150,18 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) switch (instance->adapter_type) { case MFI_SERIES: if (megasas_alloc_mfi_ctrl_mem(instance)) - goto fail; + return -ENOMEM; break; case AERO_SERIES: case VENTURA_SERIES: case THUNDERBOLT_SERIES: case INVADER_SERIES: if (megasas_alloc_fusion_context(instance)) - goto fail; + return -ENOMEM; break; } return 0; - fail: - kfree(instance->reply_map); - instance->reply_map = NULL; - return -ENOMEM; } /* From 37dd4ab1ff8cb843c69835dcaf7bc719a2bf2e0c Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Thu, 11 Aug 2022 21:40:53 +0530 Subject: [PATCH 020/654] scsi: ufs: host: ufs-exynos: Make fsd_ufs_drvs static struct fsd_ufs_drvs is not used outside this file, so make it static. This fixes sparse warning: drivers/ufs/host/ufs-exynos.c:1721:28: sparse: sparse: symbol 'fsd_ufs_drvs' was not declared. Should it be static? Link: https://lore.kernel.org/r/20220811161053.54081-1-alim.akhtar@samsung.com Fixes: 216f74e8059a ("scsi: ufs: host: ufs-exynos: Add support for FSD UFS HCI") Reported-by: kernel test robot Reviewed-by: Bart Van Assche Signed-off-by: Alim Akhtar Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index eced975380820..c3628a8645a56 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1711,7 +1711,7 @@ static struct exynos_ufs_uic_attr fsd_uic_attr = { .pa_dbg_option_suite = 0x2E820183, }; -struct exynos_ufs_drv_data fsd_ufs_drvs = { +static const struct exynos_ufs_drv_data fsd_ufs_drvs = { .uic_attr = &fsd_uic_attr, .quirks = UFSHCD_QUIRK_PRDT_BYTE_GRAN | UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR | From d957e7ffb2c72410bcc1a514153a46719255a5da Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Thu, 4 Aug 2022 08:55:34 -0700 Subject: [PATCH 021/654] scsi: storvsc: Remove WQ_MEM_RECLAIM from storvsc_error_wq storvsc_error_wq workqueue should not be marked as WQ_MEM_RECLAIM as it doesn't need to make forward progress under memory pressure. Marking this workqueue as WQ_MEM_RECLAIM may cause deadlock while flushing a non-WQ_MEM_RECLAIM workqueue. In the current state it causes the following warning: [ 14.506347] ------------[ cut here ]------------ [ 14.506354] workqueue: WQ_MEM_RECLAIM storvsc_error_wq_0:storvsc_remove_lun is flushing !WQ_MEM_RECLAIM events_freezable_power_:disk_events_workfn [ 14.506360] WARNING: CPU: 0 PID: 8 at <-snip->kernel/workqueue.c:2623 check_flush_dependency+0xb5/0x130 [ 14.506390] CPU: 0 PID: 8 Comm: kworker/u4:0 Not tainted 5.4.0-1086-azure #91~18.04.1-Ubuntu [ 14.506391] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 05/09/2022 [ 14.506393] Workqueue: storvsc_error_wq_0 storvsc_remove_lun [ 14.506395] RIP: 0010:check_flush_dependency+0xb5/0x130 <-snip-> [ 14.506408] Call Trace: [ 14.506412] __flush_work+0xf1/0x1c0 [ 14.506414] __cancel_work_timer+0x12f/0x1b0 [ 14.506417] ? kernfs_put+0xf0/0x190 [ 14.506418] cancel_delayed_work_sync+0x13/0x20 [ 14.506420] disk_block_events+0x78/0x80 [ 14.506421] del_gendisk+0x3d/0x2f0 [ 14.506423] sr_remove+0x28/0x70 [ 14.506427] device_release_driver_internal+0xef/0x1c0 [ 14.506428] device_release_driver+0x12/0x20 [ 14.506429] bus_remove_device+0xe1/0x150 [ 14.506431] device_del+0x167/0x380 [ 14.506432] __scsi_remove_device+0x11d/0x150 [ 14.506433] scsi_remove_device+0x26/0x40 [ 14.506434] storvsc_remove_lun+0x40/0x60 [ 14.506436] process_one_work+0x209/0x400 [ 14.506437] worker_thread+0x34/0x400 [ 14.506439] kthread+0x121/0x140 [ 14.506440] ? process_one_work+0x400/0x400 [ 14.506441] ? kthread_park+0x90/0x90 [ 14.506443] ret_from_fork+0x35/0x40 [ 14.506445] ---[ end trace 2d9633159fdc6ee7 ]--- Link: https://lore.kernel.org/r/1659628534-17539-1-git-send-email-ssengar@linux.microsoft.com Fixes: 436ad9413353 ("scsi: storvsc: Allow only one remove lun work item to be issued per lun") Reviewed-by: Michael Kelley Signed-off-by: Saurabh Sengar Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index fe000da113327..8ced292c4b962 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -2012,7 +2012,7 @@ static int storvsc_probe(struct hv_device *device, */ host_dev->handle_error_wq = alloc_ordered_workqueue("storvsc_error_wq_%d", - WQ_MEM_RECLAIM, + 0, host->host_no); if (!host_dev->handle_error_wq) { ret = -ENOMEM; From 27e23836ce22a3e5d89712ef832ab72e47ce9f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 10 Aug 2022 20:07:10 +0000 Subject: [PATCH 022/654] selftests/bpf: Add lru_bug to s390x deny list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lru_bug BPF selftest is failing execution on s390x machines. The failure is due to program attachment failing in turn, similar to a bunch of other tests. Those other tests have already been deny-listed and with this change we do the same for the lru_bug test, adding it to the corresponding file. Fixes: de7b9927105b ("selftests/bpf: Add test for prealloc_lru_pop bug") Signed-off-by: Daniel Müller Acked-by: Mykola Lysenko Link: https://lore.kernel.org/r/20220810200710.1300299-1-deso@posteo.net Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index e33cab34d22fc..db98106117888 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -65,3 +65,4 @@ send_signal # intermittently fails to receive signa select_reuseport # intermittently fails on new s390x setup xdp_synproxy # JIT does not support calling kernel function (kfunc) unpriv_bpf_disabled # fentry +lru_bug # prog 'printk': failed to auto-attach: -524 From 2067231a9e2cbbcae0a4aca6ac36ff2dd6a7b701 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Fri, 12 Aug 2022 09:14:40 +0800 Subject: [PATCH 023/654] NFS: Fix missing unlock in nfs_unlink() Add the missing unlock before goto. Fixes: 3c59366c207e ("NFS: don't unhash dentry during unlink/rename") Signed-off-by: Sun Ke Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dbab3caa15ed5..1b879584d4fe8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2484,8 +2484,10 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) */ error = -ETXTBSY; if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) || - WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) + WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) { + spin_unlock(&dentry->d_lock); goto out; + } if (dentry->d_fsdata) /* old devname */ kfree(dentry->d_fsdata); From 67f4b5dc49913abcdb5cc736e73674e2f352f81d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:22:25 -0400 Subject: [PATCH 024/654] NFS: Fix another fsync() issue after a server reboot Currently, when the writeback code detects a server reboot, it redirties any pages that were not committed to disk, and it sets the flag NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor that dirtied the file. While this allows the file descriptor in question to redrive its own writes, it violates the fsync() requirement that we should be synchronising all writes to disk. While the problem is infrequent, we do see corner cases where an untimely server reboot causes the fsync() call to abandon its attempt to sync data to disk and causing data corruption issues due to missed error conditions or similar. In order to tighted up the client's ability to deal with this situation without introducing livelocks, add a counter that records the number of times pages are redirtied due to a server reboot-like condition, and use that in fsync() to redrive the sync to disk. Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++++++--------- fs/nfs/inode.c | 1 + fs/nfs/write.c | 6 ++++-- include/linux/nfs_fs.h | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 54237a2316872..749e5487df503 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync) int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); + struct nfs_inode *nfsi = NFS_I(inode); + long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); + long nredirtied; int ret; trace_nfs_fsync_enter(inode); @@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = pnfs_sync_inode(inode, !!datasync); if (ret != 0) break; - if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) + nredirtied = atomic_long_read(&nfsi->redirtied_pages); + if (nredirtied == save_nredirtied) break; - /* - * If nfs_file_fsync_commit detected a server reboot, then - * resend all dirty pages that might have been covered by - * the NFS_CONTEXT_RESEND_WRITES flag - */ - start = 0; - end = LLONG_MAX; + save_nredirtied = nredirtied; } trace_nfs_fsync_exit(inode, ret); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b4e46b0ffa2dc..bea7c005119c3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) static void nfs_inode_init_regular(struct nfs_inode *nfsi) { atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->redirtied_pages, 0); INIT_LIST_HEAD(&nfsi->commit_info.list); atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4a3796811b4b5..989c734cf91d2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, */ static void nfs_redirty_request(struct nfs_page *req) { + struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); + /* Bump the transmission count */ req->wb_nio++; nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&nfsi->redirtied_pages); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1904,7 +1906,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk_cont(" mismatch\n"); nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); next: nfs_unlock_and_release_request(req); /* Latency breaker */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b32ed68e7dc49..f08e581f01618 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -182,6 +182,7 @@ struct nfs_inode { /* Regular file */ struct { atomic_long_t nrequests; + atomic_long_t redirtied_pages; struct nfs_mds_commit_info commit_info; struct mutex commit_mutex; }; From edf79efcc9d0cf38fcc1efe688fd697b9bb0ddc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:46:35 -0400 Subject: [PATCH 025/654] NFS: Remove a bogus flag setting in pnfs_write_done_resend_to_mds Since pnfs_write_done_resend_to_mds() does not actually call end_page_writeback() on the pages that are being redirected to the metadata server, callers of fsync() do not see the I/O as complete until the writeback to the MDS finishes. We therefore do not need to set NFS_CONTEXT_RESEND_WRITES, since there is nothing to redrive. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 41a9b6b58fb9f..2613b7e36eb95 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2817,7 +2817,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); - set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); From 5f6277a0c15e1ea54b6fd3d78c9fff7bfe42556c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:51:45 -0400 Subject: [PATCH 026/654] NFS: Cleanup to remove unused flag NFS_CONTEXT_RESEND_WRITES Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f08e581f01618..7931fa4725612 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -83,7 +83,6 @@ struct nfs_open_context { fmode_t mode; unsigned long flags; -#define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) #define NFS_CONTEXT_FILE_OPEN (4) From 0e66978ebeb44f96ff4d26b2a83dc88a2bf887a6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:10:59 +0200 Subject: [PATCH 027/654] drm/i915/gvt: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20220521111145.81697-49-Julia.Lawall@inria.fr Acked-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index b4f69364f9a13..ce0eb03709c3f 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2341,7 +2341,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial * update the entry in this situation p2m will fail - * settting the shadow entry to point to a scratch page + * setting the shadow entry to point to a scratch page */ ops->set_pfn(&m, gvt->gtt.scratch_mfn); } else From e16c2b8250262bc0bc531299e2c30641cee14221 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 24 May 2022 16:37:33 +0800 Subject: [PATCH 028/654] drm/i915/gvt: Fix kernel-doc Fix the following W=1 kernel warnings: drivers/gpu/drm/i915/gvt/handlers.c:3066: warning: expecting prototype for intel_t_default_mmio_write(). Prototype was for intel_vgpu_default_mmio_write() instead. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20220524083733.67148-2-jiapeng.chong@linux.alibaba.com Acked-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index beea5895e4992..9c8dde079cb41 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3052,7 +3052,7 @@ int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, } /** - * intel_t_default_mmio_write - default MMIO write handler + * intel_vgpu_default_mmio_write() - default MMIO write handler * @vgpu: a vGPU * @offset: access offset * @p_data: write data buffer From dca452041552a5a5a6aab3ebda32565fad003eb4 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 24 May 2022 16:37:32 +0800 Subject: [PATCH 029/654] drm/i915/gvt: Fix kernel-doc Fix the following W=1 kernel warnings: drivers/gpu/drm/i915/gvt/mmio_context.c:560: warning: expecting prototype for intel_gvt_switch_render_mmio(). Prototype was for intel_gvt_switch_mmio() instead. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20220524083733.67148-1-jiapeng.chong@linux.alibaba.com Acked-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index c85bafe7539ee..1c6e941c96666 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -546,7 +546,7 @@ static void switch_mmio(struct intel_vgpu *pre, } /** - * intel_gvt_switch_render_mmio - switch mmio context of specific engine + * intel_gvt_switch_mmio - switch mmio context of specific engine * @pre: the last vGPU that own the engine * @next: the vGPU to switch to * @engine: the engine From 0f761f5768b842fe483141bc59db5b4d66bcaf07 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 2 Jun 2022 15:35:19 +0800 Subject: [PATCH 030/654] drm/i915/gvt: Fix kernel-doc Fix the following W=1 kernel warnings: drivers/gpu/drm/i915/gvt/aperture_gm.c:308: warning: expecting prototype for inte_gvt_free_vgpu_resource(). Prototype was for intel_vgpu_free_resource() instead. drivers/gpu/drm/i915/gvt/aperture_gm.c:344: warning: expecting prototype for intel_alloc_vgpu_resource(). Prototype was for intel_vgpu_alloc_resource() instead. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20220602073519.22363-1-jiapeng.chong@linux.alibaba.com Acked-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 557f3314291a8..3b81a6d35a7b2 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -298,7 +298,7 @@ static int alloc_resource(struct intel_vgpu *vgpu, } /** - * inte_gvt_free_vgpu_resource - free HW resource owned by a vGPU + * intel_vgpu_free_resource() - free HW resource owned by a vGPU * @vgpu: a vGPU * * This function is used to free the HW resource owned by a vGPU. @@ -328,7 +328,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu) } /** - * intel_alloc_vgpu_resource - allocate HW resource for a vGPU + * intel_vgpu_alloc_resource() - allocate HW resource for a vGPU * @vgpu: vGPU * @param: vGPU creation params * From 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 28 Apr 2022 16:50:29 +0200 Subject: [PATCH 031/654] x86/PAT: Have pat_enabled() properly reflect state when running on Xen After commit ID in the Fixes: tag, pat_enabled() returns false (because of PAT initialization being suppressed in the absence of MTRRs being announced to be available). This has become a problem: the i915 driver now fails to initialize when running PV on Xen (i915_gem_object_pin_map() is where I located the induced failure), and its error handling is flaky enough to (at least sometimes) result in a hung system. Yet even beyond that problem the keying of the use of WC mappings to pat_enabled() (see arch_can_pci_mmap_wc()) means that in particular graphics frame buffer accesses would have been quite a bit less optimal than possible. Arrange for the function to return true in such environments, without undermining the rest of PAT MSR management logic considering PAT to be disabled: specifically, no writes to the PAT MSR should occur. For the new boolean to live in .init.data, init_cache_modes() also needs moving to .init.text (where it could/should have lived already before). [ bp: This is the "small fix" variant for stable. It'll get replaced with a proper PAT and MTRR detection split upstream but that is too involved for a stable backport. - additional touchups to commit msg. Use cpu_feature_enabled(). ] Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()") Signed-off-by: Jan Beulich Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Cc: Cc: Juergen Gross Cc: Lucas De Marchi Link: https://lore.kernel.org/r/9385fa60-fa5d-f559-a137-6608408f88b0@suse.com --- arch/x86/mm/pat/memtype.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index d5ef64ddd35e9..66a209f7eb86d 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -62,6 +62,7 @@ static bool __read_mostly pat_bp_initialized; static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); +static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT); static bool __read_mostly pat_bp_enabled; static bool __read_mostly pat_cm_initialized; @@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason) static int __init nopat(char *str) { pat_disable("PAT support disabled via boot option."); + pat_force_disabled = true; return 0; } early_param("nopat", nopat); @@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -void init_cache_modes(void) +void __init init_cache_modes(void) { u64 pat = 0; @@ -313,6 +315,12 @@ void init_cache_modes(void) */ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) { + /* + * Clearly PAT is enabled underneath. Allow pat_enabled() to + * reflect this. + */ + pat_bp_enabled = true; } __init_cache_modes(pat); From 0a90ed8d0cfa29735a221eba14d9cb6c735d35b6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 1 Aug 2022 14:37:31 +0300 Subject: [PATCH 032/654] platform/x86: pmc_atom: Fix SLP_TYPx bitfield mask On Intel hardware the SLP_TYPx bitfield occupies bits 10-12 as per ACPI specification (see Table 4.13 "PM1 Control Registers Fixed Hardware Feature Control Bits" for the details). Fix the mask and other related definitions accordingly. Fixes: 93e5eadd1f6e ("x86/platform: New Intel Atom SOC power management controller driver") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220801113734.36131-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/pmc_atom.c | 2 +- include/linux/platform_data/x86/pmc_atom.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 154317e9910d2..5c757c7f64dee 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -232,7 +232,7 @@ static void pmc_power_off(void) pm1_cnt_port = acpi_base_addr + PM1_CNT; pm1_cnt_value = inl(pm1_cnt_port); - pm1_cnt_value &= SLEEP_TYPE_MASK; + pm1_cnt_value &= ~SLEEP_TYPE_MASK; pm1_cnt_value |= SLEEP_TYPE_S5; pm1_cnt_value |= SLEEP_ENABLE; diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index 3edfb6d4e67ab..dd81f510e4cf0 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -7,6 +7,8 @@ #ifndef PMC_ATOM_H #define PMC_ATOM_H +#include + /* ValleyView Power Control Unit PCI Device ID */ #define PCI_DEVICE_ID_VLV_PMC 0x0F1C /* CherryTrail Power Control Unit PCI Device ID */ @@ -139,9 +141,9 @@ #define ACPI_MMIO_REG_LEN 0x100 #define PM1_CNT 0x4 -#define SLEEP_TYPE_MASK 0xFFFFECFF +#define SLEEP_TYPE_MASK GENMASK(12, 10) #define SLEEP_TYPE_S5 0x1C00 -#define SLEEP_ENABLE 0x2000 +#define SLEEP_ENABLE BIT(13) extern int pmc_atom_read(int offset, u32 *value); From 2986c51540ed50ac654ffb5a772e546c02628c91 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 10 Aug 2022 16:19:34 +0200 Subject: [PATCH 033/654] platform/x86: x86-android-tablets: Fix broken touchscreen on Chuwi Hi8 with Windows BIOS The x86-android-tablets handling for the Chuwi Hi8 is only necessary with the Android BIOS and it is causing problems with the Windows BIOS version. Specifically when trying to register the already present touchscreen x86_acpi_irq_helper_get() calls acpi_unregister_gsi(), this breaks the working of the touchscreen and also leads to an oops: [ 14.248946] ------------[ cut here ]------------ [ 14.248954] remove_proc_entry: removing non-empty directory 'irq/75', leaking at least 'MSSL0001:00' [ 14.248983] WARNING: CPU: 3 PID: 440 at fs/proc/generic.c:718 remove_proc_entry ... [ 14.249293] unregister_irq_proc+0xe0/0x100 [ 14.249305] free_desc+0x29/0x70 [ 14.249312] irq_free_descs+0x4b/0x80 [ 14.249320] mp_unmap_irq+0x5c/0x60 [ 14.249329] acpi_unregister_gsi_ioapic+0x2a/0x40 [ 14.249338] x86_acpi_irq_helper_get+0x4b/0x190 [x86_android_tablets] [ 14.249355] x86_android_tablet_init+0x178/0xe34 [x86_android_tablets] Add an init callback for the Chuwi Hi8, which detects when the Windows BIOS is in use and exits with -ENODEV in that case, fixing this. Fixes: 84c2dcdd475f ("platform/x86: x86-android-tablets: Add an init() callback to struct x86_dev_info") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220810141934.140771-1-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index 4803759774358..4acd6fa8d43b8 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -663,9 +663,23 @@ static const struct x86_i2c_client_info chuwi_hi8_i2c_clients[] __initconst = { }, }; +static int __init chuwi_hi8_init(void) +{ + /* + * Avoid the acpi_unregister_gsi() call in x86_acpi_irq_helper_get() + * breaking the touchscreen + logging various errors when the Windows + * BIOS is used. + */ + if (acpi_dev_present("MSSL0001", NULL, 1)) + return -ENODEV; + + return 0; +} + static const struct x86_dev_info chuwi_hi8_info __initconst = { .i2c_client_info = chuwi_hi8_i2c_clients, .i2c_client_count = ARRAY_SIZE(chuwi_hi8_i2c_clients), + .init = chuwi_hi8_init, }; #define CZC_EC_EXTRA_PORT 0x68 From 84b8e403435c8fb94b872309673764a447961e00 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Wed, 10 Aug 2022 16:01:33 +0200 Subject: [PATCH 034/654] platform/surface: aggregator_registry: Add support for Surface Laptop Go 2 The Surface Laptop Go 2 seems to have the same SAM client devices as the Surface Laptop Go 1, so re-use its node group. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220810140133.99087-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_aggregator_registry.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index d5655f6a4a416..93ab62eb393d7 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -325,6 +325,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, + /* Surface Laptop Go 2 */ + { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, + /* Surface Laptop Studio */ { "MSHW0123", (unsigned long)ssam_node_group_sls }, From 6d6ea95abc66596ceff131080e86db3bbec77064 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Wed, 10 Aug 2022 16:41:15 +0200 Subject: [PATCH 035/654] platform/surface: aggregator_registry: Rename HID device nodes based on their function Rename HID device nodes based on their function. In particular, these are nodes for firmware updates via the CFU mechanism (component firmware update), HID based sensors, and a USB-C UCSI client. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220810144117.493710-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../surface/surface_aggregator_registry.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 93ab62eb393d7..7d82398f55b1c 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -104,14 +104,14 @@ static const struct software_node ssam_node_hid_tid1_touchpad = { .parent = &ssam_node_root, }; -/* HID device instance 6 (TID1, unknown HID device). */ -static const struct software_node ssam_node_hid_tid1_iid6 = { +/* HID device instance 6 (TID1, HID sensor collection). */ +static const struct software_node ssam_node_hid_tid1_sensors = { .name = "ssam:01:15:01:06:00", .parent = &ssam_node_root, }; -/* HID device instance 7 (TID1, unknown HID device). */ -static const struct software_node ssam_node_hid_tid1_iid7 = { +/* HID device instance 7 (TID1, UCM UCSI HID client). */ +static const struct software_node ssam_node_hid_tid1_ucm_ucsi = { .name = "ssam:01:15:01:07:00", .parent = &ssam_node_root, }; @@ -182,8 +182,8 @@ static const struct software_node ssam_node_hid_kip_touchpad = { .parent = &ssam_node_hub_kip, }; -/* HID device instance 5 (KIP hub, unknown HID device). */ -static const struct software_node ssam_node_hid_kip_iid5 = { +/* HID device instance 5 (KIP hub, type-cover firmware update). */ +static const struct software_node ssam_node_hid_kip_fwupd = { .name = "ssam:01:15:02:05:00", .parent = &ssam_node_hub_kip, }; @@ -244,8 +244,8 @@ static const struct software_node *ssam_node_group_sls[] = { &ssam_node_hid_tid1_keyboard, &ssam_node_hid_tid1_penstash, &ssam_node_hid_tid1_touchpad, - &ssam_node_hid_tid1_iid6, - &ssam_node_hid_tid1_iid7, + &ssam_node_hid_tid1_sensors, + &ssam_node_hid_tid1_ucm_ucsi, &ssam_node_hid_tid1_sysctrl, NULL, }; @@ -278,7 +278,7 @@ static const struct software_node *ssam_node_group_sp8[] = { &ssam_node_hid_kip_keyboard, &ssam_node_hid_kip_penstash, &ssam_node_hid_kip_touchpad, - &ssam_node_hid_kip_iid5, + &ssam_node_hid_kip_fwupd, NULL, }; From 06964552928f6e111c2c29a8352ece8345e9cb18 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Wed, 10 Aug 2022 16:41:16 +0200 Subject: [PATCH 036/654] platform/surface: aggregator_registry: Rename HID device nodes based on new findings On Windows, the HID devices with target ID 1 are grouped as "Surface Hot Plug - SAM". Rename their device nodes in the registry to reflect that and update the comments accordingly. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220810144117.493710-3-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../surface/surface_aggregator_registry.c | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 7d82398f55b1c..9970f89b14110 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -86,38 +86,38 @@ static const struct software_node ssam_node_bas_dtx = { .parent = &ssam_node_root, }; -/* HID keyboard (TID1). */ -static const struct software_node ssam_node_hid_tid1_keyboard = { +/* HID keyboard (SAM, TID=1). */ +static const struct software_node ssam_node_hid_sam_keyboard = { .name = "ssam:01:15:01:01:00", .parent = &ssam_node_root, }; -/* HID pen stash (TID1; pen taken / stashed away evens). */ -static const struct software_node ssam_node_hid_tid1_penstash = { +/* HID pen stash (SAM, TID=1; pen taken / stashed away evens). */ +static const struct software_node ssam_node_hid_sam_penstash = { .name = "ssam:01:15:01:02:00", .parent = &ssam_node_root, }; -/* HID touchpad (TID1). */ -static const struct software_node ssam_node_hid_tid1_touchpad = { +/* HID touchpad (SAM, TID=1). */ +static const struct software_node ssam_node_hid_sam_touchpad = { .name = "ssam:01:15:01:03:00", .parent = &ssam_node_root, }; -/* HID device instance 6 (TID1, HID sensor collection). */ -static const struct software_node ssam_node_hid_tid1_sensors = { +/* HID device instance 6 (SAM, TID=1, HID sensor collection). */ +static const struct software_node ssam_node_hid_sam_sensors = { .name = "ssam:01:15:01:06:00", .parent = &ssam_node_root, }; -/* HID device instance 7 (TID1, UCM UCSI HID client). */ -static const struct software_node ssam_node_hid_tid1_ucm_ucsi = { +/* HID device instance 7 (SAM, TID=1, UCM UCSI HID client). */ +static const struct software_node ssam_node_hid_sam_ucm_ucsi = { .name = "ssam:01:15:01:07:00", .parent = &ssam_node_root, }; -/* HID system controls (TID1). */ -static const struct software_node ssam_node_hid_tid1_sysctrl = { +/* HID system controls (SAM, TID=1). */ +static const struct software_node ssam_node_hid_sam_sysctrl = { .name = "ssam:01:15:01:08:00", .parent = &ssam_node_root, }; @@ -241,12 +241,12 @@ static const struct software_node *ssam_node_group_sls[] = { &ssam_node_bat_main, &ssam_node_tmp_pprof, &ssam_node_pos_tablet_switch, - &ssam_node_hid_tid1_keyboard, - &ssam_node_hid_tid1_penstash, - &ssam_node_hid_tid1_touchpad, - &ssam_node_hid_tid1_sensors, - &ssam_node_hid_tid1_ucm_ucsi, - &ssam_node_hid_tid1_sysctrl, + &ssam_node_hid_sam_keyboard, + &ssam_node_hid_sam_penstash, + &ssam_node_hid_sam_touchpad, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + &ssam_node_hid_sam_sysctrl, NULL, }; From 6b2caaafc5df5f9d8a41679c3ec1bdad3b8fbe14 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Wed, 10 Aug 2022 16:41:17 +0200 Subject: [PATCH 037/654] platform/surface: aggregator_registry: Add HID devices for sensors and UCSI client to SP8 Add software nodes for the HID sensor collection and the UCM UCSI HID client to the Surface Pro 8. In contrast to the type-cover devices, these devices are directly attached to the SAM controller, without any hub. This enables support for HID-based sensors, including the ones used for automatic screen rotation, on the Surface Pro 8. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220810144117.493710-4-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_aggregator_registry.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 9970f89b14110..585911020cea0 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -279,6 +279,8 @@ static const struct software_node *ssam_node_group_sp8[] = { &ssam_node_hid_kip_penstash, &ssam_node_hid_kip_touchpad, &ssam_node_hid_kip_fwupd, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, NULL, }; From 58ca14ed98c87cfe0d1408cc65a9745d9e9b7a56 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 12 Aug 2022 13:32:59 +0200 Subject: [PATCH 038/654] xsk: Fix corrupted packets for XDP_SHARED_UMEM Fix an issue in XDP_SHARED_UMEM mode together with aligned mode where packets are corrupted for the second and any further sockets bound to the same umem. In other words, this does not affect the first socket bound to the umem. The culprit for this bug is that the initialization of the DMA addresses for the pre-populated xsk buffer pool entries was not performed for any socket but the first one bound to the umem. Only the linear array of DMA addresses was populated. Fix this by populating the DMA addresses in the xsk buffer pool for every socket bound to the same umem. Fixes: 94033cd8e73b8 ("xsk: Optimize for aligned case") Reported-by: Alasdair McWilliam Reported-by: Intrusion Shield Team Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Tested-by: Alasdair McWilliam Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/xdp-newbies/6205E10C-292E-4995-9D10-409649354226@outlook.com/ Link: https://lore.kernel.org/bpf/20220812113259.531-1-magnus.karlsson@gmail.com --- net/xdp/xsk_buff_pool.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index f70112176b7c1..a71a8c6edf553 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -379,6 +379,16 @@ static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map) static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map) { + if (!pool->unaligned) { + u32 i; + + for (i = 0; i < pool->heads_cnt; i++) { + struct xdp_buff_xsk *xskb = &pool->heads[i]; + + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); + } + } + pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL); if (!pool->dma_pages) return -ENOMEM; @@ -428,12 +438,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(dma_map); - else - for (i = 0; i < pool->heads_cnt; i++) { - struct xdp_buff_xsk *xskb = &pool->heads[i]; - - xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); - } err = xp_init_dma_info(pool, dma_map); if (err) { From 02b9f2636209beb843ca501d47f7fddc8792b2d7 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 8 Jul 2022 09:26:32 -0700 Subject: [PATCH 039/654] drm/msm/gpu: Drop qos request if devm_devfreq_add_device() fails In the event that devm_devfreq_add_device() fails the device's qos freq list is left referencing df->idle_freq and df->boost_freq. Attempting to initialize devfreq again after a probe deferral will then cause invalid memory accesses in dev_pm_qos_add_request(). Fix this by dropping the requests in the error path. Fixes: 7c0ffcd40b16 ("drm/msm/gpu: Respect PM QoS constraints") Signed-off-by: Bjorn Andersson Reviewed-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/493001/ Link: https://lore.kernel.org/r/20220708162632.3529864-1-bjorn.andersson@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index d1f70426f554e..85c443a37e4e8 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -213,6 +213,8 @@ void msm_devfreq_init(struct msm_gpu *gpu) if (IS_ERR(df->devfreq)) { DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); + dev_pm_qos_remove_request(&df->idle_freq); + dev_pm_qos_remove_request(&df->boost_freq); df->devfreq = NULL; return; } From 174974d8463b77c2b4065e98513adb204e64de7d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 7 Aug 2022 09:09:01 -0700 Subject: [PATCH 040/654] drm/msm/rd: Fix FIFO-full deadlock If the previous thing cat'ing $debugfs/rd left the FIFO full, then subsequent open could deadlock in rd_write() (because open is blocked, not giving a chance for read() to consume any data in the FIFO). Also it is generally a good idea to clear out old data from the FIFO. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/496706/ Link: https://lore.kernel.org/r/20220807160901.2353471-2-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_rd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index a92ffde53f0b3..db2f847c8535f 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -196,6 +196,9 @@ static int rd_open(struct inode *inode, struct file *file) file->private_data = rd; rd->open = true; + /* Reset fifo to clear any previously unread data: */ + rd->fifo.head = rd->fifo.tail = 0; + /* the parsing tools need to know gpu-id to know which * register database to load. * From e79b548b7202bb3accdfe64f113129a4340bc2f9 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Wed, 13 Apr 2022 01:04:25 +0000 Subject: [PATCH 041/654] peci: aspeed: fix error check return value of platform_get_irq() platform_get_irq() return negative value on failure, so null check of priv->irq is incorrect. Fix it by comparing whether it is less than zero. Fixes: a85e4c52086c ("peci: Add peci-aspeed controller driver") Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Link: https://lore.kernel.org/r/20220413010425.2534887-1-lv.ruyi@zte.com.cn Reviewed-by: Iwona Winiarska Signed-off-by: Iwona Winiarska --- drivers/peci/controller/peci-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/peci/controller/peci-aspeed.c b/drivers/peci/controller/peci-aspeed.c index 1925ddc13f002..731c5d8f75c66 100644 --- a/drivers/peci/controller/peci-aspeed.c +++ b/drivers/peci/controller/peci-aspeed.c @@ -523,7 +523,7 @@ static int aspeed_peci_probe(struct platform_device *pdev) return PTR_ERR(priv->base); priv->irq = platform_get_irq(pdev, 0); - if (!priv->irq) + if (priv->irq < 0) return priv->irq; ret = devm_request_irq(&pdev->dev, priv->irq, aspeed_peci_irq_handler, From 1c11289b34ab67ed080bbe0f1855c4938362d9cf Mon Sep 17 00:00:00 2001 From: Iwona Winiarska Date: Tue, 5 Jul 2022 12:15:01 +0200 Subject: [PATCH 042/654] peci: cpu: Fix use-after-free in adev_release() When auxiliary_device_add() returns an error, auxiliary_device_uninit() is called, which causes refcount for device to be decremented and .release callback will be triggered. Because adev_release() re-calls auxiliary_device_uninit(), it will cause use-after-free: [ 1269.455172] WARNING: CPU: 0 PID: 14267 at lib/refcount.c:28 refcount_warn_saturate+0x110/0x15 [ 1269.464007] refcount_t: underflow; use-after-free. Reported-by: Jianglei Nie Signed-off-by: Iwona Winiarska Link: https://lore.kernel.org/r/20220705101501.298395-1-iwona.winiarska@intel.com --- drivers/peci/cpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/peci/cpu.c b/drivers/peci/cpu.c index 68eb61c65d345..de4a7b3e5966e 100644 --- a/drivers/peci/cpu.c +++ b/drivers/peci/cpu.c @@ -188,8 +188,6 @@ static void adev_release(struct device *dev) { struct auxiliary_device *adev = to_auxiliary_dev(dev); - auxiliary_device_uninit(adev); - kfree(adev->name); kfree(adev); } @@ -234,6 +232,7 @@ static void unregister_adev(void *_adev) struct auxiliary_device *adev = _adev; auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); } static int devm_adev_add(struct device *dev, int idx) From a921be53b46c393d8d594a62a44f418c97e5504b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 10 Aug 2022 12:07:31 +0200 Subject: [PATCH 043/654] thermal/core: Add missing EXPORT_SYMBOL_GPL The function thermal_zone_device_register_with_trips() is not exported for modules. Add the missing EXPORT_SYMBOL_GPL(). Fixes: fae11de507f0e ("thermal/core: Add thermal_trip in thermal_zone") Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220810100731.749317-1-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 6a5d0ae5d7a4c..50d50cec77740 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1329,6 +1329,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t kfree(tz); return ERR_PTR(result); } +EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips); struct thermal_zone_device *thermal_zone_device_register(const char *type, int ntrips, int mask, void *devdata, struct thermal_zone_device_ops *ops, From 8c596324232d22e19f8df59ba03410b9b5b0f3d7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 9 Aug 2022 10:56:28 +0200 Subject: [PATCH 044/654] dt-bindings: thermal: Fix missing required property When the thermal zone description was converted to yaml schema, the required 'trips' property was forgotten. The initial text bindings was describing: " [ ... ] * Thermal zone nodes The thermal zone node is the node containing all the required info for describing a thermal zone, including its cooling device bindings. The thermal zone node must contain, apart from its own properties, one sub-node containing trip nodes and one sub-node containing all the zone cooling maps. Required properties: - polling-delay: The maximum number of milliseconds to wait between polls Type: unsigned when checking this thermal zone. Size: one cell - polling-delay-passive: The maximum number of milliseconds to wait Type: unsigned between polls when performing passive cooling. Size: one cell - thermal-sensors: A list of thermal sensor phandles and sensor specifier Type: list of used while monitoring the thermal zone. phandles + sensor specifier - trips: A sub-node which is a container of only trip point nodes Type: sub-node required to describe the thermal zone. Optional property: - cooling-maps: A sub-node which is a container of only cooling device Type: sub-node map nodes, used to describe the relation between trips and cooling devices. [ ... ] " Now the schema describes: " [ ... ] required: - polling-delay - polling-delay-passive - thermal-sensors [ ... ] " Add the missing 'trips' property in the required properties. Fixed: 1202a442a31fd ("dt-bindings: thermal: Add yaml bindings for thermal zones") Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Link: https://lore.kernel.org/r/20220809085629.509116-3-daniel.lezcano@linaro.org --- Documentation/devicetree/bindings/thermal/thermal-zones.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml index 2d34f3ccb2572..8d2c6d74b605a 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml @@ -214,6 +214,7 @@ patternProperties: - polling-delay - polling-delay-passive - thermal-sensors + - trips additionalProperties: false From d52788b3da2750ebc617891cf260b8f8912f522b Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 15 Jul 2022 12:03:54 +0800 Subject: [PATCH 045/654] mmc: sdhci-of-aspeed: test: Fix dependencies when KUNIT=m While the sdhci-of-aspeed KUnit tests do work when builtin, and do work when KUnit itself is being built as a module, the two together break. This is because the KUnit tests (understandably) depend on KUnit, so a built-in test cannot build if KUnit is a module. Fix this by adding a dependency on (MMC_SDHCI_OF_ASPEED=m || KUNIT=y), which only excludes this one problematic configuration. This was reported on a nasty openrisc-randconfig run by the kernel test robot, though for some reason (compiler optimisations removing the test code?) I wasn't able to reproduce it locally on x86: https://lore.kernel.org/linux-mm/202207140122.fzhlf60k-lkp@intel.com/T/ Fixes: 291cd54e5b05 ("mmc: sdhci-of-aspeed: test: Use kunit_test_suite() macro") Reported-by: kernel test robot Signed-off-by: David Gow Acked-by: Andrew Jeffery Acked-by: Ulf Hansson Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- drivers/mmc/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 10c563999d3da..e63608834411a 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -171,6 +171,7 @@ config MMC_SDHCI_OF_ASPEED config MMC_SDHCI_OF_ASPEED_TEST bool "Tests for the ASPEED SDHCI driver" if !KUNIT_ALL_TESTS depends on MMC_SDHCI_OF_ASPEED && KUNIT + depends on (MMC_SDHCI_OF_ASPEED=m || KUNIT=y) default KUNIT_ALL_TESTS help Enable KUnit tests for the ASPEED SDHCI driver. Select this From 41a55567b9e31cb852670684404654ec4fd0d8d6 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 13 Jul 2022 08:52:20 +0800 Subject: [PATCH 046/654] module: kunit: Load .kunit_test_suites section when CONFIG_KUNIT=m The new KUnit module handling has KUnit test suites listed in a .kunit_test_suites section of each module. This should be loaded when the module is, but at the moment this only happens if KUnit is built-in. Also load this when KUnit is enabled as a module: it'll not be usable unless KUnit is loaded, but such modules are likely to depend on KUnit anyway, so it's unlikely to ever be loaded needlessly. Fixes: 3d6e44623841 ("kunit: unify module and builtin suite definitions") Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Geert Uytterhoeven Signed-off-by: Shuah Khan --- kernel/module/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 6a477c622544d..a4e4d84b6f4e0 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2099,7 +2099,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->static_call_sites), &mod->num_static_call_sites); #endif -#ifdef CONFIG_KUNIT +#if IS_ENABLED(CONFIG_KUNIT) mod->kunit_suites = section_objs(info, ".kunit_test_suites", sizeof(*mod->kunit_suites), &mod->num_kunit_suites); From 8f89e33bf040bbef66386c426198622180233178 Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Fri, 22 Jul 2022 15:07:18 +0200 Subject: [PATCH 047/654] iio: adc: mcp3911: make use of the sign bit The device supports negative values as well. Fixes: 3a89b289df5d ("iio: adc: add support for mcp3911") Signed-off-by: Marcus Folkesson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220722130726.7627-2-marcus.folkesson@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp3911.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index 1cb4590fe4125..f581cefb67195 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -113,6 +113,8 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev, if (ret) goto out; + *val = sign_extend32(*val, 23); + ret = IIO_VAL_INT; break; From 160905549e663019e26395ed9d66c24ee2cf5187 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Aug 2022 08:37:43 +0200 Subject: [PATCH 048/654] iio: light: cm3605: Fix an error handling path in cm3605_probe() The commit in Fixes also introduced a new error handling path which should goto the existing error handling path. Otherwise some resources leak. Fixes: 0d31d91e6145 ("iio: light: cm3605: Make use of the helper function dev_err_probe()") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/0e186de2c125b3e17476ebf9c54eae4a5d66f994.1659854238.git.christophe.jaillet@wanadoo.fr Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm3605.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/cm3605.c b/drivers/iio/light/cm3605.c index c721b69d50950..0b30db77f78bd 100644 --- a/drivers/iio/light/cm3605.c +++ b/drivers/iio/light/cm3605.c @@ -226,8 +226,10 @@ static int cm3605_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq < 0) - return dev_err_probe(dev, irq, "failed to get irq\n"); + if (irq < 0) { + ret = dev_err_probe(dev, irq, "failed to get irq\n"); + goto out_disable_aset; + } ret = devm_request_threaded_irq(dev, irq, cm3605_prox_irq, NULL, 0, "cm3605", indio_dev); From 2b97cf76289a4fcae66d7959b0d74a87207d7068 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 6 Aug 2022 20:05:08 +0800 Subject: [PATCH 049/654] sched/psi: Zero the memory of struct psi_group After commit 5f69a6577bc3 ("psi: dont alloc memory for psi by default"), the memory used by struct psi_group is no longer allocated and zeroed in cgroup_create(). Since the memory of struct psi_group is not zeroed, the data in this memory is random, which will lead to inaccurate psi statistics when creating a new cgroup. So we use kzlloc() to allocate and zero the struct psi_group and remove the redundant zeroing in group_init(). Steps to reproduce: 1. Use cgroup v2 and enable CONFIG_PSI 2. Create a new cgroup, and query psi statistics mkdir /sys/fs/cgroup/test cat /sys/fs/cgroup/test/cpu.pressure some avg10=0.00 avg60=0.00 avg300=47927752200.00 total=12884901 full avg10=561815124.00 avg60=125835394188.00 avg300=1077090462000.00 total=10273561772 cat /sys/fs/cgroup/test/io.pressure some avg10=1040093132823.95 avg60=1203770351379.21 avg300=3862252669559.46 total=4294967296 full avg10=921884564601.39 avg60=0.00 avg300=1984507298.35 total=442381631 cat /sys/fs/cgroup/test/memory.pressure some avg10=232476085778.11 avg60=0.00 avg300=0.00 total=0 full avg10=0.00 avg60=0.00 avg300=2585658472280.57 total=12884901 Fixes: commit 5f69a6577bc3 ("psi: dont alloc memory for psi by default") Signed-off-by: Hao Jia Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- kernel/sched/psi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index ec66b40bdd403..5ee615a59fe17 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -190,12 +190,8 @@ static void group_init(struct psi_group *group) /* Init trigger-related members */ mutex_init(&group->trigger_lock); INIT_LIST_HEAD(&group->triggers); - memset(group->nr_triggers, 0, sizeof(group->nr_triggers)); - group->poll_states = 0; group->poll_min_period = U32_MAX; - memset(group->polling_total, 0, sizeof(group->polling_total)); group->polling_next_update = ULLONG_MAX; - group->polling_until = 0; init_waitqueue_head(&group->poll_wait); timer_setup(&group->poll_timer, poll_timer_fn, 0); rcu_assign_pointer(group->poll_task, NULL); @@ -957,7 +953,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup) if (static_branch_likely(&psi_disabled)) return 0; - cgroup->psi = kmalloc(sizeof(struct psi_group), GFP_KERNEL); + cgroup->psi = kzalloc(sizeof(struct psi_group), GFP_KERNEL); if (!cgroup->psi) return -ENOMEM; From 76b079ef4cc954fc2c2e0333a01855b0b2b6bdee Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 6 Aug 2022 20:05:09 +0800 Subject: [PATCH 050/654] sched/psi: Remove unused parameter nbytes of psi_trigger_create() psi_trigger_create()'s 'nbytes' parameter is not used, so we can remove it. Signed-off-by: Hao Jia Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- include/linux/psi.h | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/sched/psi.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/psi.h b/include/linux/psi.h index 89784763d19e2..dd74411ac21d7 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -27,7 +27,7 @@ void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, size_t nbytes, enum psi_res res); + char *buf, enum psi_res res); void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ffaccd6373f1e..df7df5843b4fe 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3698,7 +3698,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, } psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi; - new = psi_trigger_create(psi, buf, nbytes, res); + new = psi_trigger_create(psi, buf, res); if (IS_ERR(new)) { cgroup_put(cgrp); return PTR_ERR(new); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 5ee615a59fe17..ecb4b4ff4ce0a 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1087,7 +1087,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) } struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, size_t nbytes, enum psi_res res) + char *buf, enum psi_res res) { struct psi_trigger *t; enum psi_states state; @@ -1316,7 +1316,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, return -EBUSY; } - new = psi_trigger_create(&psi_system, buf, nbytes, res); + new = psi_trigger_create(&psi_system, buf, res); if (IS_ERR(new)) { mutex_unlock(&seq->lock); return PTR_ERR(new); From d7ae5818c3fa3007dee13f9d99832e7f26b8bc44 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 6 Aug 2022 20:05:10 +0800 Subject: [PATCH 051/654] sched/psi: Remove redundant cgroup_psi() when !CONFIG_CGROUPS cgroup_psi() is only called under CONFIG_CGROUPS. We don't need cgroup_psi() when !CONFIG_CGROUPS, so we can remove it in this case. Signed-off-by: Hao Jia Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed53bfe7c46c4..ac5d0515680ea 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -734,11 +734,6 @@ static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) return NULL; } -static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) -{ - return NULL; -} - static inline bool cgroup_psi_enabled(void) { return false; From 5f4d1fd5b5d3506759b5d9cf20bb5fb5b8bdcab1 Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Fri, 12 Aug 2022 11:07:13 -0700 Subject: [PATCH 052/654] selftests/sgx: Ignore OpenSSL 3.0 deprecated functions warning OpenSSL 3.0 deprecates some of the functions used in the SGX selftests, causing build errors on new distros. For now ignore the warnings until support for the functions is no longer available and mark FIXME so that it can be clear this should be removed at some point. Signed-off-by: Kristen Carlson Accardi Reviewed-by: Jarkko Sakkinen Signed-off-by: Shuah Khan --- tools/testing/selftests/sgx/sigstruct.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index 50c5ab1aa6fa1..a07896a463643 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -17,6 +17,12 @@ #include "defines.h" #include "main.h" +/* + * FIXME: OpenSSL 3.0 has deprecated some functions. For now just ignore + * the warnings. + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + struct q1q2_ctx { BN_CTX *bn_ctx; BIGNUM *m; From 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 16 Aug 2022 09:11:37 +0200 Subject: [PATCH 053/654] x86/entry: Fix entry_INT80_compat for Xen PV guests Commit c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS") missed one use case of SWAPGS in entry_INT80_compat(). Removing of the SWAPGS macro led to asm just using "swapgs", as it is accepting instructions in capital letters, too. This in turn leads to splats in Xen PV guests like: [ 36.145223] general protection fault, maybe for address 0x2d: 0000 [#1] PREEMPT SMP NOPTI [ 36.145794] CPU: 2 PID: 1847 Comm: ld-linux.so.2 Not tainted 5.19.1-1-default #1 \ openSUSE Tumbleweed f3b44bfb672cdb9f235aff53b57724eba8b9411b [ 36.146608] Hardware name: HP ProLiant ML350p Gen8, BIOS P72 11/14/2013 [ 36.148126] RIP: e030:entry_INT80_compat+0x3/0xa3 Fix that by open coding this single instance of the SWAPGS macro. Fixes: c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Reviewed-by: Jan Beulich Cc: # 5.19 Link: https://lore.kernel.org/r/20220816071137.4893-1-jgross@suse.com --- arch/x86/entry/entry_64_compat.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 682338e7e2a38..4dd19819053a5 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat) * Interrupts are off on entry. */ ASM_CLAC /* Do this early to minimize exposure */ - SWAPGS + ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV /* * User tracing code (ptrace or signal handlers) might assume that From b2fa9e13bbf101c662c4cd974608242a0db98cfc Mon Sep 17 00:00:00 2001 From: Grzegorz Szymaszek Date: Tue, 2 Aug 2022 19:18:44 +0200 Subject: [PATCH 054/654] staging: r8188eu: add firmware dependency The old rtl8188eu module, removed in commit 55dfa29b43d2 ("staging: rtl8188eu: remove rtl8188eu driver from staging dir") (Linux kernel v5.15-rc1), required (through a MODULE_FIRMWARE call()) the rtlwifi/rtl8188eufw.bin firmware file, which the new r8188eu driver no longer requires. I have tested a few RTL8188EUS-based Wi-Fi cards and, while supported by both drivers, they do not work when using the new one and the firmware wasn't manually loaded. According to Larry Finger, the module maintainer, all such cards need the firmware and the driver should depend on it (see the linked mails). Add a proper MODULE_FIRMWARE() call, like it was done in the old driver. Thanks to Greg Kroah-Hartman and Larry Finger for quick responses to my questions. Cc: stable Link: https://answers.launchpad.net/ubuntu/+source/linux-meta-hwe-5.15/+question/702611 Link: https://lore.kernel.org/lkml/YukkBu3TNODO3or9@nx64de-df6d00/ Signed-off-by: Grzegorz Szymaszek Link: https://lore.kernel.org/r/YulcdKfhA8dPQ78s@nx64de-df6d00 Acked-by: Phillip Potter Acked-by: Larry Finger Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/os_dep/os_intfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c index cac9553666e6d..aa100b5141e1e 100644 --- a/drivers/staging/r8188eu/os_dep/os_intfs.c +++ b/drivers/staging/r8188eu/os_dep/os_intfs.c @@ -18,6 +18,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Realtek Wireless Lan Driver"); MODULE_AUTHOR("Realtek Semiconductor Corp."); MODULE_VERSION(DRIVERVERSION); +MODULE_FIRMWARE("rtlwifi/rtl8188eufw.bin"); #define CONFIG_BR_EXT_BRNAME "br0" #define RTW_NOTCH_FILTER 0 /* 0:Disable, 1:Enable, */ From e01f5c8d6af231b3b09e23c1fe8a4057cdcc4e42 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 14 Aug 2022 12:50:27 -0500 Subject: [PATCH 055/654] staging: r8188eu: Add Rosewill USB-N150 Nano to device tables This device is reported as using the RTL8188EUS chip. It has the improbable USB ID of 0bda:ffef, which normally would belong to Realtek, but this ID works for the reporter. Signed-off-by: Larry Finger Cc: stable Link: https://lore.kernel.org/r/20220814175027.2689-1-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/r8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/r8188eu/os_dep/usb_intf.c b/drivers/staging/r8188eu/os_dep/usb_intf.c index cc2b44f60c468..9147d176da4f7 100644 --- a/drivers/staging/r8188eu/os_dep/usb_intf.c +++ b/drivers/staging/r8188eu/os_dep/usb_intf.c @@ -28,6 +28,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { /*=== Realtek demoboard ===*/ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0x8179)}, /* 8188EUS */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0x0179)}, /* 8188ETV */ + {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill USB-N150 Nano */ /*=== Customer ID ===*/ /****** 8188EUS ********/ {USB_DEVICE(0x07B8, 0x8179)}, /* Abocom - Abocom */ From 17ecd4a4db4783392edd4944f5e8268205083f70 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 16 Aug 2022 18:30:50 +0300 Subject: [PATCH 056/654] xfrm: policy: fix metadata dst->dev xmit null pointer dereference When we try to transmit an skb with metadata_dst attached (i.e. dst->dev == NULL) through xfrm interface we can hit a null pointer dereference[1] in xfrmi_xmit2() -> xfrm_lookup_with_ifid() due to the check for a loopback skb device when there's no policy which dereferences dst->dev unconditionally. Not having dst->dev can be interepreted as it not being a loopback device, so just add a check for a null dst_orig->dev. With this fix xfrm interface's Tx error counters go up as usual. [1] net-next calltrace captured via netconsole: BUG: kernel NULL pointer dereference, address: 00000000000000c0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 1 PID: 7231 Comm: ping Kdump: loaded Not tainted 5.19.0+ #24 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-1.fc36 04/01/2014 RIP: 0010:xfrm_lookup_with_ifid+0x5eb/0xa60 Code: 8d 74 24 38 e8 26 a4 37 00 48 89 c1 e9 12 fc ff ff 49 63 ed 41 83 fd be 0f 85 be 01 00 00 41 be ff ff ff ff 45 31 ed 48 8b 03 80 c0 00 00 00 08 75 0f 41 80 bc 24 19 0d 00 00 01 0f 84 1e 02 RSP: 0018:ffffb0db82c679f0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffd0db7fcad430 RCX: ffffb0db82c67a10 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb0db82c67a80 RBP: ffffb0db82c67a80 R08: ffffb0db82c67a14 R09: 0000000000000000 R10: 0000000000000000 R11: ffff8fa449667dc8 R12: ffffffff966db880 R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000000 FS: 00007ff35c83f000(0000) GS:ffff8fa478480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c0 CR3: 000000001ebb7000 CR4: 0000000000350ee0 Call Trace: xfrmi_xmit+0xde/0x460 ? tcf_bpf_act+0x13d/0x2a0 dev_hard_start_xmit+0x72/0x1e0 __dev_queue_xmit+0x251/0xd30 ip_finish_output2+0x140/0x550 ip_push_pending_frames+0x56/0x80 raw_sendmsg+0x663/0x10a0 ? try_charge_memcg+0x3fd/0x7a0 ? __mod_memcg_lruvec_state+0x93/0x110 ? sock_sendmsg+0x30/0x40 sock_sendmsg+0x30/0x40 __sys_sendto+0xeb/0x130 ? handle_mm_fault+0xae/0x280 ? do_user_addr_fault+0x1e7/0x680 ? kvm_read_and_reset_apf_flags+0x3b/0x50 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7ff35cac1366 Code: eb 0b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 11 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 72 c3 90 55 48 83 ec 30 44 89 4c 24 2c 4c 89 RSP: 002b:00007fff738e4028 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007fff738e57b0 RCX: 00007ff35cac1366 RDX: 0000000000000040 RSI: 0000557164e4b450 RDI: 0000000000000003 RBP: 0000557164e4b450 R08: 00007fff738e7a2c R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 R13: 00007fff738e5770 R14: 00007fff738e4030 R15: 0000001d00000001 Modules linked in: netconsole veth br_netfilter bridge bonding virtio_net [last unloaded: netconsole] CR2: 00000000000000c0 CC: Steffen Klassert CC: Daniel Borkmann Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Signed-off-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4f8bbb825abcb..cc6ab79609e29 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3162,7 +3162,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net, return dst; nopol: - if (!(dst_orig->dev->flags & IFF_LOOPBACK) && + if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) && net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { err = -EPERM; goto error; From abfcf55d8b07a990589301bc64d82a5d26680956 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Aug 2022 13:35:13 +0200 Subject: [PATCH 057/654] acl: handle idmapped mounts for idmapped filesystems Ensure that POSIX ACLs checking, getting, and setting works correctly for filesystems mountable with a filesystem idmapping ("fs_idmapping") that want to support idmapped mounts ("mnt_idmapping"). Note that no filesystems mountable with an fs_idmapping do yet support idmapped mounts. This is required infrastructure work to unblock this. As we explained in detail in [1] the fs_idmapping is irrelevant for getxattr() and setxattr() when mapping the ACL_{GROUP,USER} {g,u}ids stored in the uapi struct posix_acl_xattr_entry in posix_acl_fix_xattr_{from,to}_user(). But for acl_permission_check() and posix_acl_{g,s}etxattr_idmapped_mnt() the fs_idmapping matters. acl_permission_check(): During lookup POSIX ACLs are retrieved directly via i_op->get_acl() and are returned via the kernel internal struct posix_acl which contains e_{g,u}id members of type k{g,u}id_t that already take the fs_idmapping into acccount. For example, a POSIX ACL stored with u4 on the backing store is mapped to k10000004 in the fs_idmapping. The mnt_idmapping remaps the POSIX ACL to k20000004. In order to do that the fs_idmapping needs to be taken into account but that doesn't happen yet (Again, this is a counterfactual currently as fuse doesn't support idmapped mounts currently. It's just used as a convenient example.): fs_idmapping: u0:k10000000:r65536 mnt_idmapping: u0:v20000000:r65536 ACL_USER: k10000004 acl_permission_check() -> check_acl() -> get_acl() -> i_op->get_acl() == fuse_get_acl() -> posix_acl_from_xattr(u0:k10000000:r65536 /* fs_idmapping */, ...) { k10000004 = make_kuid(u0:k10000000:r65536 /* fs_idmapping */, u4 /* ACL_USER */); } -> posix_acl_permission() { -1 = make_vfsuid(u0:v20000000:r65536 /* mnt_idmapping */, &init_user_ns, k10000004); vfsuid_eq_kuid(-1, k10000004 /* caller_fsuid */) } In order to correctly map from the fs_idmapping into mnt_idmapping we require the relevant fs_idmaping to be passed: acl_permission_check() -> check_acl() -> get_acl() -> i_op->get_acl() == fuse_get_acl() -> posix_acl_from_xattr(u0:k10000000:r65536 /* fs_idmapping */, ...) { k10000004 = make_kuid(u0:k10000000:r65536 /* fs_idmapping */, u4 /* ACL_USER */); } -> posix_acl_permission() { v20000004 = make_vfsuid(u0:v20000000:r65536 /* mnt_idmapping */, u0:k10000000:r65536 /* fs_idmapping */, k10000004); vfsuid_eq_kuid(v20000004, k10000004 /* caller_fsuid */) } The initial_idmapping is only correct for the current situation because all filesystems that currently support idmapped mounts do not support being mounted with an fs_idmapping. Note that ovl_get_acl() is used to retrieve the POSIX ACLs from the relevant lower layer and the lower layer's mnt_idmapping needs to be taken into account and so does the fs_idmapping. See 0c5fd887d2bb ("acl: move idmapped mount fixup into vfs_{g,s}etxattr()") for more details. For posix_acl_{g,s}etxattr_idmapped_mnt() it is not as obvious why the fs_idmapping matters as it is for acl_permission_check(). Especially because it doesn't matter for posix_acl_fix_xattr_{from,to}_user() (See [1] for more context.). Because posix_acl_{g,s}etxattr_idmapped_mnt() operate on the uapi struct posix_acl_xattr_entry which contains {g,u}id_t values and thus give the impression that the fs_idmapping is irrelevant as at this point appropriate {g,u}id_t values have seemlingly been generated. As we've stated multiple times this assumption is wrong and in fact the uapi struct posix_acl_xattr_entry is taking idmappings into account depending at what place it is operated on. posix_acl_getxattr_idmapped_mnt() When posix_acl_getxattr_idmapped_mnt() is called the values stored in the uapi struct posix_acl_xattr_entry are mapped according to the fs_idmapping. This happened when they were read from the backing store and then translated from struct posix_acl into the uapi struct posix_acl_xattr_entry during posix_acl_to_xattr(). In other words, the fs_idmapping matters as the values stored as {g,u}id_t in the uapi struct posix_acl_xattr_entry have been generated by it. So we need to take the fs_idmapping into account during make_vfsuid() in posix_acl_getxattr_idmapped_mnt(). posix_acl_setxattr_idmapped_mnt() When posix_acl_setxattr_idmapped_mnt() is called the values stored as {g,u}id_t in uapi struct posix_acl_xattr_entry are intended to be the values that ultimately get turned back into a k{g,u}id_t in posix_acl_from_xattr() (which turns the uapi struct posix_acl_xattr_entry into the kernel internal struct posix_acl). In other words, the fs_idmapping matters as the values stored as {g,u}id_t in the uapi struct posix_acl_xattr_entry are intended to be the values that will be undone in the fs_idmapping when writing to the backing store. So we need to take the fs_idmapping into account during from_vfsuid() in posix_acl_setxattr_idmapped_mnt(). Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Fixes: 0c5fd887d2bb ("acl: move idmapped mount fixup into vfs_{g,s}etxattr()") Cc: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) Reviewed-by: Seth Forshee Link: https://lore.kernel.org/r/20220816113514.43304-1-brauner@kernel.org --- fs/overlayfs/inode.c | 11 +++++++---- fs/posix_acl.c | 15 +++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b45fea69fff3f..0fbcb590af842 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -460,9 +460,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) * of the POSIX ACLs retrieved from the lower layer to this function to not * alter the POSIX ACLs for the underlying filesystem. */ -static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns, +static void ovl_idmap_posix_acl(struct inode *realinode, + struct user_namespace *mnt_userns, struct posix_acl *acl) { + struct user_namespace *fs_userns = i_user_ns(realinode); + for (unsigned int i = 0; i < acl->a_count; i++) { vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -470,11 +473,11 @@ static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns, struct posix_acl_entry *e = &acl->a_entries[i]; switch (e->e_tag) { case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid); + vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid); e->e_uid = vfsuid_into_kuid(vfsuid); break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid); + vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid); e->e_gid = vfsgid_into_kgid(vfsgid); break; } @@ -536,7 +539,7 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) if (!clone) clone = ERR_PTR(-ENOMEM); else - ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone); + ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone); /* * Since we're not in RCU path walk we always need to release the * original ACLs. diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 1d17d7b13dcd0..5af33800743e4 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -361,6 +361,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, const struct posix_acl *acl, int want) { const struct posix_acl_entry *pa, *pe, *mask_obj; + struct user_namespace *fs_userns = i_user_ns(inode); int found = 0; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -376,7 +377,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, goto check_perm; break; case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, + vfsuid = make_vfsuid(mnt_userns, fs_userns, pa->e_uid); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) goto mask; @@ -390,7 +391,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, } break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, + vfsgid = make_vfsgid(mnt_userns, fs_userns, pa->e_gid); if (vfsgid_in_group_p(vfsgid)) { found = 1; @@ -736,6 +737,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, { struct posix_acl_xattr_header *header = value; struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; + struct user_namespace *fs_userns = i_user_ns(inode); int count; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -753,13 +755,13 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, switch (le16_to_cpu(entry->e_tag)) { case ACL_USER: uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid); + vfsuid = make_vfsuid(mnt_userns, fs_userns, uid); entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid))); break; case ACL_GROUP: gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid); + vfsgid = make_vfsgid(mnt_userns, fs_userns, gid); entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid))); break; @@ -775,6 +777,7 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, { struct posix_acl_xattr_header *header = value; struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; + struct user_namespace *fs_userns = i_user_ns(inode); int count; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -793,13 +796,13 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, case ACL_USER: uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); vfsuid = VFSUIDT_INIT(uid); - uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid); + uid = from_vfsuid(mnt_userns, fs_userns, vfsuid); entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid)); break; case ACL_GROUP: gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); vfsgid = VFSGIDT_INIT(gid); - gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid); + gid = from_vfsgid(mnt_userns, fs_userns, vfsgid); entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid)); break; default: From ddc84c90538e1fdb0721cd41c313944c38449a34 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Aug 2022 13:35:14 +0200 Subject: [PATCH 058/654] MAINTAINERS: update idmapping tree Since Seth joined as a maintainer in ba40a57ff08b ("Add Seth Forshee as co-maintainer for idmapped mounts") it was best to get a shared git tree instead of using our personal repositories. So we requested and Konstantin suggested and gave us a new "idmapping" repository under the pre-existing but mainly unused vfs namespace. Just makes it easier for Seth to send fixes in case I'm out or someone else ever takes over. Cc: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220816113514.43304-2-brauner@kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8a5012ba6ff98..a558794dddf91 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9780,7 +9780,7 @@ M: Christian Brauner M: Seth Forshee L: linux-fsdevel@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git +T: git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping.git F: Documentation/filesystems/idmappings.rst F: tools/testing/selftests/mount_setattr/ F: include/linux/mnt_idmapping.h From bf1ac16edf6770a92bc75cf2373f1f9feea398a4 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 16 Aug 2022 11:47:52 -0500 Subject: [PATCH 059/654] fs: require CAP_SYS_ADMIN in target namespace for idmapped mounts Idmapped mounts should not allow a user to map file ownsership into a range of ids which is not under the control of that user. However, we currently don't check whether the mounter is privileged wrt to the target user namespace. Currently no FS_USERNS_MOUNT filesystems support idmapped mounts, thus this is not a problem as only CAP_SYS_ADMIN in init_user_ns is allowed to set up idmapped mounts. But this could change in the future, so add a check to refuse to create idmapped mounts when the mounter does not have CAP_SYS_ADMIN in the target user namespace. Fixes: bd303368b776 ("fs: support mapped mounts of mapped filesystems") Signed-off-by: Seth Forshee Reviewed-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220816164752.2595240-1-sforshee@digitalocean.com Signed-off-by: Christian Brauner (Microsoft) --- fs/namespace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 68789f896f081..df137ba19d375 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, err = -EPERM; goto out_fput; } + + /* We're not controlling the target namespace. */ + if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_fput; + } + kattr->mnt_userns = get_user_ns(mnt_userns); out_fput: From 5e1e087457c94ad7fafbe1cf6f774c6999ee29d4 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 9 Aug 2022 12:38:48 +0800 Subject: [PATCH 060/654] arm64: Fix match_list for erratum 1286807 on Arm Cortex-A76 Since commit 51f559d66527 ("arm64: Enable repeat tlbi workaround on KRYO4XX gold CPUs"), we failed to detect erratum 1286807 on Cortex-A76 because its entry in arm64_repeat_tlbi_list[] was accidently corrupted by this commit. Fix this issue by creating a separate entry for Kryo4xx Gold. Fixes: 51f559d66527 ("arm64: Enable repeat tlbi workaround on KRYO4XX gold CPUs") Cc: Shreyas K K Signed-off-by: Zenghui Yu Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220809043848.969-1-yuzenghui@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7e6289e709fc8..0f7e9087d900d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1286807 { ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + }, + { /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, From ff5900092227ade3e31fe25f97faf406cde902e6 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 21 Jul 2022 12:04:33 +0200 Subject: [PATCH 061/654] arm64: adjust KASLR relocation after ARCH_RANDOM removal Commit aacd149b6238 ("arm64: head: avoid relocating the kernel twice for KASLR") adds the new file arch/arm64/kernel/pi/kaslr_early.c with a small code part guarded by '#ifdef CONFIG_ARCH_RANDOM'. Concurrently, commit 9592eef7c16e ("random: remove CONFIG_ARCH_RANDOM") removes the config CONFIG_ARCH_RANDOM and turns all '#ifdef CONFIG_ARCH_RANDOM' code parts into unconditional code parts, which is generally safe to do. Remove a needless ifdef guard after the ARCH_RANDOM removal. Signed-off-by: Lukas Bulwahn Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220721100433.18286-1-lukas.bulwahn@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/pi/kaslr_early.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index 6c3855e693956..17bff6e399e46 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -94,11 +94,9 @@ asmlinkage u64 kaslr_early_init(void *fdt) seed = get_kaslr_seed(fdt); if (!seed) { -#ifdef CONFIG_ARCH_RANDOM - if (!__early_cpu_has_rndr() || - !__arm64_rndr((unsigned long *)&seed)) -#endif - return 0; + if (!__early_cpu_has_rndr() || + !__arm64_rndr((unsigned long *)&seed)) + return 0; } /* From 09e52d17b72d3a4bf6951a90ccd8c97fae04e5cf Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 17 Aug 2022 15:04:00 +0200 Subject: [PATCH 062/654] hwmon: (pmbus) Use dev_err_probe() to filter -EPROBE_DEFER error messages devm_regulator_register() can return -EPROBE_DEFER, so better use dev_err_probe() instead of dev_err(), it is less verbose in such a case. It is also more informative, which can't hurt. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/3adf1cea6e32e54c0f71f4604b4e98d992beaa71.1660741419.git.christophe.jaillet@wanadoo.fr Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index f10bac8860fce..75cf8e20fa4d0 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -3016,11 +3016,10 @@ static int pmbus_regulator_register(struct pmbus_data *data) rdev = devm_regulator_register(dev, &info->reg_desc[i], &config); - if (IS_ERR(rdev)) { - dev_err(dev, "Failed to register %s regulator\n", - info->reg_desc[i].name); - return PTR_ERR(rdev); - } + if (IS_ERR(rdev)) + return dev_err_probe(dev, PTR_ERR(rdev), + "Failed to register %s regulator\n", + info->reg_desc[i].name); } return 0; From ed3590561f5d3343a1717396307d0942eda472ed Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 16 Aug 2022 16:44:14 +0200 Subject: [PATCH 063/654] hwmon: (pmbus) Fix vout margin caching The code currently uses a zero margin to mean not cached, but this results in the cache being bypassed if the (low) margin is set to zero, leading to lots of unnecessary SMBus transactions in that case. Use a negative value instead. Fixes: 07fb76273db89d93 ("hwmon: (pmbus) Introduce and use cached vout margins") Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220816144414.2358974-1-vincent.whitchurch@axis.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 75cf8e20fa4d0..81d3f91dd2047 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -2861,7 +2861,7 @@ static int pmbus_regulator_get_low_margin(struct i2c_client *client, int page) .data = -1, }; - if (!data->vout_low[page]) { + if (data->vout_low[page] < 0) { if (pmbus_check_word_register(client, page, PMBUS_MFR_VOUT_MIN)) s.data = _pmbus_read_word_data(client, page, 0xff, PMBUS_MFR_VOUT_MIN); @@ -2887,7 +2887,7 @@ static int pmbus_regulator_get_high_margin(struct i2c_client *client, int page) .data = -1, }; - if (!data->vout_high[page]) { + if (data->vout_high[page] < 0) { if (pmbus_check_word_register(client, page, PMBUS_MFR_VOUT_MAX)) s.data = _pmbus_read_word_data(client, page, 0xff, PMBUS_MFR_VOUT_MAX); @@ -3319,6 +3319,7 @@ int pmbus_do_probe(struct i2c_client *client, struct pmbus_driver_info *info) struct pmbus_data *data; size_t groups_num = 0; int ret; + int i; char *name; if (!info) @@ -3352,6 +3353,11 @@ int pmbus_do_probe(struct i2c_client *client, struct pmbus_driver_info *info) data->currpage = -1; data->currphase = -1; + for (i = 0; i < ARRAY_SIZE(data->vout_low); i++) { + data->vout_low[i] = -1; + data->vout_high[i] = -1; + } + ret = pmbus_init_common(client, data, info); if (ret < 0) return ret; From 4f7e7236435ca0abe005c674ebd6892c6e83aeb3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 15 Aug 2022 13:27:38 -1000 Subject: [PATCH 064/654] cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock Bringing up a CPU may involve creating and destroying tasks which requires read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside cpus_read_lock(). However, cpuset's ->attach(), which may be called with thredagroup_rwsem write-locked, also wants to disable CPU hotplug and acquires cpus_read_lock(), leading to a deadlock. Fix it by guaranteeing that ->attach() is always called with CPU hotplug disabled and removing cpus_read_lock() call from cpuset_attach(). Signed-off-by: Tejun Heo Reviewed-and-tested-by: Imran Khan Reported-and-tested-by: Xuewen Yan Fixes: 05c7b7a92cc8 ("cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug") Cc: stable@vger.kernel.org # v5.17+ --- kernel/cgroup/cgroup.c | 77 +++++++++++++++++++++++++++++------------- kernel/cgroup/cpuset.c | 3 +- 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index df7df5843b4fe..e1387499b3361 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2369,6 +2369,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) } EXPORT_SYMBOL_GPL(task_cgroup_path); +/** + * cgroup_attach_lock - Lock for ->attach() + * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem + * + * cgroup migration sometimes needs to stabilize threadgroups against forks and + * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach() + * implementations (e.g. cpuset), also need to disable CPU hotplug. + * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can + * lead to deadlocks. + * + * Bringing up a CPU may involve creating and destroying tasks which requires + * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside + * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while + * write-locking threadgroup_rwsem, the locking order is reversed and we end up + * waiting for an on-going CPU hotplug operation which in turn is waiting for + * the threadgroup_rwsem to be released to create new tasks. For more details: + * + * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu + * + * Resolve the situation by always acquiring cpus_read_lock() before optionally + * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that + * CPU hotplug is disabled on entry. + */ +static void cgroup_attach_lock(bool lock_threadgroup) +{ + cpus_read_lock(); + if (lock_threadgroup) + percpu_down_write(&cgroup_threadgroup_rwsem); +} + +/** + * cgroup_attach_unlock - Undo cgroup_attach_lock() + * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem + */ +static void cgroup_attach_unlock(bool lock_threadgroup) +{ + if (lock_threadgroup) + percpu_up_write(&cgroup_threadgroup_rwsem); + cpus_read_unlock(); +} + /** * cgroup_migrate_add_task - add a migration target task to a migration context * @task: target task @@ -2841,8 +2882,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, } struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, - bool *locked) - __acquires(&cgroup_threadgroup_rwsem) + bool *threadgroup_locked) { struct task_struct *tsk; pid_t pid; @@ -2859,12 +2899,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, * Therefore, we can skip the global lock. */ lockdep_assert_held(&cgroup_mutex); - if (pid || threadgroup) { - percpu_down_write(&cgroup_threadgroup_rwsem); - *locked = true; - } else { - *locked = false; - } + *threadgroup_locked = pid || threadgroup; + cgroup_attach_lock(*threadgroup_locked); rcu_read_lock(); if (pid) { @@ -2895,17 +2931,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, goto out_unlock_rcu; out_unlock_threadgroup: - if (*locked) { - percpu_up_write(&cgroup_threadgroup_rwsem); - *locked = false; - } + cgroup_attach_unlock(*threadgroup_locked); + *threadgroup_locked = false; out_unlock_rcu: rcu_read_unlock(); return tsk; } -void cgroup_procs_write_finish(struct task_struct *task, bool locked) - __releases(&cgroup_threadgroup_rwsem) +void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked) { struct cgroup_subsys *ss; int ssid; @@ -2913,8 +2946,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked) /* release reference from cgroup_procs_write_start() */ put_task_struct(task); - if (locked) - percpu_up_write(&cgroup_threadgroup_rwsem); + cgroup_attach_unlock(threadgroup_locked); + for_each_subsys(ss, ssid) if (ss->post_attach) ss->post_attach(); @@ -3000,8 +3033,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) * write-locking can be skipped safely. */ has_tasks = !list_empty(&mgctx.preloaded_src_csets); - if (has_tasks) - percpu_down_write(&cgroup_threadgroup_rwsem); + cgroup_attach_lock(has_tasks); /* NULL dst indicates self on default hierarchy */ ret = cgroup_migrate_prepare_dst(&mgctx); @@ -3022,8 +3054,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) ret = cgroup_migrate_execute(&mgctx); out_finish: cgroup_migrate_finish(&mgctx); - if (has_tasks) - percpu_up_write(&cgroup_threadgroup_rwsem); + cgroup_attach_unlock(has_tasks); return ret; } @@ -4971,13 +5002,13 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, struct task_struct *task; const struct cred *saved_cred; ssize_t ret; - bool locked; + bool threadgroup_locked; dst_cgrp = cgroup_kn_lock_live(of->kn, false); if (!dst_cgrp) return -ENODEV; - task = cgroup_procs_write_start(buf, threadgroup, &locked); + task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked); ret = PTR_ERR_OR_ZERO(task); if (ret) goto out_unlock; @@ -5003,7 +5034,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, ret = cgroup_attach_task(dst_cgrp, task, threadgroup); out_finish: - cgroup_procs_write_finish(task, locked); + cgroup_procs_write_finish(task, threadgroup_locked); out_unlock: cgroup_kn_unlock(of->kn); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 58aadfda9b8b3..1f3a55297f39d 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2289,7 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); - cpus_read_lock(); + lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ percpu_down_write(&cpuset_rwsem); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); @@ -2343,7 +2343,6 @@ static void cpuset_attach(struct cgroup_taskset *tset) wake_up(&cpuset_attach_wq); percpu_up_write(&cpuset_rwsem); - cpus_read_unlock(); } /* The various types of files and directories in a cpuset file system */ From 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Wed, 17 Aug 2022 15:25:21 +0200 Subject: [PATCH 065/654] riscv: dts: microchip: correct L2 cache interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "PolarFire SoC MSS Technical Reference Manual" documents the following PLIC interrupts: 1 - L2 Cache Controller Signals when a metadata correction event occurs 2 - L2 Cache Controller Signals when an uncorrectable metadata event occurs 3 - L2 Cache Controller Signals when a data correction event occurs 4 - L2 Cache Controller Signals when an uncorrectable data event occurs This differs from the SiFive FU540 which only has three L2 cache related interrupts. The sequence in the device tree is defined by an enum: enum {         DIR_CORR = 0,         DATA_CORR,         DATA_UNCORR,         DIR_UNCORR, }; So the correct sequence of the L2 cache interrupts is interrupts = <1>, <3>, <4>, <2>; [Conor] This manifests as an unusable system if the l2-cache driver is enabled, as the wrong interrupt gets cleared & the handler prints errors to the console ad infinitum. Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") CC: stable@vger.kernel.org # 5.15: e35b07a7df9b: riscv: dts: microchip: mpfs: Group tuples in interrupt properties Signed-off-by: Heinrich Schuchardt Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 499c2e63ad35e..0a6ad5b9ff8dd 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -193,7 +193,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic>; - interrupts = <1>, <2>, <3>; + interrupts = <1>, <3>, <4>, <2>; }; clint: clint@2000000 { From 932c29a10d5d0bba63b9f505a8ec1e3ce8c02542 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Aug 2022 19:41:27 +0100 Subject: [PATCH 066/654] locks: Fix dropped call to ->fl_release_private() Prior to commit 4149be7bda7e, sys_flock() would allocate the file_lock struct it was going to use to pass parameters, call ->flock() and then call locks_free_lock() to get rid of it - which had the side effect of calling locks_release_private() and thus ->fl_release_private(). With commit 4149be7bda7e, however, this is no longer the case: the struct is now allocated on the stack, and locks_free_lock() is no longer called - and thus any remaining private data doesn't get cleaned up either. This causes afs flock to cause oops. Kasan catches this as a UAF by the list_del_init() in afs_fl_release_private() for the file_lock record produced by afs_fl_copy_lock() as the original record didn't get delisted. It can be reproduced using the generic/504 xfstest. Fix this by reinstating the locks_release_private() call in sys_flock(). I'm not sure if this would affect any other filesystems. If not, then the release could be done in afs_flock() instead. Changes ======= ver #2) - Don't need to call ->fl_release_private() after calling the security hook, only after calling ->flock(). Fixes: 4149be7bda7e ("fs/lock: Don't allocate file_lock in flock_make_lock().") cc: Chuck Lever cc: Jeff Layton cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/166075758809.3532462.13307935588777587536.stgit@warthog.procyon.org.uk/ # v1 Acked-by: Kuniyuki Iwashima Signed-off-by: David Howells Signed-off-by: Jeff Layton --- fs/locks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/locks.c b/fs/locks.c index c266cfdc3291f..607f94a0e789f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2129,6 +2129,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) else error = locks_lock_file_wait(f.file, &fl); + locks_release_private(&fl); out_putf: fdput(f); From 583585e48d965338e73e1eb383768d16e0922d73 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 9 Aug 2022 17:49:15 +0800 Subject: [PATCH 067/654] skmsg: Fix wrong last sg check in sk_msg_recvmsg() Fix one kernel NULL pointer dereference as below: [ 224.462334] Call Trace: [ 224.462394] __tcp_bpf_recvmsg+0xd3/0x380 [ 224.462441] ? sock_has_perm+0x78/0xa0 [ 224.462463] tcp_bpf_recvmsg+0x12e/0x220 [ 224.462494] inet_recvmsg+0x5b/0xd0 [ 224.462534] __sys_recvfrom+0xc8/0x130 [ 224.462574] ? syscall_trace_enter+0x1df/0x2e0 [ 224.462606] ? __do_page_fault+0x2de/0x500 [ 224.462635] __x64_sys_recvfrom+0x24/0x30 [ 224.462660] do_syscall_64+0x5d/0x1d0 [ 224.462709] entry_SYSCALL_64_after_hwframe+0x65/0xca In commit 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()"), we change last sg check to sg_is_last(), but in sockmap redirection case (without stream_parser/stream_verdict/ skb_verdict), we did not mark the end of the scatterlist. Check the sk_msg_alloc, sk_msg_page_add, and bpf_msg_push_data functions, they all do not mark the end of sg. They are expected to use sg.end for end judgment. So the judgment of '(i != msg_rx->sg.end)' is added back here. Fixes: 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20220809094915.150391-1-liujian56@huawei.com --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index f47338d89d5d5..5be4485ff07d6 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -461,7 +461,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied == len) break; - } while (!sg_is_last(sge)); + } while ((i != msg_rx->sg.end) && !sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); @@ -471,7 +471,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx->sg.start = i; - if (!sge->length && sg_is_last(sge)) { + if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } From 3024d95a4c521c278a7504ee9e80c57c3a9750e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 17 Aug 2022 23:32:09 +0200 Subject: [PATCH 068/654] bpf: Partially revert flexible-array member replacement Partially revert 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") given it breaks BPF UAPI. For example, BPF CI run reveals build breakage under LLVM: [...] CLNG-BPF [test_maps] map_ptr_kern.o CLNG-BPF [test_maps] btf__core_reloc_arrays___diff_arr_val_sz.o CLNG-BPF [test_maps] test_bpf_cookie.o progs/map_ptr_kern.c:314:26: error: field 'trie_key' with variable sized type 'struct bpf_lpm_trie_key' not at the end of a struct or class is a GNU extension [-Werror,-Wgnu-variable-sized-type-not-at-end] struct bpf_lpm_trie_key trie_key; ^ CLNG-BPF [test_maps] btf__core_reloc_type_based___diff.o 1 error generated. make: *** [Makefile:521: /tmp/runner/work/bpf/bpf/tools/testing/selftests/bpf/map_ptr_kern.o] Error 1 make: *** Waiting for unfinished jobs.... [...] Typical usage of the bpf_lpm_trie_key is that the struct gets embedded into a user defined key for the LPM BPF map, from the selftest example: struct bpf_lpm_trie_key { <-- UAPI exported struct __u32 prefixlen; __u8 data[]; }; struct lpm_key { <-- BPF program defined struct struct bpf_lpm_trie_key trie_key; __u32 data; }; Undo this for BPF until a different solution can be found. It's the only flexible- array member case in the UAPI header. This was discovered in BPF CI after Dave reported that the include/uapi/linux/bpf.h header was out of sync with tools/include/uapi/linux/bpf.h after 94dfc73e7cf4. And the subsequent sync attempt failed CI. Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Reported-by: Dave Marchevsky Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Link: https://lore.kernel.org/bpf/22aebc88-da67-f086-e620-dd4a16e2bc69@iogearbox.net --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bf9ba1329be9..59a217ca2dfd3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -79,7 +79,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[]; /* Arbitrary size */ + __u8 data[0]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { From 14b20b784f59bdd95f6f1cfb112c9818bcec4d84 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 16 Aug 2022 20:55:16 +0000 Subject: [PATCH 069/654] bpf: Restrict bpf_sys_bpf to CAP_PERFMON The verifier cannot perform sufficient validation of any pointers passed into bpf_attr and treats them as integers rather than pointers. The helper will then read from arbitrary pointers passed into it. Restrict the helper to CAP_PERFMON since the security model in BPF of arbitrary kernel read is CAP_BPF + CAP_PERFMON. Fixes: af2ac3e13e45 ("bpf: Prepare bpf syscall to be used from kernel and user space.") Signed-off-by: YiFei Zhu Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220816205517.682470-1-zhuyifei@google.com --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a4d40d98428a3..27760627370db 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5197,7 +5197,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_sys_bpf: - return &bpf_sys_bpf_proto; + return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; case BPF_FUNC_btf_find_by_name_kind: return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: From 41191cf6bf565f4139046d7be68ec30c290af92d Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Tue, 16 Aug 2022 08:31:58 -0700 Subject: [PATCH 070/654] fs: __file_remove_privs(): restore call to inode_has_no_xattr() This restores the call to inode_has_no_xattr() in the function __file_remove_privs(). In case the dentry_meeds_remove_privs() returned 0, the function inode_has_no_xattr() was not called. Signed-off-by: Stefan Roesch Fixes: faf99b563558 ("fs: add __remove_file_privs() with flags parameter") Reviewed-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220816153158.1925040-1-shr@fb.com Signed-off-by: Christian Brauner (Microsoft) --- fs/inode.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 6462276dfdf04..ba1de23c13c1e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2018,23 +2018,25 @@ static int __file_remove_privs(struct file *file, unsigned int flags) { struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); - int error; + int error = 0; int kill; if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode)) return 0; kill = dentry_needs_remove_privs(dentry); - if (kill <= 0) + if (kill < 0) return kill; - if (flags & IOCB_NOWAIT) - return -EAGAIN; + if (kill) { + if (flags & IOCB_NOWAIT) + return -EAGAIN; + + error = __remove_privs(file_mnt_user_ns(file), dentry, kill); + } - error = __remove_privs(file_mnt_user_ns(file), dentry, kill); if (!error) inode_has_no_xattr(inode); - return error; } From eb100b8fa8e8b59eb3e5fc7a5fd4a1e3c5950f64 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 29 Apr 2022 17:10:17 +0300 Subject: [PATCH 071/654] thunderbolt: Use the actual buffer in tb_async_error() The received notification packet is held in pkg->buffer and not in pkg itself. Fix this by using the correct buffer. Fixes: 81a54b5e1986 ("thunderbolt: Let the connection manager handle all notifications") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index e5ede5debfb0e..0c661a706160f 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -407,7 +407,7 @@ static void tb_ctl_rx_submit(struct ctl_pkg *pkg) static int tb_async_error(const struct ctl_pkg *pkg) { - const struct cfg_error_pkg *error = (const struct cfg_error_pkg *)pkg; + const struct cfg_error_pkg *error = pkg->buffer; if (pkg->frame.eof != TB_CFG_PKG_ERROR) return false; From 93a3c0d4e8bfbb15145e5dd7da68a3de4b904aba Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 14 Jun 2022 18:53:59 +0300 Subject: [PATCH 072/654] thunderbolt: Check router generation before connecting xHCI Only Thunderbolt 3 routers need the xHCI connection flow. This also ensures the router actually has both lane adapters (1 and 3). While there move declaration of the boolean variables inside the block where they are being used. Fixes: 30a4eca69b76 ("thunderbolt: Add internal xHCI connect flows for Thunderbolt 3 devices") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 244f8cd38b259..c63c1f4ff9dc7 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3786,14 +3786,18 @@ int tb_switch_pcie_l1_enable(struct tb_switch *sw) */ int tb_switch_xhci_connect(struct tb_switch *sw) { - bool usb_port1, usb_port3, xhci_port1, xhci_port3; struct tb_port *port1, *port3; int ret; + if (sw->generation != 3) + return 0; + port1 = &sw->ports[1]; port3 = &sw->ports[3]; if (tb_switch_is_alpine_ridge(sw)) { + bool usb_port1, usb_port3, xhci_port1, xhci_port3; + usb_port1 = tb_lc_is_usb_plugged(port1); usb_port3 = tb_lc_is_usb_plugged(port3); xhci_port1 = tb_lc_is_xhci_connected(port1); From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 3 Aug 2022 14:41:32 -0700 Subject: [PATCH 073/654] x86/bugs: Add "unknown" reporting for MMIO Stale Data Older Intel CPUs that are not in the affected processor list for MMIO Stale Data vulnerabilities currently report "Not affected" in sysfs, which may not be correct. Vulnerability status for these older CPUs is unknown. Add known-not-affected CPUs to the whitelist. Report "unknown" mitigation status for CPUs that are not in blacklist, whitelist and also don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware immunity to MMIO Stale Data vulnerabilities. Mitigation is not deployed when the status is unknown. [ bp: Massage, fixup. ] Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data") Suggested-by: Andrew Cooper Suggested-by: Tony Luck Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com --- .../hw-vuln/processor_mmio_stale_data.rst | 14 +++++++ arch/x86/include/asm/cpufeatures.h | 5 ++- arch/x86/kernel/cpu/bugs.c | 14 ++++++- arch/x86/kernel/cpu/common.c | 42 ++++++++++++------- 4 files changed, 56 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 9393c50b5afc9..c98fd11907cc8 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -230,6 +230,20 @@ The possible values in this file are: * - 'Mitigation: Clear CPU buffers' - The processor is vulnerable and the CPU buffer clearing mitigation is enabled. + * - 'Unknown: No mitigations' + - The processor vulnerability status is unknown because it is + out of Servicing period. Mitigation is not attempted. + +Definitions: +------------ + +Servicing period: The process of providing functional and security updates to +Intel processors or platforms, utilizing the Intel Platform Update (IPU) +process or other similar mechanisms. + +End of Servicing Updates (ESU): ESU is the date at which Intel will no +longer provide Servicing, such as through IPU or other similar update +processes. ESU dates will typically be aligned to end of quarter. If the processor is vulnerable then the following information is appended to the above information: diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 235dc85c91c3e..ef4775c6db01c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -457,7 +457,8 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 510d85261132b..da7c361f47e0d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -433,7 +433,8 @@ static void __init mmio_select_mitigation(void) u64 ia32_cap; if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || - cpu_mitigations_off()) { + boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || + cpu_mitigations_off()) { mmio_mitigation = MMIO_MITIGATION_OFF; return; } @@ -538,6 +539,8 @@ static void __init md_clear_update_mitigation(void) pr_info("TAA: %s\n", taa_strings[taa_mitigation]); if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); + else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + pr_info("MMIO Stale Data: Unknown: No mitigations\n"); } static void __init md_clear_select_mitigation(void) @@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_state(char *buf) static ssize_t mmio_stale_data_show_state(char *buf) { + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return sysfs_emit(buf, "Unknown: No mitigations\n"); + if (mmio_mitigation == MMIO_MITIGATION_OFF) return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); @@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return srbds_show_state(buf); case X86_BUG_MMIO_STALE_DATA: + case X86_BUG_MMIO_UNKNOWN: return mmio_stale_data_show_state(buf); case X86_BUG_RETBLEED: @@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) { - return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); + else + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64a73f415f036..3e508f2390983 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) #define NO_SPECTRE_V2 BIT(8) -#define NO_EIBRS_PBRSB BIT(9) +#define NO_MMIO BIT(9) +#define NO_EIBRS_PBRSB BIT(10) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ + VULNWL_INTEL(TIGERLAKE, NO_MMIO), + VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), + VULNWL_INTEL(ALDERLAKE, NO_MMIO), + VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), @@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), {} }; @@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Affected CPU list is generally enough to enumerate the vulnerability, * but for virtualization case check for ARCH_CAP MSR bits also, VMM may * not want the guest to enumerate the bug. + * + * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, + * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (cpu_matches(cpu_vuln_blacklist, MMIO) && - !arch_cap_mmio_immune(ia32_cap)) - setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (!arch_cap_mmio_immune(ia32_cap)) { + if (cpu_matches(cpu_vuln_blacklist, MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); + } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) From a0e44c64b6061dda7e00b7c458e4523e2331b739 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 1 Aug 2022 18:25:11 +0000 Subject: [PATCH 074/654] binder: fix UAF of ref->proc caused by race condition A transaction of type BINDER_TYPE_WEAK_HANDLE can fail to increment the reference for a node. In this case, the target proc normally releases the failed reference upon close as expected. However, if the target is dying in parallel the call will race with binder_deferred_release(), so the target could have released all of its references by now leaving the cleanup of the new failed reference unhandled. The transaction then ends and the target proc gets released making the ref->proc now a dangling pointer. Later on, ref->node is closed and we attempt to take spin_lock(&ref->proc->inner_lock), which leads to the use-after-free bug reported below. Let's fix this by cleaning up the failed reference on the spot instead of relying on the target to do so. ================================================================== BUG: KASAN: use-after-free in _raw_spin_lock+0xa8/0x150 Write of size 4 at addr ffff5ca207094238 by task kworker/1:0/590 CPU: 1 PID: 590 Comm: kworker/1:0 Not tainted 5.19.0-rc8 #10 Hardware name: linux,dummy-virt (DT) Workqueue: events binder_deferred_func Call trace: dump_backtrace.part.0+0x1d0/0x1e0 show_stack+0x18/0x70 dump_stack_lvl+0x68/0x84 print_report+0x2e4/0x61c kasan_report+0xa4/0x110 kasan_check_range+0xfc/0x1a4 __kasan_check_write+0x3c/0x50 _raw_spin_lock+0xa8/0x150 binder_deferred_func+0x5e0/0x9b0 process_one_work+0x38c/0x5f0 worker_thread+0x9c/0x694 kthread+0x188/0x190 ret_from_fork+0x10/0x20 Acked-by: Christian Brauner (Microsoft) Signed-off-by: Carlos Llamas Cc: stable # 4.14+ Link: https://lore.kernel.org/r/20220801182511.3371447-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c964d7c8c3841..6428f6be69e3d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1385,6 +1385,18 @@ static int binder_inc_ref_for_node(struct binder_proc *proc, } ret = binder_inc_ref_olocked(ref, strong, target_list); *rdata = ref->data; + if (ret && ref == new_ref) { + /* + * Cleanup the failed reference here as the target + * could now be dead and have already released its + * references by now. Calling on the new reference + * with strong=0 and a tmp_refs will not decrement + * the node. The new_ref gets kfree'd below. + */ + binder_cleanup_ref_olocked(new_ref); + ref = NULL; + } + binder_proc_unlock(proc); if (new_ref && ref != new_ref) /* From d6f35446d0769a98e9d761593d267cdd24f09ecd Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Wed, 10 Aug 2022 16:02:25 +0000 Subject: [PATCH 075/654] binder_alloc: Add missing mmap_lock calls when using the VMA Take the mmap_read_lock() when using the VMA in binder_alloc_print_pages() and when checking for a VMA in binder_alloc_new_buf_locked(). It is worth noting binder_alloc_new_buf_locked() drops the VMA read lock after it verifies a VMA exists, but may be taken again deeper in the call stack, if necessary. Fixes: a43cfc87caaf (android: binder: stop saving a pointer to the VMA) Cc: stable Reported-by: Ondrej Mosnacek Reported-by: syzbot+a7b60a176ec13cafb793@syzkaller.appspotmail.com Tested-by: Ondrej Mosnacek Acked-by: Carlos Llamas Signed-off-by: Liam R. Howlett Link: https://lore.kernel.org/r/20220810160209.1630707-1-Liam.Howlett@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 1014beb128025..51f4e1c5cd019 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -402,12 +402,15 @@ static struct binder_buffer *binder_alloc_new_buf_locked( size_t size, data_offsets_size; int ret; + mmap_read_lock(alloc->vma_vm_mm); if (!binder_alloc_get_vma(alloc)) { + mmap_read_unlock(alloc->vma_vm_mm); binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: binder_alloc_buf, no vma\n", alloc->pid); return ERR_PTR(-ESRCH); } + mmap_read_unlock(alloc->vma_vm_mm); data_offsets_size = ALIGN(data_size, sizeof(void *)) + ALIGN(offsets_size, sizeof(void *)); @@ -929,17 +932,25 @@ void binder_alloc_print_pages(struct seq_file *m, * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. */ - if (binder_alloc_get_vma(alloc) != NULL) { - for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { - page = &alloc->pages[i]; - if (!page->page_ptr) - free++; - else if (list_empty(&page->lru)) - active++; - else - lru++; - } + + mmap_read_lock(alloc->vma_vm_mm); + if (binder_alloc_get_vma(alloc) == NULL) { + mmap_read_unlock(alloc->vma_vm_mm); + goto uninitialized; } + + mmap_read_unlock(alloc->vma_vm_mm); + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + page = &alloc->pages[i]; + if (!page->page_ptr) + free++; + else if (list_empty(&page->lru)) + active++; + else + lru++; + } + +uninitialized: mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); From a3f2fd22743fc56dd5e3896a3fbddd276df1577f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Aug 2022 15:57:20 +0200 Subject: [PATCH 076/654] musb: fix USB_MUSB_TUSB6010 dependency Turning on NOP_USB_XCEIV as builtin broke the TUSB6010 driver because of an older issue with the depencency. It is not necessary to forbid NOP_USB_XCEIV=y in combination with USB_MUSB_HDRC=m, but only the reverse, which causes the link failure from the original Kconfig change. Use the correct dependency to still allow NOP_USB_XCEIV=n or NOP_USB_XCEIV=y but forbid NOP_USB_XCEIV=m when USB_MUSB_HDRC=m to fix the multi_v7_defconfig for tusb. Fixes: ab37a7a890c1 ("ARM: multi_v7_defconfig: Make NOP_USB_XCEIV driver built-in") Fixes: c0442479652b ("usb: musb: Fix randconfig build issues for Kconfig options") Cc: stable Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20220818135737.3143895-10-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index f906dfd360d37..6c8f7763e75e4 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -86,7 +86,7 @@ config USB_MUSB_TUSB6010 tristate "TUSB6010" depends on HAS_IOMEM depends on ARCH_OMAP2PLUS || COMPILE_TEST - depends on NOP_USB_XCEIV = USB_MUSB_HDRC # both built-in or both modules + depends on NOP_USB_XCEIV!=m || USB_MUSB_HDRC=m config USB_MUSB_OMAP2PLUS tristate "OMAP2430 and onwards" From b5a5b9d5f28d23b84f06b45c61dcad95b07d41bc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Aug 2022 15:38:58 +0200 Subject: [PATCH 077/654] serial: document start_rx member at struct uart_ops Fix this doc build warning: ./include/linux/serial_core.h:397: warning: Function parameter or member 'start_rx' not described in 'uart_ops' Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/5d07ae2eec8fbad87e623160f9926b178bef2744.1660829433.git.mchehab@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index aef3145f20329..6e4f4765d209c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -141,6 +141,14 @@ struct gpio_desc; * Locking: none. * Interrupts: caller dependent. * + * @start_rx: ``void ()(struct uart_port *port)`` + * + * Start receiving characters. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * * @stop_rx: ``void ()(struct uart_port *port)`` * * Stop receiving characters; the @port is in the process of being closed. From f2d38edc5e3375e56b4a30d5b66cefd385a2b38c Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 17 Aug 2022 14:54:10 -0700 Subject: [PATCH 078/654] usb: typec: tcpm: Return ENOTSUPP for power supply prop writes When the port does not support USB PD, prevent transition to PD only states when power supply property is written. In this case, TCPM transitions to SNK_NEGOTIATE_CAPABILITIES which should not be the case given that the port is not pd_capable. [ 84.308251] state change SNK_READY -> SNK_NEGOTIATE_CAPABILITIES [rev3 NONE_AMS] [ 84.308335] Setting usb_comm capable false [ 84.323367] set_auto_vbus_discharge_threshold mode:3 pps_active:n vbus:5000 ret:0 [ 84.323376] state change SNK_NEGOTIATE_CAPABILITIES -> SNK_WAIT_CAPABILITIES [rev3 NONE_AMS] Fixes: e9e6e164ed8f6 ("usb: typec: tcpm: Support non-PD mode") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20220817215410.1807477-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ea5a917c51b1b..904c7b4ce2f0c 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -6320,6 +6320,13 @@ static int tcpm_psy_set_prop(struct power_supply *psy, struct tcpm_port *port = power_supply_get_drvdata(psy); int ret; + /* + * All the properties below are related to USB PD. The check needs to be + * property specific when a non-pd related property is added. + */ + if (!port->pd_supported) + return -EOPNOTSUPP; + switch (psp) { case POWER_SUPPLY_PROP_ONLINE: ret = tcpm_psy_set_online(port, val); From 72e2329e7c9bbe15e7a813670497ec9c6f919af3 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jun 2022 14:34:36 +0200 Subject: [PATCH 079/654] drm/vc4: hdmi: Depends on CONFIG_PM We already depend on runtime PM to get the power domains and clocks for most of the devices supported by the vc4 driver, so let's just select it to make sure it's there. Link: https://lore.kernel.org/r/20220629123510.1915022-38-maxime@cerno.tech Acked-by: Thomas Zimmermann Tested-by: Stefan Wahren Signed-off-by: Maxime Ripard (cherry picked from commit f1bc386b319e93e56453ae27e9e83817bb1f6f95) Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vc4/Kconfig | 1 + drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 061be9a6619df..b0f3117102ca5 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -8,6 +8,7 @@ config DRM_VC4 depends on DRM depends on SND && SND_SOC depends on COMMON_CLK + depends on PM select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 592c3b5d03e6e..411a87adb0c37 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -2855,7 +2855,7 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } -static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev) +static int vc4_hdmi_runtime_suspend(struct device *dev) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); From 258e483a4d5e97a6a8caa74381ddc1f395ac1c71 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 29 Jun 2022 14:34:37 +0200 Subject: [PATCH 080/654] drm/vc4: hdmi: Rework power up The current code tries to handle the case where CONFIG_PM isn't selected by first calling our runtime_resume implementation and then properly report the power state to the runtime_pm core. This allows to have a functionning device even if pm_runtime_get_* functions are nops. However, the device power state if CONFIG_PM is enabled is RPM_SUSPENDED, and thus our vc4_hdmi_write() and vc4_hdmi_read() calls in the runtime_pm hooks will now report a warning since the device might not be properly powered. Even more so, we need CONFIG_PM enabled since the previous RaspberryPi have a power domain that needs to be powered up for the HDMI controller to be usable. The previous patch has created a dependency on CONFIG_PM, now we can just assume it's there and only call pm_runtime_resume_and_get() to make sure our device is powered in bind. Link: https://lore.kernel.org/r/20220629123510.1915022-39-maxime@cerno.tech Acked-by: Thomas Zimmermann Tested-by: Stefan Wahren Signed-off-by: Maxime Ripard (cherry picked from commit 53565c28e6af2cef6bbf438c34250135e3564459) Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vc4/vc4_hdmi.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 411a87adb0c37..1e5f68704d7d8 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -2972,17 +2972,15 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi->disable_4kp60 = true; } + pm_runtime_enable(dev); + /* - * We need to have the device powered up at this point to call - * our reset hook and for the CEC init. + * We need to have the device powered up at this point to call + * our reset hook and for the CEC init. */ - ret = vc4_hdmi_runtime_resume(dev); + ret = pm_runtime_resume_and_get(dev); if (ret) - goto err_put_ddc; - - pm_runtime_get_noresume(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); + goto err_disable_runtime_pm; if ((of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi0") || of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1")) && @@ -3028,6 +3026,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) err_destroy_encoder: drm_encoder_cleanup(encoder); pm_runtime_put_sync(dev); +err_disable_runtime_pm: pm_runtime_disable(dev); err_put_ddc: put_device(&vc4_hdmi->ddc->dev); From 2aa48857ad52236a9564c71183d6cc8893becd41 Mon Sep 17 00:00:00 2001 From: Witold Lipieta Date: Tue, 9 Aug 2022 13:29:11 +0200 Subject: [PATCH 081/654] usb-storage: Add ignore-residue quirk for NXP PN7462AU This is USB mass storage primary boot loader for code download on NXP PN7462AU. Without the quirk it is impossible to write whole memory at once as device restarts during the write due to bogus residue values reported. Acked-by: Alan Stern Cc: stable Signed-off-by: Witold Lipieta Link: https://lore.kernel.org/r/20220809112911.462776-1-witold.lipieta@thaumatec.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1a05e3dcfec8a..4993227ab2930 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2294,6 +2294,13 @@ UNUSUAL_DEV( 0x1e74, 0x4621, 0x0000, 0x0000, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BULK_IGNORE_TAG | US_FL_MAX_SECTORS_64 ), +/* Reported by Witold Lipieta */ +UNUSUAL_DEV( 0x1fc9, 0x0117, 0x0100, 0x0100, + "NXP Semiconductors", + "PN7462AU", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_RESIDUE ), + /* Supplied with some Castlewood ORB removable drives */ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999, "Double-H Technology", From cee7db1b0239468b22c295cf04a8c40c34ecd35a Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Mon, 8 Aug 2022 17:53:57 +0900 Subject: [PATCH 082/654] docs: kerneldoc-preamble: Test xeCJK.sty before loading On distros whose texlive packaging is fine-grained, texlive-xecjk can be installed/removed independently of other texlive packages. Conditionally loading xeCJK depending only on the existence of the "Noto Sans CJK SC" font might end up in xelatex error of "xeCJK.sty not found!". Improve the situation by testing existence of xeCJK.sty before loading it. This is useful on RHEL 9 and its clone distros where texlive-xecjk doesn't work at the moment due to a missing dependency [1]. "make pdfdocs" for non-CJK contents should work after removing texlive-xecjk. Link: [1] https://bugzilla.redhat.com/show_bug.cgi?id=2086254 Fixes: 398f7abdcb7e ("docs: pdfdocs: Pull LaTeX preamble part out of conf.py") Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: Akira Yokosawa Acked-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/c24c2a87-70b2-5342-bcc9-de467940466e@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kerneldoc-preamble.sty | 22 +++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty index 2a29cbe51396d..9707e033c8c45 100644 --- a/Documentation/sphinx/kerneldoc-preamble.sty +++ b/Documentation/sphinx/kerneldoc-preamble.sty @@ -70,8 +70,16 @@ % Translations have Asian (CJK) characters which are only displayed if % xeCJK is used +\usepackage{ifthen} +\newboolean{enablecjk} +\setboolean{enablecjk}{false} \IfFontExistsTF{Noto Sans CJK SC}{ - % Load xeCJK when CJK font is available + \IfFileExists{xeCJK.sty}{ + \setboolean{enablecjk}{true} + }{} +}{} +\ifthenelse{\boolean{enablecjk}}{ + % Load xeCJK when both the Noto Sans CJK font and xeCJK.sty are available. \usepackage{xeCJK} % Noto CJK fonts don't provide slant shape. [AutoFakeSlant] permits % its emulation. @@ -196,7 +204,7 @@ % Inactivate CJK after tableofcontents \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{} \xeCJKsetup{CJKspace = true}% For inter-phrase space of Korean TOC -}{ % No CJK font found +}{ % Don't enable CJK % Custom macros to on/off CJK and switch CJK fonts (Dummy) \newcommand{\kerneldocCJKon}{} \newcommand{\kerneldocCJKoff}{} @@ -204,14 +212,16 @@ %% and ignore the argument (#1) in their definitions, whole contents of %% CJK chapters can be ignored. \newcommand{\kerneldocBeginSC}[1]{% - %% Put a note on missing CJK fonts in place of zh_CN translation. - \begin{sphinxadmonition}{note}{Note on missing fonts:} + %% Put a note on missing CJK fonts or the xecjk package in place of + %% zh_CN translation. + \begin{sphinxadmonition}{note}{Note on missing fonts and a package:} Translations of Simplified Chinese (zh\_CN), Traditional Chinese (zh\_TW), Korean (ko\_KR), and Japanese (ja\_JP) were skipped - due to the lack of suitable font families. + due to the lack of suitable font families and/or the texlive-xecjk + package. If you want them, please install ``Noto Sans CJK'' font families - by following instructions from + along with the texlive-xecjk package by following instructions from \sphinxcode{./scripts/sphinx-pre-install}. Having optional ``Noto Serif CJK'' font families will improve the looks of those translations. From d2ac7bef95c9ead307801ccb6cb6dfbeb14247bf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:53 +0200 Subject: [PATCH 083/654] usb: dwc3: fix PHY disable sequence Generic PHYs must be powered-off before they can be tore down. Similarly, suspending legacy PHYs after having powered them off makes no sense. Fix the dwc3_core_exit() (e.g. called during suspend) and open-coded dwc3_probe() error-path sequences that got this wrong. Note that this makes dwc3_core_exit() match the dwc3_core_init() error path with respect to powering off the PHYs. Fixes: 03c1fd622f72 ("usb: dwc3: core: add phy cleanup for probe error handling") Fixes: c499ff71ff2a ("usb: dwc3: core: re-factor init and exit paths") Cc: stable@vger.kernel.org # 4.8 Reviewed-by: Andrew Halaney Reviewed-by: Matthias Kaehlcke Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index c5c238ab3083e..16d1f328775f4 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -833,15 +833,16 @@ static void dwc3_core_exit(struct dwc3 *dwc) { dwc3_event_buffers_cleanup(dwc); + usb_phy_set_suspend(dwc->usb2_phy, 1); + usb_phy_set_suspend(dwc->usb3_phy, 1); + phy_power_off(dwc->usb2_generic_phy); + phy_power_off(dwc->usb3_generic_phy); + usb_phy_shutdown(dwc->usb2_phy); usb_phy_shutdown(dwc->usb3_phy); phy_exit(dwc->usb2_generic_phy); phy_exit(dwc->usb3_generic_phy); - usb_phy_set_suspend(dwc->usb2_phy, 1); - usb_phy_set_suspend(dwc->usb3_phy, 1); - phy_power_off(dwc->usb2_generic_phy); - phy_power_off(dwc->usb3_generic_phy); dwc3_clk_disable(dwc); reset_control_assert(dwc->reset); } @@ -1879,16 +1880,16 @@ static int dwc3_probe(struct platform_device *pdev) dwc3_debugfs_exit(dwc); dwc3_event_buffers_cleanup(dwc); - usb_phy_shutdown(dwc->usb2_phy); - usb_phy_shutdown(dwc->usb3_phy); - phy_exit(dwc->usb2_generic_phy); - phy_exit(dwc->usb3_generic_phy); - usb_phy_set_suspend(dwc->usb2_phy, 1); usb_phy_set_suspend(dwc->usb3_phy, 1); phy_power_off(dwc->usb2_generic_phy); phy_power_off(dwc->usb3_generic_phy); + usb_phy_shutdown(dwc->usb2_phy); + usb_phy_shutdown(dwc->usb3_phy); + phy_exit(dwc->usb2_generic_phy); + phy_exit(dwc->usb3_generic_phy); + dwc3_ulpi_exit(dwc); err4: From 762e744922b566b47d15b195646440ca64257e91 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:54 +0200 Subject: [PATCH 084/654] Revert "usb: dwc3: qcom: Keep power domain on to retain controller status" This reverts commit d9be8d5c5b032e5383ff5c404ff4155e9c705429. Generic power-domain flags must be set before the power-domain is initialised and must specifically not be modified by drivers for devices that happen to be in the domain. To make sure that USB power-domains are left enabled during system suspend when a device in the domain is in the wakeup path, the GENPD_FLAG_ACTIVE_WAKEUP flag should instead be set for the domain unconditionally when it is registered. Note that this also avoids keeping power-domains on during suspend when wakeup has not been enabled (e.g. through sysfs). For the runtime PM case, making sure that the PHYs are not suspended and that they are in the same domain as the controller prevents the domain from being suspended. If there are cases where this is not possible or desirable, the genpd implementation may need to be extended. Fixes: d9be8d5c5b03 ("usb: dwc3: qcom: Keep power domain on to retain controller status") Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index c5e482f53e9db..be2e3dd364408 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -757,13 +756,12 @@ dwc3_qcom_create_urs_usb_platdev(struct device *dev) static int dwc3_qcom_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; - struct device *dev = &pdev->dev; - struct dwc3_qcom *qcom; - struct resource *res, *parent_res = NULL; - int ret, i; - bool ignore_pipe_clk; - struct generic_pm_domain *genpd; + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct dwc3_qcom *qcom; + struct resource *res, *parent_res = NULL; + int ret, i; + bool ignore_pipe_clk; qcom = devm_kzalloc(&pdev->dev, sizeof(*qcom), GFP_KERNEL); if (!qcom) @@ -772,8 +770,6 @@ static int dwc3_qcom_probe(struct platform_device *pdev) platform_set_drvdata(pdev, qcom); qcom->dev = &pdev->dev; - genpd = pd_to_genpd(qcom->dev->pm_domain); - if (has_acpi_companion(dev)) { qcom->acpi_pdata = acpi_device_get_match_data(dev); if (!qcom->acpi_pdata) { @@ -881,17 +877,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (ret) goto interconnect_exit; - if (device_can_wakeup(&qcom->dwc3->dev)) { - /* - * Setting GENPD_FLAG_ALWAYS_ON flag takes care of keeping - * genpd on in both runtime suspend and system suspend cases. - */ - genpd->flags |= GENPD_FLAG_ALWAYS_ON; - device_init_wakeup(&pdev->dev, true); - } else { - genpd->flags |= GENPD_FLAG_RPM_ALWAYS_ON; - } - + device_init_wakeup(&pdev->dev, 1); qcom->is_suspended = false; pm_runtime_set_active(dev); pm_runtime_enable(dev); From c06795f114a6c4a423b11c9d9bbeb77ecbfbaa8b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:55 +0200 Subject: [PATCH 085/654] usb: dwc3: qcom: fix gadget-only builds A recent change added a dependency to the USB host stack and broke gadget-only builds of the driver. Fixes: 6895ea55c385 ("usb: dwc3: qcom: Configure wakeup interrupts during suspend") Reported-by: Randy Dunlap Reviewed-by: Randy Dunlap Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index be2e3dd364408..e9364141661bc 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -310,8 +310,11 @@ static enum usb_device_speed dwc3_qcom_read_usb2_speed(struct dwc3_qcom *qcom) * currently supports only 1 port per controller. So * this is sufficient. */ +#ifdef CONFIG_USB udev = usb_hub_find_child(hcd->self.root_hub, 1); - +#else + udev = NULL; +#endif if (!udev) return USB_SPEED_UNKNOWN; From a872ab303d5ddd4c965f9cd868677781a33ce35a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:56 +0200 Subject: [PATCH 086/654] usb: dwc3: qcom: fix use-after-free on runtime-PM wakeup The Qualcomm dwc3 runtime-PM implementation checks the xhci platform-device pointer in the wakeup-interrupt handler to determine whether the controller is in host mode and if so triggers a resume. After a role switch in OTG mode the xhci platform-device would have been freed and the next wakeup from runtime suspend would access the freed memory. Note that role switching is executed from a freezable workqueue, which guarantees that the pointer is stable during suspend. Also note that runtime PM has been broken since commit 2664deb09306 ("usb: dwc3: qcom: Honor wakeup enabled/disabled state"), which incidentally also prevents this issue from being triggered. Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Cc: stable@vger.kernel.org # 4.18 Reviewed-by: Matthias Kaehlcke Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-5-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 14 +++++++++++++- drivers/usb/dwc3/host.c | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index e9364141661bc..6884026b9fadb 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -298,6 +298,14 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom) icc_put(qcom->icc_path_apps); } +/* Only usable in contexts where the role can not change. */ +static bool dwc3_qcom_is_host(struct dwc3_qcom *qcom) +{ + struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3); + + return dwc->xhci; +} + static enum usb_device_speed dwc3_qcom_read_usb2_speed(struct dwc3_qcom *qcom) { struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3); @@ -460,7 +468,11 @@ static irqreturn_t qcom_dwc3_resume_irq(int irq, void *data) if (qcom->pm_suspended) return IRQ_HANDLED; - if (dwc->xhci) + /* + * This is safe as role switching is done from a freezable workqueue + * and the wakeup interrupts are disabled as part of resume. + */ + if (dwc3_qcom_is_host(qcom)) pm_runtime_resume(&dwc->xhci->dev); return IRQ_HANDLED; diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index f56c30cf151e4..f6f13e7f1ba14 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -135,4 +135,5 @@ int dwc3_host_init(struct dwc3 *dwc) void dwc3_host_exit(struct dwc3 *dwc) { platform_device_unregister(dwc->xhci); + dwc->xhci = NULL; } From 6498a96c8c9ce8ae4078e586a607851491e29a33 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:57 +0200 Subject: [PATCH 087/654] usb: dwc3: qcom: fix runtime PM wakeup A device must enable wakeups during runtime suspend regardless of whether it is capable and allowed to wake the system up from system suspend. Fixes: 2664deb09306 ("usb: dwc3: qcom: Honor wakeup enabled/disabled state") Tested-by: Matthias Kaehlcke Reviewed-by: Matthias Kaehlcke Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-6-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 6884026b9fadb..05b4666fde146 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -397,7 +397,7 @@ static void dwc3_qcom_enable_interrupts(struct dwc3_qcom *qcom) dwc3_qcom_enable_wakeup_irq(qcom->ss_phy_irq, 0); } -static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) +static int dwc3_qcom_suspend(struct dwc3_qcom *qcom, bool wakeup) { u32 val; int i, ret; @@ -416,7 +416,7 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) if (ret) dev_warn(qcom->dev, "failed to disable interconnect: %d\n", ret); - if (device_may_wakeup(qcom->dev)) { + if (wakeup) { qcom->usb2_speed = dwc3_qcom_read_usb2_speed(qcom); dwc3_qcom_enable_interrupts(qcom); } @@ -426,7 +426,7 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) return 0; } -static int dwc3_qcom_resume(struct dwc3_qcom *qcom) +static int dwc3_qcom_resume(struct dwc3_qcom *qcom, bool wakeup) { int ret; int i; @@ -434,7 +434,7 @@ static int dwc3_qcom_resume(struct dwc3_qcom *qcom) if (!qcom->is_suspended) return 0; - if (device_may_wakeup(qcom->dev)) + if (wakeup) dwc3_qcom_disable_interrupts(qcom); for (i = 0; i < qcom->num_clocks; i++) { @@ -945,9 +945,11 @@ static int dwc3_qcom_remove(struct platform_device *pdev) static int __maybe_unused dwc3_qcom_pm_suspend(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); + bool wakeup = device_may_wakeup(dev); int ret = 0; - ret = dwc3_qcom_suspend(qcom); + + ret = dwc3_qcom_suspend(qcom, wakeup); if (!ret) qcom->pm_suspended = true; @@ -957,9 +959,10 @@ static int __maybe_unused dwc3_qcom_pm_suspend(struct device *dev) static int __maybe_unused dwc3_qcom_pm_resume(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); + bool wakeup = device_may_wakeup(dev); int ret; - ret = dwc3_qcom_resume(qcom); + ret = dwc3_qcom_resume(qcom, wakeup); if (!ret) qcom->pm_suspended = false; @@ -970,14 +973,14 @@ static int __maybe_unused dwc3_qcom_runtime_suspend(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); - return dwc3_qcom_suspend(qcom); + return dwc3_qcom_suspend(qcom, true); } static int __maybe_unused dwc3_qcom_runtime_resume(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); - return dwc3_qcom_resume(qcom); + return dwc3_qcom_resume(qcom, true); } static const struct dev_pm_ops dwc3_qcom_dev_pm_ops = { From c5f14abeb52b0177b940fd734133d383da3521d8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:58 +0200 Subject: [PATCH 088/654] usb: dwc3: qcom: fix peripheral and OTG suspend A recent commit implementing wakeup support in host mode instead broke suspend for peripheral and OTG mode. The hack that was added in the suspend path to determine the speed of any device connected to the USB2 bus not only accesses internal driver data for a child device, but also dereferences a NULL pointer or accesses freed data when the controller is not acting as host. There's no quick fix to the layering violation, but since reverting would leave us with broken suspend in host mode with wakeup triggering immediately, let's keep the hack for now. Fix the immediate issues by only checking the host bus speed and enabling wakeup interrupts when acting as host. Fixes: 6895ea55c385 ("usb: dwc3: qcom: Configure wakeup interrupts during suspend") Reported-by: kernel test robot Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-7-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 05b4666fde146..6ae0b7fc4e2cd 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -309,8 +309,13 @@ static bool dwc3_qcom_is_host(struct dwc3_qcom *qcom) static enum usb_device_speed dwc3_qcom_read_usb2_speed(struct dwc3_qcom *qcom) { struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3); - struct usb_hcd *hcd = platform_get_drvdata(dwc->xhci); struct usb_device *udev; + struct usb_hcd *hcd; + + /* + * FIXME: Fix this layering violation. + */ + hcd = platform_get_drvdata(dwc->xhci); /* * It is possible to query the speed of all children of @@ -416,7 +421,11 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom, bool wakeup) if (ret) dev_warn(qcom->dev, "failed to disable interconnect: %d\n", ret); - if (wakeup) { + /* + * The role is stable during suspend as role switching is done from a + * freezable workqueue. + */ + if (dwc3_qcom_is_host(qcom) && wakeup) { qcom->usb2_speed = dwc3_qcom_read_usb2_speed(qcom); dwc3_qcom_enable_interrupts(qcom); } @@ -434,7 +443,7 @@ static int dwc3_qcom_resume(struct dwc3_qcom *qcom, bool wakeup) if (!qcom->is_suspended) return 0; - if (wakeup) + if (dwc3_qcom_is_host(qcom) && wakeup) dwc3_qcom_disable_interrupts(qcom); for (i = 0; i < qcom->num_clocks; i++) { From 416b61893860d45484d4717599c828411cee9c7f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:59 +0200 Subject: [PATCH 089/654] dt-bindings: usb: qcom,dwc3: add wakeup-source property Add a wakeup-source property to the binding to describe whether the wakeup interrupts can wake the system from suspend. Acked-by: Rob Herring Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-8-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/qcom,dwc3.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml index fea3e7092acea..d5959bdea63e1 100644 --- a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml @@ -108,12 +108,17 @@ properties: HS/FS/LS modes are supported. type: boolean + wakeup-source: true + # Required child node: patternProperties: "^usb@[0-9a-f]+$": $ref: snps,dwc3.yaml# + properties: + wakeup-source: false + required: - compatible - reg From e3fafbd8e36530eb015a1bb31273bcf7f7035422 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:10:00 +0200 Subject: [PATCH 090/654] usb: dwc3: qcom: fix wakeup implementation It is the Qualcomm glue wakeup interrupts that may be able to wake the system from suspend and this can now be described in the devicetree. Move the wakeup-source property handling over from the core driver and instead propagate the capability setting to the core device during probe. This is needed as there is currently no way for the core driver to query the wakeup setting of the glue device, but it is the core driver that manages the PHY power state during suspend. Also don't leave the PHYs enabled when system wakeup has been disabled through sysfs. Fixes: 649f5c842ba3 ("usb: dwc3: core: Host wake up support from system suspend") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-9-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 5 ++--- drivers/usb/dwc3/dwc3-qcom.c | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 16d1f328775f4..8c8e32651473c 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1822,7 +1822,6 @@ static int dwc3_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dwc); dwc3_cache_hwparams(dwc); - device_init_wakeup(&pdev->dev, of_property_read_bool(dev->of_node, "wakeup-source")); spin_lock_init(&dwc->lock); mutex_init(&dwc->mutex); @@ -1984,7 +1983,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) dwc3_core_exit(dwc); break; case DWC3_GCTL_PRTCAP_HOST: - if (!PMSG_IS_AUTO(msg) && !device_can_wakeup(dwc->dev)) { + if (!PMSG_IS_AUTO(msg) && !device_may_wakeup(dwc->dev)) { dwc3_core_exit(dwc); break; } @@ -2045,7 +2044,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) spin_unlock_irqrestore(&dwc->lock, flags); break; case DWC3_GCTL_PRTCAP_HOST: - if (!PMSG_IS_AUTO(msg) && !device_can_wakeup(dwc->dev)) { + if (!PMSG_IS_AUTO(msg) && !device_may_wakeup(dwc->dev)) { ret = dwc3_core_init_for_resume(dwc); if (ret) return ret; diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 6ae0b7fc4e2cd..b05f67d206d2c 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -786,6 +786,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) struct resource *res, *parent_res = NULL; int ret, i; bool ignore_pipe_clk; + bool wakeup_source; qcom = devm_kzalloc(&pdev->dev, sizeof(*qcom), GFP_KERNEL); if (!qcom) @@ -901,7 +902,10 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (ret) goto interconnect_exit; - device_init_wakeup(&pdev->dev, 1); + wakeup_source = of_property_read_bool(dev->of_node, "wakeup-source"); + device_init_wakeup(&pdev->dev, wakeup_source); + device_init_wakeup(&qcom->dwc3->dev, wakeup_source); + qcom->is_suspended = false; pm_runtime_set_active(dev); pm_runtime_enable(dev); From ac6928f83f8da73eee254606b45eacc6b743518a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:10:01 +0200 Subject: [PATCH 091/654] usb: dwc3: qcom: clean up suspend callbacks Clean up the suspend callbacks by separating the error and success paths to improve readability. Also drop a related redundant initialisation. Reviewed-by: Matthias Kaehlcke Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-10-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index b05f67d206d2c..197583ff3f3d8 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -959,14 +959,15 @@ static int __maybe_unused dwc3_qcom_pm_suspend(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); bool wakeup = device_may_wakeup(dev); - int ret = 0; - + int ret; ret = dwc3_qcom_suspend(qcom, wakeup); - if (!ret) - qcom->pm_suspended = true; + if (ret) + return ret; - return ret; + qcom->pm_suspended = true; + + return 0; } static int __maybe_unused dwc3_qcom_pm_resume(struct device *dev) @@ -976,10 +977,12 @@ static int __maybe_unused dwc3_qcom_pm_resume(struct device *dev) int ret; ret = dwc3_qcom_resume(qcom, wakeup); - if (!ret) - qcom->pm_suspended = false; + if (ret) + return ret; - return ret; + qcom->pm_suspended = false; + + return 0; } static int __maybe_unused dwc3_qcom_runtime_suspend(struct device *dev) From aece382251f8fa660d8f621a7f50b0ea0f390178 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 16 Jul 2022 21:32:54 +0200 Subject: [PATCH 092/654] dt-bindings: usb: qcom,dwc3: Add SM6375 compatible Add a compatible for DWC3 found on SM6375. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220716193257.456023-4-konrad.dybcio@somainline.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/qcom,dwc3.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml index d5959bdea63e1..cd2f7cb6745a8 100644 --- a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml @@ -33,6 +33,7 @@ properties: - qcom,sm6115-dwc3 - qcom,sm6125-dwc3 - qcom,sm6350-dwc3 + - qcom,sm6375-dwc3 - qcom,sm8150-dwc3 - qcom,sm8250-dwc3 - qcom,sm8350-dwc3 From 1b1b672cc1d4fb3065dac79efb8901bd6244ef69 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Tue, 16 Aug 2022 13:16:24 +0300 Subject: [PATCH 093/654] usb: typec: intel_pmc_mux: Add new ACPI ID for Meteor Lake IOM device This adds the necessary ACPI ID for Intel Meteor Lake IOM devices. The callback function is_memory() is modified so that it also checks if the resource descriptor passed to it is a memory type "Address Space Resource Descriptor". On Intel Meteor Lake the ACPI memory resource is not described using the "32-bit Memory Range Descriptor" because the memory is outside of the 32-bit address space. The memory resource is described using the "Address Space Resource Descriptor" instead. Intel Meteor Lake is the first platform to describe the memory resource for this device with Address Space Resource Descriptor, but it most likely will not be the last. Therefore the change to the is_memory() callback function is made generic. Signed-off-by: Utkarsh Patel Cc: stable@vger.kernel.org [ heikki: Rewrote the commit message. ] Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220816101629.69054-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index 47b733f78fb0d..a8e273fe204ab 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -571,9 +571,11 @@ static int pmc_usb_register_port(struct pmc_usb *pmc, int index, static int is_memory(struct acpi_resource *res, void *data) { - struct resource r; + struct resource_win win = {}; + struct resource *r = &win.res; - return !acpi_dev_resource_memory(res, &r); + return !(acpi_dev_resource_memory(res, r) || + acpi_dev_resource_address_space(res, &win)); } /* IOM ACPI IDs and IOM_PORT_STATUS_OFFSET */ @@ -583,6 +585,9 @@ static const struct acpi_device_id iom_acpi_ids[] = { /* AlderLake */ { "INTC1079", 0x160, }, + + /* Meteor Lake */ + { "INTC107A", 0x160, }, {} }; From bad0d1d726ace2db9e0f39c62b173bc7cc43dd6a Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 15 Aug 2022 15:33:34 +0300 Subject: [PATCH 094/654] usb: dwc3: pci: Add support for Intel Raptor Lake This adds the necessary PCI device ID for the controller inside the Intel Raptor Lake CPU block. The controllers that are part of the PCH (chipset) have separate device IDs. Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220815123334.87526-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 6b018048fe2e1..4ee4ca09873af 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -44,6 +44,7 @@ #define PCI_DEVICE_ID_INTEL_ADLP 0x51ee #define PCI_DEVICE_ID_INTEL_ADLM 0x54ee #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 +#define PCI_DEVICE_ID_INTEL_RPL 0x460e #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e @@ -456,6 +457,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPL), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, From 040f2dbd2010c43f33ad27249e6dac48456f4d99 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 27 Jul 2022 19:06:47 -0700 Subject: [PATCH 095/654] usb: dwc3: gadget: Avoid duplicate requests to enable Run/Stop Relocate the pullups_connected check until after it is ensured that there are no runtime PM transitions. If another context triggered the DWC3 core's runtime resume, it may have already enabled the Run/Stop. Do not re-run the entire pullup sequence again, as it may issue a core soft reset while Run/Stop is already set. This patch depends on commit 69e131d1ac4e ("usb: dwc3: gadget: Prevent repeat pullup()") Fixes: 77adb8bdf422 ("usb: dwc3: gadget: Allow runtime suspend if UDC unbinded") Cc: stable Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220728020647.9377-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index aeeec751c53c7..eca945feeec3e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2539,9 +2539,6 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) is_on = !!is_on; - if (dwc->pullups_connected == is_on) - return 0; - dwc->softconnect = is_on; /* @@ -2566,6 +2563,11 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) return 0; } + if (dwc->pullups_connected == is_on) { + pm_runtime_put(dwc->dev); + return 0; + } + if (!is_on) { ret = dwc3_gadget_soft_disconnect(dwc); } else { From 1bcafc0498038a5a2ce5a9b888c39f1c32242cec Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 5 Aug 2022 11:19:31 -0700 Subject: [PATCH 096/654] usb: misc: onboard_usb_hub: Drop reset delay in onboard_hub_power_off() onboard_hub_power_off() currently has a delay after asserting the reset of the hub. There is already a delay in onboard_hub_power_on() before de-asserting the reset, which ensures that the reset is asserted for the required time, so the delay in _power_off() is not needed. Skip the reset GPIO check before calling gpiod_set_value_cansleep(), the function returns early when the GPIO descriptor is NULL. Reviewed-By: Alexander Stein Reviewed-by: Douglas Anderson Signed-off-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20220805111836.1.Id5a4dc0a2c046236116693aa55672295513a0f2a@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index d1df153e7f5a6..d63c63942af1b 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -71,10 +71,7 @@ static int onboard_hub_power_off(struct onboard_hub *hub) { int err; - if (hub->reset_gpio) { - gpiod_set_value_cansleep(hub->reset_gpio, 1); - fsleep(hub->pdata->reset_us); - } + gpiod_set_value_cansleep(hub->reset_gpio, 1); err = regulator_disable(hub->vdd); if (err) { From 7ec9fce4b31604f8415136a4c07f7dc8ad431aec Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 18 Aug 2022 10:41:18 +0300 Subject: [PATCH 097/654] ip_tunnel: Respect tunnel key's "flow_flags" in IP tunnels Commit 451ef36bd229 ("ip_tunnels: Add new flow flags field to ip_tunnel_key") added a "flow_flags" member to struct ip_tunnel_key which was later used by the commit in the fixes tag to avoid dropping packets with sources that aren't locally configured when set in bpf_set_tunnel_key(). VXLAN and GENEVE were made to respect this flag, ip tunnels like IPIP and GRE were not. This commit fixes this omission by making ip_tunnel_init_flow() receive the flow flags from the tunnel key in the relevant collect_md paths. Fixes: b8fff748521c ("bpf: Set flow flag to allow any source IP in bpf_tunnel_key") Signed-off-by: Eyal Birger Signed-off-by: Daniel Borkmann Reviewed-by: Paul Chaignon Link: https://lore.kernel.org/bpf/20220818074118.726639-1-eyal.birger@gmail.com --- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 3 ++- include/net/ip_tunnels.h | 4 +++- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 7 ++++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 39904dacf4f0d..b3472fb946177 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -423,7 +423,8 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0); + 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0, + 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 63fac94f9aced..ced80e2f8b58f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -246,7 +246,8 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, struct net *net, int oif, - __u32 mark, __u32 tun_inner_hash) + __u32 mark, __u32 tun_inner_hash, + __u8 flow_flags) { memset(fl4, 0, sizeof(*fl4)); @@ -263,6 +264,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; + fl4->flowi4_flags = flow_flags; } int ip_tunnel_init(struct net_device *dev); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5c58e21f724e9..f866d6282b2b3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -609,7 +609,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), key->tos & ~INET_ECN_MASK, dev_net(dev), 0, - skb->mark, skb_get_hash(skb)); + skb->mark, skb_get_hash(skb), key->flow_flags); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e65e948cab9f2..019f3b0839c52 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -295,7 +295,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, RT_TOS(iph->tos), dev_net(dev), - tunnel->parms.link, tunnel->fwmark, 0); + tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -570,7 +570,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), - dev_net(dev), 0, skb->mark, skb_get_hash(skb)); + dev_net(dev), 0, skb->mark, skb_get_hash(skb), + key->flow_flags); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; @@ -729,7 +730,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), dev_net(dev), tunnel->parms.link, - tunnel->fwmark, skb_get_hash(skb)); + tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; From c1e5c2f0cb8a22ec2e14af92afc7006491bebabb Mon Sep 17 00:00:00 2001 From: Pablo Sun Date: Thu, 4 Aug 2022 11:48:03 +0800 Subject: [PATCH 098/654] usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles Fix incorrect pin assignment values when connecting to a monitor with Type-C receptacle instead of a plug. According to specification, an UFP_D receptacle's pin assignment should came from the UFP_D pin assignments field (bit 23:16), while an UFP_D plug's assignments are described in the DFP_D pin assignments (bit 15:8) during Mode Discovery. For example the LG 27 UL850-W is a monitor with Type-C receptacle. The monitor responds to MODE DISCOVERY command with following DisplayPort Capability flag: dp->alt->vdo=0x140045 The existing logic only take cares of UPF_D plug case, and would take the bit 15:8 for this 0x140045 case. This results in an non-existing pin assignment 0x0 in dp_altmode_configure. To fix this problem a new set of macros are introduced to take plug/receptacle differences into consideration. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Co-developed-by: Pablo Sun Co-developed-by: Macpaul Lin Reviewed-by: Guillaume Ranquet Reviewed-by: Heikki Krogerus Signed-off-by: Pablo Sun Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20220804034803.19486-1-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 4 ++-- include/linux/usb/typec_dp.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index c1d8c23baa399..de66a2949e33b 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -99,8 +99,8 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con) case DP_STATUS_CON_UFP_D: case DP_STATUS_CON_BOTH: /* NOTE: First acting as DP source */ conf |= DP_CONF_UFP_U_AS_UFP_D; - pin_assign = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo) & - DP_CAP_UFP_D_PIN_ASSIGN(dp->port->vdo); + pin_assign = DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo) & + DP_CAP_PIN_ASSIGN_DFP_D(dp->port->vdo); break; default: break; diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index cfb916cccd316..8d09c2f0a9b80 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -73,6 +73,11 @@ enum { #define DP_CAP_USB BIT(7) #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) #define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) +/* Get pin assignment taking plug & receptacle into consideration */ +#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ + DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ + DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) From 7d6620f107bae6ed687ff07668e8e8f855487aa9 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Sat, 13 Aug 2022 21:40:30 +0800 Subject: [PATCH 099/654] bpf, cgroup: Fix kernel BUG in purge_effective_progs Syzkaller reported a triggered kernel BUG as follows: ------------[ cut here ]------------ kernel BUG at kernel/bpf/cgroup.c:925! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 194 Comm: detach Not tainted 5.19.0-14184-g69dac8e431af #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__cgroup_bpf_detach+0x1f2/0x2a0 Code: 00 e8 92 60 30 00 84 c0 75 d8 4c 89 e0 31 f6 85 f6 74 19 42 f6 84 28 48 05 00 00 02 75 0e 48 8b 80 c0 00 00 00 48 85 c0 75 e5 <0f> 0b 48 8b 0c5 RSP: 0018:ffffc9000055bdb0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff888100ec0800 RCX: ffffc900000f1000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff888100ec4578 RBP: 0000000000000000 R08: ffff888100ec0800 R09: 0000000000000040 R10: 0000000000000000 R11: 0000000000000000 R12: ffff888100ec4000 R13: 000000000000000d R14: ffffc90000199000 R15: ffff888100effb00 FS: 00007f68213d2b80(0000) GS:ffff88813bc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f74a0e5850 CR3: 0000000102836000 CR4: 00000000000006e0 Call Trace: cgroup_bpf_prog_detach+0xcc/0x100 __sys_bpf+0x2273/0x2a00 __x64_sys_bpf+0x17/0x20 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f68214dbcb9 Code: 08 44 89 e0 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff8 RSP: 002b:00007ffeb487db68 EFLAGS: 00000246 ORIG_RAX: 0000000000000141 RAX: ffffffffffffffda RBX: 000000000000000b RCX: 00007f68214dbcb9 RDX: 0000000000000090 RSI: 00007ffeb487db70 RDI: 0000000000000009 RBP: 0000000000000003 R08: 0000000000000012 R09: 0000000b00000003 R10: 00007ffeb487db70 R11: 0000000000000246 R12: 00007ffeb487dc20 R13: 0000000000000004 R14: 0000000000000001 R15: 000055f74a1011b0 Modules linked in: ---[ end trace 0000000000000000 ]--- Repetition steps: For the following cgroup tree, root | cg1 | cg2 1. attach prog2 to cg2, and then attach prog1 to cg1, both bpf progs attach type is NONE or OVERRIDE. 2. write 1 to /proc/thread-self/fail-nth for failslab. 3. detach prog1 for cg1, and then kernel BUG occur. Failslab injection will cause kmalloc fail and fall back to purge_effective_progs. The problem is that cg2 have attached another prog, so when go through cg2 layer, iteration will add pos to 1, and subsequent operations will be skipped by the following condition, and cg will meet NULL in the end. `if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))` The NULL cg means no link or prog match, this is as expected, and it's not a bug. So here just skip the no match situation. Fixes: 4c46091ee985 ("bpf: Fix KASAN use-after-free Read in compute_effective_progs") Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220813134030.1972696-1-pulehui@huawei.com --- kernel/bpf/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 59b7eb60d5b46..4a400cd637316 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -921,8 +921,10 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, pos++; } } + + /* no link or prog match, skip the cgroup of this layer */ + continue; found: - BUG_ON(!cg); progs = rcu_dereference_protected( desc->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); From b5c3aca86d2698c4850b6ee8b341938025d2780c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:37 +0100 Subject: [PATCH 100/654] riscv: signal: fix missing prototype warning Fix the warning: arch/riscv/kernel/signal.c:316:27: warning: no previous prototype for function 'do_notify_resume' [-Wmissing-prototypes] asmlinkage __visible void do_notify_resume(struct pt_regs *regs, All other functions in the file are static & none of the existing headers stood out as an obvious location. Create signal.h to hold the declaration. Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API") Signed-off-by: Conor Dooley Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220814141237.493457-4-mail@conchuod.ie Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/signal.h | 12 ++++++++++++ arch/riscv/kernel/signal.c | 1 + 2 files changed, 13 insertions(+) create mode 100644 arch/riscv/include/asm/signal.h diff --git a/arch/riscv/include/asm/signal.h b/arch/riscv/include/asm/signal.h new file mode 100644 index 0000000000000..532c29ef03769 --- /dev/null +++ b/arch/riscv/include/asm/signal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SIGNAL_H +#define __ASM_SIGNAL_H + +#include +#include + +asmlinkage __visible +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); + +#endif diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 38b05ca6fe669..5a2de6b6f8822 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include From d951b20b9def73dcc39a5379831525d0d2a537e9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:38 +0100 Subject: [PATCH 101/654] riscv: traps: add missing prototype Sparse complains: arch/riscv/kernel/traps.c:213:6: warning: symbol 'shadow_stack' was not declared. Should it be static? The variable is used in entry.S, so declare shadow_stack there alongside SHADOW_OVERFLOW_STACK_SIZE. Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") Signed-off-by: Conor Dooley Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220814141237.493457-5-mail@conchuod.ie Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/thread_info.h | 2 ++ arch/riscv/kernel/traps.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 78933ac04995b..67322f878e0d7 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -42,6 +42,8 @@ #ifndef __ASSEMBLY__ +extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; + #include #include diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 39d0f8bba4b40..635e6ec269380 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -20,9 +20,10 @@ #include #include +#include #include #include -#include +#include int show_unhandled_signals = 1; From 8b13ea05117ffad4727b0971ed09122d5c91c4dc Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 19 Aug 2022 16:05:55 +0800 Subject: [PATCH 102/654] usb: xhci-mtk: relax TT periodic bandwidth allocation Currently uses the worst case byte budgets on FS/LS bus bandwidth, for example, for an isochronos IN endpoint with 192 bytes budget, it will consume the whole 5 uframes(188 * 5) while the actual FS bus budget should be just 192 bytes. It cause that many usb audio headsets with 3 interfaces (audio input, audio output, and HID) cannot be configured. To improve it, changes to use "approximate" best case budget for FS/LS bandwidth management. For the same endpoint from the above example, the approximate best case budget is now reduced to (188 * 2) bytes. Signed-off-by: Chunfeng Yun Cc: stable Link: https://lore.kernel.org/r/20220819080556.32215-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 06a6b19acaae6..a17bc584ee99e 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -425,7 +425,6 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset) static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset) { - u32 extra_cs_count; u32 start_ss, last_ss; u32 start_cs, last_cs; @@ -461,18 +460,12 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset) if (last_cs > 7) return -ESCH_CS_OVERFLOW; - if (sch_ep->ep_type == ISOC_IN_EP) - extra_cs_count = (last_cs == 7) ? 1 : 2; - else /* ep_type : INTR IN / INTR OUT */ - extra_cs_count = 1; - - cs_count += extra_cs_count; if (cs_count > 7) cs_count = 7; /* HW limit */ sch_ep->cs_count = cs_count; - /* one for ss, the other for idle */ - sch_ep->num_budget_microframes = cs_count + 2; + /* ss, idle are ignored */ + sch_ep->num_budget_microframes = cs_count; /* * if interval=1, maxp >752, num_budge_micoframe is larger From 6020f480004a80cdad4ae5ee180a231c4f65595b Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 19 Aug 2022 16:05:56 +0800 Subject: [PATCH 103/654] usb: xhci-mtk: fix bandwidth release issue This happens when @udev->reset_resume is set to true, when usb resume, the flow as below: - hub_resume - usb_disable_interface - usb_disable_endpoint - usb_hcd_disable_endpoint - xhci_endpoint_disable // it set @ep->hcpriv to NULL Then when reset usb device, it will drop allocated endpoints, the flow as below: - usb_reset_and_verify_device - usb_hcd_alloc_bandwidth - xhci_mtk_drop_ep but @ep->hcpriv is already set to NULL, the bandwidth will be not released anymore. Due to the added endponts are stored in hash table, we can drop the check of @ep->hcpriv. Fixes: 4ce186665e7c ("usb: xhci-mtk: Do not use xhci's virt_dev in drop_endpoint") Cc: stable Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20220819080556.32215-2-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index a17bc584ee99e..579899eb24c15 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -764,8 +764,8 @@ int xhci_mtk_drop_ep(struct usb_hcd *hcd, struct usb_device *udev, if (ret) return ret; - if (ep->hcpriv) - drop_ep_quirk(hcd, udev, ep); + /* needn't check @ep->hcpriv, xhci_endpoint_disable set it NULL */ + drop_ep_quirk(hcd, udev, ep); return 0; } From db7e5c10351e3dd58e6bef237c8fa74282e5d59e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 19 Aug 2022 10:55:54 +0200 Subject: [PATCH 104/654] Revert "binder_alloc: Add missing mmap_lock calls when using the VMA" This reverts commit d6f35446d0769a98e9d761593d267cdd24f09ecd. It is coming in through Andrew's tree instead, and for some reason we have different versions. I trust the version from Andrew more as the original offending commit came through his tree. Fixes: d6f35446d076 ("binder_alloc: Add missing mmap_lock calls when using the VMA") Cc: stable Cc: Ondrej Mosnacek Cc: Ondrej Mosnacek Cc: Carlos Llamas Cc: Liam R. Howlett Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20220819184027.7b3fda3e@canb.auug.org.au Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 51f4e1c5cd019..1014beb128025 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -402,15 +402,12 @@ static struct binder_buffer *binder_alloc_new_buf_locked( size_t size, data_offsets_size; int ret; - mmap_read_lock(alloc->vma_vm_mm); if (!binder_alloc_get_vma(alloc)) { - mmap_read_unlock(alloc->vma_vm_mm); binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: binder_alloc_buf, no vma\n", alloc->pid); return ERR_PTR(-ESRCH); } - mmap_read_unlock(alloc->vma_vm_mm); data_offsets_size = ALIGN(data_size, sizeof(void *)) + ALIGN(offsets_size, sizeof(void *)); @@ -932,25 +929,17 @@ void binder_alloc_print_pages(struct seq_file *m, * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. */ - - mmap_read_lock(alloc->vma_vm_mm); - if (binder_alloc_get_vma(alloc) == NULL) { - mmap_read_unlock(alloc->vma_vm_mm); - goto uninitialized; - } - - mmap_read_unlock(alloc->vma_vm_mm); - for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { - page = &alloc->pages[i]; - if (!page->page_ptr) - free++; - else if (list_empty(&page->lru)) - active++; - else - lru++; + if (binder_alloc_get_vma(alloc) != NULL) { + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + page = &alloc->pages[i]; + if (!page->page_ptr) + free++; + else if (list_empty(&page->lru)) + active++; + else + lru++; + } } - -uninitialized: mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); From f511aef2ebe5377d4c263842f2e0c0b8e274e8e5 Mon Sep 17 00:00:00 2001 From: Jing Leng Date: Wed, 20 Jul 2022 18:48:15 -0700 Subject: [PATCH 105/654] usb: gadget: f_uac2: fix superspeed transfer On page 362 of the USB3.2 specification ( https://usb.org/sites/default/files/usb_32_20210125.zip), The 'SuperSpeed Endpoint Companion Descriptor' shall only be returned by Enhanced SuperSpeed devices that are operating at Gen X speed. Each endpoint described in an interface is followed by a 'SuperSpeed Endpoint Companion Descriptor'. If users use SuperSpeed UDC, host can't recognize the device if endpoint doesn't have 'SuperSpeed Endpoint Companion Descriptor' followed. Currently in the uac2 driver code: 1. ss_epout_desc_comp follows ss_epout_desc; 2. ss_epin_fback_desc_comp follows ss_epin_fback_desc; 3. ss_epin_desc_comp follows ss_epin_desc; 4. Only ss_ep_int_desc endpoint doesn't have 'SuperSpeed Endpoint Companion Descriptor' followed, so we should add it. Fixes: eaf6cbe09920 ("usb: gadget: f_uac2: add volume and mute support") Cc: stable Signed-off-by: Jing Leng Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20220721014815.14453-1-quic_jackp@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 1905a8d8e0c9f..08726e4c68a56 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -291,6 +291,12 @@ static struct usb_endpoint_descriptor ss_ep_int_desc = { .bInterval = 4, }; +static struct usb_ss_ep_comp_descriptor ss_ep_int_desc_comp = { + .bLength = sizeof(ss_ep_int_desc_comp), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + .wBytesPerInterval = cpu_to_le16(6), +}; + /* Audio Streaming OUT Interface - Alt0 */ static struct usb_interface_descriptor std_as_out_if0_desc = { .bLength = sizeof std_as_out_if0_desc, @@ -604,7 +610,8 @@ static struct usb_descriptor_header *ss_audio_desc[] = { (struct usb_descriptor_header *)&in_feature_unit_desc, (struct usb_descriptor_header *)&io_out_ot_desc, - (struct usb_descriptor_header *)&ss_ep_int_desc, + (struct usb_descriptor_header *)&ss_ep_int_desc, + (struct usb_descriptor_header *)&ss_ep_int_desc_comp, (struct usb_descriptor_header *)&std_as_out_if0_desc, (struct usb_descriptor_header *)&std_as_out_if1_desc, @@ -800,6 +807,7 @@ static void setup_headers(struct f_uac2_opts *opts, struct usb_ss_ep_comp_descriptor *epout_desc_comp = NULL; struct usb_ss_ep_comp_descriptor *epin_desc_comp = NULL; struct usb_ss_ep_comp_descriptor *epin_fback_desc_comp = NULL; + struct usb_ss_ep_comp_descriptor *ep_int_desc_comp = NULL; struct usb_endpoint_descriptor *epout_desc; struct usb_endpoint_descriptor *epin_desc; struct usb_endpoint_descriptor *epin_fback_desc; @@ -827,6 +835,7 @@ static void setup_headers(struct f_uac2_opts *opts, epin_fback_desc = &ss_epin_fback_desc; epin_fback_desc_comp = &ss_epin_fback_desc_comp; ep_int_desc = &ss_ep_int_desc; + ep_int_desc_comp = &ss_ep_int_desc_comp; } i = 0; @@ -855,8 +864,11 @@ static void setup_headers(struct f_uac2_opts *opts, if (EPOUT_EN(opts)) headers[i++] = USBDHDR(&io_out_ot_desc); - if (FUOUT_EN(opts) || FUIN_EN(opts)) + if (FUOUT_EN(opts) || FUIN_EN(opts)) { headers[i++] = USBDHDR(ep_int_desc); + if (ep_int_desc_comp) + headers[i++] = USBDHDR(ep_int_desc_comp); + } if (EPOUT_EN(opts)) { headers[i++] = USBDHDR(&std_as_out_if0_desc); From a10bc71729b236fe36de0d8e4d35c959fd8dec3a Mon Sep 17 00:00:00 2001 From: Thierry GUIBERT Date: Fri, 19 Aug 2022 10:17:02 +0200 Subject: [PATCH 106/654] USB: cdc-acm: Add Icom PMR F3400 support (0c26:0020) Supports for ICOM F3400 and ICOM F4400 PMR radios in CDC-ACM driver enabling the AT serial port. The Vendor Id is 0x0C26 The Product ID is 0x0020 Output of lsusb : Bus 001 Device 009: ID 0c26:0020 Prolific Technology Inc. ICOM Radio Couldn't open device, some information will be missing Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 2 Communications bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x0c26 Prolific Technology Inc. idProduct 0x0020 bcdDevice 0.00 iManufacturer 1 ICOM Inc. iProduct 2 ICOM Radio iSerial 3 *obfuscated* bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0030 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 0 bmAttributes 0xc0 Self Powered MaxPower 0mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 12 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Signed-off-by: Thierry GUIBERT Cc: stable Link: https://lore.kernel.org/r/20220819081702.84118-1-thierry.guibert@croix-rouge.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 483bcb1213f73..cc637c4599e16 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1810,6 +1810,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ }, + { USB_DEVICE(0x0c26, 0x0020), /* Icom ICF3400 Serie */ + .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ + }, { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, From a8c67e27d9e3ed33afadbdf86bbd58e26e0c4357 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 19 Aug 2022 16:10:27 +0800 Subject: [PATCH 107/654] dt-bindings: usb: mtu3: add compatible for mt8188 Add a new compatible for mt8188 Acked-by: Krzysztof Kozlowski Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20220819081027.32382-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml b/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml index e63b665453176..b019d490170d7 100644 --- a/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml +++ b/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml @@ -24,6 +24,7 @@ properties: - mediatek,mt2712-mtu3 - mediatek,mt8173-mtu3 - mediatek,mt8183-mtu3 + - mediatek,mt8188-mtu3 - mediatek,mt8192-mtu3 - mediatek,mt8195-mtu3 - const: mediatek,mtu3 From 4e3aa9238277597c6c7624f302d81a7b568b6f2d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Aug 2022 14:28:36 +0200 Subject: [PATCH 108/654] x86/nospec: Unwreck the RSB stuffing Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections") made a right mess of the RSB stuffing, rewrite the whole thing to not suck. Thanks to Andrew for the enlightening comment about Post-Barrier RSB things so we can make this code less magical. Cc: stable@vger.kernel.org Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net --- arch/x86/include/asm/nospec-branch.h | 80 ++++++++++++++-------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e64fd20778b61..10731ccfed376 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -35,33 +35,44 @@ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ /* + * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. + */ +#define __FILL_RETURN_SLOT \ + ANNOTATE_INTRA_FUNCTION_CALL; \ + call 772f; \ + int3; \ +772: + +/* + * Stuff the entire RSB. + * * Google experimented with loop-unrolling and this turned out to be * the optimal version - two calls, each with their own speculation * trap should their return address end up getting used, in a loop. */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - ANNOTATE_INTRA_FUNCTION_CALL; \ - call 772f; \ -773: /* speculation trap */ \ - UNWIND_HINT_EMPTY; \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - ANNOTATE_INTRA_FUNCTION_CALL; \ - call 774f; \ -775: /* speculation trap */ \ - UNWIND_HINT_EMPTY; \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - add $(BITS_PER_LONG/8) * 2, sp; \ - dec reg; \ - jnz 771b; \ - /* barrier for jnz misprediction */ \ +#define __FILL_RETURN_BUFFER(reg, nr) \ + mov $(nr/2), reg; \ +771: \ + __FILL_RETURN_SLOT \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ + dec reg; \ + jnz 771b; \ + /* barrier for jnz misprediction */ \ + lfence; + +/* + * Stuff a single RSB slot. + * + * To mitigate Post-Barrier RSB speculation, one CALL instruction must be + * forced to retire before letting a RET instruction execute. + * + * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed + * before this point. + */ +#define __FILL_ONE_RETURN \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8), %_ASM_SP; \ lfence; #ifdef __ASSEMBLY__ @@ -132,28 +143,15 @@ #endif .endm -.macro ISSUE_UNBALANCED_RET_GUARD - ANNOTATE_INTRA_FUNCTION_CALL - call .Lunbalanced_ret_guard_\@ - int3 -.Lunbalanced_ret_guard_\@: - add $(BITS_PER_LONG/8), %_ASM_SP - lfence -.endm - /* * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 -.ifb \ftr2 - ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr -.else - ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 -.endif - __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) -.Lunbalanced_\@: - ISSUE_UNBALANCED_RET_GUARD +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ + __stringify(__FILL_ONE_RETURN), \ftr2 + .Lskip_rsb_\@: .endm From 332924973725e8cdcc783c175f68cf7e162cb9e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Aug 2022 13:01:35 +0200 Subject: [PATCH 109/654] x86/nospec: Fix i386 RSB stuffing Turns out that i386 doesn't unconditionally have LFENCE, as such the loop in __FILL_RETURN_BUFFER isn't actually speculation safe on such chips. Fixes: ba6e31af2be9 ("x86/speculation: Add LFENCE to RSB fill sequence") Reported-by: Ben Hutchings Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yv9tj9vbQ9nNlXoY@worktop.programming.kicks-ass.net --- arch/x86/include/asm/nospec-branch.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 10731ccfed376..c936ce9f0c47c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -50,6 +50,7 @@ * the optimal version - two calls, each with their own speculation * trap should their return address end up getting used, in a loop. */ +#ifdef CONFIG_X86_64 #define __FILL_RETURN_BUFFER(reg, nr) \ mov $(nr/2), reg; \ 771: \ @@ -60,6 +61,17 @@ jnz 771b; \ /* barrier for jnz misprediction */ \ lfence; +#else +/* + * i386 doesn't unconditionally have LFENCE, as such it can't + * do a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr) \ + .rept nr; \ + __FILL_RETURN_SLOT; \ + .endr; \ + add $(BITS_PER_LONG/8) * nr, %_ASM_SP; +#endif /* * Stuff a single RSB slot. From 1441ca1494d74a2c9d6c9bd8dd633d9ebff1daba Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Fri, 22 Jul 2022 19:43:16 -0700 Subject: [PATCH 110/654] kvm: x86: mmu: Drop the need_remote_flush() function This is only used by kvm_mmu_pte_write(), which no longer actually creates the new SPTE and instead just clears the old SPTE. So we just need to check if the old SPTE was shadow-present instead of calling need_remote_flush(). Hence we can drop this function. It was incomplete anyway as it didn't take access-tracking into account. This patch should not result in any functional change. Signed-off-by: Junaid Shahid Reviewed-by: David Matlack Reviewed-by: Sean Christopherson Message-Id: <20220723024316.2725328-1-junaids@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 126fa9aec64cd..fae2c0863e45b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5361,19 +5361,6 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } -static bool need_remote_flush(u64 old, u64 new) -{ - if (!is_shadow_present_pte(old)) - return false; - if (!is_shadow_present_pte(new)) - return true; - if ((old ^ new) & SPTE_BASE_ADDR_MASK) - return true; - old ^= shadow_nx_mask; - new ^= shadow_nx_mask; - return (old & ~new & SPTE_PERM_MASK) != 0; -} - static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) { @@ -5519,7 +5506,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); if (gentry && sp->role.level != PG_LEVEL_4K) ++vcpu->kvm->stat.mmu_pde_zapped; - if (need_remote_flush(entry, *spte)) + if (is_shadow_present_pte(entry)) flush = true; ++spte; } From b64d740ea7ddc929d97b28de4c0665f7d5db9e2a Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 10 Aug 2022 15:49:39 -0700 Subject: [PATCH 111/654] kvm: x86: mmu: Always flush TLBs when enabling dirty logging When A/D bits are not available, KVM uses a software access tracking mechanism, which involves making the SPTEs inaccessible. However, the clear_young() MMU notifier does not flush TLBs. So it is possible that there may still be stale, potentially writable, TLB entries. This is usually fine, but can be problematic when enabling dirty logging, because it currently only does a TLB flush if any SPTEs were modified. But if all SPTEs are in access-tracked state, then there won't be a TLB flush, which means that the guest could still possibly write to memory and not have it reflected in the dirty bitmap. So just unconditionally flush the TLBs when enabling dirty logging. As an alternative, KVM could explicitly check the MMU-Writable bit when write-protecting SPTEs to decide if a flush is needed (instead of checking the Writable bit), but given that a flush almost always happens anyway, so just making it unconditional seems simpler. Signed-off-by: Junaid Shahid Message-Id: <20220810224939.2611160-1-junaids@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 45 +++++++---------------------------------- arch/x86/kvm/mmu/spte.h | 14 +++++++++---- arch/x86/kvm/x86.c | 44 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 42 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fae2c0863e45b..e418ef3ecfcb8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6072,47 +6072,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, const struct kvm_memory_slot *memslot, int start_level) { - bool flush = false; - if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); - flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, - start_level, KVM_MAX_HUGEPAGE_LEVEL, - false); + slot_handle_level(kvm, memslot, slot_rmap_write_protect, + start_level, KVM_MAX_HUGEPAGE_LEVEL, false); write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); + kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); read_unlock(&kvm->mmu_lock); } - - /* - * Flush TLBs if any SPTEs had to be write-protected to ensure that - * guest writes are reflected in the dirty bitmap before the memslot - * update completes, i.e. before enabling dirty logging is visible to - * userspace. - * - * Perform the TLB flush outside the mmu_lock to reduce the amount of - * time the lock is held. However, this does mean that another CPU can - * now grab mmu_lock and encounter a write-protected SPTE while CPUs - * still have a writable mapping for the associated GFN in their TLB. - * - * This is safe but requires KVM to be careful when making decisions - * based on the write-protection status of an SPTE. Specifically, KVM - * also write-protects SPTEs to monitor changes to guest page tables - * during shadow paging, and must guarantee no CPUs can write to those - * page before the lock is dropped. As mentioned in the previous - * paragraph, a write-protected SPTE is no guarantee that CPU cannot - * perform writes. So to determine if a TLB flush is truly required, KVM - * will clear a separate software-only bit (MMU-writable) and skip the - * flush if-and-only-if this bit was already clear. - * - * See is_writable_pte() for more details. - */ - if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min) @@ -6480,32 +6451,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { - bool flush = false; - if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); /* * Clear dirty bits only on 4k SPTEs since the legacy MMU only * support dirty logging at a 4k granularity. */ - flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); + slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); + kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); read_unlock(&kvm->mmu_lock); } /* + * The caller will flush the TLBs after this function returns. + * * It's also safe to flush TLBs out of mmu lock here as currently this * function is only used for dirty logging, in which case flushing TLB * out of mmu lock also guarantees no dirty pages will be lost in * dirty_bitmap. */ - if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } void kvm_mmu_zap_all(struct kvm *kvm) diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index f3744eea45f5d..7670c13ce251b 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -343,7 +343,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, } /* - * An shadow-present leaf SPTE may be non-writable for 3 possible reasons: + * A shadow-present leaf SPTE may be non-writable for 4 possible reasons: * * 1. To intercept writes for dirty logging. KVM write-protects huge pages * so that they can be split be split down into the dirty logging @@ -361,8 +361,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, * read-only memslot or guest memory backed by a read-only VMA. Writes to * such pages are disallowed entirely. * - * To keep track of why a given SPTE is write-protected, KVM uses 2 - * software-only bits in the SPTE: + * 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this + * case, the SPTE is access-protected, not just write-protected! + * + * For cases #1 and #4, KVM can safely make such SPTEs writable without taking + * mmu_lock as capturing the Accessed/Dirty state doesn't require taking it. + * To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits + * in the SPTE: * * shadow_mmu_writable_mask, aka MMU-writable - * Cleared on SPTEs that KVM is currently write-protecting for shadow paging @@ -391,7 +396,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, * shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging * (which does not clear the MMU-writable bit), does not flush TLBs before * dropping the lock, as it only needs to synchronize guest writes with the - * dirty bitmap. + * dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for + * access-tracking via the clear_young() MMU notifier also does not flush TLBs. * * So, there is the problem: clearing the MMU-writable bit can encounter a * write-protected SPTE while CPUs still have writable mappings for that SPTE diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 205ebdc2b11bc..d7374d768296f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12473,6 +12473,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, } else { kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); } + + /* + * Unconditionally flush the TLBs after enabling dirty logging. + * A flush is almost always going to be necessary (see below), + * and unconditionally flushing allows the helpers to omit + * the subtly complex checks when removing write access. + * + * Do the flush outside of mmu_lock to reduce the amount of + * time mmu_lock is held. Flushing after dropping mmu_lock is + * safe as KVM only needs to guarantee the slot is fully + * write-protected before returning to userspace, i.e. before + * userspace can consume the dirty status. + * + * Flushing outside of mmu_lock requires KVM to be careful when + * making decisions based on writable status of an SPTE, e.g. a + * !writable SPTE doesn't guarantee a CPU can't perform writes. + * + * Specifically, KVM also write-protects guest page tables to + * monitor changes when using shadow paging, and must guarantee + * no CPUs can write to those page before mmu_lock is dropped. + * Because CPUs may have stale TLB entries at this point, a + * !writable SPTE doesn't guarantee CPUs can't perform writes. + * + * KVM also allows making SPTES writable outside of mmu_lock, + * e.g. to allow dirty logging without taking mmu_lock. + * + * To handle these scenarios, KVM uses a separate software-only + * bit (MMU-writable) to track if a SPTE is !writable due to + * a guest page table being write-protected (KVM clears the + * MMU-writable flag when write-protecting for shadow paging). + * + * The use of MMU-writable is also the primary motivation for + * the unconditional flush. Because KVM must guarantee that a + * CPU doesn't contain stale, writable TLB entries for a + * !MMU-writable SPTE, KVM must flush if it encounters any + * MMU-writable SPTE regardless of whether the actual hardware + * writable bit was set. I.e. KVM is almost guaranteed to need + * to flush, while unconditionally flushing allows the "remove + * write access" helpers to ignore MMU-writable entirely. + * + * See is_writable_pte() for more details (the case involving + * access-tracked SPTEs is particularly relevant). + */ + kvm_arch_flush_remote_tlbs_memslot(kvm, new); } } From 020dac4187968535f089f83f376a72beb3451311 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 10 Aug 2022 14:30:50 -0700 Subject: [PATCH 112/654] KVM: VMX: Heed the 'msr' argument in msr_write_intercepted() Regardless of the 'msr' argument passed to the VMX version of msr_write_intercepted(), the function always checks to see if a specific MSR (IA32_SPEC_CTRL) is intercepted for write. This behavior seems unintentional and unexpected. Modify the function so that it checks to see if the provided 'msr' index is intercepted for write. Fixes: 67f4b9969c30 ("KVM: nVMX: Handle dynamic MSR intercept toggling") Cc: Sean Christopherson Signed-off-by: Jim Mattson Reviewed-by: Sean Christopherson Message-Id: <20220810213050.2655000-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d7f8331d6f7e7..c9b49a09e6b53 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -843,8 +843,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) return true; - return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, - MSR_IA32_SPEC_CTRL); + return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr); } unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) From 67ef8664cc5b113f6c49b01d2a0e4cbc589623dd Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Jul 2022 23:48:37 +0000 Subject: [PATCH 113/654] KVM: selftests: Fix KVM_EXCEPTION_MAGIC build with Clang Change KVM_EXCEPTION_MAGIC to use the all-caps "ULL", rather than lower case. This fixes a build failure with Clang: In file included from x86_64/hyperv_features.c:13: include/x86_64/processor.h:825:9: error: unexpected token in argument list return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); ^ include/x86_64/processor.h:802:15: note: expanded from macro 'kvm_asm_safe' asm volatile(KVM_ASM_SAFE(insn) \ ^ include/x86_64/processor.h:785:2: note: expanded from macro 'KVM_ASM_SAFE' "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ ^ :1:18: note: instantiated into assembly here mov $0xabacadabaull, %r9 ^ Fixes: 3b23054cd3f5 ("KVM: selftests: Add x86-64 support for exception fixup") Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Message-Id: <20220722234838.2160385-2-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 45edf45821d05..51c6661aca772 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -754,7 +754,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); /* If a toddler were to say "abracadabra". */ -#define KVM_EXCEPTION_MAGIC 0xabacadabaull +#define KVM_EXCEPTION_MAGIC 0xabacadabaULL /* * KVM selftest exception fixup uses registers to coordinate with the exception From 372d07084593dc7a399bf9bee815711b1fb1bcf2 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Jul 2022 23:48:38 +0000 Subject: [PATCH 114/654] KVM: selftests: Fix ambiguous mov in KVM_ASM_SAFE() Change the mov in KVM_ASM_SAFE() that zeroes @vector to a movb to make it unambiguous. This fixes a build failure with Clang since, unlike the GNU assembler, the LLVM integrated assembler rejects ambiguous X86 instructions that don't have suffixes: In file included from x86_64/hyperv_features.c:13: include/x86_64/processor.h:825:9: error: ambiguous instructions require an explicit suffix (could be 'movb', 'movw', 'movl', or 'movq') return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); ^ include/x86_64/processor.h:802:15: note: expanded from macro 'kvm_asm_safe' asm volatile(KVM_ASM_SAFE(insn) \ ^ include/x86_64/processor.h:788:16: note: expanded from macro 'KVM_ASM_SAFE' "1: " insn "\n\t" \ ^ :5:2: note: instantiated into assembly here mov $0, 15(%rsp) ^ It seems like this change could introduce undesirable behavior in the future, e.g. if someone used a type larger than a u8 for @vector, since KVM_ASM_SAFE() will only zero the bottom byte. I tried changing the type of @vector to an int to see what would happen. GCC failed to compile due to a size mismatch between `movb` and `%eax`. Clang succeeded in compiling, but the generated code looked correct, so perhaps it will not be an issue. That being said it seems like there could be a better solution to this issue that does not assume @vector is a u8. Fixes: 3b23054cd3f5 ("KVM: selftests: Add x86-64 support for exception fixup") Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Message-Id: <20220722234838.2160385-3-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 51c6661aca772..0cbc71b7af50a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -786,7 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, "lea 1f(%%rip), %%r10\n\t" \ "lea 2f(%%rip), %%r11\n\t" \ "1: " insn "\n\t" \ - "mov $0, %[vector]\n\t" \ + "movb $0, %[vector]\n\t" \ "jmp 3f\n\t" \ "2:\n\t" \ "mov %%r9b, %[vector]\n\t" \ From ea2aa97ca37a9044ade001aef71dbc06318e8d44 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Fri, 19 Aug 2022 15:28:34 +0800 Subject: [PATCH 115/654] drm/gem: Fix GEM handle release errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are assuming a one to one mapping between dmabuf and GEM handle when releasing GEM handles. But that is not always true, since we would create extra handles for the GEM obj in cases like gem_open() and getfb{,2}(). A similar issue was reported at: https://lore.kernel.org/all/20211105083308.392156-1-jay.xu@rock-chips.com/ Another problem is that the imported dmabuf might not always have gem_obj->dma_buf set, which would cause leaks in drm_gem_remove_prime_handles(). Let's fix these for now by using handle to find the exact map to remove. Signed-off-by: Jeffy Chen Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220819072834.17888-1-jeffy.chen@rock-chips.com --- drivers/gpu/drm/drm_gem.c | 17 +---------------- drivers/gpu/drm/drm_internal.h | 4 ++-- drivers/gpu/drm/drm_prime.c | 20 ++++++++++++-------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 86d670c712867..ad068865ba206 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -168,21 +168,6 @@ void drm_gem_private_object_init(struct drm_device *dev, } EXPORT_SYMBOL(drm_gem_private_object_init); -static void -drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp) -{ - /* - * Note: obj->dma_buf can't disappear as long as we still hold a - * handle reference in obj->handle_count. - */ - mutex_lock(&filp->prime.lock); - if (obj->dma_buf) { - drm_prime_remove_buf_handle_locked(&filp->prime, - obj->dma_buf); - } - mutex_unlock(&filp->prime.lock); -} - /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -253,7 +238,7 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) if (obj->funcs->close) obj->funcs->close(obj, file_priv); - drm_gem_remove_prime_handles(obj, file_priv); + drm_prime_remove_buf_handle(&file_priv->prime, id); drm_vma_node_revoke(&obj->vma_node, file_priv); drm_gem_object_handle_put_unlocked(obj); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 1fbbc19f1ac09..7bb98e6a446d0 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv); void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv); -void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, - struct dma_buf *dma_buf); +void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, + uint32_t handle); /* drm_drv.c */ struct drm_minor *drm_minor_acquire(unsigned int minor_id); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index a3f180653b8bb..eb09e86044c6d 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -190,29 +190,33 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri return -ENOENT; } -void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, - struct dma_buf *dma_buf) +void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, + uint32_t handle) { struct rb_node *rb; - rb = prime_fpriv->dmabufs.rb_node; + mutex_lock(&prime_fpriv->lock); + + rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; - member = rb_entry(rb, struct drm_prime_member, dmabuf_rb); - if (member->dma_buf == dma_buf) { + member = rb_entry(rb, struct drm_prime_member, handle_rb); + if (member->handle == handle) { rb_erase(&member->handle_rb, &prime_fpriv->handles); rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs); - dma_buf_put(dma_buf); + dma_buf_put(member->dma_buf); kfree(member); - return; - } else if (member->dma_buf < dma_buf) { + break; + } else if (member->handle < handle) { rb = rb->rb_right; } else { rb = rb->rb_left; } } + + mutex_unlock(&prime_fpriv->lock); } void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv) From 636c3982d296a560aabcc5e462c3a6408389ffd2 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 16 Aug 2022 09:39:52 +0200 Subject: [PATCH 116/654] MAINTAINERS: rectify entry for XILINX GPIO DRIVER Commit ba96b2e7974b ("dt-bindings: gpio: gpio-xilinx: Convert Xilinx axi gpio binding to YAML") converts gpio-xilinx.txt to xlnx,gpio-xilinx.yaml, but missed to adjust its reference in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken reference. Repair this file reference in XILINX GPIO DRIVER. Signed-off-by: Lukas Bulwahn Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8a5012ba6ff98..1f2530771ff4f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22301,7 +22301,7 @@ M: Shubhrajyoti Datta R: Srinivas Neeli R: Michal Simek S: Maintained -F: Documentation/devicetree/bindings/gpio/gpio-xilinx.txt +F: Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml F: Documentation/devicetree/bindings/gpio/gpio-zynq.yaml F: drivers/gpio/gpio-xilinx.c F: drivers/gpio/gpio-zynq.c From 3f4e432fb9c6357b4b9bce1def67d61a215029eb Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 15 Aug 2022 11:19:29 +0200 Subject: [PATCH 117/654] gpio: pxa: use devres for the clock struct The clock is never released after probe(). Use devres to not leak resources. Reported-by: Hulk Robot Reported-by: Yuan Can Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- drivers/gpio/gpio-pxa.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index c7fbfa3ae43b9..1198ab0305d03 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -661,24 +661,17 @@ static int pxa_gpio_probe(struct platform_device *pdev) if (IS_ERR(gpio_reg_base)) return PTR_ERR(gpio_reg_base); - clk = clk_get(&pdev->dev, NULL); + clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(clk)) { dev_err(&pdev->dev, "Error %ld to get gpio clock\n", PTR_ERR(clk)); return PTR_ERR(clk); } - ret = clk_prepare_enable(clk); - if (ret) { - clk_put(clk); - return ret; - } /* Initialize GPIO chips */ ret = pxa_init_gpio_chip(pchip, pxa_last_gpio + 1, gpio_reg_base); - if (ret) { - clk_put(clk); + if (ret) return ret; - } /* clear all GPIO edge detects */ for_each_gpio_bank(gpio, c, pchip) { From 04d4ca41809052f6088860fe150dac679e6453d0 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Fri, 19 Aug 2022 07:34:40 +0900 Subject: [PATCH 118/654] docs/ja_JP/SubmittingPatches: Remove reference to submitting-drivers.rst Reflect changes made in commit 9db370de2780 ("docs: process: remove outdated submitting-drivers.rst") Reported-by: Mauro Carvalho Chehab Signed-off-by: Akira Yokosawa Fixes: 9db370de2780 ("docs: process: remove outdated submitting-drivers.rst") Cc: Tsugikazu Shibata Link: https://lore.kernel.org/r/20220818223440.13530-1-akiyks@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/translations/ja_JP/SubmittingPatches | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/translations/ja_JP/SubmittingPatches b/Documentation/translations/ja_JP/SubmittingPatches index 66ce0d8b05264..04deb77b20c6f 100644 --- a/Documentation/translations/ja_JP/SubmittingPatches +++ b/Documentation/translations/ja_JP/SubmittingPatches @@ -35,8 +35,7 @@ Linux カーネルに変更を加えたいと思っている個人又は会社 てもらえやすくする提案を集めたものです。 コードを投稿する前に、Documentation/process/submit-checklist.rst の項目リストに目 -を通してチェックしてください。もしあなたがドライバーを投稿しようとし -ているなら、Documentation/process/submitting-drivers.rst にも目を通してください。 +を通してチェックしてください。 -------------------------------------------- セクション1 パッチの作り方と送り方 From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 16 Aug 2022 05:56:11 -0700 Subject: [PATCH 119/654] perf/x86/lbr: Enable the branch type for the Arch LBR by default On the platform with Arch LBR, the HW raw branch type encoding may leak to the perf tool when the SAVE_TYPE option is not set. In the intel_pmu_store_lbr(), the HW raw branch type is stored in lbr_entries[].type. If the SAVE_TYPE option is set, the lbr_entries[].type will be converted into the generic PERF_BR_* type in the intel_pmu_lbr_filter() and exposed to the user tools. But if the SAVE_TYPE option is NOT set by the user, the current perf kernel doesn't clear the field. The HW raw branch type leaks. There are two solutions to fix the issue for the Arch LBR. One is to clear the field if the SAVE_TYPE option is NOT set. The other solution is to unconditionally convert the branch type and expose the generic type to the user tools. The latter is implemented here, because - The branch type is valuable information. I don't see a case where you would not benefit from the branch type. (Stephane Eranian) - Not having the branch type DOES NOT save any space in the branch record (Stephane Eranian) - The Arch LBR HW can retrieve the common branch types from the LBR_INFO. It doesn't require the high overhead SW disassemble. Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR") Reported-by: Stephane Eranian Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com --- arch/x86/events/intel/lbr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 4f70fb6c2c1eb..47fca6a7a8bcd 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { reg->config = mask; + + /* + * The Arch LBR HW can retrieve the common branch types + * from the LBR_INFO. It doesn't require the high overhead + * SW disassemble. + * Enable the branch type by default for the Arch LBR. + */ + reg->reg |= X86_BR_TYPE_SAVE; return 0; } From 7d3598868aaee05eb738d1c3115616b867e7530a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Aug 2022 19:40:57 +0800 Subject: [PATCH 120/654] perf/x86/core: Set pebs_capable and PMU_FL_PEBS_ALL for the Baseline The SDM explicitly states that PEBS Baseline implies Extended PEBS. For cpu model forward compatibility (e.g. on ICX, SPR, ADL), it's safe to stop doing FMS table thing such as setting pebs_capable and PMU_FL_PEBS_ALL since it's already set in the intel_ds_init(). The Goldmont Plus is the only platform which supports extended PEBS but doesn't have Baseline. Keep the status quo. Reported-by: Like Xu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lkml.kernel.org/r/20220816114057.51307-1-likexu@tencent.com --- arch/x86/events/intel/core.c | 4 ---- arch/x86/events/intel/ds.c | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2db93498ff711..cb98a05ee7437 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6291,10 +6291,8 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; - x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; @@ -6337,10 +6335,8 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; - x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.lbr_pt_coexist = true; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba60427caa6d3..ac6dd4c96dbc1 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2262,6 +2262,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.pebs_capable = ~0ULL; pebs_qual = "-baseline"; x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { From d4bdb0bebc5ba3299d74f123c782d99cd4e25c49 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 17 Aug 2022 22:46:13 -0700 Subject: [PATCH 121/654] perf/x86/intel/ds: Fix precise store latency handling With the existing code in store_latency_data(), the memory operation (mem_op) returned to the user is always OP_LOAD where in fact, it should be OP_STORE. This comes from the fact that the function is simply grabbing the information from a data source map which covers only load accesses. Intel 12th gen CPU offers precise store sampling that captures both the data source and latency. Therefore it can use the data source mapping table but must override the memory operation to reflect stores instead of loads. Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220818054613.1548130-1-eranian@google.com --- arch/x86/events/intel/ds.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ac6dd4c96dbc1..e5b5874991227 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -291,6 +291,7 @@ static u64 load_latency_data(struct perf_event *event, u64 status) static u64 store_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; + union perf_mem_data_src src; u64 val; dse.val = status; @@ -304,7 +305,14 @@ static u64 store_latency_data(struct perf_event *event, u64 status) val |= P(BLK, NA); - return val; + /* + * the pebs_data_source table is only for loads + * so override the mem_op to say STORE instead + */ + src.val = val; + src.mem_op = P(OP,STORE); + + return src.val; } struct pebs_record_core { From cde643ff75bc20c538dfae787ca3b587bab16b50 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Aug 2022 11:44:29 -0700 Subject: [PATCH 122/654] perf/x86/intel: Fix pebs event constraints for ADL According to the latest event list, the LOAD_LATENCY PEBS event only works on the GP counter 0 and 1 for ADL and RPL. Update the pebs event constraints table. Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support") Reported-by: Ammy Yi Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220818184429.2355857-1-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e5b5874991227..de1f55d517847 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -830,7 +830,7 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { struct event_constraint intel_grt_pebs_event_constraints[] = { /* Allow all events as PEBS with no flags */ - INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf), + INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), EVENT_CONSTRAINT_END }; From fd0cd59f322b1919bfc68cd245d51906f7f1ba2a Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:35 +0100 Subject: [PATCH 123/654] riscv: kvm: vcpu_timer: fix unused variable warnings In two places, csr is set but never used: arch/riscv/kvm/vcpu_timer.c:302:23: warning: variable 'csr' set but not used [-Wunused-but-set-variable] struct kvm_vcpu_csr *csr; ^ arch/riscv/kvm/vcpu_timer.c:327:23: warning: variable 'csr' set but not used [-Wunused-but-set-variable] struct kvm_vcpu_csr *csr; ^ Remove the variable. Fixes: 8f5cb44b1bae ("RISC-V: KVM: Support sstc extension") Reviewed-by: Palmer Dabbelt Signed-off-by: Conor Dooley Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_timer.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 16f50c46ba394..185f2386a747e 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -299,7 +299,6 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) { - struct kvm_vcpu_csr *csr; struct kvm_vcpu_timer *t = &vcpu->arch.timer; kvm_riscv_vcpu_update_timedelta(vcpu); @@ -307,7 +306,6 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) if (!t->sstc_enabled) return; - csr = &vcpu->arch.guest_csr; #if defined(CONFIG_32BIT) csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); @@ -324,13 +322,11 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) { - struct kvm_vcpu_csr *csr; struct kvm_vcpu_timer *t = &vcpu->arch.timer; if (!t->sstc_enabled) return; - csr = &vcpu->arch.guest_csr; t = &vcpu->arch.timer; #if defined(CONFIG_32BIT) t->next_cycles = csr_read(CSR_VSTIMECMP); From 3e5e56c60a14776e2a49837b55b03bc193fd91f7 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:36 +0100 Subject: [PATCH 124/654] riscv: kvm: move extern sbi_ext declarations to a header Sparse complains about missing statics in the declarations of several variables: arch/riscv/kvm/vcpu_sbi_replace.c:38:37: warning: symbol 'vcpu_sbi_ext_time' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_replace.c:73:37: warning: symbol 'vcpu_sbi_ext_ipi' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_replace.c:126:37: warning: symbol 'vcpu_sbi_ext_rfence' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_replace.c:170:37: warning: symbol 'vcpu_sbi_ext_srst' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_base.c:69:37: warning: symbol 'vcpu_sbi_ext_base' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_base.c:90:37: warning: symbol 'vcpu_sbi_ext_experimental' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_base.c:96:37: warning: symbol 'vcpu_sbi_ext_vendor' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_hsm.c:115:37: warning: symbol 'vcpu_sbi_ext_hsm' was not declared. Should it be static? These variables are however used in vcpu_sbi.c where they are declared as extern. Move them to kvm_vcpu_sbi.h which is handily already included by the three other files. Fixes: a046c2d8578c ("RISC-V: KVM: Reorganize SBI code by moving SBI v0.1 to its own file") Fixes: 5f862df5585c ("RISC-V: KVM: Add v0.1 replacement SBI extensions defined in v0.2") Fixes: 3e1d86569c21 ("RISC-V: KVM: Add SBI HSM extension in KVM") Reviewed-by: Palmer Dabbelt Signed-off-by: Conor Dooley Signed-off-by: Anup Patel --- arch/riscv/include/asm/kvm_vcpu_sbi.h | 12 ++++++++++++ arch/riscv/kvm/vcpu_sbi.c | 12 +----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 83d6d4d2b1dff..26a446a34057b 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -33,4 +33,16 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, u32 type, u64 flags); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); +#ifdef CONFIG_RISCV_SBI_V01 +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; +#endif +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; + #endif /* __RISCV_KVM_VCPU_SBI_H__ */ diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index d45e7da3f0d32..f96991d230bfc 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -32,23 +32,13 @@ static int kvm_linux_err_map_sbi(int err) }; } -#ifdef CONFIG_RISCV_SBI_V01 -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; -#else +#ifndef CONFIG_RISCV_SBI_V01 static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { .extid_start = -1UL, .extid_end = -1UL, .handler = NULL, }; #endif -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { &vcpu_sbi_ext_v01, From 65fac0d54f374625b43a9d6ad1f2c212bd41f518 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 26 Jul 2022 20:22:24 +0800 Subject: [PATCH 125/654] blk-mq: fix io hung due to missing commit_rqs Currently, in virtio_scsi, if 'bd->last' is not set to true while dispatching request, such io will stay in driver's queue, and driver will wait for block layer to dispatch more rqs. However, if block layer failed to dispatch more rq, it should trigger commit_rqs to inform driver. There is a problem in blk_mq_try_issue_list_directly() that commit_rqs won't be called: // assume that queue_depth is set to 1, list contains two rq blk_mq_try_issue_list_directly blk_mq_request_issue_directly // dispatch first rq // last is false __blk_mq_try_issue_directly blk_mq_get_dispatch_budget // succeed to get first budget __blk_mq_issue_directly scsi_queue_rq cmd->flags |= SCMD_LAST virtscsi_queuecommand kick = (sc->flags & SCMD_LAST) != 0 // kick is false, first rq won't issue to disk queued++ blk_mq_request_issue_directly // dispatch second rq __blk_mq_try_issue_directly blk_mq_get_dispatch_budget // failed to get second budget ret == BLK_STS_RESOURCE blk_mq_request_bypass_insert // errors is still 0 if (!list_empty(list) || errors && ...) // won't pass, commit_rqs won't be called In this situation, first rq relied on second rq to dispatch, while second rq relied on first rq to complete, thus they will both hung. Fix the problem by also treat 'BLK_STS_*RESOURCE' as 'errors' since it means that request is not queued successfully. Same problem exists in blk_mq_dispatch_rq_list(), 'BLK_STS_*RESOURCE' can't be treated as 'errors' here, fix the problem by calling commit_rqs if queue_rq return 'BLK_STS_*RESOURCE'. Fixes: d666ba98f849 ("blk-mq: add mq_ops->commit_rqs()") Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220726122224.1790882-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3c1e6b6d991d2..c96c8c4f751b3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1931,7 +1931,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, /* If we didn't flush the entire list, we could have told the driver * there was more coming, but that turned out to be a lie. */ - if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued) + if ((!list_empty(list) || errors || needs_resource || + ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued) q->mq_ops->commit_rqs(hctx); /* * Any items that need requeuing? Stuff them into hctx->dispatch, @@ -2660,6 +2661,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, list_del_init(&rq->queuelist); ret = blk_mq_request_issue_directly(rq, list_empty(list)); if (ret != BLK_STS_OK) { + errors++; if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { blk_mq_request_bypass_insert(rq, false, @@ -2667,7 +2669,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; } blk_mq_end_request(rq, ret); - errors++; } else queued++; } From 43ef9db423bdce1df504d4d10e25092d427f04e3 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 14:04:49 +0800 Subject: [PATCH 126/654] drm/amdgpu: enable GFXOFF allow control for GC IP v11.0.1 Enable GFXOFF allow control when set the GFX power gating. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 158d87e6805d1..f45db80810fa8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5328,8 +5328,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, break; case IP_VERSION(11, 0, 1): gfx_v11_cntl_pg(adev, enable); - /* TODO: Enable this when GFXOFF is ready */ - // amdgpu_gfx_off_ctrl(adev, enable); + amdgpu_gfx_off_ctrl(adev, enable); break; default: break; From 9d705d7741ae70764f3d6d87e67fad3b5c30ffd0 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 12 Aug 2022 13:38:34 +0800 Subject: [PATCH 127/654] drm/amdgpu: Move psp_xgmi_terminate call from amdgpu_xgmi_remove_device to psp_hw_fini V1: The amdgpu_xgmi_remove_device function will send unload command to psp through psp ring to terminate xgmi, but psp ring has been destroyed in psp_hw_fini. V2: 1. Change the commit title. 2. Restore amdgpu_xgmi_remove_device to its original calling location. Move psp_xgmi_terminate call from amdgpu_xgmi_remove_device to psp_hw_fini. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b067ce45d2264..1036446abc308 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2641,6 +2641,9 @@ static int psp_hw_fini(void *handle) psp_rap_terminate(psp); psp_dtm_terminate(psp); psp_hdcp_terminate(psp); + + if (adev->gmc.xgmi.num_physical_nodes > 1) + psp_xgmi_terminate(psp); } psp_asd_terminate(psp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1b108d03e7859..f2aebbf3fbe38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -742,7 +742,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) amdgpu_put_xgmi_hive(hive); } - return psp_xgmi_terminate(&adev->psp); + return 0; } static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) From f5994da72ba124a3d0463672fdfbec073e3bb72f Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 12 Aug 2022 14:34:35 +0800 Subject: [PATCH 128/654] drm/amdgpu: fix hive reference leak when adding xgmi device Only amdgpu_get_xgmi_hive but no amdgpu_put_xgmi_hive which will leak the hive reference. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e8a0b19b73985..95ce3687902be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2456,12 +2456,14 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!hive->reset_domain || !amdgpu_reset_get_reset_domain(hive->reset_domain)) { r = -ENOENT; + amdgpu_put_xgmi_hive(hive); goto init_failed; } /* Drop the early temporary reset domain we created for device */ amdgpu_reset_put_reset_domain(adev->reset_domain); adev->reset_domain = hive->reset_domain; + amdgpu_put_xgmi_hive(hive); } } From c351938350ab9b5e978dede2c321da43de7eb70c Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 18 Aug 2022 10:47:09 +0800 Subject: [PATCH 129/654] drm/amdgpu: Check num_gfx_rings for gfx v9_0 rb setup. No need to set up rb when no gfx rings. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c6e0f9313a7f7..fc9c1043244cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2587,7 +2587,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) gfx_v9_0_tiling_mode_table_init(adev); - gfx_v9_0_setup_rb(adev); + if (adev->gfx.num_gfx_rings) + gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); From 06671734881af2bcf7f453661b5f8616e32bb3fc Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 18 Aug 2022 14:13:52 -0400 Subject: [PATCH 130/654] drm/amdgpu: Remove the additional kfd pre reset call for sriov The additional call is caused by merge conflict Reviewed-by: Felix Kuehling Signed-off-by: shaoyunl Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 95ce3687902be..f095a2513affc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4415,8 +4415,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); - amdgpu_amdkfd_pre_reset(adev); - if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else From 2035590f3d40f227eac453d0c36b5eae85c1cf08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 18 Aug 2022 10:27:30 -0300 Subject: [PATCH 131/654] drm/amd/display: Include missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file amdgpu_dm_plane.c missed the header amdgpu_dm_plane.h, which resulted on the following warning: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1046:5: warning: no previous prototype for 'fill_dc_scaling_info' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1222:6: warning: no previous prototype for 'handle_cursor_update' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:152:6: warning: no previous prototype for 'modifier_has_dcc' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:1576:5: warning: no previous prototype for 'amdgpu_dm_plane_init' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:157:10: warning: no previous prototype for 'modifier_gfx9_swizzle_mode' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:752:5: warning: no previous prototype for 'fill_plane_buffer_attributes' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:83:31: warning: no previous prototype for 'amd_get_format_info' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:88:6: warning: no previous prototype for 'fill_blending_from_plane_state' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_plane.c:992:5: warning: no previous prototype for 'dm_plane_helper_check_state' [-Wmissing-prototypes] Therefore, include the missing header on the file and turn global functions that are not used outside of the file into static functions. Fixes: 5d945cbcd4b1 ("drm/amd/display: Create a file dedicated to planes") Reported-by: kernel test robot Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 5 +++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h | 8 -------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index fca7cf9dbaeec..987bde4dca3db 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -34,6 +34,7 @@ #include "dal_asic_id.h" #include "amdgpu_display.h" #include "amdgpu_dm_trace.h" +#include "amdgpu_dm_plane.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" @@ -149,12 +150,12 @@ static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_ *size += 1; } -bool modifier_has_dcc(uint64_t modifier) +static bool modifier_has_dcc(uint64_t modifier) { return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); } -unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) +static unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) { if (modifier == DRM_FORMAT_MOD_LINEAR) return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 95168c2cfa6fa..286981a2dd403 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -36,17 +36,9 @@ int fill_dc_scaling_info(struct amdgpu_device *adev, const struct drm_plane_state *state, struct dc_scaling_info *scaling_info); -void get_min_max_dc_plane_scaling(struct drm_device *dev, - struct drm_framebuffer *fb, - int *min_downscale, int *max_upscale); - int dm_plane_helper_check_state(struct drm_plane_state *state, struct drm_crtc_state *new_crtc_state); -bool modifier_has_dcc(uint64_t modifier); - -unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier); - int fill_plane_buffer_attributes(struct amdgpu_device *adev, const struct amdgpu_framebuffer *afb, const enum surface_pixel_format format, From f461950fdc374a3ada5a63c669d997de4600dffe Mon Sep 17 00:00:00 2001 From: Zhenneng Li Date: Thu, 11 Aug 2022 15:25:40 +0800 Subject: [PATCH 132/654] drm/radeon: add a force flush to delay work when radeon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although radeon card fence and wait for gpu to finish processing current batch rings, there is still a corner case that radeon lockup work queue may not be fully flushed, and meanwhile the radeon_suspend_kms() function has called pci_set_power_state() to put device in D3hot state. Per PCI spec rev 4.0 on 5.3.1.4.1 D3hot State. > Configuration and Message requests are the only TLPs accepted by a Function in > the D3hot state. All other received Requests must be handled as Unsupported Requests, > and all received Completions may optionally be handled as Unexpected Completions. This issue will happen in following logs: Unable to handle kernel paging request at virtual address 00008800e0008010 CPU 0 kworker/0:3(131): Oops 0 pc = [] ra = [] ps = 0000 Tainted: G W pc is at si_gpu_check_soft_reset+0x3c/0x240 ra is at si_dma_is_lockup+0x34/0xd0 v0 = 0000000000000000 t0 = fff08800e0008010 t1 = 0000000000010000 t2 = 0000000000008010 t3 = fff00007e3c00000 t4 = fff00007e3c00258 t5 = 000000000000ffff t6 = 0000000000000001 t7 = fff00007ef078000 s0 = fff00007e3c016e8 s1 = fff00007e3c00000 s2 = fff00007e3c00018 s3 = fff00007e3c00000 s4 = fff00007fff59d80 s5 = 0000000000000000 s6 = fff00007ef07bd98 a0 = fff00007e3c00000 a1 = fff00007e3c016e8 a2 = 0000000000000008 a3 = 0000000000000001 a4 = 8f5c28f5c28f5c29 a5 = ffffffff810f4338 t8 = 0000000000000275 t9 = ffffffff809b66f8 t10 = ff6769c5d964b800 t11= 000000000000b886 pv = ffffffff811bea20 at = 0000000000000000 gp = ffffffff81d89690 sp = 00000000aa814126 Disabling lock debugging due to kernel taint Trace: [] si_dma_is_lockup+0x34/0xd0 [] radeon_fence_check_lockup+0xd0/0x290 [] process_one_work+0x280/0x550 [] worker_thread+0x70/0x7c0 [] worker_thread+0x130/0x7c0 [] kthread+0x200/0x210 [] worker_thread+0x0/0x7c0 [] kthread+0x14c/0x210 [] ret_from_kernel_thread+0x18/0x20 [] kthread+0x0/0x210 Code: ad3e0008 43f0074a ad7e0018 ad9e0020 8c3001e8 40230101 <88210000> 4821ed21 So force lockup work queue flush to fix this problem. Acked-by: Christian König Signed-off-by: Zhenneng Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 2b12389f841ae..ee0165687239b 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1605,6 +1605,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, if (r) { /* delay GPU reset to resume */ radeon_fence_driver_force_completion(rdev, i); + } else { + /* finish executing delayed work */ + flush_delayed_work(&rdev->fence_drv[i].lockup_work); } } From 26f2da0d2f823dc7180b0505d46318f64d1e0a7a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 21 Jun 2022 12:11:18 +0300 Subject: [PATCH 133/654] clk: ti: Fix missing of_node_get() ti_find_clock_provider() For ti_find_clock_provider() we want to return the np with refcount incremented. However we are missing of_node_get() for the clock-output-names case that causes refcount warnings. Fixes: 51f661ef9a10 ("clk: ti: Add ti_find_clock_provider() to use clock-output-names") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20220621091118.33930-1-tony@atomide.com Signed-off-by: Stephen Boyd --- drivers/clk/ti/clk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index ef2a445c63a3c..373e9438b57a3 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -135,6 +135,7 @@ static struct device_node *ti_find_clock_provider(struct device_node *from, continue; if (!strncmp(n, tmp, strlen(tmp))) { + of_node_get(np); found = true; break; } From 9dbdfd4a9f3416c84d22623e6b5bfe1966e336b7 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 18 Aug 2022 12:40:29 -0400 Subject: [PATCH 134/654] net: dpaa: Fix <1G ethernet on LS1046ARDB As discussed in commit 73a21fa817f0 ("dpaa_eth: support all modes with rate adapting PHYs"), we must add a workaround for Aquantia phys with in-tree support in order to keep 1G support working. Update this workaround for the AQR113C phy found on revision C LS1046ARDB boards. Fixes: 12cf1b89a668 ("net: phy: Add support for AQR113C EPHY") Signed-off-by: Sean Anderson Acked-by: Camelia Groza Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220818164029.2063293-1-sean.anderson@seco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 45634579adb67..a770bab4d1ed2 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2886,6 +2886,7 @@ static void dpaa_adjust_link(struct net_device *net_dev) /* The Aquantia PHYs are capable of performing rate adaptation */ #define PHY_VEND_AQUANTIA 0x03a1b400 +#define PHY_VEND_AQUANTIA2 0x31c31c00 static int dpaa_phy_init(struct net_device *net_dev) { @@ -2893,6 +2894,7 @@ static int dpaa_phy_init(struct net_device *net_dev) struct mac_device *mac_dev; struct phy_device *phy_dev; struct dpaa_priv *priv; + u32 phy_vendor; priv = netdev_priv(net_dev); mac_dev = priv->mac_dev; @@ -2905,9 +2907,11 @@ static int dpaa_phy_init(struct net_device *net_dev) return -ENODEV; } + phy_vendor = phy_dev->drv->phy_id & GENMASK(31, 10); /* Unless the PHY is capable of rate adaptation */ if (mac_dev->phy_if != PHY_INTERFACE_MODE_XGMII || - ((phy_dev->drv->phy_id & GENMASK(31, 10)) != PHY_VEND_AQUANTIA)) { + (phy_vendor != PHY_VEND_AQUANTIA && + phy_vendor != PHY_VEND_AQUANTIA2)) { /* remove any features not supported by the controller */ ethtool_convert_legacy_u32_to_link_mode(mask, mac_dev->if_support); From e82c649e851c9c25367fb7a2a6cf3479187de467 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 17 Aug 2022 14:54:36 +0200 Subject: [PATCH 135/654] Revert "net: macsec: update SCI upon MAC address change." This reverts commit 6fc498bc82929ee23aa2f35a828c6178dfd3f823. Commit 6fc498bc8292 states: SCI should be updated, because it contains MAC in its first 6 octets. That's not entirely correct. The SCI can be based on the MAC address, but doesn't have to be. We can also use any 64-bit number as the SCI. When the SCI based on the MAC address, it uses a 16-bit "port number" provided by userspace, which commit 6fc498bc8292 overwrites with 1. In addition, changing the SCI after macsec has been setup can just confuse the receiver. If we configure the RXSC on the peer based on the original SCI, we should keep the same SCI on TX. When the macsec device is being managed by a userspace key negotiation daemon such as wpa_supplicant, commit 6fc498bc8292 would also overwrite the SCI defined by userspace. Fixes: 6fc498bc8292 ("net: macsec: update SCI upon MAC address change.") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/9b1a9d28327e7eb54550a92eebda45d25e54dd0d.1660667033.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ee6087e7b2bfb..c6d271e5687e9 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -462,11 +462,6 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb) return (struct macsec_eth_header *)skb_mac_header(skb); } -static sci_t dev_to_sci(struct net_device *dev, __be16 port) -{ - return make_sci(dev->dev_addr, port); -} - static void __macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { @@ -3661,7 +3656,6 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) out: eth_hw_addr_set(dev, addr->sa_data); - macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { @@ -4000,6 +3994,11 @@ static bool sci_exists(struct net_device *dev, sci_t sci) return false; } +static sci_t dev_to_sci(struct net_device *dev, __be16 port) +{ + return make_sci(dev->dev_addr, port); +} + static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); From bdbf0617bbc3641af158d1aeffeebb1505f76263 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Fri, 19 Aug 2022 12:19:28 -0700 Subject: [PATCH 136/654] selftests/vm: fix inability to build any vm tests When we stopped using KSFT_KHDR_INSTALL, a side effect is we also changed the value of `top_srcdir`. This can be seen by looking at the code removed by commit 49de12ba06ef ("selftests: drop KSFT_KHDR_INSTALL make target"). (Note though that this commit didn't break this, technically the one before it did since that's the one that stopped KSFT_KHDR_INSTALL from being used, even though the code was still there.) Previously lib.mk reconfigured `top_srcdir` when KSFT_KHDR_INSTALL was being used. Now, that's no longer the case. As a result, the path to gup_test.h in vm/Makefile was wrong, and since it's a dependency of all of the vm binaries none of them could be built. Instead, we'd get an "error" like: make[1]: *** No rule to make target '/[...]/tools/testing/selftests/vm/compaction_test', needed by 'all'. Stop. So, modify lib.mk so it once again sets top_srcdir to the root of the kernel tree. Fixes: f2745dc0ba3d ("selftests: stop using KSFT_KHDR_INSTALL") Signed-off-by: Axel Rasmussen Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 947fc72413e9a..d44c72b3abe36 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -40,6 +40,7 @@ ifeq (0,$(MAKELEVEL)) endif endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) +top_srcdir = $(selfdir)/../../.. # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require From f16857e62bac60786104c020ad7c86e2163b2c5b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 19 Aug 2022 09:55:59 +1000 Subject: [PATCH 137/654] NFS: unlink/rmdir shouldn't call d_delete() twice on ENOENT nfs_unlink() calls d_delete() twice if it receives ENOENT from the server - once in nfs_dentry_handle_enoent() from nfs_safe_remove and once in nfs_dentry_remove_handle_error(). nfs_rmddir() also calls it twice - the nfs_dentry_handle_enoent() call is direct and inside a region locked with ->rmdir_sem It is safe to call d_delete() twice if the refcount > 1 as the dentry is simply unhashed. If the refcount is 1, the first call sets d_inode to NULL and the second call crashes. This patch guards the d_delete() call from nfs_dentry_handle_enoent() leaving the one under ->remdir_sem in case that is important. In mainline it would be safe to remove the d_delete() call. However in older kernels to which this might be backported, that would change the behaviour of nfs_unlink(). nfs_unlink() used to unhash the dentry which resulted in nfs_dentry_handle_enoent() not calling d_delete(). So in older kernels we need the d_delete() in nfs_dentry_remove_handle_error() when called from nfs_unlink() but not when called from nfs_rmdir(). To make the code work correctly for old and new kernels, and from both nfs_unlink() and nfs_rmdir(), we protect the d_delete() call with simple_positive(). This ensures it is never called in a circumstance where it could crash. Fixes: 3c59366c207e ("NFS: don't unhash dentry during unlink/rename") Fixes: 9019fb391de0 ("NFS: Label the dentry with a verifier in nfs_rmdir() and nfs_unlink()") Signed-off-by: NeilBrown Tested-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1b879584d4fe8..5d6c2ddc7ea6f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2382,7 +2382,8 @@ static void nfs_dentry_remove_handle_error(struct inode *dir, { switch (error) { case -ENOENT: - d_delete(dentry); + if (d_really_is_positive(dentry)) + d_delete(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; case 0: From fcfc8be1e9cf2f12b50dce8b579b3ae54443a014 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 18 Aug 2022 15:07:05 -0400 Subject: [PATCH 138/654] NFSv4.2 fix problems with __nfs42_ssc_open A destination server while doing a COPY shouldn't accept using the passed in filehandle if its not a regular filehandle. If alloc_file_pseudo() has failed, we need to decrement a reference on the newly created inode, otherwise it leaks. Reported-by: Al Viro Fixes: ec4b092508982 ("NFS: inter ssc open") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e88f6b18445ec..9eb1812878795 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out; } + if (!S_ISREG(fattr->mode)) { + res = ERR_PTR(-EBADF); + goto out; + } + res = ERR_PTR(-ENOMEM); len = strlen(SSC_READ_NAME_BODY) + 16; read_name = kzalloc(len, GFP_KERNEL); @@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, r_ino->i_fop); if (IS_ERR(filep)) { res = ERR_CAST(filep); + iput(r_ino); goto out_free_name; } From ed06fce0b034b2e25bd93430f5c4cbb28036cc1a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Aug 2022 14:55:03 -0400 Subject: [PATCH 139/654] SUNRPC: RPC level errors should set task->tk_rpc_status Fix up a case in call_encode() where we're failing to set task->tk_rpc_status when an RPC level error occurred. Fixes: 9c5948c24869 ("SUNRPC: task should be exit if encode return EKEYEXPIRED more times") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b098e707ad415..7d268a291486b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1902,7 +1902,7 @@ call_encode(struct rpc_task *task) break; case -EKEYEXPIRED: if (!task->tk_cred_retry) { - rpc_exit(task, task->tk_status); + rpc_call_rpcerror(task, task->tk_status); } else { task->tk_action = call_refresh; task->tk_cred_retry--; From 8f2c96420c6ec3dcb18c8be923e24c6feaa5ccf6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Aug 2022 16:43:49 -0700 Subject: [PATCH 140/654] scsi: ufs: core: Reduce the power mode change timeout The current power mode change timeout (180 s) is so large that it can cause a watchdog timer to fire. Reduce the power mode change timeout to 10 seconds. Link: https://lore.kernel.org/r/20220811234401.1957911-1-bvanassche@acm.org Reviewed-by: Stanley Chu Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 6bc679d229279..a202d7d5240d8 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8741,6 +8741,8 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, struct scsi_device *sdp; unsigned long flags; int ret, retries; + unsigned long deadline; + int32_t remaining; spin_lock_irqsave(hba->host->host_lock, flags); sdp = hba->ufs_device_wlun; @@ -8773,9 +8775,14 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * callbacks hence set the RQF_PM flag so that it doesn't resume the * already suspended childs. */ + deadline = jiffies + 10 * HZ; for (retries = 3; retries > 0; --retries) { + ret = -ETIMEDOUT; + remaining = deadline - jiffies; + if (remaining <= 0) + break; ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, - START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL); + remaining / HZ, 0, 0, RQF_PM, NULL); if (!scsi_status_is_check_condition(ret) || !scsi_sense_valid(&sshdr) || sshdr.sense_key != UNIT_ATTENTION) From fac8e558da9485e13a0ae0488aa0b8a8c307cd34 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 11 Aug 2022 20:12:06 -0500 Subject: [PATCH 141/654] scsi: core: Fix passthrough retry counter handling Passthrough users will set the scsi_cmnd->allowed value and were expecting up to $allowed retries. The problem is that before: commit 6aded12b10e0 ("scsi: core: Remove struct scsi_request") we used to set the retries on the scsi_request then copy them over to scsi_cmnd->allowed in scsi_setup_scsi_cmnd. With that patch we now set scsi_cmnd->allowed to 0 in scsi_prepare_cmd and overwrite what the passthrough user set. This moves the allowed initialization to after the blk_rq_is_passthrough() check so it's only done for the non-passthrough path where the ULD init_command will normally set an allowed value it prefers. Link: https://lore.kernel.org/r/20220812011206.9157-1-michael.christie@oracle.com Fixes: 6aded12b10e0 ("scsi: core: Remove struct scsi_request") Reviewed-by: Christoph Hellwig Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ac2e70e2cd969..ef08029a00793 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1548,7 +1548,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req) scsi_init_command(sdev, cmd); cmd->eh_eflags = 0; - cmd->allowed = 0; cmd->prot_type = 0; cmd->prot_flags = 0; cmd->submitter = 0; @@ -1599,6 +1598,8 @@ static blk_status_t scsi_prepare_cmd(struct request *req) return ret; } + /* Usually overridden by the ULP */ + cmd->allowed = 0; memset(cmd->cmnd, 0, sizeof(cmd->cmnd)); return scsi_cmd_to_driver(cmd)->init_command(cmd); } From d10a72de54c2d2990721923e025ea505fa4f5b02 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Aug 2022 12:18:08 +0300 Subject: [PATCH 142/654] get_maintainer: add Alan to .get_maintainer.ignore Alan asked to be added to the .get_maintainer.ignore list. Link: https://lkml.kernel.org/r/YvN30KhO9aD5Sza9@kili Signed-off-by: Dan Carpenter Cc: Alan Cox Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- .get_maintainer.ignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.get_maintainer.ignore b/.get_maintainer.ignore index a64d219137455..c298bab3d3207 100644 --- a/.get_maintainer.ignore +++ b/.get_maintainer.ignore @@ -1,2 +1,4 @@ +Alan Cox +Alan Cox Christoph Hellwig Marc Gonzalez From 37887783b3fef877bf34b8992c9199864da4afcb Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Aug 2022 09:06:09 +0200 Subject: [PATCH 143/654] Revert "zram: remove double compression logic" This reverts commit e7be8d1dd983156b ("zram: remove double compression logic") as it causes zram failures. It does not revert cleanly, PTR_ERR handling was introduced in the meantime. This is handled by appropriate IS_ERR. When under memory pressure, zs_malloc() can fail. Before the above commit, the allocation was retried with direct reclaim enabled (GFP_NOIO). After the commit, it is not -- only __GFP_KSWAPD_RECLAIM is tried. So when the failure occurs under memory pressure, the overlaying filesystem such as ext2 (mounted by ext4 module in this case) can emit failures, making the (file)system unusable: EXT4-fs warning (device zram0): ext4_end_bio:343: I/O error 10 writing to inode 16386 starting block 159744) Buffer I/O error on device zram0, logical block 159744 With direct reclaim, memory is really reclaimed and allocation succeeds, eventually. In the worst case, the oom killer is invoked, which is proper outcome if user sets up zram too large (in comparison to available RAM). This very diff doesn't apply to 5.19 (stable) cleanly (see PTR_ERR note above). Use revert of e7be8d1dd983 directly. Link: https://bugzilla.suse.com/show_bug.cgi?id=1202203 Link: https://lkml.kernel.org/r/20220810070609.14402-1-jslaby@suse.cz Fixes: e7be8d1dd983 ("zram: remove double compression logic") Signed-off-by: Jiri Slaby Reviewed-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Cc: Alexey Romanov Cc: Dmitry Rokosov Cc: Lukas Czerner Cc: [5.19] Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 42 ++++++++++++++++++++++++++--------- drivers/block/zram/zram_drv.h | 1 + 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 92cb929a45b79..226ea76cc8197 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1146,14 +1146,15 @@ static ssize_t bd_stat_show(struct device *dev, static ssize_t debug_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { - int version = 2; + int version = 1; struct zram *zram = dev_to_zram(dev); ssize_t ret; down_read(&zram->init_lock); ret = scnprintf(buf, PAGE_SIZE, - "version: %d\n%8llu\n", + "version: %d\n%8llu %8llu\n", version, + (u64)atomic64_read(&zram->stats.writestall), (u64)atomic64_read(&zram->stats.miss_free)); up_read(&zram->init_lock); @@ -1351,7 +1352,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, { int ret = 0; unsigned long alloced_pages; - unsigned long handle = 0; + unsigned long handle = -ENOMEM; unsigned int comp_len = 0; void *src, *dst, *mem; struct zcomp_strm *zstrm; @@ -1369,6 +1370,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, } kunmap_atomic(mem); +compress_again: zstrm = zcomp_stream_get(zram->comp); src = kmap_atomic(page); ret = zcomp_compress(zstrm, src, &comp_len); @@ -1377,20 +1379,39 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (unlikely(ret)) { zcomp_stream_put(zram->comp); pr_err("Compression failed! err=%d\n", ret); + zs_free(zram->mem_pool, handle); return ret; } if (comp_len >= huge_class_size) comp_len = PAGE_SIZE; - - handle = zs_malloc(zram->mem_pool, comp_len, - __GFP_KSWAPD_RECLAIM | - __GFP_NOWARN | - __GFP_HIGHMEM | - __GFP_MOVABLE); - + /* + * handle allocation has 2 paths: + * a) fast path is executed with preemption disabled (for + * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, + * since we can't sleep; + * b) slow path enables preemption and attempts to allocate + * the page with __GFP_DIRECT_RECLAIM bit set. we have to + * put per-cpu compression stream and, thus, to re-do + * the compression once handle is allocated. + * + * if we have a 'non-null' handle here then we are coming + * from the slow path and handle has already been allocated. + */ + if (IS_ERR((void *)handle)) + handle = zs_malloc(zram->mem_pool, comp_len, + __GFP_KSWAPD_RECLAIM | + __GFP_NOWARN | + __GFP_HIGHMEM | + __GFP_MOVABLE); if (IS_ERR((void *)handle)) { zcomp_stream_put(zram->comp); + atomic64_inc(&zram->stats.writestall); + handle = zs_malloc(zram->mem_pool, comp_len, + GFP_NOIO | __GFP_HIGHMEM | + __GFP_MOVABLE); + if (!IS_ERR((void *)handle)) + goto compress_again; return PTR_ERR((void *)handle); } @@ -1948,6 +1969,7 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) goto out_cleanup_disk; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 158c91e548501..80c3b43b4828f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -81,6 +81,7 @@ struct zram_stats { atomic64_t huge_pages_since; /* no. of huge pages since zram set up */ atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ + atomic64_t writestall; /* no. of write slow paths */ atomic64_t miss_free; /* no. of missed free */ #ifdef CONFIG_ZRAM_WRITEBACK atomic64_t bd_count; /* no. of pages in backing device */ From 5535be3099717646781ce1540cf725965d680e7b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 9 Aug 2022 22:56:40 +0200 Subject: [PATCH 144/654] mm/gup: fix FOLL_FORCE COW security issue and remove FOLL_COW Ever since the Dirty COW (CVE-2016-5195) security issue happened, we know that FOLL_FORCE can be possibly dangerous, especially if there are races that can be exploited by user space. Right now, it would be sufficient to have some code that sets a PTE of a R/O-mapped shared page dirty, in order for it to erroneously become writable by FOLL_FORCE. The implications of setting a write-protected PTE dirty might not be immediately obvious to everyone. And in fact ever since commit 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte"), we can use UFFDIO_CONTINUE to map a shmem page R/O while marking the pte dirty. This can be used by unprivileged user space to modify tmpfs/shmem file content even if the user does not have write permissions to the file, and to bypass memfd write sealing -- Dirty COW restricted to tmpfs/shmem (CVE-2022-2590). To fix such security issues for good, the insight is that we really only need that fancy retry logic (FOLL_COW) for COW mappings that are not writable (!VM_WRITE). And in a COW mapping, we really only broke COW if we have an exclusive anonymous page mapped. If we have something else mapped, or the mapped anonymous page might be shared (!PageAnonExclusive), we have to trigger a write fault to break COW. If we don't find an exclusive anonymous page when we retry, we have to trigger COW breaking once again because something intervened. Let's move away from this mandatory-retry + dirty handling and rely on our PageAnonExclusive() flag for making a similar decision, to use the same COW logic as in other kernel parts here as well. In case we stumble over a PTE in a COW mapping that does not map an exclusive anonymous page, COW was not properly broken and we have to trigger a fake write-fault to break COW. Just like we do in can_change_pte_writable() added via commit 64fe24a3e05e ("mm/mprotect: try avoiding write faults for exclusive anonymous pages when changing protection") and commit 76aefad628aa ("mm/mprotect: fix soft-dirty check in can_change_pte_writable()"), take care of softdirty and uffd-wp manually. For example, a write() via /proc/self/mem to a uffd-wp-protected range has to fail instead of silently granting write access and bypassing the userspace fault handler. Note that FOLL_FORCE is not only used for debug access, but also triggered by applications without debug intentions, for example, when pinning pages via RDMA. This fixes CVE-2022-2590. Note that only x86_64 and aarch64 are affected, because only those support CONFIG_HAVE_ARCH_USERFAULTFD_MINOR. Fortunately, FOLL_COW is no longer required to handle FOLL_FORCE. So let's just get rid of it. Thanks to Nadav Amit for pointing out that the pte_dirty() check in FOLL_FORCE code is problematic and might be exploitable. Note 1: We don't check for the PTE being dirty because it doesn't matter for making a "was COWed" decision anymore, and whoever modifies the page has to set the page dirty either way. Note 2: Kernels before extended uffd-wp support and before PageAnonExclusive (< 5.19) can simply revert the problematic commit instead and be safe regarding UFFDIO_CONTINUE. A backport to v5.19 requires minor adjustments due to lack of vma_soft_dirty_enabled(). Link: https://lkml.kernel.org/r/20220809205640.70916-1-david@redhat.com Fixes: 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte") Signed-off-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Axel Rasmussen Cc: Nadav Amit Cc: Peter Xu Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: John Hubbard Cc: Jason Gunthorpe Cc: David Laight Cc: [5.16] Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - mm/gup.c | 68 +++++++++++++++++++++++++++++++--------------- mm/huge_memory.c | 64 +++++++++++++++++++++++++++++-------------- 3 files changed, 89 insertions(+), 44 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3bedc449c14d8..982f2607180b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2885,7 +2885,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ -#define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ #define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ diff --git a/mm/gup.c b/mm/gup.c index 7328251574307..5abdaf4874605 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -478,14 +478,42 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, return -EEXIST; } -/* - * FOLL_FORCE can write to even unwritable pte's, but only - * after we've gone through a COW cycle and they are dirty. - */ -static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) +/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */ +static inline bool can_follow_write_pte(pte_t pte, struct page *page, + struct vm_area_struct *vma, + unsigned int flags) { - return pte_write(pte) || - ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); + /* If the pte is writable, we can write to the page. */ + if (pte_write(pte)) + return true; + + /* Maybe FOLL_FORCE is set to override it? */ + if (!(flags & FOLL_FORCE)) + return false; + + /* But FOLL_FORCE has no effect on shared mappings */ + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) + return false; + + /* ... or read-only private ones */ + if (!(vma->vm_flags & VM_MAYWRITE)) + return false; + + /* ... or already writable ones that just need to take a write fault */ + if (vma->vm_flags & VM_WRITE) + return false; + + /* + * See can_change_pte_writable(): we broke COW and could map the page + * writable if we have an exclusive anonymous page ... + */ + if (!page || !PageAnon(page) || !PageAnonExclusive(page)) + return false; + + /* ... and a write-fault isn't required for other reasons. */ + if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte)) + return false; + return !userfaultfd_pte_wp(vma, pte); } static struct page *follow_page_pte(struct vm_area_struct *vma, @@ -528,12 +556,19 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, } if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; - if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { - pte_unmap_unlock(ptep, ptl); - return NULL; - } page = vm_normal_page(vma, address, pte); + + /* + * We only care about anon pages in can_follow_write_pte() and don't + * have to worry about pte_devmap() because they are never anon. + */ + if ((flags & FOLL_WRITE) && + !can_follow_write_pte(pte, page, vma, flags)) { + page = NULL; + goto out; + } + if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { /* * Only return device mapping pages in the FOLL_GET or FOLL_PIN @@ -986,17 +1021,6 @@ static int faultin_page(struct vm_area_struct *vma, return -EBUSY; } - /* - * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when - * necessary, even if maybe_mkwrite decided not to set pte_write. We - * can thus safely do subsequent page lookups as if they were reads. - * But only do so when looping for pte_write is futile: in some cases - * userspace may also be wanting to write to the gotten user page, - * which a read fault here might prevent (a readonly page might get - * reCOWed by userspace write). - */ - if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) - *flags |= FOLL_COW; return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8a7c1b344abef..e9414ee57c5b1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1040,12 +1040,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pmd_lockptr(mm, pmd)); - /* - * When we COW a devmap PMD entry, we split it into PTEs, so we should - * not be in this function with `flags & FOLL_COW` set. - */ - WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); - /* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) @@ -1395,14 +1389,42 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) return VM_FAULT_FALLBACK; } -/* - * FOLL_FORCE can write to even unwritable pmd's, but only - * after we've gone through a COW cycle and they are dirty. - */ -static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) +/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */ +static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page, + struct vm_area_struct *vma, + unsigned int flags) { - return pmd_write(pmd) || - ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); + /* If the pmd is writable, we can write to the page. */ + if (pmd_write(pmd)) + return true; + + /* Maybe FOLL_FORCE is set to override it? */ + if (!(flags & FOLL_FORCE)) + return false; + + /* But FOLL_FORCE has no effect on shared mappings */ + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) + return false; + + /* ... or read-only private ones */ + if (!(vma->vm_flags & VM_MAYWRITE)) + return false; + + /* ... or already writable ones that just need to take a write fault */ + if (vma->vm_flags & VM_WRITE) + return false; + + /* + * See can_change_pte_writable(): we broke COW and could map the page + * writable if we have an exclusive anonymous page ... + */ + if (!page || !PageAnon(page) || !PageAnonExclusive(page)) + return false; + + /* ... and a write-fault isn't required for other reasons. */ + if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd)) + return false; + return !userfaultfd_huge_pmd_wp(vma, pmd); } struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, @@ -1411,12 +1433,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; - struct page *page = NULL; + struct page *page; assert_spin_locked(pmd_lockptr(mm, pmd)); - if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags)) - goto out; + page = pmd_page(*pmd); + VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); + + if ((flags & FOLL_WRITE) && + !can_follow_write_pmd(*pmd, page, vma, flags)) + return NULL; /* Avoid dumping huge zero page */ if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) @@ -1424,10 +1450,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, /* Full NUMA hinting faults to serialise migration in fault paths */ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) - goto out; - - page = pmd_page(*pmd); - VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); + return NULL; if (!pmd_write(*pmd) && gup_must_unshare(flags, page)) return ERR_PTR(-EMLINK); @@ -1444,7 +1467,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page); -out: return page; } From a8faed3a02eeb75857a3b5d660fa80fe79db77a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 7 Aug 2022 15:09:34 -0700 Subject: [PATCH 145/654] kernel/sys_ni: add compat entry for fadvise64_64 When CONFIG_ADVISE_SYSCALLS is not set/enabled and CONFIG_COMPAT is set/enabled, the riscv compat_syscall_table references 'compat_sys_fadvise64_64', which is not defined: riscv64-linux-ld: arch/riscv/kernel/compat_syscall_table.o:(.rodata+0x6f8): undefined reference to `compat_sys_fadvise64_64' Add 'fadvise64_64' to kernel/sys_ni.c as a conditional COMPAT function so that when CONFIG_ADVISE_SYSCALLS is not set, there is a fallback function available. Link: https://lkml.kernel.org/r/20220807220934.5689-1-rdunlap@infradead.org Fixes: d3ac21cacc24 ("mm: Support compiling out madvise and fadvise") Signed-off-by: Randy Dunlap Suggested-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Cc: Josh Triplett Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Signed-off-by: Andrew Morton --- kernel/sys_ni.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a492f159624fa..860b2dcf3ac46 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -277,6 +277,7 @@ COND_SYSCALL(landlock_restrict_self); /* mm/fadvise.c */ COND_SYSCALL(fadvise64_64); +COND_SYSCALL_COMPAT(fadvise64_64); /* mm/, CONFIG_MMU only */ COND_SYSCALL(swapon); From a39c5d3ce03dd890ab6a9be44b21177cec32da55 Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Sun, 7 Aug 2022 15:44:42 +0000 Subject: [PATCH 146/654] mm: add DEVICE_ZONE to FOR_ALL_ZONES FOR_ALL_ZONES should be consistent with enum zone_type. Otherwise, __count_zid_vm_events have the potential to add count to wrong item when zid is ZONE_DEVICE. Link: https://lkml.kernel.org/r/20220807154442.GA18167@haolee.io Signed-off-by: Hao Lee Cc: David Hildenbrand Cc: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/vm_event_item.h | 15 +++++++++++---- mm/vmstat.c | 9 ++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 404024486fa53..f3fc36cd2276a 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -20,12 +20,19 @@ #define HIGHMEM_ZONE(xx) #endif -#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE +#ifdef CONFIG_ZONE_DEVICE +#define DEVICE_ZONE(xx) xx##_DEVICE, +#else +#define DEVICE_ZONE(xx) +#endif + +#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \ + HIGHMEM_ZONE(xx) xx##_MOVABLE, DEVICE_ZONE(xx) enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, - FOR_ALL_ZONES(PGALLOC), - FOR_ALL_ZONES(ALLOCSTALL), - FOR_ALL_ZONES(PGSCAN_SKIP), + FOR_ALL_ZONES(PGALLOC) + FOR_ALL_ZONES(ALLOCSTALL) + FOR_ALL_ZONES(PGSCAN_SKIP) PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, PGFAULT, PGMAJFAULT, PGLAZYFREED, diff --git a/mm/vmstat.c b/mm/vmstat.c index 373d2730fcf21..90af9a8572f5a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1168,8 +1168,15 @@ int fragmentation_index(struct zone *zone, unsigned int order) #define TEXT_FOR_HIGHMEM(xx) #endif +#ifdef CONFIG_ZONE_DEVICE +#define TEXT_FOR_DEVICE(xx) xx "_device", +#else +#define TEXT_FOR_DEVICE(xx) +#endif + #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ - TEXT_FOR_HIGHMEM(xx) xx "_movable", + TEXT_FOR_HIGHMEM(xx) xx "_movable", \ + TEXT_FOR_DEVICE(xx) const char * const vmstat_text[] = { /* enum zone_stat_item counters */ From efd4149342db2df41b1bbe68972ead853b30e444 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 5 Aug 2022 12:00:03 -0400 Subject: [PATCH 147/654] mm/smaps: don't access young/dirty bit if pte unpresent These bits should only be valid when the ptes are present. Introducing two booleans for it and set it to false when !pte_present() for both pte and pmd accountings. The bug is found during code reading and no real world issue reported, but logically such an error can cause incorrect readings for either smaps or smaps_rollup output on quite a few fields. For example, it could cause over-estimate on values like Shared_Dirty, Private_Dirty, Referenced. Or it could also cause under-estimate on values like LazyFree, Shared_Clean, Private_Clean. Link: https://lkml.kernel.org/r/20220805160003.58929-1-peterx@redhat.com Fixes: b1d4d9e0cbd0 ("proc/smaps: carefully handle migration entries") Fixes: c94b6923fa0a ("/proc/PID/smaps: Add PMD migration entry parsing") Signed-off-by: Peter Xu Reviewed-by: Vlastimil Babka Reviewed-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Konstantin Khlebnikov Cc: Huang Ying Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a3398d0f1927f..4e0023643f8be 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -527,10 +527,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; - bool migration = false; + bool migration = false, young = false, dirty = false; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); + young = pte_young(*pte); + dirty = pte_dirty(*pte); } else if (is_swap_pte(*pte)) { swp_entry_t swpent = pte_to_swp_entry(*pte); @@ -560,8 +562,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), - locked, migration); + smaps_account(mss, page, false, young, dirty, locked, migration); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE From f369b07c861435bd812a9d14493f71b34132ed6f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 11 Aug 2022 16:13:40 -0400 Subject: [PATCH 148/654] mm/uffd: reset write protection when unregister with wp-mode The motivation of this patch comes from a recent report and patchfix from David Hildenbrand on hugetlb shared handling of wr-protected page [1]. With the reproducer provided in commit message of [1], one can leverage the uffd-wp lazy-reset of ptes to trigger a hugetlb issue which can affect not only the attacker process, but also the whole system. The lazy-reset mechanism of uffd-wp was used to make unregister faster, meanwhile it has an assumption that any leftover pgtable entries should only affect the process on its own, so not only the user should be aware of anything it does, but also it should not affect outside of the process. But it seems that this is not true, and it can also be utilized to make some exploit easier. So far there's no clue showing that the lazy-reset is important to any userfaultfd users because normally the unregister will only happen once for a specific range of memory of the lifecycle of the process. Considering all above, what this patch proposes is to do explicit pte resets when unregister an uffd region with wr-protect mode enabled. It should be the same as calling ioctl(UFFDIO_WRITEPROTECT, wp=false) right before ioctl(UFFDIO_UNREGISTER) for the user. So potentially it'll make the unregister slower. From that pov it's a very slight abi change, but hopefully nothing should break with this change either. Regarding to the change itself - core of uffd write [un]protect operation is moved into a separate function (uffd_wp_range()) and it is reused in the unregister code path. Note that the new function will not check for anything, e.g. ranges or memory types, because they should have been checked during the previous UFFDIO_REGISTER or it should have failed already. It also doesn't check mmap_changing because we're with mmap write lock held anyway. I added a Fixes upon introducing of uffd-wp shmem+hugetlbfs because that's the only issue reported so far and that's the commit David's reproducer will start working (v5.19+). But the whole idea actually applies to not only file memories but also anonymous. It's just that we don't need to fix anonymous prior to v5.19- because there's no known way to exploit. IOW, this patch can also fix the issue reported in [1] as the patch 2 does. [1] https://lore.kernel.org/all/20220811103435.188481-3-david@redhat.com/ Link: https://lkml.kernel.org/r/20220811201340.39342-1-peterx@redhat.com Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs") Signed-off-by: Peter Xu Cc: David Hildenbrand Cc: Mike Rapoport Cc: Mike Kravetz Cc: Andrea Arcangeli Cc: Nadav Amit Cc: Axel Rasmussen Cc: Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 4 ++++ include/linux/userfaultfd_k.h | 2 ++ mm/userfaultfd.c | 29 ++++++++++++++++++----------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1c44bf75f9160..175de70e3adfd 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); } + /* Reset ptes for the whole vma range if wr-protected */ + if (userfaultfd_wp(vma)) + uffd_wp_range(mm, vma, start, vma_end - start, false); + new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 732b522bacb7e..e1b8a915e9e9f 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing); +extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, + unsigned long start, unsigned long len, bool enable_wp); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 07d3befc80e41..7327b2573f7c2 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -703,14 +703,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, mmap_changing, 0); } +void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, + unsigned long start, unsigned long len, bool enable_wp) +{ + struct mmu_gather tlb; + pgprot_t newprot; + + if (enable_wp) + newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); + else + newprot = vm_get_page_prot(dst_vma->vm_flags); + + tlb_gather_mmu(&tlb, dst_mm); + change_protection(&tlb, dst_vma, start, start + len, newprot, + enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); + tlb_finish_mmu(&tlb); +} + int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing) { struct vm_area_struct *dst_vma; unsigned long page_mask; - struct mmu_gather tlb; - pgprot_t newprot; int err; /* @@ -750,15 +765,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, goto out_unlock; } - if (enable_wp) - newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); - else - newprot = vm_get_page_prot(dst_vma->vm_flags); - - tlb_gather_mmu(&tlb, dst_mm); - change_protection(&tlb, dst_vma, start, start + len, newprot, - enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); - tlb_finish_mmu(&tlb); + uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); err = 0; out_unlock: From f96f7a40874d7c746680c0b9f57cef2262ae551f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 11 Aug 2022 12:34:34 +0200 Subject: [PATCH 149/654] mm/hugetlb: fix hugetlb not supporting softdirty tracking Patch series "mm/hugetlb: fix write-fault handling for shared mappings", v2. I observed that hugetlb does not support/expect write-faults in shared mappings that would have to map the R/O-mapped page writable -- and I found two case where we could currently get such faults and would erroneously map an anon page into a shared mapping. Reproducers part of the patches. I propose to backport both fixes to stable trees. The first fix needs a small adjustment. This patch (of 2): Staring at hugetlb_wp(), one might wonder where all the logic for shared mappings is when stumbling over a write-protected page in a shared mapping. In fact, there is none, and so far we thought we could get away with that because e.g., mprotect() should always do the right thing and map all pages directly writable. Looks like we were wrong: -------------------------------------------------------------------------- #include #include #include #include #include #include #include #define HUGETLB_SIZE (2 * 1024 * 1024u) static void clear_softdirty(void) { int fd = open("/proc/self/clear_refs", O_WRONLY); const char *ctrl = "4"; int ret; if (fd < 0) { fprintf(stderr, "open(clear_refs) failed\n"); exit(1); } ret = write(fd, ctrl, strlen(ctrl)); if (ret != strlen(ctrl)) { fprintf(stderr, "write(clear_refs) failed\n"); exit(1); } close(fd); } int main(int argc, char **argv) { char *map; int fd; fd = open("/dev/hugepages/tmp", O_RDWR | O_CREAT); if (!fd) { fprintf(stderr, "open() failed\n"); return -errno; } if (ftruncate(fd, HUGETLB_SIZE)) { fprintf(stderr, "ftruncate() failed\n"); return -errno; } map = mmap(NULL, HUGETLB_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { fprintf(stderr, "mmap() failed\n"); return -errno; } *map = 0; if (mprotect(map, HUGETLB_SIZE, PROT_READ)) { fprintf(stderr, "mmprotect() failed\n"); return -errno; } clear_softdirty(); if (mprotect(map, HUGETLB_SIZE, PROT_READ|PROT_WRITE)) { fprintf(stderr, "mmprotect() failed\n"); return -errno; } *map = 0; return 0; } -------------------------------------------------------------------------- Above test fails with SIGBUS when there is only a single free hugetlb page. # echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # ./test Bus error (core dumped) And worse, with sufficient free hugetlb pages it will map an anonymous page into a shared mapping, for example, messing up accounting during unmap and breaking MAP_SHARED semantics: # echo 2 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # ./test # cat /proc/meminfo | grep HugePages_ HugePages_Total: 2 HugePages_Free: 1 HugePages_Rsvd: 18446744073709551615 HugePages_Surp: 0 Reason in this particular case is that vma_wants_writenotify() will return "true", removing VM_SHARED in vma_set_page_prot() to map pages write-protected. Let's teach vma_wants_writenotify() that hugetlb does not support softdirty tracking. Link: https://lkml.kernel.org/r/20220811103435.188481-1-david@redhat.com Link: https://lkml.kernel.org/r/20220811103435.188481-2-david@redhat.com Fixes: 64e455079e1b ("mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared") Signed-off-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Peter Feiner Cc: Kirill A. Shutemov Cc: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Jamie Liu Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: Bjorn Helgaas Cc: Muchun Song Cc: Peter Xu Cc: [3.18+] Signed-off-by: Andrew Morton --- mm/mmap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index c035020d0c896..9d780f415be3c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1646,8 +1646,11 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags))) return 0; - /* Do we need to track softdirty? */ - if (vma_soft_dirty_enabled(vma)) + /* + * Do we need to track softdirty? hugetlb does not support softdirty + * tracking yet. + */ + if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma)) return 1; /* Specialty mapping? */ From 1d8d14641fd94a01b20a4abbf2749fd8eddcf57b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 11 Aug 2022 12:34:35 +0200 Subject: [PATCH 150/654] mm/hugetlb: support write-faults in shared mappings If we ever get a write-fault on a write-protected page in a shared mapping, we'd be in trouble (again). Instead, we can simply map the page writable. And in fact, there is even a way right now to trigger that code via uffd-wp ever since we stared to support it for shmem in 5.19: -------------------------------------------------------------------------- #include #include #include #include #include #include #include #include #include #include #define HUGETLB_SIZE (2 * 1024 * 1024u) static char *map; int uffd; static int temp_setup_uffd(void) { struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; struct uffdio_writeprotect uffd_writeprotect; struct uffdio_range uffd_range; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); if (uffd < 0) { fprintf(stderr, "syscall() failed: %d\n", errno); return -errno; } uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { fprintf(stderr, "UFFDIO_API failed: %d\n", errno); return -errno; } if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { fprintf(stderr, "UFFD_FEATURE_WRITEPROTECT missing\n"); return -ENOSYS; } /* Register UFFD-WP */ uffdio_register.range.start = (unsigned long) map; uffdio_register.range.len = HUGETLB_SIZE; uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) { fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno); return -errno; } /* Writeprotect a single page. */ uffd_writeprotect.range.start = (unsigned long) map; uffd_writeprotect.range.len = HUGETLB_SIZE; uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP; if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { fprintf(stderr, "UFFDIO_WRITEPROTECT failed: %d\n", errno); return -errno; } /* Unregister UFFD-WP without prior writeunprotection. */ uffd_range.start = (unsigned long) map; uffd_range.len = HUGETLB_SIZE; if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_range)) { fprintf(stderr, "UFFDIO_UNREGISTER failed: %d\n", errno); return -errno; } return 0; } int main(int argc, char **argv) { int fd; fd = open("/dev/hugepages/tmp", O_RDWR | O_CREAT); if (!fd) { fprintf(stderr, "open() failed\n"); return -errno; } if (ftruncate(fd, HUGETLB_SIZE)) { fprintf(stderr, "ftruncate() failed\n"); return -errno; } map = mmap(NULL, HUGETLB_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { fprintf(stderr, "mmap() failed\n"); return -errno; } *map = 0; if (temp_setup_uffd()) return 1; *map = 0; return 0; } -------------------------------------------------------------------------- Above test fails with SIGBUS when there is only a single free hugetlb page. # echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # ./test Bus error (core dumped) And worse, with sufficient free hugetlb pages it will map an anonymous page into a shared mapping, for example, messing up accounting during unmap and breaking MAP_SHARED semantics: # echo 2 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # ./test # cat /proc/meminfo | grep HugePages_ HugePages_Total: 2 HugePages_Free: 1 HugePages_Rsvd: 18446744073709551615 HugePages_Surp: 0 Reason is that uffd-wp doesn't clear the uffd-wp PTE bit when unregistering and consequently keeps the PTE writeprotected. Reason for this is to avoid the additional overhead when unregistering. Note that this is the case also for !hugetlb and that we will end up with writable PTEs that still have the uffd-wp PTE bit set once we return from hugetlb_wp(). I'm not touching the uffd-wp PTE bit for now, because it seems to be a generic thing -- wp_page_reuse() also doesn't clear it. VM_MAYSHARE handling in hugetlb_fault() for FAULT_FLAG_WRITE indicates that MAP_SHARED handling was at least envisioned, but could never have worked as expected. While at it, make sure that we never end up in hugetlb_wp() on write faults without VM_WRITE, because we don't support maybe_mkwrite() semantics as commonly used in the !hugetlb case -- for example, in wp_page_reuse(). Note that there is no need to do any kind of reservation in hugetlb_fault() in this case ... because we already have a hugetlb page mapped R/O that we will simply map writable and we are not dealing with COW/unsharing. Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs") Signed-off-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Bjorn Helgaas Cc: Cyrill Gorcunov Cc: Hugh Dickins Cc: Jamie Liu Cc: Kirill A. Shutemov Cc: Muchun Song Cc: Naoya Horiguchi Cc: Pavel Emelyanov Cc: Peter Feiner Cc: Peter Xu Cc: [5.19] Signed-off-by: Andrew Morton --- mm/hugetlb.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0aee2f3ae15c8..2480ba627aa5e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5241,6 +5241,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, VM_BUG_ON(unshare && (flags & FOLL_WRITE)); VM_BUG_ON(!unshare && !(flags & FOLL_WRITE)); + /* + * hugetlb does not support FOLL_FORCE-style write faults that keep the + * PTE mapped R/O such as maybe_mkwrite() would do. + */ + if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE))) + return VM_FAULT_SIGSEGV; + + /* Let's take out MAP_SHARED mappings first. */ + if (vma->vm_flags & VM_MAYSHARE) { + if (unlikely(unshare)) + return 0; + set_huge_ptep_writable(vma, haddr, ptep); + return 0; + } + pte = huge_ptep_get(ptep); old_page = pte_page(pte); @@ -5781,12 +5796,11 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * If we are going to COW/unshare the mapping later, we examine the * pending reservations for this page now. This will ensure that any * allocations necessary to record that reservation occur outside the - * spinlock. For private mappings, we also lookup the pagecache - * page now as it is used to determine if a reservation has been - * consumed. + * spinlock. Also lookup the pagecache page now as it is used to + * determine if a reservation has been consumed. */ if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && - !huge_pte_write(entry)) { + !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) { if (vma_needs_reservation(h, vma, haddr) < 0) { ret = VM_FAULT_OOM; goto out_mutex; @@ -5794,9 +5808,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, /* Just decrements count, does not deallocate */ vma_end_reservation(h, vma, haddr); - if (!(vma->vm_flags & VM_MAYSHARE)) - pagecache_page = hugetlbfs_pagecache_page(h, - vma, haddr); + pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr); } ptl = huge_pte_lock(h, mm, ptep); From cb241339b9d020c758a6647c69f8e42538c5cf88 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 10 Aug 2022 21:51:09 -0700 Subject: [PATCH 151/654] mm/shmem: fix chattr fsflags support in tmpfs ext[234] have always allowed unimplemented chattr flags to be set, but other filesystems have tended to be stricter. Follow the stricter approach for tmpfs: I don't want to have to explain why csu attributes don't actually work, and we won't need to update the chattr(1) manpage; and it's never wrong to start off strict, relaxing later if persuaded. Allow only a (append only) i (immutable) A (no atime) and d (no dump). Although lsattr showed 'A' inherited, the NOATIME behavior was not being inherited: because nothing sync'ed FS_NOATIME_FL to S_NOATIME. Add shmem_set_inode_flags() to sync the flags, using inode_set_flags() to avoid that instant of lost immutablility during fileattr_set(). But that change switched generic/079 from passing to failing: because FS_IMMUTABLE_FL and FS_APPEND_FL had been unconventionally included in the INHERITED fsflags: remove them and generic/079 is back to passing. Link: https://lkml.kernel.org/r/2961dcb0-ddf3-b9f0-3268-12a4ff996856@google.com Fixes: e408e695f5f1 ("mm/shmem: support FS_IOC_[SG]ETFLAGS in tmpfs") Signed-off-by: Hugh Dickins Cc: "Theodore Ts'o" Cc: Radoslaw Burny Cc: "Darrick J. Wong" Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 13 +++------- mm/shmem.c | 54 +++++++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 1b6c4013f691b..ff0b990de83d4 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -29,15 +29,10 @@ struct shmem_inode_info { struct inode vfs_inode; }; -#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE -#define SHMEM_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE -#define SHMEM_FL_INHERITED FS_FL_USER_MODIFIABLE - -/* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define SHMEM_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) - -/* Flags that are appropriate for non-directories/regular files. */ -#define SHMEM_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) +#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE +#define SHMEM_FL_USER_MODIFIABLE \ + (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL) +#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL) struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ diff --git a/mm/shmem.c b/mm/shmem.c index 5783f11351bb0..170b4078420f5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2281,16 +2281,34 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -/* Mask out flags that are inappropriate for the given type of inode. */ -static unsigned shmem_mask_flags(umode_t mode, __u32 flags) +#ifdef CONFIG_TMPFS_XATTR +static int shmem_initxattrs(struct inode *, const struct xattr *, void *); + +/* + * chattr's fsflags are unrelated to extended attributes, + * but tmpfs has chosen to enable them under the same config option. + */ +static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) +{ + unsigned int i_flags = 0; + + if (fsflags & FS_NOATIME_FL) + i_flags |= S_NOATIME; + if (fsflags & FS_APPEND_FL) + i_flags |= S_APPEND; + if (fsflags & FS_IMMUTABLE_FL) + i_flags |= S_IMMUTABLE; + /* + * But FS_NODUMP_FL does not require any action in i_flags. + */ + inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE); +} +#else +static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) { - if (S_ISDIR(mode)) - return flags; - else if (S_ISREG(mode)) - return flags & SHMEM_REG_FLMASK; - else - return flags & SHMEM_OTHER_FLMASK; } +#define shmem_initxattrs NULL +#endif static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev, unsigned long flags) @@ -2319,7 +2337,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir, info->i_crtime = inode->i_mtime; info->fsflags = (dir == NULL) ? 0 : SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; - info->fsflags = shmem_mask_flags(mode, info->fsflags); + if (info->fsflags) + shmem_set_inode_flags(inode, info->fsflags); INIT_LIST_HEAD(&info->shrinklist); INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); @@ -2468,12 +2487,6 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, static const struct inode_operations shmem_symlink_inode_operations; static const struct inode_operations shmem_short_symlink_operations; -#ifdef CONFIG_TMPFS_XATTR -static int shmem_initxattrs(struct inode *, const struct xattr *, void *); -#else -#define shmem_initxattrs NULL -#endif - static int shmem_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, @@ -3179,18 +3192,13 @@ static int shmem_fileattr_set(struct user_namespace *mnt_userns, if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; + if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE) + return -EOPNOTSUPP; info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) | (fa->flags & SHMEM_FL_USER_MODIFIABLE); - inode->i_flags &= ~(S_APPEND | S_IMMUTABLE | S_NOATIME); - if (info->fsflags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; - if (info->fsflags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - if (info->fsflags & FS_NOATIME_FL) - inode->i_flags |= S_NOATIME; - + shmem_set_inode_flags(inode, info->fsflags); inode->i_ctime = current_time(inode); return 0; } From 15f242bb65b89d5f1ff990668a586fdf1307b2c8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 10 Aug 2022 21:55:36 -0700 Subject: [PATCH 152/654] mm/shmem: tmpfs fallocate use file_modified() 5.18 fixed the btrfs and ext4 fallocates to use file_modified(), as xfs was already doing, to drop privileges: and fstests generic/{683,684,688} expect this. There's no need to argue over keep-size allocation (which could just update ctime): fix shmem_fallocate() to behave the same way. Link: https://lkml.kernel.org/r/39c5e62-4896-7795-c0a0-f79c50d4909@google.com Signed-off-by: Hugh Dickins Acked-by: Christian Brauner (Microsoft) Cc: "Darrick J. Wong" Cc: Matthew Wilcox (Oracle) Cc: Radoslaw Burny Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton --- mm/shmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 170b4078420f5..ce2090744c5e4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2839,12 +2839,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) i_size_write(inode, offset + len); - inode->i_ctime = current_time(inode); undone: spin_lock(&inode->i_lock); inode->i_private = NULL; spin_unlock(&inode->i_lock); out: + if (!error) + file_modified(file); inode_unlock(inode); return error; } From 76d36dea02691a8ffa8cd7368eecbf727b8a1c0c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 10 Aug 2022 22:06:33 -0700 Subject: [PATCH 153/654] mm/shmem: shmem_replace_page() remember NR_SHMEM Elsewhere, NR_SHMEM is updated at the same time as shmem NR_FILE_PAGES; but shmem_replace_page() was forgetting to do that - so NR_SHMEM stats could grow too big or too small, in those unusual cases when it's used. Link: https://lkml.kernel.org/r/cec7c09d-5874-e160-ada6-6e10ee48784@google.com Signed-off-by: Hugh Dickins Reviewed-by: Matthew Wilcox (Oracle) Cc: "Darrick J. Wong" Cc: Radoslaw Burny Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton --- mm/shmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index ce2090744c5e4..d075dd2dcc484 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1659,7 +1659,9 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, new = page_folio(newpage); mem_cgroup_migrate(old, new); __inc_lruvec_page_state(newpage, NR_FILE_PAGES); + __inc_lruvec_page_state(newpage, NR_SHMEM); __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); + __dec_lruvec_page_state(oldpage, NR_SHMEM); } xa_unlock_irq(&swap_mapping->i_pages); From 9c80e79906b4ca440d09e7f116609262bb747909 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 12 Aug 2022 19:05:09 -0700 Subject: [PATCH 154/654] kprobes: don't call disarm_kprobe() for disabled kprobes The assumption in __disable_kprobe() is wrong, and it could try to disarm an already disarmed kprobe and fire the WARN_ONCE() below. [0] We can easily reproduce this issue. 1. Write 0 to /sys/kernel/debug/kprobes/enabled. # echo 0 > /sys/kernel/debug/kprobes/enabled 2. Run execsnoop. At this time, one kprobe is disabled. # /usr/share/bcc/tools/execsnoop & [1] 2460 PCOMM PID PPID RET ARGS # cat /sys/kernel/debug/kprobes/list ffffffff91345650 r __x64_sys_execve+0x0 [FTRACE] ffffffff91345650 k __x64_sys_execve+0x0 [DISABLED][FTRACE] 3. Write 1 to /sys/kernel/debug/kprobes/enabled, which changes kprobes_all_disarmed to false but does not arm the disabled kprobe. # echo 1 > /sys/kernel/debug/kprobes/enabled # cat /sys/kernel/debug/kprobes/list ffffffff91345650 r __x64_sys_execve+0x0 [FTRACE] ffffffff91345650 k __x64_sys_execve+0x0 [DISABLED][FTRACE] 4. Kill execsnoop, when __disable_kprobe() calls disarm_kprobe() for the disabled kprobe and hits the WARN_ONCE() in __disarm_kprobe_ftrace(). # fg /usr/share/bcc/tools/execsnoop ^C Actually, WARN_ONCE() is fired twice, and __unregister_kprobe_top() misses some cleanups and leaves the aggregated kprobe in the hash table. Then, __unregister_trace_kprobe() initialises tk->rp.kp.list and creates an infinite loop like this. aggregated kprobe.list -> kprobe.list -. ^ | '.__.' In this situation, these commands fall into the infinite loop and result in RCU stall or soft lockup. cat /sys/kernel/debug/kprobes/list : show_kprobe_addr() enters into the infinite loop with RCU. /usr/share/bcc/tools/execsnoop : warn_kprobe_rereg() holds kprobe_mutex, and __get_valid_kprobe() is stuck in the loop. To avoid the issue, make sure we don't call disarm_kprobe() for disabled kprobes. [0] Failed to disarm kprobe-ftrace at __x64_sys_execve+0x0/0x40 (error -2) WARNING: CPU: 6 PID: 2460 at kernel/kprobes.c:1130 __disarm_kprobe_ftrace.isra.19 (kernel/kprobes.c:1129) Modules linked in: ena CPU: 6 PID: 2460 Comm: execsnoop Not tainted 5.19.0+ #28 Hardware name: Amazon EC2 c5.2xlarge/, BIOS 1.0 10/16/2017 RIP: 0010:__disarm_kprobe_ftrace.isra.19 (kernel/kprobes.c:1129) Code: 24 8b 02 eb c1 80 3d c4 83 f2 01 00 75 d4 48 8b 75 00 89 c2 48 c7 c7 90 fa 0f 92 89 04 24 c6 05 ab 83 01 e8 e4 94 f0 ff <0f> 0b 8b 04 24 eb b1 89 c6 48 c7 c7 60 fa 0f 92 89 04 24 e8 cc 94 RSP: 0018:ffff9e6ec154bd98 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffff930f7b00 RCX: 0000000000000001 RDX: 0000000080000001 RSI: ffffffff921461c5 RDI: 00000000ffffffff RBP: ffff89c504286da8 R08: 0000000000000000 R09: c0000000fffeffff R10: 0000000000000000 R11: ffff9e6ec154bc28 R12: ffff89c502394e40 R13: ffff89c502394c00 R14: ffff9e6ec154bc00 R15: 0000000000000000 FS: 00007fe800398740(0000) GS:ffff89c812d80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000c00057f010 CR3: 0000000103b54006 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __disable_kprobe (kernel/kprobes.c:1716) disable_kprobe (kernel/kprobes.c:2392) __disable_trace_kprobe (kernel/trace/trace_kprobe.c:340) disable_trace_kprobe (kernel/trace/trace_kprobe.c:429) perf_trace_event_unreg.isra.2 (./include/linux/tracepoint.h:93 kernel/trace/trace_event_perf.c:168) perf_kprobe_destroy (kernel/trace/trace_event_perf.c:295) _free_event (kernel/events/core.c:4971) perf_event_release_kernel (kernel/events/core.c:5176) perf_release (kernel/events/core.c:5186) __fput (fs/file_table.c:321) task_work_run (./include/linux/sched.h:2056 (discriminator 1) kernel/task_work.c:179 (discriminator 1)) exit_to_user_mode_prepare (./include/linux/resume_user_mode.h:49 kernel/entry/common.c:169 kernel/entry/common.c:201) syscall_exit_to_user_mode (./arch/x86/include/asm/jump_label.h:55 ./arch/x86/include/asm/nospec-branch.h:384 ./arch/x86/include/asm/entry-common.h:94 kernel/entry/common.c:133 kernel/entry/common.c:296) do_syscall_64 (arch/x86/entry/common.c:87) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) RIP: 0033:0x7fe7ff210654 Code: 15 79 89 20 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb be 0f 1f 00 8b 05 9a cd 20 00 48 63 ff 85 c0 75 11 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 3a f3 c3 48 83 ec 18 48 89 7c 24 08 e8 34 fc RSP: 002b:00007ffdbd1d3538 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000008 RCX: 00007fe7ff210654 RDX: 0000000000000000 RSI: 0000000000002401 RDI: 0000000000000008 RBP: 0000000000000000 R08: 94ae31d6fda838a4 R0900007fe8001c9d30 R10: 00007ffdbd1d34b0 R11: 0000000000000246 R12: 00007ffdbd1d3600 R13: 0000000000000000 R14: fffffffffffffffc R15: 00007ffdbd1d3560 Link: https://lkml.kernel.org/r/20220813020509.90805-1-kuniyu@amazon.com Fixes: 69d54b916d83 ("kprobes: makes kprobes/enabled works correctly for optimized kprobes.") Signed-off-by: Kuniyuki Iwashima Reported-by: Ayushman Dutta Cc: "Naveen N. Rao" Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: Wang Nan Cc: Kuniyuki Iwashima Cc: Kuniyuki Iwashima Cc: Ayushman Dutta Cc: Signed-off-by: Andrew Morton --- kernel/kprobes.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 80697e5e03e49..08350e35aba24 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1707,11 +1707,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { /* - * If 'kprobes_all_disarmed' is set, 'orig_p' - * should have already been disarmed, so - * skip unneed disarming process. + * Don't be lazy here. Even if 'kprobes_all_disarmed' + * is false, 'orig_p' might not have been armed yet. + * Note arm_all_kprobes() __tries__ to arm all kprobes + * on the best effort basis. */ - if (!kprobes_all_disarmed) { + if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) { ret = disarm_kprobe(orig_p, true); if (ret) { p->flags &= ~KPROBE_FLAG_DISABLED; From 7ae1f5508d9a33fd58ed3059bd2d569961e3b8bd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 20 Aug 2022 17:59:17 +0200 Subject: [PATCH 155/654] parisc: Fix exception handler for fldw and fstw instructions The exception handler is broken for unaligned memory acceses with fldw and fstw instructions, because it trashes or uses randomly some other floating point register than the one specified in the instruction word on loads and stores. The instruction "fldw 0(addr),%fr22L" (and the other fldw/fstw instructions) encode the target register (%fr22) in the rightmost 5 bits of the instruction word. The 7th rightmost bit of the instruction word defines if the left or right half of %fr22 should be used. While processing unaligned address accesses, the FR3() define is used to extract the offset into the local floating-point register set. But the calculation in FR3() was buggy, so that for example instead of %fr22, register %fr12 [((22 * 2) & 0x1f) = 12] was used. This bug has been since forever in the parisc kernel and I wonder why it wasn't detected earlier. Interestingly I noticed this bug just because the libime debian package failed to build on *native* hardware, while it successfully built in qemu. This patch corrects the bitshift and masking calculation in FR3(). Signed-off-by: Helge Deller Cc: --- arch/parisc/kernel/unaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index bac581b5ecfc5..e8a4d77cff53a 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -93,7 +93,7 @@ #define R1(i) (((i)>>21)&0x1f) #define R2(i) (((i)>>16)&0x1f) #define R3(i) ((i)&0x1f) -#define FR3(i) ((((i)<<1)&0x1f)|(((i)>>6)&1)) +#define FR3(i) ((((i)&0x1f)<<1)|(((i)>>6)&1)) #define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0)) #define IM5_2(i) IM((i)>>16,5) #define IM5_3(i) IM((i),5) From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 19 Aug 2022 16:43:34 +0800 Subject: [PATCH 156/654] x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry When meeting ftrace trampolines in ORC unwinding, unwinder uses address of ftrace_{regs_}call address to find the ORC entry, which gets next frame at sp+176. If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be sp+8 instead of 176. It makes unwinder skip correct frame and throw warnings such as "wrong direction" or "can't access registers", etc, depending on the content of the incorrect frame address. By adding the base address ftrace_{regs_}caller with the offset *ip - ops->trampoline*, we can get the correct address to find the ORC entry. Also change "caller" to "tramp_addr" to make variable name conform to its content. [ mingo: Clarified the changelog a bit. ] Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines") Signed-off-by: Chen Zhongjin Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (Google) Cc: Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com --- arch/x86/kernel/unwind_orc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 38185aedf7d16..0ea57da929407 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip); static struct orc_entry *orc_ftrace_find(unsigned long ip) { struct ftrace_ops *ops; - unsigned long caller; + unsigned long tramp_addr, offset; ops = ftrace_ops_trampoline(ip); if (!ops) return NULL; + /* Set tramp_addr to the start of the code copied by the trampoline */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) - caller = (unsigned long)ftrace_regs_call; + tramp_addr = (unsigned long)ftrace_regs_caller; else - caller = (unsigned long)ftrace_call; + tramp_addr = (unsigned long)ftrace_caller; + + /* Now place tramp_addr to the location within the trampoline ip is at */ + offset = ip - ops->trampoline; + tramp_addr += offset; /* Prevent unlikely recursion */ - if (ip == caller) + if (ip == tramp_addr) return NULL; - return orc_find(caller); + return orc_find(tramp_addr); } #else static struct orc_entry *orc_ftrace_find(unsigned long ip) From cfbd76d5c9c449739bb74288d982bccf9ff822f4 Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Fri, 22 Jul 2022 15:07:19 +0200 Subject: [PATCH 157/654] iio: adc: mcp3911: correct "microchip,device-addr" property Go for the right property name that is documented in the bindings. Fixes: 3a89b289df5d ("iio: adc: add support for mcp3911") Signed-off-by: Marcus Folkesson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220722130726.7627-3-marcus.folkesson@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp3911.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index f581cefb67195..f8875076ae801 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -210,7 +210,14 @@ static int mcp3911_config(struct mcp3911 *adc) u32 configreg; int ret; - device_property_read_u32(dev, "device-addr", &adc->dev_addr); + ret = device_property_read_u32(dev, "microchip,device-addr", &adc->dev_addr); + + /* + * Fallback to "device-addr" due to historical mismatch between + * dt-bindings and implementation + */ + if (ret) + device_property_read_u32(dev, "device-addr", &adc->dev_addr); if (adc->dev_addr > 3) { dev_err(&adc->spi->dev, "invalid device address (%i). Must be in range 0-3.\n", From 9e2238e3ae40d371a1130226e0e740aa1601efa6 Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Fri, 22 Jul 2022 15:07:20 +0200 Subject: [PATCH 158/654] iio: adc: mcp3911: use correct formula for AD conversion The ADC conversion is actually not rail-to-rail but with a factor 1.5. Make use of this factor when calculating actual voltage. Fixes: 3a89b289df5d ("iio: adc: add support for mcp3911") Signed-off-by: Marcus Folkesson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220722130726.7627-4-marcus.folkesson@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp3911.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index f8875076ae801..890af7dca62de 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -40,8 +40,8 @@ #define MCP3911_CHANNEL(x) (MCP3911_REG_CHANNEL0 + x * 3) #define MCP3911_OFFCAL(x) (MCP3911_REG_OFFCAL_CH0 + x * 6) -/* Internal voltage reference in uV */ -#define MCP3911_INT_VREF_UV 1200000 +/* Internal voltage reference in mV */ +#define MCP3911_INT_VREF_MV 1200 #define MCP3911_REG_READ(reg, id) ((((reg) << 1) | ((id) << 5) | (1 << 0)) & 0xff) #define MCP3911_REG_WRITE(reg, id) ((((reg) << 1) | ((id) << 5) | (0 << 0)) & 0xff) @@ -139,11 +139,18 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev, *val = ret / 1000; } else { - *val = MCP3911_INT_VREF_UV; + *val = MCP3911_INT_VREF_MV; } - *val2 = 24; - ret = IIO_VAL_FRACTIONAL_LOG2; + /* + * For 24bit Conversion + * Raw = ((Voltage)/(Vref) * 2^23 * Gain * 1.5 + * Voltage = Raw * (Vref)/(2^23 * Gain * 1.5) + */ + + /* val2 = (2^23 * 1.5) */ + *val2 = 12582912; + ret = IIO_VAL_FRACTIONAL; break; } From 767470209cedbe2cc72ba38d77c9f096d2c7694c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 5 Aug 2022 09:55:03 +0200 Subject: [PATCH 159/654] dt-bindings: iio: gyroscope: bosch,bmg160: correct number of pins BMG160 has two interrupt pins to which interrupts can be freely mapped. Correct the schema to express such case and fix warnings like: qcom/msm8916-alcatel-idol347.dtb: gyroscope@68: interrupts: [[97, 1], [98, 1]] is too long However the basic issue still persists - the interrupts should come in a defined order. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220805075503.16983-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml b/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml index b6bbc312a7cf7..1414ba9977c16 100644 --- a/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml +++ b/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml @@ -24,8 +24,10 @@ properties: interrupts: minItems: 1 + maxItems: 2 description: Should be configured with type IRQ_TYPE_EDGE_RISING. + If two interrupts are provided, expected order is INT1 and INT2. required: - compatible From 22b4277641c6823ec03d5b1cd82628e5e53e75b7 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 19 Aug 2022 11:51:07 +0300 Subject: [PATCH 160/654] iio: ad7292: Prevent regulator double disable The ad7292 tries to add an devm_action for disabling a regulator at device detach using devm_add_action_or_reset(). The devm_add_action_or_reset() does call the release function should adding action fail. The driver inspects the value returned by devm_add_action_or_reset() and manually calls regulator_disable() if adding the action has failed. This leads to double disable and messes the enable count for regulator. Do not manually call disable if devm_add_action_or_reset() fails. Fixes: 506d2e317a0a ("iio: adc: Add driver support for AD7292") Signed-off-by: Matti Vaittinen Tested-by: Marcelo Schmitt Link: https://lore.kernel.org/r/Yv9O+9sxU7gAv3vM@fedora Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7292.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad7292.c b/drivers/iio/adc/ad7292.c index 92c68d467c505..a2f9fda25ff34 100644 --- a/drivers/iio/adc/ad7292.c +++ b/drivers/iio/adc/ad7292.c @@ -287,10 +287,8 @@ static int ad7292_probe(struct spi_device *spi) ret = devm_add_action_or_reset(&spi->dev, ad7292_regulator_disable, st); - if (ret) { - regulator_disable(st->reg); + if (ret) return ret; - } ret = regulator_get_voltage(st->reg); if (ret < 0) From 0096fc879358ad6b82ee7e790c07b118c515c980 Mon Sep 17 00:00:00 2001 From: sunliming Date: Mon, 15 Aug 2022 09:29:30 +0800 Subject: [PATCH 161/654] iio: light: cm32181: make cm32181_pm_ops static This symbol is not used outside of cm32181.c, so marks it static. Fixes the following sparse warnings: >> drivers/iio/light/cm32181.c:508:1: sparse: sparse: symbol 'cm32181_pm_ops' was not declared. Should it be static? Reported-by: kernel test robot Signed-off-by: sunliming Link: https://lore.kernel.org/r/20220815012930.150078-1-sunliming@kylinos.cn Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm32181.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/cm32181.c b/drivers/iio/light/cm32181.c index edbe6a3138d0b..001055d097509 100644 --- a/drivers/iio/light/cm32181.c +++ b/drivers/iio/light/cm32181.c @@ -505,7 +505,7 @@ static int cm32181_resume(struct device *dev) cm32181->conf_regs[CM32181_REG_ADDR_CMD]); } -DEFINE_SIMPLE_DEV_PM_OPS(cm32181_pm_ops, cm32181_suspend, cm32181_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(cm32181_pm_ops, cm32181_suspend, cm32181_resume); static const struct of_device_id cm32181_of_match[] = { { .compatible = "capella,cm3218" }, From 233f56745be446b289edac2ba8184c09365c005e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Aug 2022 13:02:47 +0800 Subject: [PATCH 162/654] drm/i915/reg: Fix spelling mistake "Unsupport" -> "Unsupported" There is a spelling mistake in a gvt_vgpu_err error message. Fix it. Fixes: 695fbc08d80f ("drm/i915/gvt: replace the gvt_err with gvt_vgpu_err") Signed-off-by: Colin Ian King Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/20220315202449.2952845-1-colin.i.king@gmail.com Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 9c8dde079cb41..61423da367105 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -905,7 +905,7 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu, else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX) index = FDI_RX_IMR_TO_PIPE(offset); else { - gvt_vgpu_err("Unsupport registers %x\n", offset); + gvt_vgpu_err("Unsupported registers %x\n", offset); return -EINVAL; } From b75ef35bb57791a5d675699ed4a40c870d1da12f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 10 Aug 2022 15:55:48 -0600 Subject: [PATCH 163/654] drm/i915/gvt: Fix Comet Lake Prior to the commit below the GAMT_CHKN_BIT_REG address was setup for devices matching (D_KBL | D_CFL), where intel_gvt_get_device_type() returns D_CFL for either Coffee Lake or Comet Lake. Include the missed platform.` Link: https://lore.kernel.org/all/20220808142711.02d16782.alex.williamson@redhat.com Fixes: e0f74ed4634d ("i915/gvt: Separate the MMIO tracking table from GVT-g") Signed-off-by: Alex Williamson Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/166016852965.780835.10366587502693016900.stgit@omen Reviewed-by: Zhenyu Wang --- drivers/gpu/drm/i915/intel_gvt_mmio_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c index 72dac1718f3e7..6163aeaee9b98 100644 --- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c +++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c @@ -1074,7 +1074,8 @@ static int iterate_skl_plus_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(GEN8_HDC_CHICKEN1); MMIO_D(GEN9_WM_CHICKEN3); - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv)) MMIO_D(GAMT_CHKN_BIT_REG); if (!IS_BROXTON(dev_priv)) MMIO_D(GEN9_CTX_PREEMPT_REG); From 3dcfb729b5f4a0c9b50742865cd5e6c4dbcc80dc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Aug 2022 19:30:50 +0200 Subject: [PATCH 164/654] parisc: Make CONFIG_64BIT available for ARCH=parisc64 only With this patch the ARCH= parameter decides if the CONFIG_64BIT option will be set or not. This means, the ARCH= parameter will give: ARCH=parisc -> 32-bit kernel ARCH=parisc64 -> 64-bit kernel This simplifies the usage of the other config options like randconfig, allmodconfig and allyesconfig a lot and produces the output which is expected for parisc64 (64-bit) vs. parisc (32-bit). Suggested-by: Masahiro Yamada Signed-off-by: Helge Deller Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Cc: # 5.15+ --- arch/parisc/Kconfig | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 7f059cd1196a1..9aede2447011b 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -146,10 +146,10 @@ menu "Processor type and features" choice prompt "Processor type" - default PA7000 + default PA7000 if "$(ARCH)" = "parisc" config PA7000 - bool "PA7000/PA7100" + bool "PA7000/PA7100" if "$(ARCH)" = "parisc" help This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel @@ -160,21 +160,21 @@ config PA7000 which is required on some machines. config PA7100LC - bool "PA7100LC" + bool "PA7100LC" if "$(ARCH)" = "parisc" help Select this option for the PCX-L processor, as used in the 712, 715/64, 715/80, 715/100, 715/100XC, 725/100, 743, 748, D200, D210, D300, D310 and E-class config PA7200 - bool "PA7200" + bool "PA7200" if "$(ARCH)" = "parisc" help Select this option for the PCX-T' processor, as used in the C100, C110, J100, J110, J210XC, D250, D260, D350, D360, K100, K200, K210, K220, K400, K410 and K420 config PA7300LC - bool "PA7300LC" + bool "PA7300LC" if "$(ARCH)" = "parisc" help Select this option for the PCX-L2 processor, as used in the 744, A180, B132L, B160L, B180L, C132L, C160L, C180L, @@ -224,17 +224,8 @@ config MLONGCALLS Enabling this option will probably slow down your kernel. config 64BIT - bool "64-bit kernel" + def_bool "$(ARCH)" = "parisc64" depends on PA8X00 - help - Enable this if you want to support 64bit kernel on PA-RISC platform. - - At the moment, only people willing to use more than 2GB of RAM, - or having a 64bit-only capable PA-RISC machine should say Y here. - - Since there is no 64bit userland on PA-RISC, there is no point to - enable this option otherwise. The 64bit kernel is significantly bigger - and slower than the 32bit one. choice prompt "Kernel page size" From b4b18f47f4f9682fbf5827682645da7c8dde8f80 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 21 Aug 2022 08:12:19 +0200 Subject: [PATCH 165/654] Revert "parisc: Show error if wrong 32/64-bit compiler is being used" This reverts commit b160628e9ebcdc85d0db9d7f423c26b3c7c179d0. There is no need any longer to have this sanity check, because the previous commit ("parisc: Make CONFIG_64BIT available for ARCH=parisc64 only") prevents that CONFIG_64BIT is set if ARCH==parisc. Signed-off-by: Helge Deller --- arch/parisc/include/asm/bitops.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index 56ffd260c669b..0ec9cfc5131fc 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -12,14 +12,6 @@ #include #include -/* compiler build environment sanity checks: */ -#if !defined(CONFIG_64BIT) && defined(__LP64__) -#error "Please use 'ARCH=parisc' to build the 32-bit kernel." -#endif -#if defined(CONFIG_64BIT) && !defined(__LP64__) -#error "Please use 'ARCH=parisc64' to build the 64-bit kernel." -#endif - /* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion * on use of volatile and __*_bit() (set/clear/change): * *_bit() want use of volatile. From db4538ad4db280a2114e9d507d47c9f1a9f2cfd8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 11 Aug 2022 21:59:53 +0800 Subject: [PATCH 166/654] parisc: ccio-dma: Fix typo in comment The double `was' is duplicated in the comment, remove one. Signed-off-by: Jason Wang Signed-off-by: Helge Deller --- drivers/parisc/ccio-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 9be007c9420f9..e863eb6483794 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -268,7 +268,7 @@ static int ioc_count; * Each bit can represent a number of pages. * LSbs represent lower addresses (IOVA's). * -* This was was copied from sba_iommu.c. Don't try to unify +* This was copied from sba_iommu.c. Don't try to unify * the two resource managers unless a way to have different * allocation policies is also adjusted. We'd like to avoid * I/O TLB thrashing by having resource allocation policy From 4cb2643667c26fc976a4941b92f3770da9ec06a0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:00:55 +0200 Subject: [PATCH 167/654] parisc: led: Move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Helge Deller --- drivers/parisc/led.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index 1e4a5663d0112..d4be9d2ee74d9 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -646,7 +646,7 @@ int lcd_print( const char *str ) cancel_delayed_work_sync(&led_task); /* copy display string to buffer for procfs */ - strlcpy(lcd_text, str, sizeof(lcd_text)); + strscpy(lcd_text, str, sizeof(lcd_text)); /* Set LCD Cursor to 1st character */ gsc_writeb(lcd_info.reset_cmd1, LCD_CMD_REG); From d46c742f827fa2326ab1f4faa1cccadb56912341 Mon Sep 17 00:00:00 2001 From: Li Qiong Date: Fri, 19 Aug 2022 12:15:10 +0800 Subject: [PATCH 168/654] parisc: ccio-dma: Handle kmalloc failure in ccio_init_resources() As the possible failure of the kmalloc(), it should be better to fix this error path, check and return '-ENOMEM' error code. Signed-off-by: Li Qiong Signed-off-by: Helge Deller --- drivers/parisc/ccio-dma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index e863eb6483794..f223afe47d104 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1380,15 +1380,17 @@ ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr) } } -static void __init ccio_init_resources(struct ioc *ioc) +static int __init ccio_init_resources(struct ioc *ioc) { struct resource *res = ioc->mmio_region; char *name = kmalloc(14, GFP_KERNEL); - + if (unlikely(!name)) + return -ENOMEM; snprintf(name, 14, "GSC Bus [%d/]", ioc->hw_path); ccio_init_resource(res, name, &ioc->ioc_regs->io_io_low); ccio_init_resource(res + 1, name, &ioc->ioc_regs->io_io_low_hv); + return 0; } static int new_ioc_area(struct resource *res, unsigned long size, @@ -1543,7 +1545,10 @@ static int __init ccio_probe(struct parisc_device *dev) return -ENOMEM; } ccio_ioc_init(ioc); - ccio_init_resources(ioc); + if (ccio_init_resources(ioc)) { + kfree(ioc); + return -ENOMEM; + } hppa_dma_ops = &ccio_ops; hba = kzalloc(sizeof(*hba), GFP_KERNEL); From 591d2108f3abc4db9f9073cae37cf3591fd250d6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 21 Aug 2022 14:49:58 +0200 Subject: [PATCH 169/654] parisc: Add runtime check to prevent PA2.0 kernels on PA1.x machines If a 32-bit kernel was compiled for PA2.0 CPUs, it won't be able to run on machines with PA1.x CPUs. Add a check and bail out early if a PA1.x machine is detected. Signed-off-by: Helge Deller --- arch/parisc/kernel/head.S | 43 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index e0a9e96576221..fd15fd4bbb61b 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -22,7 +22,7 @@ #include #include - .level PA_ASM_LEVEL + .level 1.1 __INITDATA ENTRY(boot_args) @@ -70,6 +70,47 @@ $bss_loop: stw,ma %arg2,4(%r1) stw,ma %arg3,4(%r1) +#if !defined(CONFIG_64BIT) && defined(CONFIG_PA20) + /* This 32-bit kernel was compiled for PA2.0 CPUs. Check current CPU + * and halt kernel if we detect a PA1.x CPU. */ + ldi 32,%r10 + mtctl %r10,%cr11 + .level 2.0 + mfctl,w %cr11,%r10 + .level 1.1 + comib,<>,n 0,%r10,$cpu_ok + + load32 PA(msg1),%arg0 + ldi msg1_end-msg1,%arg1 +$iodc_panic: + copy %arg0, %r10 + copy %arg1, %r11 + load32 PA(init_stack),%sp +#define MEM_CONS 0x3A0 + ldw MEM_CONS+32(%r0),%arg0 // HPA + ldi ENTRY_IO_COUT,%arg1 + ldw MEM_CONS+36(%r0),%arg2 // SPA + ldw MEM_CONS+8(%r0),%arg3 // layers + load32 PA(__bss_start),%r1 + stw %r1,-52(%sp) // arg4 + stw %r0,-56(%sp) // arg5 + stw %r10,-60(%sp) // arg6 = ptr to text + stw %r11,-64(%sp) // arg7 = len + stw %r0,-68(%sp) // arg8 + load32 PA(.iodc_panic_ret), %rp + ldw MEM_CONS+40(%r0),%r1 // ENTRY_IODC + bv,n (%r1) +.iodc_panic_ret: + b . /* wait endless with ... */ + or %r10,%r10,%r10 /* qemu idle sleep */ +msg1: .ascii "Can't boot kernel which was built for PA8x00 CPUs on this machine.\r\n" +msg1_end: + +$cpu_ok: +#endif + + .level PA_ASM_LEVEL + /* Initialize startup VM. Just map first 16/32 MB of memory */ load32 PA(swapper_pg_dir),%r4 mtctl %r4,%cr24 /* Initialize kernel root pointer */ From 0c3bc7899e6dfb52df1c46118a5a670ae619645f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Jul 2022 14:32:52 +0200 Subject: [PATCH 170/654] ntfs: fix acl handling While looking at our current POSIX ACL handling in the context of some overlayfs work I went through a range of other filesystems checking how they handle them currently and encountered ntfs3. The posic_acl_{from,to}_xattr() helpers always need to operate on the filesystem idmapping. Since ntfs3 can only be mounted in the initial user namespace the relevant idmapping is init_user_ns. The posix_acl_{from,to}_xattr() helpers are concerned with translating between the kernel internal struct posix_acl{_entry} and the uapi struct posix_acl_xattr_{header,entry} and the kernel internal data structure is cached filesystem wide. Additional idmappings such as the caller's idmapping or the mount's idmapping are handled higher up in the VFS. Individual filesystems usually do not need to concern themselves with these. The posix_acl_valid() helper is concerned with checking whether the values in the kernel internal struct posix_acl can be represented in the filesystem's idmapping. IOW, if they can be written to disk. So this helper too needs to take the filesystem's idmapping. Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Cc: Konstantin Komarov Cc: ntfs3@lists.linux.dev Signed-off-by: Christian Brauner (Microsoft) --- fs/ntfs3/xattr.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 5e0e0280e70de..3e9118705174f 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -478,8 +478,7 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, } #ifdef CONFIG_NTFS3_FS_POSIX_ACL -static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, - struct inode *inode, int type, +static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type, int locked) { struct ntfs_inode *ni = ntfs_i(inode); @@ -514,7 +513,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, /* Translate extended attribute to acl. */ if (err >= 0) { - acl = posix_acl_from_xattr(mnt_userns, buf, err); + acl = posix_acl_from_xattr(&init_user_ns, buf, err); } else if (err == -ENODATA) { acl = NULL; } else { @@ -537,8 +536,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) if (rcu) return ERR_PTR(-ECHILD); - /* TODO: init_user_ns? */ - return ntfs_get_acl_ex(&init_user_ns, inode, type, 0); + return ntfs_get_acl_ex(inode, type, 0); } static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, @@ -595,7 +593,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, value = kmalloc(size, GFP_NOFS); if (!value) return -ENOMEM; - err = posix_acl_to_xattr(mnt_userns, acl, value, size); + err = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (err < 0) goto out; flags = 0; @@ -641,7 +639,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns, if (!acl) return -ENODATA; - err = posix_acl_to_xattr(mnt_userns, acl, buffer, size); + err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return err; @@ -665,12 +663,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns, if (!value) { acl = NULL; } else { - acl = posix_acl_from_xattr(mnt_userns, value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { - err = posix_acl_valid(mnt_userns, acl); + err = posix_acl_valid(&init_user_ns, acl); if (err) goto release_and_out; } From 15c56208c79c340686869c31595c209d1431c5e8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 15 Aug 2022 10:33:20 +0300 Subject: [PATCH 171/654] mmc: core: Fix UHS-I SD 1.8V workaround branch When introduced, upon success, the 1.8V fixup workaround in mmc_sd_init_card() would branch to practically the end of the function, to a label named "done". Unfortunately, perhaps due to the label name, over time new code has been added that really should have come after "done" not before it. Let's fix the problem by moving the label to the correct place and rename it "cont". Fixes: 045d705dc1fb ("mmc: core: Enable the MMC host software queue for the SD card") Signed-off-by: Adrian Hunter Reviewed-by: Seunghui Lee Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220815073321.63382-2-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/sd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index cee4c0b59f43d..bc84d7dfc8e19 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1498,7 +1498,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, mmc_remove_card(card); goto retry; } - goto done; + goto cont; } } @@ -1534,7 +1534,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, mmc_set_bus_width(host, MMC_BUS_WIDTH_4); } } - +cont: if (!oldcard) { /* Read/parse the extension registers. */ err = sd_read_ext_regs(card); @@ -1566,7 +1566,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, err = -EINVAL; goto free_card; } -done: + host->card = card; return 0; From 63f1560930e4e1c4f6279b8ae715c9841fe1a6d3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 15 Aug 2022 10:33:21 +0300 Subject: [PATCH 172/654] mmc: core: Fix inconsistent sd3_bus_mode at UHS-I SD voltage switch failure If re-initialization results is a different signal voltage, because the voltage switch failed previously, but not this time (or vice versa), then sd3_bus_mode will be inconsistent with the card because the SD_SWITCH command is done only upon first initialization. Fix by always reading SD_SWITCH information during re-initialization, which also means it does not need to be re-read later for the 1.8V fixup workaround. Note, brief testing showed SD_SWITCH took about 1.8ms to 2ms which added about 1% to 1.5% to the re-initialization time, so it's not particularly significant. Reported-by: Seunghui Lee Signed-off-by: Adrian Hunter Reviewed-by: Seunghui Lee Tested-by: Seunghui Lee Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220815073321.63382-3-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/sd.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index bc84d7dfc8e19..06aa62ce0ed1f 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -949,15 +949,16 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, /* Erase init depends on CSD and SSR */ mmc_init_erase(card); - - /* - * Fetch switch information from card. - */ - err = mmc_read_switch(card); - if (err) - return err; } + /* + * Fetch switch information from card. Note, sd3_bus_mode can change if + * voltage switch outcome changes, so do this always. + */ + err = mmc_read_switch(card); + if (err) + return err; + /* * For SPI, enable CRC as appropriate. * This CRC enable is located AFTER the reading of the @@ -1480,26 +1481,15 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, if (!v18_fixup_failed && !mmc_host_is_spi(host) && mmc_host_uhs(host) && mmc_sd_card_using_v18(card) && host->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_180) { - /* - * Re-read switch information in case it has changed since - * oldcard was initialized. - */ - if (oldcard) { - err = mmc_read_switch(card); - if (err) - goto free_card; - } - if (mmc_sd_card_using_v18(card)) { - if (mmc_host_set_uhs_voltage(host) || - mmc_sd_init_uhs_card(card)) { - v18_fixup_failed = true; - mmc_power_cycle(host, ocr); - if (!oldcard) - mmc_remove_card(card); - goto retry; - } - goto cont; + if (mmc_host_set_uhs_voltage(host) || + mmc_sd_init_uhs_card(card)) { + v18_fixup_failed = true; + mmc_power_cycle(host, ocr); + if (!oldcard) + mmc_remove_card(card); + goto retry; } + goto cont; } /* Initialization sequence for UHS-I cards */ From a8d302a0b77057568350fe0123e639d02dba0745 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 21 Aug 2022 17:59:11 +0200 Subject: [PATCH 173/654] ALSA: memalloc: Revive x86-specific WC page allocations again We dropped the x86-specific hack for WC-page allocations with a hope that the standard dma_alloc_wc() works nowadays. Alas, it doesn't, and we need to take back some workaround again, but in a different form, as the previous one was broken for some platforms. This patch re-introduces the x86-specific WC-page allocations, but it uses rather the manual page allocations instead of dma_alloc_coherent(). The use of dma_alloc_coherent() was also a potential problem in the recent addition of the fallback allocation for noncontig pages, and this patch eliminates both at once. Fixes: 9882d63bea14 ("ALSA: memalloc: Drop x86-specific hack for WC allocations") Cc: BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216363 Link: https://lore.kernel.org/r/20220821155911.10715-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 87 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 16 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index d3885cb02270e..b665ac66ccbe8 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -20,6 +20,13 @@ static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab); +#ifdef CONFIG_SND_DMA_SGBUF +static void *do_alloc_fallback_pages(struct device *dev, size_t size, + dma_addr_t *addr, bool wc); +static void do_free_fallback_pages(void *p, size_t size, bool wc); +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size); +#endif + /* a cast to gfp flag from the dev pointer; for CONTINUOUS and VMALLOC types */ static inline gfp_t snd_mem_get_gfp_flags(const struct snd_dma_buffer *dmab, gfp_t default_gfp) @@ -277,16 +284,21 @@ EXPORT_SYMBOL(snd_sgbuf_get_chunk_size); /* * Continuous pages allocator */ -static void *snd_dma_continuous_alloc(struct snd_dma_buffer *dmab, size_t size) +static void *do_alloc_pages(size_t size, dma_addr_t *addr, gfp_t gfp) { - gfp_t gfp = snd_mem_get_gfp_flags(dmab, GFP_KERNEL); void *p = alloc_pages_exact(size, gfp); if (p) - dmab->addr = page_to_phys(virt_to_page(p)); + *addr = page_to_phys(virt_to_page(p)); return p; } +static void *snd_dma_continuous_alloc(struct snd_dma_buffer *dmab, size_t size) +{ + return do_alloc_pages(size, &dmab->addr, + snd_mem_get_gfp_flags(dmab, GFP_KERNEL)); +} + static void snd_dma_continuous_free(struct snd_dma_buffer *dmab) { free_pages_exact(dmab->area, dmab->bytes); @@ -463,6 +475,25 @@ static const struct snd_malloc_ops snd_dma_dev_ops = { /* * Write-combined pages */ +/* x86-specific allocations */ +#ifdef CONFIG_SND_DMA_SGBUF +static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size) +{ + return do_alloc_fallback_pages(dmab->dev.dev, size, &dmab->addr, true); +} + +static void snd_dma_wc_free(struct snd_dma_buffer *dmab) +{ + do_free_fallback_pages(dmab->area, dmab->bytes, true); +} + +static int snd_dma_wc_mmap(struct snd_dma_buffer *dmab, + struct vm_area_struct *area) +{ + area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); + return snd_dma_continuous_mmap(dmab, area); +} +#else static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size) { return dma_alloc_wc(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP); @@ -479,6 +510,7 @@ static int snd_dma_wc_mmap(struct snd_dma_buffer *dmab, return dma_mmap_wc(dmab->dev.dev, area, dmab->area, dmab->addr, dmab->bytes); } +#endif /* CONFIG_SND_DMA_SGBUF */ static const struct snd_malloc_ops snd_dma_wc_ops = { .alloc = snd_dma_wc_alloc, @@ -486,10 +518,6 @@ static const struct snd_malloc_ops snd_dma_wc_ops = { .mmap = snd_dma_wc_mmap, }; -#ifdef CONFIG_SND_DMA_SGBUF -static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size); -#endif - /* * Non-contiguous pages allocator */ @@ -669,6 +697,37 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { .get_chunk_size = snd_dma_noncontig_get_chunk_size, }; +/* manual page allocations with wc setup */ +static void *do_alloc_fallback_pages(struct device *dev, size_t size, + dma_addr_t *addr, bool wc) +{ + gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + void *p; + + again: + p = do_alloc_pages(size, addr, gfp); + if (!p || (*addr + size - 1) & ~dev->coherent_dma_mask) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && !(gfp & GFP_DMA32)) { + gfp |= GFP_DMA32; + goto again; + } + if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + } + if (p && wc) + set_memory_wc((unsigned long)(p), size >> PAGE_SHIFT); + return p; +} + +static void do_free_fallback_pages(void *p, size_t size, bool wc) +{ + if (wc) + set_memory_wb((unsigned long)(p), size >> PAGE_SHIFT); + free_pages_exact(p, size); +} + /* Fallback SG-buffer allocations for x86 */ struct snd_dma_sg_fallback { size_t count; @@ -679,14 +738,11 @@ struct snd_dma_sg_fallback { static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, struct snd_dma_sg_fallback *sgbuf) { + bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; size_t i; - if (sgbuf->count && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) - set_pages_array_wb(sgbuf->pages, sgbuf->count); for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) - dma_free_coherent(dmab->dev.dev, PAGE_SIZE, - page_address(sgbuf->pages[i]), - sgbuf->addrs[i]); + do_free_fallback_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc); kvfree(sgbuf->pages); kvfree(sgbuf->addrs); kfree(sgbuf); @@ -698,6 +754,7 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) struct page **pages; size_t i, count; void *p; + bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); if (!sgbuf) @@ -712,15 +769,13 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) goto error; for (i = 0; i < count; sgbuf->count++, i++) { - p = dma_alloc_coherent(dmab->dev.dev, PAGE_SIZE, - &sgbuf->addrs[i], DEFAULT_GFP); + p = do_alloc_fallback_pages(dmab->dev.dev, PAGE_SIZE, + &sgbuf->addrs[i], wc); if (!p) goto error; sgbuf->pages[i] = virt_to_page(p); } - if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) - set_pages_array_wc(pages, count); p = vmap(pages, count, VM_MAP, PAGE_KERNEL); if (!p) goto error; From 3c53cd65dece47dd1f9d3a809f32e59d1d87b2b8 Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Thu, 18 Aug 2022 02:02:13 +0200 Subject: [PATCH 174/654] rose: check NULL rose_loopback_neigh->loopback Commit 3b3fd068c56e3fbea30090859216a368398e39bf added NULL check for `rose_loopback_neigh->dev` in rose_loopback_timer() but omitted to check rose_loopback_neigh->loopback. It thus prevents *all* rose connect. The reason is that a special rose_neigh loopback has a NULL device. /proc/net/rose_neigh illustrates it via rose_neigh_show() function : [...] seq_printf(seq, "%05d %-9s %-4s %3d %3d %3s %3s %3lu %3lu", rose_neigh->number, (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, /proc/net/rose_neigh displays special rose_loopback_neigh->loopback as callsign RSLOOP-0: addr callsign dev count use mode restart t0 tf digipeaters 00001 RSLOOP-0 ??? 1 2 DCE yes 0 0 By checking rose_loopback_neigh->loopback, rose_rx_call_request() is called even in case rose_loopback_neigh->dev is NULL. This repairs rose connections. Verification with rose client application FPAC: FPAC-Node v 4.1.3 (built Aug 5 2022) for LINUX (help = h) F6BVP-4 (Commands = ?) : u Users - AX.25 Level 2 sessions : Port Callsign Callsign AX.25 state ROSE state NetRom status axudp F6BVP-5 -> F6BVP-9 Connected Connected --------- Fixes: 3b3fd068c56e ("rose: Fix Null pointer dereference in rose_send_frame()") Signed-off-by: Bernard Pidoux Suggested-by: Francois Romieu Cc: Thomas DL9SAU Osterried Signed-off-by: David S. Miller --- net/rose/rose_loopback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 11c45c8c6c164..036d92c0ad794 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -96,7 +96,8 @@ static void rose_loopback_timer(struct timer_list *unused) } if (frametype == ROSE_CALL_REQUEST) { - if (!rose_loopback_neigh->dev) { + if (!rose_loopback_neigh->dev && + !rose_loopback_neigh->loopback) { kfree_skb(skb); continue; } From 6dc4df12d741c0fe8f885778a43039e0619b9cd9 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 18 Aug 2022 16:06:19 +0800 Subject: [PATCH 175/654] r8152: fix the units of some registers for RTL8156A The units of PLA_RX_FIFO_FULL and PLA_RX_FIFO_EMPTY are 16 bytes. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0f6efaabaa32b..46c7954d27629 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6431,21 +6431,8 @@ static void r8156_fc_parameter(struct r8152 *tp) u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp); u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp); - switch (tp->version) { - case RTL_VER_10: - case RTL_VER_11: - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 8); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 8); - break; - case RTL_VER_12: - case RTL_VER_13: - case RTL_VER_15: - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16); - break; - default: - break; - } + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16); } static void rtl8156_change_mtu(struct r8152 *tp) From b75d612014447e04abdf0e37ffb8f2fd8b0b49d6 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 18 Aug 2022 16:06:20 +0800 Subject: [PATCH 176/654] r8152: fix the RX FIFO settings when suspending The RX FIFO would be changed when suspending, so the related settings have to be modified, too. Otherwise, the flow control would work abnormally. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216333 Reported-by: Mark Blakeney Fixes: cdf0b86b250f ("r8152: fix a WOL issue") Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 46c7954d27629..d142ac8fcf6e2 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5906,6 +5906,11 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_data &= ~NOW_IS_OOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + /* RX FIFO settings for OOB */ + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_OOB); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_OOB); + rtl_disable(tp); rtl_reset_bmu(tp); @@ -6544,6 +6549,11 @@ static void rtl8156_down(struct r8152 *tp) ocp_data &= ~NOW_IS_OOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + /* RX FIFO settings for OOB */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, 64 / 16); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, 1024 / 16); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, 4096 / 16); + rtl_disable(tp); rtl_reset_bmu(tp); From 123d6455771ec577ce65f8d1bda548fb0eb7ef21 Mon Sep 17 00:00:00 2001 From: Wang Jingjin Date: Mon, 1 Aug 2022 16:47:45 +0800 Subject: [PATCH 177/654] ftrace: Fix build warning for ops_references_rec() not used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The change that made IPMODIFY and DIRECT ops work together needed access to the ops_references_ip() function, which it pulled out of the module only code. But now if both CONFIG_MODULES and CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS is not set, we get the below warning: ‘ops_references_rec’ defined but not used. Since ops_references_rec() only calls ops_references_ip() replace the usage of ops_references_rec() with ops_references_ip() and encompass the function with an #ifdef of DIRECT_CALLS || MODULES being defined. Link: https://lkml.kernel.org/r/20220801084745.1187987-1-wangjingjin1@huawei.com Fixes: 53cd885bc5c3 ("ftrace: Allow IPMODIFY and DIRECT ops on the same function") Signed-off-by: Wang Jingjin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 79 ++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 126c769d36c3c..439e2ab6905ee 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1861,8 +1861,6 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, ftrace_hash_rec_update_modify(ops, filter_hash, 1); } -static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip); - /* * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK * or no-needed to update, -EBUSY if it detects a conflict of the flag @@ -3118,49 +3116,6 @@ static inline int ops_traces_mod(struct ftrace_ops *ops) ftrace_hash_empty(ops->func_hash->notrace_hash); } -/* - * Check if the current ops references the given ip. - * - * If the ops traces all functions, then it was already accounted for. - * If the ops does not trace the current record function, skip it. - * If the ops ignores the function via notrace filter, skip it. - */ -static bool -ops_references_ip(struct ftrace_ops *ops, unsigned long ip) -{ - /* If ops isn't enabled, ignore it */ - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return false; - - /* If ops traces all then it includes this function */ - if (ops_traces_mod(ops)) - return true; - - /* The function must be in the filter */ - if (!ftrace_hash_empty(ops->func_hash->filter_hash) && - !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) - return false; - - /* If in notrace hash, we ignore it too */ - if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) - return false; - - return true; -} - -/* - * Check if the current ops references the record. - * - * If the ops traces all functions, then it was already accounted for. - * If the ops does not trace the current record function, skip it. - * If the ops ignores the function via notrace filter, skip it. - */ -static bool -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) -{ - return ops_references_ip(ops, rec->ip); -} - static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { bool init_nop = ftrace_need_init_nop(); @@ -6822,6 +6777,38 @@ static int ftrace_get_trampoline_kallsym(unsigned int symnum, return -ERANGE; } +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) || defined(CONFIG_MODULES) +/* + * Check if the current ops references the given ip. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static bool +ops_references_ip(struct ftrace_ops *ops, unsigned long ip) +{ + /* If ops isn't enabled, ignore it */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return false; + + /* If ops traces all then it includes this function */ + if (ops_traces_mod(ops)) + return true; + + /* The function must be in the filter */ + if (!ftrace_hash_empty(ops->func_hash->filter_hash) && + !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) + return false; + + /* If in notrace hash, we ignore it too */ + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) + return false; + + return true; +} +#endif + #ifdef CONFIG_MODULES #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) @@ -6834,7 +6821,7 @@ static int referenced_filters(struct dyn_ftrace *rec) int cnt = 0; for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops_references_rec(ops, rec)) { + if (ops_references_ip(ops, rec->ip)) { if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT)) continue; if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY)) From f1e941dbf80a9b8bab0bffbc4cbe41cc7f4c6fb6 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 18 Aug 2022 17:06:21 +0800 Subject: [PATCH 178/654] nfc: pn533: Fix use-after-free bugs caused by pn532_cmd_timeout When the pn532 uart device is detaching, the pn532_uart_remove() is called. But there are no functions in pn532_uart_remove() that could delete the cmd_timeout timer, which will cause use-after-free bugs. The process is shown below: (thread 1) | (thread 2) | pn532_uart_send_frame pn532_uart_remove | mod_timer(&pn532->cmd_timeout,...) ... | (wait a time) kfree(pn532) //FREE | pn532_cmd_timeout | pn532_uart_send_frame | pn532->... //USE This patch adds del_timer_sync() in pn532_uart_remove() in order to prevent the use-after-free bugs. What's more, the pn53x_unregister_nfc() is well synchronized, it sets nfc_dev->shutting_down to true and there are no syscalls could restart the cmd_timeout timer. Fixes: c656aa4c27b1 ("nfc: pn533: add UART phy driver") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- drivers/nfc/pn533/uart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c index 2caf997f9bc94..07596bf5f7d6d 100644 --- a/drivers/nfc/pn533/uart.c +++ b/drivers/nfc/pn533/uart.c @@ -310,6 +310,7 @@ static void pn532_uart_remove(struct serdev_device *serdev) pn53x_unregister_nfc(pn532->priv); serdev_device_close(serdev); pn53x_common_clean(pn532->priv); + del_timer_sync(&pn532->cmd_timeout); kfree_skb(pn532->recv_skb); kfree(pn532); } From 5a42f112d367bb4700a8a41f5c12724fde6bfbb9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 11 Aug 2022 20:21:48 +0200 Subject: [PATCH 179/654] ice: xsk: prohibit usage of non-balanced queue id Fix the following scenario: 1. ethtool -L $IFACE rx 8 tx 96 2. xdpsock -q 10 -t -z Above refers to a case where user would like to attach XSK socket in txonly mode at a queue id that does not have a corresponding Rx queue. At this moment ice's XSK logic is tightly bound to act on a "queue pair", e.g. both Tx and Rx queues at a given queue id are disabled/enabled and both of them will get XSK pool assigned, which is broken for the presented queue configuration. This results in the splat included at the bottom, which is basically an OOB access to Rx ring array. To fix this, allow using the ids only in scope of "combined" queues reported by ethtool. However, logic should be rewritten to allow such configurations later on, which would end up as a complete rewrite of the control path, so let us go with this temporary fix. [420160.558008] BUG: kernel NULL pointer dereference, address: 0000000000000082 [420160.566359] #PF: supervisor read access in kernel mode [420160.572657] #PF: error_code(0x0000) - not-present page [420160.579002] PGD 0 P4D 0 [420160.582756] Oops: 0000 [#1] PREEMPT SMP NOPTI [420160.588396] CPU: 10 PID: 21232 Comm: xdpsock Tainted: G OE 5.19.0-rc7+ #10 [420160.597893] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [420160.609894] RIP: 0010:ice_xsk_pool_setup+0x44/0x7d0 [ice] [420160.616968] Code: f3 48 83 ec 40 48 8b 4f 20 48 8b 3f 65 48 8b 04 25 28 00 00 00 48 89 44 24 38 31 c0 48 8d 04 ed 00 00 00 00 48 01 c1 48 8b 11 <0f> b7 92 82 00 00 00 48 85 d2 0f 84 2d 75 00 00 48 8d 72 ff 48 85 [420160.639421] RSP: 0018:ffffc9002d2afd48 EFLAGS: 00010282 [420160.646650] RAX: 0000000000000050 RBX: ffff88811d8bdd00 RCX: ffff888112c14ff8 [420160.655893] RDX: 0000000000000000 RSI: ffff88811d8bdd00 RDI: ffff888109861000 [420160.665166] RBP: 000000000000000a R08: 000000000000000a R09: 0000000000000000 [420160.674493] R10: 000000000000889f R11: 0000000000000000 R12: 000000000000000a [420160.683833] R13: 000000000000000a R14: 0000000000000000 R15: ffff888117611828 [420160.693211] FS: 00007fa869fc1f80(0000) GS:ffff8897e0880000(0000) knlGS:0000000000000000 [420160.703645] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [420160.711783] CR2: 0000000000000082 CR3: 00000001d076c001 CR4: 00000000007706e0 [420160.721399] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [420160.731045] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [420160.740707] PKRU: 55555554 [420160.745960] Call Trace: [420160.750962] [420160.755597] ? kmalloc_large_node+0x79/0x90 [420160.762703] ? __kmalloc_node+0x3f5/0x4b0 [420160.769341] xp_assign_dev+0xfd/0x210 [420160.775661] ? shmem_file_read_iter+0x29a/0x420 [420160.782896] xsk_bind+0x152/0x490 [420160.788943] __sys_bind+0xd0/0x100 [420160.795097] ? exit_to_user_mode_prepare+0x20/0x120 [420160.802801] __x64_sys_bind+0x16/0x20 [420160.809298] do_syscall_64+0x38/0x90 [420160.815741] entry_SYSCALL_64_after_hwframe+0x63/0xcd [420160.823731] RIP: 0033:0x7fa86a0dd2fb [420160.830264] Code: c3 66 0f 1f 44 00 00 48 8b 15 69 8b 0c 00 f7 d8 64 89 02 b8 ff ff ff ff eb bc 0f 1f 44 00 00 f3 0f 1e fa b8 31 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 3d 8b 0c 00 f7 d8 64 89 01 48 [420160.855410] RSP: 002b:00007ffc1146f618 EFLAGS: 00000246 ORIG_RAX: 0000000000000031 [420160.866366] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa86a0dd2fb [420160.876957] RDX: 0000000000000010 RSI: 00007ffc1146f680 RDI: 0000000000000003 [420160.887604] RBP: 000055d7113a0520 R08: 00007fa868fb8000 R09: 0000000080000000 [420160.898293] R10: 0000000000008001 R11: 0000000000000246 R12: 000055d7113a04e0 [420160.909038] R13: 000055d7113a0320 R14: 000000000000000a R15: 0000000000000000 [420160.919817] [420160.925659] Modules linked in: ice(OE) af_packet binfmt_misc nls_iso8859_1 ipmi_ssif intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp mei_me coretemp ioatdma mei ipmi_si wmi ipmi_msghandler acpi_pad acpi_power_meter ip_tables x_tables autofs4 ixgbe i40e crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd ahci mdio dca libahci lpc_ich [last unloaded: ice] [420160.977576] CR2: 0000000000000082 [420160.985037] ---[ end trace 0000000000000000 ]--- [420161.097724] RIP: 0010:ice_xsk_pool_setup+0x44/0x7d0 [ice] [420161.107341] Code: f3 48 83 ec 40 48 8b 4f 20 48 8b 3f 65 48 8b 04 25 28 00 00 00 48 89 44 24 38 31 c0 48 8d 04 ed 00 00 00 00 48 01 c1 48 8b 11 <0f> b7 92 82 00 00 00 48 85 d2 0f 84 2d 75 00 00 48 8d 72 ff 48 85 [420161.134741] RSP: 0018:ffffc9002d2afd48 EFLAGS: 00010282 [420161.144274] RAX: 0000000000000050 RBX: ffff88811d8bdd00 RCX: ffff888112c14ff8 [420161.155690] RDX: 0000000000000000 RSI: ffff88811d8bdd00 RDI: ffff888109861000 [420161.168088] RBP: 000000000000000a R08: 000000000000000a R09: 0000000000000000 [420161.179295] R10: 000000000000889f R11: 0000000000000000 R12: 000000000000000a [420161.190420] R13: 000000000000000a R14: 0000000000000000 R15: ffff888117611828 [420161.201505] FS: 00007fa869fc1f80(0000) GS:ffff8897e0880000(0000) knlGS:0000000000000000 [420161.213628] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [420161.223413] CR2: 0000000000000082 CR3: 00000001d076c001 CR4: 00000000007706e0 [420161.234653] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [420161.245893] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [420161.257052] PKRU: 55555554 Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 49ba8bfdbf047..45f88e6ec25e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -329,6 +329,12 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) bool if_running, pool_present = !!pool; int ret = 0, pool_failure = 0; + if (qid >= vsi->num_rxq || qid >= vsi->num_txq) { + netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n"); + pool_failure = -EINVAL; + goto failure; + } + if (!is_power_of_2(vsi->rx_rings[qid]->count) || !is_power_of_2(vsi->tx_rings[qid]->count)) { netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n"); From 9ead7e74bfd6dd54db12ef133b8604add72511de Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 11 Aug 2022 20:21:49 +0200 Subject: [PATCH 180/654] ice: xsk: use Rx ring's XDP ring when picking NAPI context Ice driver allocates per cpu XDP queues so that redirect path can safely use smp_processor_id() as an index to the array. At the same time though, XDP rings are used to pick NAPI context to call napi_schedule() or set NAPIF_STATE_MISSED. When user reduces queue count, say to 8, and num_possible_cpus() of underlying platform is 44, then this means queue vectors with correlated NAPI contexts will carry several XDP queues. This in turn can result in a broken behavior where NAPI context of interest will never be scheduled and AF_XDP socket will not process any traffic. To fix this, let us change the way how XDP rings are assigned to Rx rings and use this information later on when setting ice_tx_ring::xsk_pool pointer. For each Rx ring, grab the associated queue vector and walk through Tx ring's linked list. Once we stumble upon XDP ring in it, assign this ring to ice_rx_ring::xdp_ring. Previous [0] approach of fixing this issue was for txonly scenario because of the described grouping of XDP rings across queue vectors. So, relying on Rx ring meant that NAPI context could be scheduled with a queue vector without XDP ring with associated XSK pool. [0]: https://lore.kernel.org/netdev/20220707161128.54215-1-maciej.fijalkowski@intel.com/ Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Fixes: 22bf877e528f ("ice: introduce XDP_TX fallback path") Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 36 +++++++++++++++-------- drivers/net/ethernet/intel/ice/ice_lib.c | 4 +-- drivers/net/ethernet/intel/ice/ice_main.c | 25 +++++++++++----- drivers/net/ethernet/intel/ice/ice_xsk.c | 12 ++++---- 4 files changed, 48 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index cc5b85afd437e..841fa149c4076 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) * ice_xsk_pool - get XSK buffer pool bound to a ring * @ring: Rx ring to use * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. + * Returns a pointer to xsk_buff_pool structure if there is a buffer pool + * present, NULL otherwise. */ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) { @@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) } /** - * ice_tx_xsk_pool - get XSK buffer pool bound to a ring - * @ring: Tx ring to use + * ice_tx_xsk_pool - assign XSK buff pool to XDP ring + * @vsi: pointer to VSI + * @qid: index of a queue to look at XSK buff pool presence * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. Tx equivalent of ice_xsk_pool. + * Sets XSK buff pool pointer on XDP ring. + * + * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided + * queue id. Reason for doing so is that queue vectors might have assigned more + * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring + * carries a pointer to one of these XDP rings for its own purposes, such as + * handling XDP_TX action, therefore we can piggyback here on the + * rx_ring->xdp_ring assignment that was done during XDP rings initialization. */ -static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) +static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) { - struct ice_vsi *vsi = ring->vsi; - u16 qid; + struct ice_tx_ring *ring; - qid = ring->q_index - vsi->alloc_txq; + ring = vsi->rx_rings[qid]->xdp_ring; + if (!ring) + return; - if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) - return NULL; + if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) { + ring->xsk_pool = NULL; + return; + } - return xsk_get_pool_from_qid(vsi->netdev, qid); + ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 733c455f65746..0c4ec92640710 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1986,8 +1986,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) if (ret) return ret; - ice_for_each_xdp_txq(vsi, i) - vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); + ice_for_each_rxq(vsi, i) + ice_tx_xsk_pool(vsi, i); return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4ecaf40cf946b..173fe6c313418 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); spin_lock_init(&xdp_ring->tx_lock); for (j = 0; j < xdp_ring->count; j++) { tx_desc = ICE_TX_DESC(xdp_ring, j); @@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) } } - ice_for_each_rxq(vsi, i) { - if (static_key_enabled(&ice_xdp_locking_key)) - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; - else - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; - } - return 0; free_xdp_rings: @@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) xdp_rings_rem -= xdp_rings_per_v; } + ice_for_each_rxq(vsi, i) { + if (static_key_enabled(&ice_xdp_locking_key)) { + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; + } else { + struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) { + if (ice_ring_is_xdp(ring)) { + vsi->rx_rings[i]->xdp_ring = ring; + break; + } + } + } + ice_tx_xsk_pool(vsi, i); + } + /* omit the scheduler update if in reset path; XDP queues will be * taken into account at the end of ice_vsi_rebuild, where * ice_cfg_vsi_lan is being called diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 45f88e6ec25e8..e48e29258450f 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_rxq(rx_ring); @@ -359,7 +359,7 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) if (if_running) { ret = ice_qp_ena(vsi, qid); if (!ret && pool_present) - napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); + napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi); else if (ret) netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); } @@ -950,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, if (!ice_is_xdp_ena_vsi(vsi)) return -EINVAL; - if (queue_id >= vsi->num_txq) + if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq) return -EINVAL; - if (!vsi->xdp_rings[queue_id]->xsk_pool) - return -EINVAL; + ring = vsi->rx_rings[queue_id]->xdp_ring; - ring = vsi->xdp_rings[queue_id]; + if (!ring->xsk_pool) + return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so * it will run again. If not, trigger an interrupt and From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 16 Aug 2022 16:42:56 -0500 Subject: [PATCH 181/654] btrfs: check if root is readonly while setting security xattr For a filesystem which has btrfs read-only property set to true, all write operations including xattr should be denied. However, security xattr can still be changed even if btrfs ro property is true. This happens because xattr_permission() does not have any restrictions on security.*, system.* and in some cases trusted.* from VFS and the decision is left to the underlying filesystem. See comments in xattr_permission() for more details. This patch checks if the root is read-only before performing the set xattr operation. Testcase: DEV=/dev/vdb MNT=/mnt mkfs.btrfs -f $DEV mount $DEV $MNT echo "file one" > $MNT/f1 setfattr -n "security.one" -v 2 $MNT/f1 btrfs property set /mnt ro true setfattr -n "security.one" -v 1 $MNT/f1 umount $MNT CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 7421abcf325a5..5bb8d8c863119 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, const char *name, const void *buffer, size_t size, int flags) { + if (btrfs_root_readonly(BTRFS_I(inode)->root)) + return -EROFS; + name = xattr_full_name(handler, name); return btrfs_setxattr_trans(inode, name, buffer, size, flags); } From 9ea0106a7a3d8116860712e3f17cd52ce99f6707 Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Mon, 15 Aug 2022 23:16:06 +0800 Subject: [PATCH 182/654] btrfs: fix possible memory leak in btrfs_get_dev_args_from_path() In btrfs_get_dev_args_from_path(), btrfs_get_bdev_and_sb() can fail if the path is invalid. In this case, btrfs_get_dev_args_from_path() returns directly without freeing args->uuid and args->fsid allocated before, which causes memory leak. To fix these possible leaks, when btrfs_get_bdev_and_sb() fails, btrfs_put_dev_args_from_path() is called to clean up the memory. Reported-by: TOTE Robot Fixes: faa775c41d655 ("btrfs: add a btrfs_get_dev_args_from_path helper") CC: stable@vger.kernel.org # 5.16 Reviewed-by: Boris Burkov Signed-off-by: Zixuan Fu Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 272901514b0c1..064ab2a79c805 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2345,8 +2345,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, &bdev, &disk_super); - if (ret) + if (ret) { + btrfs_put_dev_args_from_path(args); return ret; + } + args->devid = btrfs_stack_device_id(&disk_super->dev_item); memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); if (btrfs_fs_incompat(fs_info, METADATA_UUID)) From e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Aug 2022 12:18:37 +0100 Subject: [PATCH 183/654] btrfs: update generation of hole file extent item when merging holes When punching a hole into a file range that is adjacent with a hole and we are not using the no-holes feature, we expand the range of the adjacent file extent item that represents a hole, to save metadata space. However we don't update the generation of hole file extent item, which means a full fsync will not log that file extent item if the fsync happens in a later transaction (since commit 7f30c07288bb9e ("btrfs: stop copying old file extents when doing a full fsync")). For example, if we do this: $ mkfs.btrfs -f -O ^no-holes /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xab 2M 2M" /mnt/foobar $ sync We end up with 2 file extent items in our file: 1) One that represents the hole for the file range [0, 2M), with a generation of 7; 2) Another one that represents an extent covering the range [2M, 4M). After that if we do the following: $ xfs_io -c "fpunch 2M 2M" /mnt/foobar We end up with a single file extent item in the file, which represents a hole for the range [0, 4M) and with a generation of 7 - because we end dropping the data extent for range [2M, 4M) and then update the file extent item that represented the hole at [0, 2M), by increasing length from 2M to 4M. Then doing a full fsync and power failing: $ xfs_io -c "fsync" /mnt/foobar will result in the full fsync not logging the file extent item that represents the hole for the range [0, 4M), because its generation is 7, which is lower than the generation of the current transaction (8). As a consequence, after mounting again the filesystem (after log replay), the region [2M, 4M) does not have a hole, it still points to the previous data extent. So fix this by always updating the generation of existing file extent items representing holes when we merge/expand them. This solves the problem and it's the same approach as when we merge prealloc extents that got written (at btrfs_mark_extent_written()). Setting the generation to the current transaction's generation is also what we do when merging the new hole extent map with the previous one or the next one. A test case for fstests, covering both cases of hole file extent item merging (to the left and to the right), will be sent soon. Fixes: 7f30c07288bb9e ("btrfs: stop copying old file extents when doing a full fsync") CC: stable@vger.kernel.org # 5.18+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 687fb372093fa..470421955de49 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2499,6 +2499,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_mark_buffer_dirty(leaf); goto out; } @@ -2515,6 +2516,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_mark_buffer_dirty(leaf); goto out; } From 4a445b7b6178d88956192c0202463063f52e8667 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 13 Aug 2022 16:06:53 +0800 Subject: [PATCH 184/654] btrfs: don't merge pages into bio if their page offset is not contiguous [BUG] Zygo reported on latest development branch, he could hit ASSERT()/BUG_ON() caused crash when doing RAID5 recovery (intentionally corrupt one disk, and let btrfs to recover the data during read/scrub). And The following minimal reproducer can cause extent state leakage at rmmod time: mkfs.btrfs -f -d raid5 -m raid5 $dev1 $dev2 $dev3 -b 1G > /dev/null mount $dev1 $mnt fsstress -w -d $mnt -n 25 -s 1660807876 sync fssum -A -f -w /tmp/fssum.saved $mnt umount $mnt # Wipe the dev1 but keeps its super block xfs_io -c "pwrite -S 0x0 1m 1023m" $dev1 mount $dev1 $mnt fssum -r /tmp/fssum.saved $mnt > /dev/null umount $mnt rmmod btrfs This will lead to the following extent states leakage: BTRFS: state leak: start 499712 end 503807 state 5 in tree 1 refs 1 BTRFS: state leak: start 495616 end 499711 state 5 in tree 1 refs 1 BTRFS: state leak: start 491520 end 495615 state 5 in tree 1 refs 1 BTRFS: state leak: start 487424 end 491519 state 5 in tree 1 refs 1 BTRFS: state leak: start 483328 end 487423 state 5 in tree 1 refs 1 BTRFS: state leak: start 479232 end 483327 state 5 in tree 1 refs 1 BTRFS: state leak: start 475136 end 479231 state 5 in tree 1 refs 1 BTRFS: state leak: start 471040 end 475135 state 5 in tree 1 refs 1 [CAUSE] Since commit 7aa51232e204 ("btrfs: pass a btrfs_bio to btrfs_repair_one_sector"), we always use btrfs_bio->file_offset to determine the file offset of a page. But that usage assume that, one bio has all its page having a continuous page offsets. Unfortunately that's not true, btrfs only requires the logical bytenr contiguous when assembling its bios. From above script, we have one bio looks like this: fssum-27671 submit_one_bio: bio logical=217739264 len=36864 fssum-27671 submit_one_bio: r/i=5/261 page_offset=466944 <<< fssum-27671 submit_one_bio: r/i=5/261 page_offset=724992 <<< fssum-27671 submit_one_bio: r/i=5/261 page_offset=729088 fssum-27671 submit_one_bio: r/i=5/261 page_offset=733184 fssum-27671 submit_one_bio: r/i=5/261 page_offset=737280 fssum-27671 submit_one_bio: r/i=5/261 page_offset=741376 fssum-27671 submit_one_bio: r/i=5/261 page_offset=745472 fssum-27671 submit_one_bio: r/i=5/261 page_offset=749568 fssum-27671 submit_one_bio: r/i=5/261 page_offset=753664 Note that the 1st and the 2nd page has non-contiguous page offsets. This means, at repair time, we will have completely wrong file offset passed in: kworker/u32:2-19927 btrfs_repair_one_sector: r/i=5/261 page_off=729088 file_off=475136 bio_offset=8192 Since the file offset is incorrect, we latter incorrectly set the extent states, and no way to really release them. Thus later it causes the leakage. In fact, this can be even worse, since the file offset is incorrect, we can hit cases like the incorrect file offset belongs to a HOLE, and later cause btrfs_num_copies() to trigger error, finally hit BUG_ON()/ASSERT() later. [FIX] Add an extra condition in btrfs_bio_add_page() for uncompressed IO. Now we will have more strict requirement for bio pages: - They should all have the same mapping (the mapping check is already implied by the call chain) - Their logical bytenr should be adjacent This is the same as the old condition. - Their page_offset() (file offset) should be adjacent This is the new check. This would result a slightly increased amount of bios from btrfs (needs holes and inside the same stripe boundary to trigger). But this would greatly reduce the confusion, as it's pretty common to assume a btrfs bio would only contain continuous page cache. Later we may need extra cleanups, as we no longer needs to handle gaps between page offsets in endio functions. Currently this should be the minimal patch to fix commit 7aa51232e204 ("btrfs: pass a btrfs_bio to btrfs_repair_one_sector"). Reported-by: Zygo Blaxell Fixes: 7aa51232e204 ("btrfs: pass a btrfs_bio to btrfs_repair_one_sector") Reviewed-by: Christoph Hellwig Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6e8e936a8a1ef..eb17e5bb92351 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3233,7 +3233,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, u32 bio_size = bio->bi_iter.bi_size; u32 real_size; const sector_t sector = disk_bytenr >> SECTOR_SHIFT; - bool contig; + bool contig = false; int ret; ASSERT(bio); @@ -3242,10 +3242,35 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, if (bio_ctrl->compress_type != compress_type) return 0; - if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) + + if (bio->bi_iter.bi_size == 0) { + /* We can always add a page into an empty bio. */ + contig = true; + } else if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE) { + struct bio_vec *bvec = bio_last_bvec_all(bio); + + /* + * The contig check requires the following conditions to be met: + * 1) The pages are belonging to the same inode + * This is implied by the call chain. + * + * 2) The range has adjacent logical bytenr + * + * 3) The range has adjacent file offset + * This is required for the usage of btrfs_bio->file_offset. + */ + if (bio_end_sector(bio) == sector && + page_offset(bvec->bv_page) + bvec->bv_offset + + bvec->bv_len == page_offset(page) + pg_offset) + contig = true; + } else { + /* + * For compression, all IO should have its logical bytenr + * set to the starting bytenr of the compressed extent. + */ contig = bio->bi_iter.bi_sector == sector; - else - contig = bio_end_sector(bio) == sector; + } + if (!contig) return 0; From 79d3d1d12e6f22c904195f9356069859e2595f00 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Aug 2022 11:53:39 -0400 Subject: [PATCH 185/654] btrfs: don't allow large NOWAIT direct reads Dylan and Jens reported a problem where they had an io_uring test that was returning short reads, and bisected it to ee5b46a353af ("btrfs: increase direct io read size limit to 256 sectors"). The root cause is their test was doing larger reads via io_uring with NOWAIT and async. This was triggering a page fault during the direct read, however the first page was able to work just fine and thus we submitted a 4k read for a larger iocb. Btrfs allows for partial IO's in this case specifically because we don't allow page faults, and thus we'll attempt to do any io that we can, submit what we could, come back and fault in the rest of the range and try to do the remaining IO. However for !is_sync_kiocb() we'll call ->ki_complete() as soon as the partial dio is done, which is incorrect. In the sync case we can exit the iomap code, submit more io's, and return with the amount of IO we were able to complete successfully. We were always doing short reads in this case, but for NOWAIT we were getting saved by the fact that we were limiting direct reads to sectorsize, and if we were larger than that we would return EAGAIN. Fix the regression by simply returning EAGAIN in the NOWAIT case with larger reads, that way io_uring can retry and get the larger IO and have the fault logic handle everything properly. This still leaves the AIO short read case, but that existed before this change. The way to properly fix this would be to handle partial iocb completions, but that's a lot of work, for now deal with the regression in the most straightforward way possible. Reported-by: Dylan Yudaken Fixes: ee5b46a353af ("btrfs: increase direct io read size limit to 256 sectors") Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ecc5fa3343fc5..bea0adf287351 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7693,6 +7693,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, const u64 data_alloc_len = length; bool unlock_extents = false; + /* + * We could potentially fault if we have a buffer > PAGE_SIZE, and if + * we're NOWAIT we may submit a bio for a partial range and return + * EIOCBQUEUED, which would result in an errant short read. + * + * The best way to handle this would be to allow for partial completions + * of iocb's, so we could submit the partial bio, return and fault in + * the rest of the pages, and then submit the io for the rest of the + * range. However we don't have that currently, so simply return + * -EAGAIN at this point so that the normal path is used. + */ + if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE) + return -EAGAIN; + /* * Cap the size of reads to that usually seen in buffered I/O as we need * to allocate a contiguous array for the checksums. From cfd2b5c1106fa20254d9f24970232cdf24860005 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 22 Aug 2022 17:25:57 +0800 Subject: [PATCH 186/654] perf tools: Fix compile error for x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a0a12c3ed057 ("asm goto: eradicate CC_HAS_ASM_GOTO") eradicates CC_HAS_ASM_GOTO, and in the process also causes the perf tool on x86 to use asm_volatile_goto when compiling __GEN_RMWcc. However, asm_volatile_goto is not declared in the perf tool headers, which causes a compilation error: In file included from tools/arch/x86/include/asm/atomic.h:7, from tools/include/asm/atomic.h:6, from tools/include/linux/atomic.h:5, from tools/include/linux/refcount.h:41, from tools/lib/perf/include/internal/cpumap.h:5, from tools/perf/util/cpumap.h:7, from tools/perf/util/env.h:7, from tools/perf/util/header.h:12, from pmu-events/pmu-events.c:9: tools/arch/x86/include/asm/atomic.h: In function ‘atomic_dec_and_test’: tools/arch/x86/include/asm/rmwcc.h:7:2: error: implicit declaration of function ‘asm_volatile_goto’ [-Werror=implicit-function-declaration] asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ ^~~~~~~~~~~~~~~~~ Define asm_volatile_goto in compiler_types.h if not declared, like the main kernel header files do. Fixes: a0a12c3ed057 ("asm goto: eradicate CC_HAS_ASM_GOTO") Signed-off-by: Yang Jihong Tested-by: Arnaldo Carvalho de Melo Tested-by: Ingo Molnar Signed-off-by: Linus Torvalds --- tools/include/linux/compiler_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 24ae3054f304f..1bdd834bdd571 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -36,4 +36,8 @@ #include #endif +#ifndef asm_volatile_goto +#define asm_volatile_goto(x...) asm goto(x) +#endif + #endif /* __LINUX_COMPILER_TYPES_H */ From f37044fd759b6bc40b6398a978e0b1acdf717372 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 15 Jul 2022 21:41:48 +0200 Subject: [PATCH 187/654] net/mlx5e: Properly disable vlan strip on non-UL reps When querying mlx5 non-uplink representors capabilities with ethtool rx-vlan-offload is marked as "off [fixed]". However, it is actually always enabled because mlx5e_params->vlan_strip_disable is 0 by default when initializing struct mlx5e_params instance. Fix the issue by explicitly setting the vlan_strip_disable to 'true' for non-uplink representors. Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0c66774a1720c..759f7d3c2cfd8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -662,6 +662,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->mqprio.num_tc = 1; params->tunneled_offload_en = false; + if (rep->vport != MLX5_VPORT_UPLINK) + params->vlan_strip_disable = true; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } From a6e675a66175869b7d87c0e1dd0ddf93e04f8098 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 2 Aug 2022 19:45:36 +0300 Subject: [PATCH 188/654] net/mlx5: LAG, fix logic over MLX5_LAG_FLAG_NDEVS_READY Only set MLX5_LAG_FLAG_NDEVS_READY if both netdevices are registered. Doing so guarantees that both ldev->pf[MLX5_LAG_P0].dev and ldev->pf[MLX5_LAG_P1].dev have valid pointers when MLX5_LAG_FLAG_NDEVS_READY is set. The core issue is asymmetry in setting MLX5_LAG_FLAG_NDEVS_READY and clearing it. Setting it is done wrongly when both ldev->pf[MLX5_LAG_P0].dev and ldev->pf[MLX5_LAG_P1].dev are set; clearing it is done right when either of ldev->pf[i].netdev is cleared. Consider the following scenario: 1. PF0 loads and sets ldev->pf[MLX5_LAG_P0].dev to a valid pointer 2. PF1 loads and sets both ldev->pf[MLX5_LAG_P1].dev and ldev->pf[MLX5_LAG_P1].netdev with valid pointers. This results in MLX5_LAG_FLAG_NDEVS_READY is set. 3. PF0 is unloaded before setting dev->pf[MLX5_LAG_P0].netdev. MLX5_LAG_FLAG_NDEVS_READY remains set. Further execution of mlx5_do_bond() will result in null pointer dereference when calling mlx5_lag_is_multipath() This patch fixes the following call trace actually encountered: [ 1293.475195] BUG: kernel NULL pointer dereference, address: 00000000000009a8 [ 1293.478756] #PF: supervisor read access in kernel mode [ 1293.481320] #PF: error_code(0x0000) - not-present page [ 1293.483686] PGD 0 P4D 0 [ 1293.484434] Oops: 0000 [#1] SMP PTI [ 1293.485377] CPU: 1 PID: 23690 Comm: kworker/u16:2 Not tainted 5.18.0-rc5_for_upstream_min_debug_2022_05_05_10_13 #1 [ 1293.488039] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 1293.490836] Workqueue: mlx5_lag mlx5_do_bond_work [mlx5_core] [ 1293.492448] RIP: 0010:mlx5_lag_is_multipath+0x5/0x50 [mlx5_core] [ 1293.494044] Code: e8 70 40 ff e0 48 8b 14 24 48 83 05 5c 1a 1b 00 01 e9 19 ff ff ff 48 83 05 47 1a 1b 00 01 eb d7 0f 1f 44 00 00 0f 1f 44 00 00 <48> 8b 87 a8 09 00 00 48 85 c0 74 26 48 83 05 a7 1b 1b 00 01 41 b8 [ 1293.498673] RSP: 0018:ffff88811b2fbe40 EFLAGS: 00010202 [ 1293.500152] RAX: ffff88818a94e1c0 RBX: ffff888165eca6c0 RCX: 0000000000000000 [ 1293.501841] RDX: 0000000000000001 RSI: ffff88818a94e1c0 RDI: 0000000000000000 [ 1293.503585] RBP: 0000000000000000 R08: ffff888119886740 R09: ffff888165eca73c [ 1293.505286] R10: 0000000000000018 R11: 0000000000000018 R12: ffff88818a94e1c0 [ 1293.506979] R13: ffff888112729800 R14: 0000000000000000 R15: ffff888112729858 [ 1293.508753] FS: 0000000000000000(0000) GS:ffff88852cc40000(0000) knlGS:0000000000000000 [ 1293.510782] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1293.512265] CR2: 00000000000009a8 CR3: 00000001032d4002 CR4: 0000000000370ea0 [ 1293.514001] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1293.515806] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 8a66e4585979 ("net/mlx5: Change ownership model for lag") Signed-off-by: Eli Cohen Reviewed-by: Maor Dickman Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 0f34e3c80d1f4..f67d29164962c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1234,7 +1234,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, mlx5_ldev_add_netdev(ldev, dev, netdev); for (i = 0; i < ldev->ports; i++) - if (!ldev->pf[i].dev) + if (!ldev->pf[i].netdev) break; if (i >= ldev->ports) From 942fca7e762be39204e5926e91a288a343a97c72 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 7 Aug 2022 08:25:28 +0300 Subject: [PATCH 189/654] net/mlx5: Eswitch, Fix forwarding decision to uplink Make sure to modify the rule for uplink forwarding only for the case where destination vport number is MLX5_VPORT_UPLINK. Fixes: 94db33177819 ("net/mlx5: Support multiport eswitch mode") Signed-off-by: Eli Cohen Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ed73132129aae..10b0b260f02bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -427,7 +427,8 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f dest[dest_idx].vport.vhca_id = MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - if (mlx5_lag_mpesw_is_activated(esw->dev)) + if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK && + mlx5_lag_mpesw_is_activated(esw->dev)) dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { From 8e93f29422ffe968d7161f91acdf0d47f5323727 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 11 Aug 2022 13:46:36 +0200 Subject: [PATCH 190/654] net/mlx5: Disable irq when locking lag_lock The lag_lock is taken from both process and softirq contexts which results lockdep warning[0] about potential deadlock. However, just disabling softirqs by using *_bh spinlock API is not enough since it will cause warning in some contexts where the lock is obtained with hard irqs disabled. To fix the issue save current irq state, disable them before obtaining the lock an re-enable irqs from saved state after releasing it. [0]: [Sun Aug 7 13:12:29 2022] ================================ [Sun Aug 7 13:12:29 2022] WARNING: inconsistent lock state [Sun Aug 7 13:12:29 2022] 5.19.0_for_upstream_debug_2022_08_04_16_06 #1 Not tainted [Sun Aug 7 13:12:29 2022] -------------------------------- [Sun Aug 7 13:12:29 2022] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [Sun Aug 7 13:12:29 2022] swapper/0/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [Sun Aug 7 13:12:29 2022] ffffffffa06dc0d8 (lag_lock){+.?.}-{2:2}, at: mlx5_lag_is_shared_fdb+0x1f/0x120 [mlx5_core] [Sun Aug 7 13:12:29 2022] {SOFTIRQ-ON-W} state was registered at: [Sun Aug 7 13:12:29 2022] lock_acquire+0x1c1/0x550 [Sun Aug 7 13:12:29 2022] _raw_spin_lock+0x2c/0x40 [Sun Aug 7 13:12:29 2022] mlx5_lag_add_netdev+0x13b/0x480 [mlx5_core] [Sun Aug 7 13:12:29 2022] mlx5e_nic_enable+0x114/0x470 [mlx5_core] [Sun Aug 7 13:12:29 2022] mlx5e_attach_netdev+0x30e/0x6a0 [mlx5_core] [Sun Aug 7 13:12:29 2022] mlx5e_resume+0x105/0x160 [mlx5_core] [Sun Aug 7 13:12:29 2022] mlx5e_probe+0xac3/0x14f0 [mlx5_core] [Sun Aug 7 13:12:29 2022] auxiliary_bus_probe+0x9d/0xe0 [Sun Aug 7 13:12:29 2022] really_probe+0x1e0/0xaa0 [Sun Aug 7 13:12:29 2022] __driver_probe_device+0x219/0x480 [Sun Aug 7 13:12:29 2022] driver_probe_device+0x49/0x130 [Sun Aug 7 13:12:29 2022] __driver_attach+0x1e4/0x4d0 [Sun Aug 7 13:12:29 2022] bus_for_each_dev+0x11e/0x1a0 [Sun Aug 7 13:12:29 2022] bus_add_driver+0x3f4/0x5a0 [Sun Aug 7 13:12:29 2022] driver_register+0x20f/0x390 [Sun Aug 7 13:12:29 2022] __auxiliary_driver_register+0x14e/0x260 [Sun Aug 7 13:12:29 2022] mlx5e_init+0x38/0x90 [mlx5_core] [Sun Aug 7 13:12:29 2022] vhost_iotlb_itree_augment_rotate+0xcb/0x180 [vhost_iotlb] [Sun Aug 7 13:12:29 2022] do_one_initcall+0xc4/0x400 [Sun Aug 7 13:12:29 2022] do_init_module+0x18a/0x620 [Sun Aug 7 13:12:29 2022] load_module+0x563a/0x7040 [Sun Aug 7 13:12:29 2022] __do_sys_finit_module+0x122/0x1d0 [Sun Aug 7 13:12:29 2022] do_syscall_64+0x3d/0x90 [Sun Aug 7 13:12:29 2022] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [Sun Aug 7 13:12:29 2022] irq event stamp: 3596508 [Sun Aug 7 13:12:29 2022] hardirqs last enabled at (3596508): [] __local_bh_enable_ip+0xa2/0x100 [Sun Aug 7 13:12:29 2022] hardirqs last disabled at (3596507): [] __local_bh_enable_ip+0xba/0x100 [Sun Aug 7 13:12:29 2022] softirqs last enabled at (3596488): [] irq_exit_rcu+0x11a/0x170 [Sun Aug 7 13:12:29 2022] softirqs last disabled at (3596495): [] irq_exit_rcu+0x11a/0x170 [Sun Aug 7 13:12:29 2022] other info that might help us debug this: [Sun Aug 7 13:12:29 2022] Possible unsafe locking scenario: [Sun Aug 7 13:12:29 2022] CPU0 [Sun Aug 7 13:12:29 2022] ---- [Sun Aug 7 13:12:29 2022] lock(lag_lock); [Sun Aug 7 13:12:29 2022] [Sun Aug 7 13:12:29 2022] lock(lag_lock); [Sun Aug 7 13:12:29 2022] *** DEADLOCK *** [Sun Aug 7 13:12:29 2022] 4 locks held by swapper/0/0: [Sun Aug 7 13:12:29 2022] #0: ffffffff84643260 (rcu_read_lock){....}-{1:2}, at: mlx5e_napi_poll+0x43/0x20a0 [mlx5_core] [Sun Aug 7 13:12:29 2022] #1: ffffffff84643260 (rcu_read_lock){....}-{1:2}, at: netif_receive_skb_list_internal+0x2d7/0xd60 [Sun Aug 7 13:12:29 2022] #2: ffff888144a18b58 (&br->hash_lock){+.-.}-{2:2}, at: br_fdb_update+0x301/0x570 [Sun Aug 7 13:12:29 2022] #3: ffffffff84643260 (rcu_read_lock){....}-{1:2}, at: atomic_notifier_call_chain+0x5/0x1d0 [Sun Aug 7 13:12:29 2022] stack backtrace: [Sun Aug 7 13:12:29 2022] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.19.0_for_upstream_debug_2022_08_04_16_06 #1 [Sun Aug 7 13:12:29 2022] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [Sun Aug 7 13:12:29 2022] Call Trace: [Sun Aug 7 13:12:29 2022] [Sun Aug 7 13:12:29 2022] dump_stack_lvl+0x57/0x7d [Sun Aug 7 13:12:29 2022] mark_lock.part.0.cold+0x5f/0x92 [Sun Aug 7 13:12:29 2022] ? lock_chain_count+0x20/0x20 [Sun Aug 7 13:12:29 2022] ? unwind_next_frame+0x1c4/0x1b50 [Sun Aug 7 13:12:29 2022] ? secondary_startup_64_no_verify+0xcd/0xdb [Sun Aug 7 13:12:29 2022] ? mlx5e_napi_poll+0x4e9/0x20a0 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? mlx5e_napi_poll+0x4e9/0x20a0 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? stack_access_ok+0x1d0/0x1d0 [Sun Aug 7 13:12:29 2022] ? start_kernel+0x3a7/0x3c5 [Sun Aug 7 13:12:29 2022] __lock_acquire+0x1260/0x6720 [Sun Aug 7 13:12:29 2022] ? lock_chain_count+0x20/0x20 [Sun Aug 7 13:12:29 2022] ? lock_chain_count+0x20/0x20 [Sun Aug 7 13:12:29 2022] ? register_lock_class+0x1880/0x1880 [Sun Aug 7 13:12:29 2022] ? mark_lock.part.0+0xed/0x3060 [Sun Aug 7 13:12:29 2022] ? stack_trace_save+0x91/0xc0 [Sun Aug 7 13:12:29 2022] lock_acquire+0x1c1/0x550 [Sun Aug 7 13:12:29 2022] ? mlx5_lag_is_shared_fdb+0x1f/0x120 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? lockdep_hardirqs_on_prepare+0x400/0x400 [Sun Aug 7 13:12:29 2022] ? __lock_acquire+0xd6f/0x6720 [Sun Aug 7 13:12:29 2022] _raw_spin_lock+0x2c/0x40 [Sun Aug 7 13:12:29 2022] ? mlx5_lag_is_shared_fdb+0x1f/0x120 [mlx5_core] [Sun Aug 7 13:12:29 2022] mlx5_lag_is_shared_fdb+0x1f/0x120 [mlx5_core] [Sun Aug 7 13:12:29 2022] mlx5_esw_bridge_rep_vport_num_vhca_id_get+0x1a0/0x600 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? mlx5_esw_bridge_update_work+0x90/0x90 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? lock_acquire+0x1c1/0x550 [Sun Aug 7 13:12:29 2022] mlx5_esw_bridge_switchdev_event+0x185/0x8f0 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? mlx5_esw_bridge_port_obj_attr_set+0x3e0/0x3e0 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? check_chain_key+0x24a/0x580 [Sun Aug 7 13:12:29 2022] atomic_notifier_call_chain+0xd7/0x1d0 [Sun Aug 7 13:12:29 2022] br_switchdev_fdb_notify+0xea/0x100 [Sun Aug 7 13:12:29 2022] ? br_switchdev_set_port_flag+0x310/0x310 [Sun Aug 7 13:12:29 2022] fdb_notify+0x11b/0x150 [Sun Aug 7 13:12:29 2022] br_fdb_update+0x34c/0x570 [Sun Aug 7 13:12:29 2022] ? lock_chain_count+0x20/0x20 [Sun Aug 7 13:12:29 2022] ? br_fdb_add_local+0x50/0x50 [Sun Aug 7 13:12:29 2022] ? br_allowed_ingress+0x5f/0x1070 [Sun Aug 7 13:12:29 2022] ? check_chain_key+0x24a/0x580 [Sun Aug 7 13:12:29 2022] br_handle_frame_finish+0x786/0x18e0 [Sun Aug 7 13:12:29 2022] ? check_chain_key+0x24a/0x580 [Sun Aug 7 13:12:29 2022] ? br_handle_local_finish+0x20/0x20 [Sun Aug 7 13:12:29 2022] ? __lock_acquire+0xd6f/0x6720 [Sun Aug 7 13:12:29 2022] ? sctp_inet_bind_verify+0x4d/0x190 [Sun Aug 7 13:12:29 2022] ? xlog_unpack_data+0x2e0/0x310 [Sun Aug 7 13:12:29 2022] ? br_handle_local_finish+0x20/0x20 [Sun Aug 7 13:12:29 2022] br_nf_hook_thresh+0x227/0x380 [br_netfilter] [Sun Aug 7 13:12:29 2022] ? setup_pre_routing+0x460/0x460 [br_netfilter] [Sun Aug 7 13:12:29 2022] ? br_handle_local_finish+0x20/0x20 [Sun Aug 7 13:12:29 2022] ? br_nf_pre_routing_ipv6+0x48b/0x69c [br_netfilter] [Sun Aug 7 13:12:29 2022] br_nf_pre_routing_finish_ipv6+0x5c2/0xbf0 [br_netfilter] [Sun Aug 7 13:12:29 2022] ? br_handle_local_finish+0x20/0x20 [Sun Aug 7 13:12:29 2022] br_nf_pre_routing_ipv6+0x4c6/0x69c [br_netfilter] [Sun Aug 7 13:12:29 2022] ? br_validate_ipv6+0x9e0/0x9e0 [br_netfilter] [Sun Aug 7 13:12:29 2022] ? br_nf_forward_arp+0xb70/0xb70 [br_netfilter] [Sun Aug 7 13:12:29 2022] ? br_nf_pre_routing+0xacf/0x1160 [br_netfilter] [Sun Aug 7 13:12:29 2022] br_handle_frame+0x8a9/0x1270 [Sun Aug 7 13:12:29 2022] ? br_handle_frame_finish+0x18e0/0x18e0 [Sun Aug 7 13:12:29 2022] ? register_lock_class+0x1880/0x1880 [Sun Aug 7 13:12:29 2022] ? br_handle_local_finish+0x20/0x20 [Sun Aug 7 13:12:29 2022] ? bond_handle_frame+0xf9/0xac0 [bonding] [Sun Aug 7 13:12:29 2022] ? br_handle_frame_finish+0x18e0/0x18e0 [Sun Aug 7 13:12:29 2022] __netif_receive_skb_core+0x7c0/0x2c70 [Sun Aug 7 13:12:29 2022] ? check_chain_key+0x24a/0x580 [Sun Aug 7 13:12:29 2022] ? generic_xdp_tx+0x5b0/0x5b0 [Sun Aug 7 13:12:29 2022] ? __lock_acquire+0xd6f/0x6720 [Sun Aug 7 13:12:29 2022] ? register_lock_class+0x1880/0x1880 [Sun Aug 7 13:12:29 2022] ? check_chain_key+0x24a/0x580 [Sun Aug 7 13:12:29 2022] __netif_receive_skb_list_core+0x2d7/0x8a0 [Sun Aug 7 13:12:29 2022] ? lock_acquire+0x1c1/0x550 [Sun Aug 7 13:12:29 2022] ? process_backlog+0x960/0x960 [Sun Aug 7 13:12:29 2022] ? lockdep_hardirqs_on_prepare+0x129/0x400 [Sun Aug 7 13:12:29 2022] ? kvm_clock_get_cycles+0x14/0x20 [Sun Aug 7 13:12:29 2022] netif_receive_skb_list_internal+0x5f4/0xd60 [Sun Aug 7 13:12:29 2022] ? do_xdp_generic+0x150/0x150 [Sun Aug 7 13:12:29 2022] ? mlx5e_poll_rx_cq+0xf6b/0x2960 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? mlx5e_poll_ico_cq+0x3d/0x1590 [mlx5_core] [Sun Aug 7 13:12:29 2022] napi_complete_done+0x188/0x710 [Sun Aug 7 13:12:29 2022] mlx5e_napi_poll+0x4e9/0x20a0 [mlx5_core] [Sun Aug 7 13:12:29 2022] ? __queue_work+0x53c/0xeb0 [Sun Aug 7 13:12:29 2022] __napi_poll+0x9f/0x540 [Sun Aug 7 13:12:29 2022] net_rx_action+0x420/0xb70 [Sun Aug 7 13:12:29 2022] ? napi_threaded_poll+0x470/0x470 [Sun Aug 7 13:12:29 2022] ? __common_interrupt+0x79/0x1a0 [Sun Aug 7 13:12:29 2022] __do_softirq+0x271/0x92c [Sun Aug 7 13:12:29 2022] irq_exit_rcu+0x11a/0x170 [Sun Aug 7 13:12:29 2022] common_interrupt+0x7d/0xa0 [Sun Aug 7 13:12:29 2022] [Sun Aug 7 13:12:29 2022] [Sun Aug 7 13:12:29 2022] asm_common_interrupt+0x22/0x40 [Sun Aug 7 13:12:29 2022] RIP: 0010:default_idle+0x42/0x60 [Sun Aug 7 13:12:29 2022] Code: c1 83 e0 07 48 c1 e9 03 83 c0 03 0f b6 14 11 38 d0 7c 04 84 d2 75 14 8b 05 6b f1 22 02 85 c0 7e 07 0f 00 2d 80 3b 4a 00 fb f4 48 c7 c7 e0 07 7e 85 e8 21 bd 40 fe eb de 66 66 2e 0f 1f 84 00 [Sun Aug 7 13:12:29 2022] RSP: 0018:ffffffff84407e18 EFLAGS: 00000242 [Sun Aug 7 13:12:29 2022] RAX: 0000000000000001 RBX: ffffffff84ec4a68 RCX: 1ffffffff0afc0fc [Sun Aug 7 13:12:29 2022] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffff835b1fac [Sun Aug 7 13:12:29 2022] RBP: 0000000000000000 R08: 0000000000000001 R09: ffff8884d2c44ac3 [Sun Aug 7 13:12:29 2022] R10: ffffed109a588958 R11: 00000000ffffffff R12: 0000000000000000 [Sun Aug 7 13:12:29 2022] R13: ffffffff84efac20 R14: 0000000000000000 R15: dffffc0000000000 [Sun Aug 7 13:12:29 2022] ? default_idle_call+0xcc/0x460 [Sun Aug 7 13:12:29 2022] default_idle_call+0xec/0x460 [Sun Aug 7 13:12:29 2022] do_idle+0x394/0x450 [Sun Aug 7 13:12:29 2022] ? arch_cpu_idle_exit+0x40/0x40 [Sun Aug 7 13:12:29 2022] cpu_startup_entry+0x19/0x20 [Sun Aug 7 13:12:29 2022] rest_init+0x156/0x250 [Sun Aug 7 13:12:29 2022] arch_call_rest_init+0xf/0x15 [Sun Aug 7 13:12:29 2022] start_kernel+0x3a7/0x3c5 [Sun Aug 7 13:12:29 2022] secondary_startup_64_no_verify+0xcd/0xdb [Sun Aug 7 13:12:29 2022] Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") Signed-off-by: Vlad Buslov Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index f67d29164962c..065102278cb80 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1067,30 +1067,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, struct net_device *netdev) { unsigned int fn = mlx5_get_dev_index(dev); + unsigned long flags; if (fn >= ldev->ports) return; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev->pf[fn].netdev = netdev; ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); } static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, struct net_device *netdev) { + unsigned long flags; int i; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); for (i = 0; i < ldev->ports; i++) { if (ldev->pf[i].netdev == netdev) { ldev->pf[i].netdev = NULL; break; } } - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); } static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, @@ -1246,12 +1248,13 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_roce(ldev); - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -1260,12 +1263,13 @@ EXPORT_SYMBOL(mlx5_lag_is_roce); bool mlx5_lag_is_active(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_active(ldev); - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -1274,13 +1278,14 @@ EXPORT_SYMBOL(mlx5_lag_is_active); bool mlx5_lag_is_master(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_active(ldev) && dev == ldev->pf[MLX5_LAG_P1].dev; - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -1289,12 +1294,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_sriov(ldev); - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -1303,13 +1309,14 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_sriov(ldev) && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -1352,9 +1359,10 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) { struct net_device *ndev = NULL; struct mlx5_lag *ldev; + unsigned long flags; int i; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!(ldev && __mlx5_lag_is_roce(ldev))) @@ -1373,7 +1381,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) dev_hold(ndev); unlock: - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return ndev; } @@ -1383,10 +1391,11 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave) { struct mlx5_lag *ldev; + unsigned long flags; u8 port = 0; int i; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; @@ -1401,7 +1410,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, port = ldev->v2p_map[port * ldev->buckets]; unlock: - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return port; } EXPORT_SYMBOL(mlx5_lag_get_slave_port); @@ -1422,8 +1431,9 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) { struct mlx5_core_dev *peer_dev = NULL; struct mlx5_lag *ldev; + unsigned long flags; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!ldev) goto unlock; @@ -1433,7 +1443,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) ldev->pf[MLX5_LAG_P1].dev; unlock: - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return peer_dev; } EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); @@ -1446,6 +1456,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); struct mlx5_core_dev **mdev; struct mlx5_lag *ldev; + unsigned long flags; int num_ports; int ret, i, j; void *out; @@ -1462,7 +1473,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, memset(values, 0, sizeof(*values) * num_counters); - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (ldev && __mlx5_lag_is_active(ldev)) { num_ports = ldev->ports; @@ -1472,7 +1483,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, num_ports = 1; mdev[MLX5_LAG_P1] = dev; } - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); for (i = 0; i < num_ports; ++i) { u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; From 090f3e4f4089ab8041ed7d632c7851c2a42fcc10 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 30 Mar 2022 17:59:27 +0300 Subject: [PATCH 191/654] net/mlx5: Fix cmd error logging for manage pages cmd When the driver unloads, give/reclaim_pages may fail as PF driver in teardown flow, current code will lead to the following kernel log print 'failed reclaiming pages: err 0'. Fix it to get same behavior as before the cited commits, by calling mlx5_cmd_check before handling error state. mlx5_cmd_check will verify if the returned error is an actual error needed to be handled by the driver or not and will return an appropriate value. Fixes: 8d564292a166 ("net/mlx5: Remove redundant error on reclaim pages") Fixes: 4dac2f10ada0 ("net/mlx5: Remove redundant notify fail on give pages") Signed-off-by: Roy Novich Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index ec76a8b1acc1c..60596357bfc7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -376,8 +376,8 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, goto out_dropped; } } + err = mlx5_cmd_check(dev, err, in, out); if (err) { - err = mlx5_cmd_check(dev, err, in, out); mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); goto out_dropped; @@ -524,10 +524,13 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, dev->priv.reclaim_pages_discard += npages; } /* if triggered by FW event and failed by FW then ignore */ - if (event && err == -EREMOTEIO) + if (event && err == -EREMOTEIO) { err = 0; + goto out_free; + } + + err = mlx5_cmd_check(dev, err, in, out); if (err) { - err = mlx5_cmd_check(dev, err, in, out); mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } From d59b73a66e5e0682442b6d7b4965364e57078b80 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 3 Aug 2022 10:49:23 +0300 Subject: [PATCH 192/654] net/mlx5: Avoid false positive lockdep warning by adding lock_class_key Add a lock_class_key per mlx5 device to avoid a false positive "possible circular locking dependency" warning by lockdep, on flows which lock more than one mlx5 device, such as adding SF. kernel log: ====================================================== WARNING: possible circular locking dependency detected 5.19.0-rc8+ #2 Not tainted ------------------------------------------------------ kworker/u20:0/8 is trying to acquire lock: ffff88812dfe0d98 (&dev->intf_state_mutex){+.+.}-{3:3}, at: mlx5_init_one+0x2e/0x490 [mlx5_core] but task is already holding lock: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(¬ifier->n_head)->rwsem){++++}-{3:3}: down_write+0x90/0x150 blocking_notifier_chain_register+0x53/0xa0 mlx5_sf_table_init+0x369/0x4a0 [mlx5_core] mlx5_init_one+0x261/0x490 [mlx5_core] probe_one+0x430/0x680 [mlx5_core] local_pci_probe+0xd6/0x170 work_for_cpu_fn+0x4e/0xa0 process_one_work+0x7c2/0x1340 worker_thread+0x6f6/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 -> #0 (&dev->intf_state_mutex){+.+.}-{3:3}: __lock_acquire+0x2fc7/0x6720 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 bus_for_each_drv+0x123/0x1a0 __device_attach+0x1a3/0x460 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 __auxiliary_device_add+0x88/0xc0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); *** DEADLOCK *** 4 locks held by kworker/u20:0/8: #0: ffff888150612938 ((wq_completion)mlx5_events){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888100cafdb8 ((work_completion)(&work->work)#3){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 #3: ffff88813682d0e8 (&dev->mutex){....}-{3:3}, at:__device_attach+0x76/0x460 stack backtrace: CPU: 6 PID: 8 Comm: kworker/u20:0 Not tainted 5.19.0-rc8+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_events mlx5_vhca_state_work_handler [mlx5_core] Call Trace: dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? lock_chain_count+0x20/0x20 ? register_lock_class+0x1880/0x1880 __lock_acquire+0x2fc7/0x6720 ? register_lock_class+0x1880/0x1880 ? register_lock_class+0x1880/0x1880 lock_acquire+0x1c1/0x550 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 __mutex_lock+0x12c/0x14b0 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? _raw_read_unlock+0x1f/0x30 ? mutex_lock_io_nested+0x1320/0x1320 ? __ioremap_caller.constprop.0+0x306/0x490 ? mlx5_sf_dev_probe+0x269/0x370 [mlx5_core] ? iounmap+0x160/0x160 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] ? mlx5_sf_dev_remove+0x130/0x130 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 ? auxiliary_match_id+0xe9/0x140 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 ? driver_allows_async_probing+0x140/0x140 bus_for_each_drv+0x123/0x1a0 ? bus_for_each_dev+0x1a0/0x1a0 ? lockdep_hardirqs_on_prepare+0x286/0x400 ? trace_hardirqs_on+0x2d/0x100 __device_attach+0x1a3/0x460 ? device_driver_attach+0x1e0/0x1e0 ? kobject_uevent_env+0x22d/0xf10 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 ? dev_set_name+0xab/0xe0 ? __fw_devlink_link_to_suppliers+0x260/0x260 ? memset+0x20/0x40 ? lockdep_init_map_type+0x21a/0x7d0 __auxiliary_device_add+0x88/0xc0 ? auxiliary_device_init+0x86/0xa0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] ? mlx5_vhca_event_arm+0x100/0x100 [mlx5_core] ? lock_downgrade+0x6e0/0x6e0 ? lockdep_hardirqs_on_prepare+0x286/0x400 process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++++ include/linux/mlx5/driver.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bec8d6d0b5f67..c085b031abfc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1530,7 +1530,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); + lockdep_register_key(&dev->lock_key); mutex_init(&dev->intf_state_mutex); + lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); @@ -1597,6 +1599,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); return err; } @@ -1618,6 +1621,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); } static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96b16fbe1aa45..7b7ce602c8080 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -779,6 +779,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; + struct lock_class_key lock_key; unsigned long intf_state; struct mlx5_priv priv; struct mlx5_profile profile; From 7b3707fc79044871ab8f3d5fa5e9603155bb5577 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 8 Jun 2022 18:38:37 +0300 Subject: [PATCH 193/654] net/mlx5e: Fix wrong application of the LRO state Driver caches packet merge type in mlx5e_params instance which must be in perfect sync with the netdev_feature's bit. Prior to this patch, in certain conditions (*) LRO state was set in mlx5e_params, while netdev_feature's bit was off. Causing the LRO to be applied on the RQs (HW level). (*) This can happen only on profile init (mlx5e_build_nic_params()), when RQ expect non-linear SKB and PCI is fast enough in comparison to link width. Solution: remove setting of packet merge type from mlx5e_build_nic_params() as netdev features are not updated. Fixes: 619a8f2a42f1 ("net/mlx5e: Use linear SKB in Striding RQ") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d858667736a32..c65b6a2883d3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4769,14 +4769,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* RQ */ mlx5e_build_rq_params(mdev, params); - /* HW LRO */ - if (MLX5_CAP_ETH(mdev, lro_cap) && - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - /* No XSK params: checking the availability of striding RQ in general. */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - params->packet_merge.type = slow_pci_heuristic(mdev) ? - MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO; - } params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ From f7a4e867f48c2e03ab6a237c06ab923d80aeeeb1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 1 Aug 2022 18:02:49 +0300 Subject: [PATCH 194/654] net/mlx5e: TC, Add missing policer validation There is a missing policer validation when offloading police action with tc action api. Add it. Fixes: 7d1a5ce46e47 ("net/mlx5e: TC, Support tc action api for police") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c index 37522352e4b23..c8e5ca65bb6ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -79,6 +79,10 @@ tc_act_police_offload(struct mlx5e_priv *priv, struct mlx5e_flow_meter_handle *meter; int err = 0; + err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack); + if (err) + return err; + err = fill_meter_params_from_act(act, ¶ms); if (err) return err; From 550f96432e6f6770efdaee0e65239d61431062a1 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 4 Aug 2022 15:28:42 +0300 Subject: [PATCH 195/654] net/mlx5e: Fix wrong tc flag used when set hw-tc-offload off The cited commit reintroduced the ability to set hw-tc-offload in switchdev mode by reusing NIC mode calls without modifying it to support both modes, this can cause an illegal memory access when trying to turn hw-tc-offload off. Fix this by using the right TC_FLAG when checking if tc rules are installed while disabling hw-tc-offload. Fixes: d3cbd4254df8 ("net/mlx5e: Add ndo_set_feature for uplink representor") Signed-off-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c65b6a2883d3e..02eb2f0fa2ae7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3682,7 +3682,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable) int err = 0; #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) - if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { + int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) : + MLX5_TC_FLAG(NIC_OFFLOAD); + if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; From b868c8fe37bd15def1a8dd0b1f30fca9087e499a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Jul 2022 11:27:03 +0300 Subject: [PATCH 196/654] net/mlx5: unlock on error path in esw_vfs_changed_event_handler() Unlock before returning on this error path. Fixes: f1bc646c9a06 ("net/mlx5: Use devl_ API in mlx5_esw_offloads_devlink_port_register") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 10b0b260f02bf..a9f4c652f859c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3116,8 +3116,10 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs, MLX5_VPORT_UC_ADDR_CHANGE); - if (err) + if (err) { + devl_unlock(devlink); return; + } } esw->esw_funcs.num_vfs = new_num_vfs; devl_unlock(devlink); From 6514210b6d0dc36352fda86b71f80f9a9ed4f677 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 17:44:07 +0300 Subject: [PATCH 197/654] net/mlx5e: kTLS, Use _safe() iterator in mlx5e_tls_priv_tx_list_cleanup() Use the list_for_each_entry_safe() macro to prevent dereferencing "obj" after it has been freed. Fixes: c4dfe704f53f ("net/mlx5e: kTLS, Recycle objects of device-offloaded TLS TX connections") Signed-off-by: Dan Carpenter Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 0aef695272264..3a1f76eac542d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -246,7 +246,7 @@ static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev, struct list_head *list, int size) { - struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_ktls_offload_context_tx *obj, *n; struct mlx5e_async_ctx *bulk_async; int i; @@ -255,7 +255,7 @@ static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev, return; i = 0; - list_for_each_entry(obj, list, list_node) { + list_for_each_entry_safe(obj, n, list, list_node) { mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]); i++; } From 21234e3a84c70f27ea106411bdd5ef7af17508a6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 17:43:21 +0300 Subject: [PATCH 198/654] net/mlx5e: Fix use after free in mlx5e_fs_init() Call mlx5e_fs_vlan_free(fs) before kvfree(fs). Fixes: af8bbf730068 ("net/mlx5e: Convert mlx5e_flow_steering member of mlx5e_priv to pointer") Signed-off-by: Dan Carpenter Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index e2a9b9be5c1fb..e0ce5a233d0b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1395,10 +1395,11 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, } return fs; -err_free_fs: - kvfree(fs); + err_free_vlan: mlx5e_fs_vlan_free(fs); +err_free_fs: + kvfree(fs); err: return NULL; } From 35419025cb1ee40f8b4c10ab7dbe567ef70b8da4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 17:39:31 +0300 Subject: [PATCH 199/654] net/mlx5: Unlock on error in mlx5_sriov_enable() Unlock before returning if mlx5_device_enable_sriov() fails. Fixes: 84a433a40d0e ("net/mlx5: Lock mlx5 devlink reload callbacks") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index ee2e1b7c1310d..c0e6c487c63c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -159,11 +159,11 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) devl_lock(devlink); err = mlx5_device_enable_sriov(dev, num_vfs); + devl_unlock(devlink); if (err) { mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); return err; } - devl_unlock(devlink); err = pci_enable_sriov(pdev, num_vfs); if (err) { From ad982c3be4e60c7d39c03f782733503cbd88fd2a Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 22 Aug 2022 10:29:05 +0800 Subject: [PATCH 200/654] audit: fix potential double free on error path from fsnotify_add_inode_mark Audit_alloc_mark() assign pathname to audit_mark->path, on error path from fsnotify_add_inode_mark(), fsnotify_put_mark will free memory of audit_mark->path, but the caller of audit_alloc_mark will free the pathname again, so there will be double free problem. Fix this by resetting audit_mark->path to NULL pointer on error path from fsnotify_add_inode_mark(). Cc: stable@vger.kernel.org Fixes: 7b1293234084d ("fsnotify: Add group pointer in fsnotify_init_mark()") Signed-off-by: Gaosheng Cui Reviewed-by: Jan Kara Signed-off-by: Paul Moore --- kernel/audit_fsnotify.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index 6432a37ac1c94..c565fbf66ac87 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0); if (ret < 0) { + audit_mark->path = NULL; fsnotify_put_mark(&audit_mark->mark); audit_mark = ERR_PTR(ret); } From 35b0fac808b95eea1212f8860baf6ad25b88b087 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 Aug 2022 16:14:23 +0800 Subject: [PATCH 201/654] clk: core: Honor CLK_OPS_PARENT_ENABLE for clk gate ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the previous commits that added CLK_OPS_PARENT_ENABLE, support for this flag was only added to rate change operations (rate setting and reparent) and disabling unused subtree. It was not added to the clock gate related operations. Any hardware driver that needs it for these operations will either see bogus results, or worse, hang. This has been seen on MT8192 and MT8195, where the imp_ii2_* clk drivers set this, but dumping debugfs clk_summary would cause it to hang. Fixes: fc8726a2c021 ("clk: core: support clocks which requires parents enable (part 2)") Fixes: a4b3518d146f ("clk: core: support clocks which requires parents enable (part 1)") Signed-off-by: Chen-Yu Tsai Reviewed-by: Nícolas F. R. A. Prado Tested-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20220822081424.1310926-2-wenst@chromium.org Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 7fc191c155073..9b365cd6d14b2 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -196,6 +196,9 @@ static bool clk_core_rate_is_protected(struct clk_core *core) return core->protect_count; } +static int clk_core_prepare_enable(struct clk_core *core); +static void clk_core_disable_unprepare(struct clk_core *core); + static bool clk_core_is_prepared(struct clk_core *core) { bool ret = false; @@ -208,7 +211,11 @@ static bool clk_core_is_prepared(struct clk_core *core) return core->prepare_count; if (!clk_pm_runtime_get(core)) { + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_prepare_enable(core->parent); ret = core->ops->is_prepared(core->hw); + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_disable_unprepare(core->parent); clk_pm_runtime_put(core); } @@ -244,7 +251,13 @@ static bool clk_core_is_enabled(struct clk_core *core) } } + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_prepare_enable(core->parent); + ret = core->ops->is_enabled(core->hw); + + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_disable_unprepare(core->parent); done: if (core->rpm_enabled) pm_runtime_put(core->dev); @@ -812,6 +825,9 @@ int clk_rate_exclusive_get(struct clk *clk) } EXPORT_SYMBOL_GPL(clk_rate_exclusive_get); +static int clk_core_enable_lock(struct clk_core *core); +static void clk_core_disable_lock(struct clk_core *core); + static void clk_core_unprepare(struct clk_core *core) { lockdep_assert_held(&prepare_lock); @@ -835,6 +851,9 @@ static void clk_core_unprepare(struct clk_core *core) WARN(core->enable_count > 0, "Unpreparing enabled %s\n", core->name); + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_enable_lock(core->parent); + trace_clk_unprepare(core); if (core->ops->unprepare) @@ -843,6 +862,9 @@ static void clk_core_unprepare(struct clk_core *core) clk_pm_runtime_put(core); trace_clk_unprepare_complete(core); + + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_disable_lock(core->parent); clk_core_unprepare(core->parent); } @@ -891,6 +913,9 @@ static int clk_core_prepare(struct clk_core *core) if (ret) goto runtime_put; + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_enable_lock(core->parent); + trace_clk_prepare(core); if (core->ops->prepare) @@ -898,6 +923,9 @@ static int clk_core_prepare(struct clk_core *core) trace_clk_prepare_complete(core); + if (core->flags & CLK_OPS_PARENT_ENABLE) + clk_core_disable_lock(core->parent); + if (ret) goto unprepare; } From 4b592061f7b3971c70e8b72fc42aaead47c24701 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 Aug 2022 16:14:24 +0800 Subject: [PATCH 202/654] clk: core: Fix runtime PM sequence in clk_core_unprepare() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the original commit 9a34b45397e5 ("clk: Add support for runtime PM"), the commit message mentioned that pm_runtime_put_sync() would be done at the end of clk_core_unprepare(). This mirrors the operations in clk_core_prepare() in the opposite order. However, the actual code that was added wasn't in the order the commit message described. Move clk_pm_runtime_put() to the end of clk_core_unprepare() so that it is in the correct order. Fixes: 9a34b45397e5 ("clk: Add support for runtime PM") Signed-off-by: Chen-Yu Tsai Reviewed-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20220822081424.1310926-3-wenst@chromium.org Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 9b365cd6d14b2..2e29a72c68e1b 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -859,13 +859,12 @@ static void clk_core_unprepare(struct clk_core *core) if (core->ops->unprepare) core->ops->unprepare(core->hw); - clk_pm_runtime_put(core); - trace_clk_unprepare_complete(core); if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_disable_lock(core->parent); clk_core_unprepare(core->parent); + clk_pm_runtime_put(core); } static void clk_core_unprepare_lock(struct clk_core *core) From 99077ad668ddd9b4823cc8ce3f3c7a3fc56f6fd9 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 22 Aug 2022 16:33:18 -0700 Subject: [PATCH 203/654] Input: rk805-pwrkey - fix module autoloading Add the module alias so the rk805-pwrkey driver will autoload when built as a module. Fixes: 5a35b85c2d92 ("Input: add power key driver for Rockchip RK805 PMIC") Signed-off-by: Peter Robinson Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20220612225437.3628788-1-pbrobinson@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/rk805-pwrkey.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/misc/rk805-pwrkey.c b/drivers/input/misc/rk805-pwrkey.c index 3fb64dbda1a21..76873aa005b41 100644 --- a/drivers/input/misc/rk805-pwrkey.c +++ b/drivers/input/misc/rk805-pwrkey.c @@ -98,6 +98,7 @@ static struct platform_driver rk805_pwrkey_driver = { }; module_platform_driver(rk805_pwrkey_driver); +MODULE_ALIAS("platform:rk805-pwrkey"); MODULE_AUTHOR("Joseph Chen "); MODULE_DESCRIPTION("RK805 PMIC Power Key driver"); MODULE_LICENSE("GPL"); From 5fbb08eb7f945c7e8896ea39f03143ce66dfa4c7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 18 Aug 2022 17:32:50 +0300 Subject: [PATCH 204/654] net: dsa: microchip: keep compatibility with device tree blobs with no phy-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DSA has multiple ways of specifying a MAC connection to an internal PHY. One requires a DT description like this: port@0 { reg = <0>; phy-handle = <&internal_phy>; phy-mode = "internal"; }; (which is IMO the recommended approach, as it is the clearest description) but it is also possible to leave the specification as just: port@0 { reg = <0>; } and if the driver implements ds->ops->phy_read and ds->ops->phy_write, the DSA framework "knows" it should create a ds->slave_mii_bus, and it should connect to a non-OF-based internal PHY on this MDIO bus, at an MDIO address equal to the port address. There is also an intermediary way of describing things: port@0 { reg = <0>; phy-handle = <&internal_phy>; }; In case 2, DSA calls phylink_connect_phy() and in case 3, it calls phylink_of_phy_connect(). In both cases, phylink_create() has been called with a phy_interface_t of PHY_INTERFACE_MODE_NA, and in both cases, PHY_INTERFACE_MODE_NA is translated into phy->interface. It is important to note that phy_device_create() initializes dev->interface = PHY_INTERFACE_MODE_GMII, and so, when we use phylink_create(PHY_INTERFACE_MODE_NA), no one will override this, and we will end up with a PHY_INTERFACE_MODE_GMII interface inherited from the PHY. All this means that in order to maintain compatibility with device tree blobs where the phy-mode property is missing, we need to allow the "gmii" phy-mode and treat it as "internal". Fixes: 2c709e0bdad4 ("net: dsa: microchip: ksz8795: add phylink support") Link: https://bugzilla.kernel.org/show_bug.cgi?id=216320 Reported-by: Craig McQueen Signed-off-by: Vladimir Oltean Reviewed-by: Alvin Šipraga Tested-by: Rasmus Villemoes Link: https://lore.kernel.org/r/20220818143250.2797111-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index ed7d137cba994..7461272a6d410 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -803,9 +803,15 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port, if (dev->info->supports_rgmii[port]) phy_interface_set_rgmii(config->supported_interfaces); - if (dev->info->internal_phy[port]) + if (dev->info->internal_phy[port]) { __set_bit(PHY_INTERFACE_MODE_INTERNAL, config->supported_interfaces); + /* Compatibility for phylib's default interface type when the + * phy-mode property is absent + */ + __set_bit(PHY_INTERFACE_MODE_GMII, + config->supported_interfaces); + } if (dev->dev_ops->get_caps) dev->dev_ops->get_caps(dev, port, config); From b8d4380365c515d8e0351f2f46d371738dd19be1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 18 Aug 2022 08:42:05 -0500 Subject: [PATCH 205/654] net: ipa: don't assume SMEM is page-aligned In ipa_smem_init(), a Qualcomm SMEM region is allocated (if needed) and then its virtual address is fetched using qcom_smem_get(). The physical address associated with that region is also fetched. The physical address is adjusted so that it is page-aligned, and an attempt is made to update the size of the region to compensate for any non-zero adjustment. But that adjustment isn't done properly. The physical address is aligned twice, and as a result the size is never actually adjusted. Fix this by *not* aligning the "addr" local variable, and instead making the "phys" local variable be the adjusted "addr" value. Fixes: a0036bb413d5b ("net: ipa: define SMEM memory region for IPA") Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20220818134206.567618-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 1e9eae208e44f..53a1dbeaffa6d 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -568,7 +568,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) } /* Align the address down and the size up to a page boundary */ - addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK; + addr = qcom_smem_virt_to_phys(virt); phys = addr & PAGE_MASK; size = PAGE_ALIGN(size + addr - phys); iova = phys; /* We just want a direct mapping */ From 6dbe852c379ff032a70a6b13a91914918c82cb07 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Fri, 19 Aug 2022 16:24:51 +0800 Subject: [PATCH 206/654] net: phy: Don't WARN for PHY_READY state in mdio_bus_phy_resume() For some MAC drivers, they set the mac_managed_pm to true in its ->ndo_open() callback. So before the mac_managed_pm is set to true, we still want to leverage the mdio_bus_phy_suspend()/resume() for the phy device suspend and resume. In this case, the phy device is in PHY_READY, and we shouldn't warn about this. It also seems that the check of mac_managed_pm in WARN_ON is redundant since we already check this in the entry of mdio_bus_phy_resume(), so drop it. Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Signed-off-by: Xiaolei Wang Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220819082451.1992102-1-xiaolei.wang@windriver.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0c6efd7926907..12ff276b80aed 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -316,11 +316,11 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) phydev->suspended_by_mdio_bus = 0; - /* If we managed to get here with the PHY state machine in a state other - * than PHY_HALTED this is an indication that something went wrong and - * we should most likely be using MAC managed PM and we are not. + /* If we manged to get here with the PHY state machine in a state neither + * PHY_HALTED nor PHY_READY this is an indication that something went wrong + * and we should most likely be using MAC managed PM and we are not. */ - WARN_ON(phydev->state != PHY_HALTED && !phydev->mac_managed_pm); + WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY); ret = phy_init_hw(phydev); if (ret < 0) From 0ee7828dfc56e97d71e51e6374dc7b4eb2b6e081 Mon Sep 17 00:00:00 2001 From: Sergei Antonov Date: Fri, 19 Aug 2022 14:05:19 +0300 Subject: [PATCH 207/654] net: moxa: get rid of asymmetry in DMA mapping/unmapping Since priv->rx_mapping[i] is maped in moxart_mac_open(), we should unmap it from moxart_mac_stop(). Fixes 2 warnings. 1. During error unwinding in moxart_mac_probe(): "goto init_fail;", then moxart_mac_free_memory() calls dma_unmap_single() with priv->rx_mapping[i] pointers zeroed. WARNING: CPU: 0 PID: 1 at kernel/dma/debug.c:963 check_unmap+0x704/0x980 DMA-API: moxart-ethernet 92000000.mac: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=1600 bytes] CPU: 0 PID: 1 Comm: swapper Not tainted 5.19.0+ #60 Hardware name: Generic DT based system unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x34/0x44 dump_stack_lvl from __warn+0xbc/0x1f0 __warn from warn_slowpath_fmt+0x94/0xc8 warn_slowpath_fmt from check_unmap+0x704/0x980 check_unmap from debug_dma_unmap_page+0x8c/0x9c debug_dma_unmap_page from moxart_mac_free_memory+0x3c/0xa8 moxart_mac_free_memory from moxart_mac_probe+0x190/0x218 moxart_mac_probe from platform_probe+0x48/0x88 platform_probe from really_probe+0xc0/0x2e4 2. After commands: ip link set dev eth0 down ip link set dev eth0 up WARNING: CPU: 0 PID: 55 at kernel/dma/debug.c:570 add_dma_entry+0x204/0x2ec DMA-API: moxart-ethernet 92000000.mac: cacheline tracking EEXIST, overlapping mappings aren't supported CPU: 0 PID: 55 Comm: ip Not tainted 5.19.0+ #57 Hardware name: Generic DT based system unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x34/0x44 dump_stack_lvl from __warn+0xbc/0x1f0 __warn from warn_slowpath_fmt+0x94/0xc8 warn_slowpath_fmt from add_dma_entry+0x204/0x2ec add_dma_entry from dma_map_page_attrs+0x110/0x328 dma_map_page_attrs from moxart_mac_open+0x134/0x320 moxart_mac_open from __dev_open+0x11c/0x1ec __dev_open from __dev_change_flags+0x194/0x22c __dev_change_flags from dev_change_flags+0x14/0x44 dev_change_flags from devinet_ioctl+0x6d4/0x93c devinet_ioctl from inet_ioctl+0x1ac/0x25c v1 -> v2: Extraneous change removed. Fixes: 6c821bd9edc9 ("net: Add MOXA ART SoCs ethernet driver") Signed-off-by: Sergei Antonov Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220819110519.1230877-1-saproj@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/moxa/moxart_ether.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 19009a6bd33ae..9e57d23e57bf4 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -71,11 +71,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr) static void moxart_mac_free_memory(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); - int i; - - for (i = 0; i < RX_DESC_NUM; i++) - dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i], - priv->rx_buf_size, DMA_FROM_DEVICE); if (priv->tx_desc_base) dma_free_coherent(&priv->pdev->dev, @@ -187,6 +182,7 @@ static int moxart_mac_open(struct net_device *ndev) static int moxart_mac_stop(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); + int i; napi_disable(&priv->napi); @@ -198,6 +194,11 @@ static int moxart_mac_stop(struct net_device *ndev) /* disable all functions */ writel(0, priv->base + REG_MAC_CTRL); + /* unmap areas mapped in moxart_mac_setup_desc_ring() */ + for (i = 0; i < RX_DESC_NUM; i++) + dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i], + priv->rx_buf_size, DMA_FROM_DEVICE); + return 0; } From c078290a2b7618473a7d0a05334cc91fe0ac2949 Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Fri, 19 Aug 2022 11:15:12 -0400 Subject: [PATCH 208/654] selftests: include bonding tests into the kselftest infra This creates a test collection in drivers/net/bonding for bonding specific kernel selftests. The first test is a reproducer that provisions a bond and given the specific order in how the ip-link(8) commands are issued the bond never transmits an LACPDU frame on any of its slaves. Signed-off-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + tools/testing/selftests/Makefile | 1 + .../selftests/drivers/net/bonding/Makefile | 6 ++ .../net/bonding/bond-break-lacpdu-tx.sh | 81 +++++++++++++++++++ .../selftests/drivers/net/bonding/config | 1 + .../selftests/drivers/net/bonding/settings | 1 + 6 files changed, 91 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/bonding/Makefile create mode 100755 tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh create mode 100644 tools/testing/selftests/drivers/net/bonding/config create mode 100644 tools/testing/selftests/drivers/net/bonding/settings diff --git a/MAINTAINERS b/MAINTAINERS index f512b430c7cbb..274b2c1e506e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3679,6 +3679,7 @@ F: Documentation/networking/bonding.rst F: drivers/net/bonding/ F: include/net/bond* F: include/uapi/linux/if_bonding.h +F: tools/testing/selftests/net/bonding/ BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER M: Dan Robertson diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 10b34bb03bc1b..c2064a35688b0 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -12,6 +12,7 @@ TARGETS += cpu-hotplug TARGETS += damon TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice +TARGETS += drivers/net/bonding TARGETS += efivarfs TARGETS += exec TARGETS += filesystems diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile new file mode 100644 index 0000000000000..ab6c54b120981 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for net selftests + +TEST_PROGS := bond-break-lacpdu-tx.sh + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh new file mode 100755 index 0000000000000..47ab90596acb2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh @@ -0,0 +1,81 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# Verify LACPDUs get transmitted after setting the MAC address of +# the bond. +# +# https://bugzilla.redhat.com/show_bug.cgi?id=2020773 +# +# +---------+ +# | fab-br0 | +# +---------+ +# | +# +---------+ +# | fbond | +# +---------+ +# | | +# +------+ +------+ +# |veth1 | |veth2 | +# +------+ +------+ +# +# We use veths instead of physical interfaces + +set -e +tmp=$(mktemp -q dump.XXXXXX) +cleanup() { + ip link del fab-br0 >/dev/null 2>&1 || : + ip link del fbond >/dev/null 2>&1 || : + ip link del veth1-bond >/dev/null 2>&1 || : + ip link del veth2-bond >/dev/null 2>&1 || : + modprobe -r bonding >/dev/null 2>&1 || : + rm -f -- ${tmp} +} + +trap cleanup 0 1 2 +cleanup +sleep 1 + +# create the bridge +ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \ + forward_delay 15 + +# create the bond +ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \ + ad_actor_sys_prio 65535 lacp_rate fast + +# set bond address +ip link set fbond address 52:54:00:3B:7C:A6 +ip link set fbond up + +# set again bond sysfs parameters +ip link set fbond type bond ad_actor_sys_prio 65535 + +# create veths +ip link add name veth1-bond type veth peer name veth1-end +ip link add name veth2-bond type veth peer name veth2-end + +# add ports +ip link set fbond master fab-br0 +ip link set veth1-bond down master fbond +ip link set veth2-bond down master fbond + +# bring up +ip link set veth1-end up +ip link set veth2-end up +ip link set fab-br0 up +ip link set fbond up +ip addr add dev fab-br0 10.0.0.3 + +tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 & +sleep 15 +pkill tcpdump >/dev/null 2>&1 +rc=0 +num=$(grep "packets captured" ${tmp} | awk '{print $1}') +if test "$num" -gt 0; then + echo "PASS, captured ${num}" +else + echo "FAIL" + rc=1 +fi +exit $rc diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config new file mode 100644 index 0000000000000..dc1c22de3c923 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -0,0 +1 @@ +CONFIG_BONDING=y diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings new file mode 100644 index 0000000000000..867e118223cd6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/settings @@ -0,0 +1 @@ +timeout=60 From d745b5062ad2b5da90a5e728d7ca884fc07315fd Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Fri, 19 Aug 2022 11:15:13 -0400 Subject: [PATCH 209/654] bonding: 802.3ad: fix no transmission of LACPDUs This is caused by the global variable ad_ticks_per_sec being zero as demonstrated by the reproducer script discussed below. This causes all timer values in __ad_timer_to_ticks to be zero, resulting in the periodic timer to never fire. To reproduce: Run the script in `tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh` which puts bonding into a state where it never transmits LACPDUs. line 44: ip link add fbond type bond mode 4 miimon 200 \ xmit_hash_policy 1 ad_actor_sys_prio 65535 lacp_rate fast setting bond param: ad_actor_sys_prio given: params.ad_actor_system = 0 call stack: bond_option_ad_actor_sys_prio() -> bond_3ad_update_ad_actor_settings() -> set ad.system.sys_priority = bond->params.ad_actor_sys_prio -> ad.system.sys_mac_addr = bond->dev->dev_addr; because params.ad_actor_system == 0 results: ad.system.sys_mac_addr = bond->dev->dev_addr line 48: ip link set fbond address 52:54:00:3B:7C:A6 setting bond MAC addr call stack: bond->dev->dev_addr = new_mac line 52: ip link set fbond type bond ad_actor_sys_prio 65535 setting bond param: ad_actor_sys_prio given: params.ad_actor_system = 0 call stack: bond_option_ad_actor_sys_prio() -> bond_3ad_update_ad_actor_settings() -> set ad.system.sys_priority = bond->params.ad_actor_sys_prio -> ad.system.sys_mac_addr = bond->dev->dev_addr; because params.ad_actor_system == 0 results: ad.system.sys_mac_addr = bond->dev->dev_addr line 60: ip link set veth1-bond down master fbond given: params.ad_actor_system = 0 params.mode = BOND_MODE_8023AD ad.system.sys_mac_addr == bond->dev->dev_addr call stack: bond_enslave -> bond_3ad_initialize(); because first slave -> if ad.system.sys_mac_addr != bond->dev->dev_addr return results: Nothing is run in bond_3ad_initialize() because dev_addr equals sys_mac_addr leaving the global ad_ticks_per_sec zero as it is never initialized anywhere else. The if check around the contents of bond_3ad_initialize() is no longer needed due to commit 5ee14e6d336f ("bonding: 3ad: apply ad_actor settings changes immediately") which sets ad.system.sys_mac_addr if any one of the bonding parameters whos set function calls bond_3ad_update_ad_actor_settings(). This is because if ad.system.sys_mac_addr is zero it will be set to the current bond mac address, this causes the if check to never be true. Fixes: 5ee14e6d336f ("bonding: 3ad: apply ad_actor settings changes immediately") Signed-off-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_3ad.c | 38 ++++++++++++++-------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index d7fb33c078e81..1f0120cbe9e80 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2007,30 +2007,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) */ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) { - /* check that the bond is not initialized yet */ - if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr), - bond->dev->dev_addr)) { - - BOND_AD_INFO(bond).aggregator_identifier = 0; - - BOND_AD_INFO(bond).system.sys_priority = - bond->params.ad_actor_sys_prio; - if (is_zero_ether_addr(bond->params.ad_actor_system)) - BOND_AD_INFO(bond).system.sys_mac_addr = - *((struct mac_addr *)bond->dev->dev_addr); - else - BOND_AD_INFO(bond).system.sys_mac_addr = - *((struct mac_addr *)bond->params.ad_actor_system); + BOND_AD_INFO(bond).aggregator_identifier = 0; + BOND_AD_INFO(bond).system.sys_priority = + bond->params.ad_actor_sys_prio; + if (is_zero_ether_addr(bond->params.ad_actor_system)) + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->dev->dev_addr); + else + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->params.ad_actor_system); - /* initialize how many times this module is called in one - * second (should be about every 100ms) - */ - ad_ticks_per_sec = tick_resolution; + /* initialize how many times this module is called in one + * second (should be about every 100ms) + */ + ad_ticks_per_sec = tick_resolution; - bond_3ad_initiate_agg_selection(bond, - AD_AGGREGATOR_SELECTION_TIMER * - ad_ticks_per_sec); - } + bond_3ad_initiate_agg_selection(bond, + AD_AGGREGATOR_SELECTION_TIMER * + ad_ticks_per_sec); } /** From f2e44dffa97f2e1c222a959ea5b6e604548b891b Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Fri, 19 Aug 2022 11:15:14 -0400 Subject: [PATCH 210/654] bonding: 3ad: make ad_ticks_per_sec a const The value is only ever set once in bond_3ad_initialize and only ever read otherwise. There seems to be no reason to set the variable via bond_3ad_initialize when setting the global variable will do. Change ad_ticks_per_sec to a const to enforce its read-only usage. Signed-off-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_3ad.c | 11 +++-------- drivers/net/bonding/bond_main.c | 2 +- include/net/bond_3ad.h | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 1f0120cbe9e80..184608bd89999 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -84,7 +84,8 @@ enum ad_link_speed_type { static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = { 0, 0, 0, 0, 0, 0 }; -static u16 ad_ticks_per_sec; + +static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = @@ -2001,11 +2002,10 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) /** * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures * @bond: bonding struct to work on - * @tick_resolution: tick duration (millisecond resolution) * * Can be called only after the mac address of the bond is set. */ -void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) +void bond_3ad_initialize(struct bonding *bond) { BOND_AD_INFO(bond).aggregator_identifier = 0; BOND_AD_INFO(bond).system.sys_priority = @@ -2017,11 +2017,6 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); - /* initialize how many times this module is called in one - * second (should be about every 100ms) - */ - ad_ticks_per_sec = tick_resolution; - bond_3ad_initiate_agg_selection(bond, AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 50e60843020ce..2f4da2c13c0af 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2081,7 +2081,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, /* Initialize AD with the number of times that the AD timer is called in 1 second * can be called only after the mac address of the bond is set */ - bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL); + bond_3ad_initialize(bond); } else { SLAVE_AD_INFO(new_slave)->id = SLAVE_AD_INFO(prev_slave)->id + 1; diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 184105d682942..be2992e6de5d5 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -290,7 +290,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state) } /* ========== AD Exported functions to the main bonding code ========== */ -void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution); +void bond_3ad_initialize(struct bonding *bond); void bond_3ad_bind_slave(struct slave *slave); void bond_3ad_unbind_slave(struct slave *slave); void bond_3ad_state_machine_handler(struct work_struct *); From 9b1ac04698a4bfec146322502cdcd9904c1777fa Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 4 Aug 2022 13:18:52 +0530 Subject: [PATCH 211/654] powerpc/papr_scm: Fix nvdimm event mappings Commit 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support") added performance monitoring support for papr-scm nvdimm devices via perf interface. Commit also added an array in papr_scm_priv structure called "nvdimm_events_map", which got filled based on the result of H_SCM_PERFORMANCE_STATS hcall. Currently there is an assumption that the order of events in the stats buffer, returned by the hypervisor is same. And order also happens to matches with the events specified in nvdimm driver code. But this assumption is not documented in Power Architecture Platform Requirements (PAPR) document. Although the order of events happens to be same on current generation od system, but it might not be true in future generation systems. Fix the issue, by adding a static mapping for nvdimm events to corresponding stat-id, and removing the dynamic map from papr_scm_priv structure. Also remove the function papr_scm_pmu_check_events from papr_scm.c file, as we no longer need to copy stat-ids dynamically. Fixes: 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support") Reported-by: Aneesh Kumar K.V Signed-off-by: Kajol Jain Reviewed-by: Vaibhav Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220804074852.55157-1-kjain@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 88 +++++++---------------- 1 file changed, 27 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 20f6ed813bffb..54740af21557c 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -124,9 +124,6 @@ struct papr_scm_priv { /* The bits which needs to be overridden */ u64 health_bitmap_inject_mask; - - /* array to have event_code and stat_id mappings */ - u8 *nvdimm_events_map; }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -350,6 +347,25 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, #ifdef CONFIG_PERF_EVENTS #define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) +static const char * const nvdimm_events_map[] = { + [1] = "CtlResCt", + [2] = "CtlResTm", + [3] = "PonSecs ", + [4] = "MemLife ", + [5] = "CritRscU", + [6] = "HostLCnt", + [7] = "HostSCnt", + [8] = "HostSDur", + [9] = "HostLDur", + [10] = "MedRCnt ", + [11] = "MedWCnt ", + [12] = "MedRDur ", + [13] = "MedWDur ", + [14] = "CchRHCnt", + [15] = "CchWHCnt", + [16] = "FastWCnt", +}; + static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) { struct papr_scm_perf_stat *stat; @@ -357,11 +373,15 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, struct papr_scm_priv *p = dev_get_drvdata(dev); int rc, size; + /* Invalid eventcode */ + if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map)) + return -EINVAL; + /* Allocate request buffer enough to hold single performance stat */ size = sizeof(struct papr_scm_perf_stats) + sizeof(struct papr_scm_perf_stat); - if (!p || !p->nvdimm_events_map) + if (!p) return -EINVAL; stats = kzalloc(size, GFP_KERNEL); @@ -370,7 +390,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, stat = &stats->scm_statistic[0]; memcpy(&stat->stat_id, - &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)], + nvdimm_events_map[event->attr.config], sizeof(stat->stat_id)); stat->stat_val = 0; @@ -458,56 +478,6 @@ static void papr_scm_pmu_del(struct perf_event *event, int flags) papr_scm_pmu_read(event); } -static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu) -{ - struct papr_scm_perf_stat *stat; - struct papr_scm_perf_stats *stats; - u32 available_events; - int index, rc = 0; - - if (!p->stat_buffer_len) - return -ENOENT; - - available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) - / sizeof(struct papr_scm_perf_stat); - if (available_events == 0) - return -EOPNOTSUPP; - - /* Allocate the buffer for phyp where stats are written */ - stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); - if (!stats) { - rc = -ENOMEM; - return rc; - } - - /* Called to get list of events supported */ - rc = drc_pmem_query_stats(p, stats, 0); - if (rc) - goto out; - - /* - * Allocate memory and populate nvdimm_event_map. - * Allocate an extra element for NULL entry - */ - p->nvdimm_events_map = kcalloc(available_events + 1, - sizeof(stat->stat_id), - GFP_KERNEL); - if (!p->nvdimm_events_map) { - rc = -ENOMEM; - goto out; - } - - /* Copy all stat_ids to event map */ - for (index = 0, stat = stats->scm_statistic; - index < available_events; index++, ++stat) { - memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)], - &stat->stat_id, sizeof(stat->stat_id)); - } -out: - kfree(stats); - return rc; -} - static void papr_scm_pmu_register(struct papr_scm_priv *p) { struct nvdimm_pmu *nd_pmu; @@ -519,8 +489,7 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) goto pmu_err_print; } - rc = papr_scm_pmu_check_events(p, nd_pmu); - if (rc) + if (!p->stat_buffer_len) goto pmu_check_events_err; nd_pmu->pmu.task_ctx_nr = perf_invalid_context; @@ -539,7 +508,7 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) rc = register_nvdimm_pmu(nd_pmu, p->pdev); if (rc) - goto pmu_register_err; + goto pmu_check_events_err; /* * Set archdata.priv value to nvdimm_pmu structure, to handle the @@ -548,8 +517,6 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) p->pdev->archdata.priv = nd_pmu; return; -pmu_register_err: - kfree(p->nvdimm_events_map); pmu_check_events_err: kfree(nd_pmu); pmu_err_print: @@ -1560,7 +1527,6 @@ static int papr_scm_remove(struct platform_device *pdev) unregister_nvdimm_pmu(pdev->archdata.priv); pdev->archdata.priv = NULL; - kfree(p->nvdimm_events_map); kfree(p->bus_desc.provider_name); kfree(p); From 35f73cca1cecda0c1f8bb7d8be4ce5cd2d46ae8c Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 25 Jun 2022 10:36:43 +0200 Subject: [PATCH 212/654] clk: bcm: rpi: Fix error handling of raspberrypi_fw_get_rate The function raspberrypi_fw_get_rate (e.g. used for the recalc_rate hook) can fail to get the clock rate from the firmware. In this case we cannot return a signed error value, which would be casted to unsigned long. Fix this by returning 0 instead. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20220625083643.4012-1-stefan.wahren@i2se.com Fixes: 4e85e535e6cc ("clk: bcm283x: add driver interfacing with Raspberry Pi's firmware") Acked-by: Florian Fainelli Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-raspberrypi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c index 73518009a0f20..39d63c983d62c 100644 --- a/drivers/clk/bcm/clk-raspberrypi.c +++ b/drivers/clk/bcm/clk-raspberrypi.c @@ -203,7 +203,7 @@ static unsigned long raspberrypi_fw_get_rate(struct clk_hw *hw, ret = raspberrypi_clock_property(rpi->firmware, data, RPI_FIRMWARE_GET_CLOCK_RATE, &val); if (ret) - return ret; + return 0; return val; } From 785538bfdd682c8e962341d585f9b88262a0475e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 16 Aug 2022 10:26:38 -0700 Subject: [PATCH 213/654] scsi: sd: Revert "Rework asynchronous resume support" Although commit 88f1669019bd ("scsi: sd: Rework asynchronous resume support") eliminates a delay for some ATA disks after resume, it causes resume of ATA disks to fail on other setups. See also: * "Resume process hangs for 5-6 seconds starting sometime in 5.16" (https://bugzilla.kernel.org/show_bug.cgi?id=215880). * Geert's regression report (https://lore.kernel.org/linux-scsi/alpine.DEB.2.22.394.2207191125130.1006766@ramsan.of.borg/). This is what I understand about this issue: * During resume, ata_port_pm_resume() starts the SCSI error handler. This changes the SCSI host state into SHOST_RECOVERY and causes scsi_queue_rq() to return BLK_STS_RESOURCE. * sd_resume() calls sd_start_stop_device() for ATA devices. That function in turn calls sd_submit_start() which tries to submit a START STOP UNIT command. That command can only be submitted after the SCSI error handler has changed the SCSI host state back to SHOST_RUNNING. * The SCSI error handler runs on its own thread and calls schedule_work(&(ap->scsi_rescan_task)). That causes ata_scsi_dev_rescan() to be called from the context of a kernel workqueue. That call hangs in blk_mq_get_tag(). I'm not sure why - maybe because all available tags have been allocated by sd_submit_start() calls (this is a guess). Link: https://lore.kernel.org/r/20220816172638.538734-1-bvanassche@acm.org Fixes: 88f1669019bd ("scsi: sd: Rework asynchronous resume support") Cc: Damien Le Moal Cc: Hannes Reinecke Cc: Geert Uytterhoeven Cc: gzhqyz@gmail.com Reported-by: Geert Uytterhoeven Reported-by: gzhqyz@gmail.com Reported-and-tested-by: Vlastimil Babka Tested-by: John Garry Tested-by: Hans de Goede Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 84 ++++++++++------------------------------------- drivers/scsi/sd.h | 5 --- 2 files changed, 18 insertions(+), 71 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8f79fa6318fec..eb76ba0550216 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -103,7 +103,6 @@ static void sd_config_discard(struct scsi_disk *, unsigned int); static void sd_config_write_same(struct scsi_disk *); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); -static void sd_start_done_work(struct work_struct *work); static int sd_probe(struct device *); static int sd_remove(struct device *); static void sd_shutdown(struct device *); @@ -3471,7 +3470,6 @@ static int sd_probe(struct device *dev) sdkp->max_retries = SD_MAX_RETRIES; atomic_set(&sdkp->openers, 0); atomic_set(&sdkp->device->ioerr_cnt, 0); - INIT_WORK(&sdkp->start_done_work, sd_start_done_work); if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) @@ -3594,69 +3592,12 @@ static void scsi_disk_release(struct device *dev) kfree(sdkp); } -/* Process sense data after a START command finished. */ -static void sd_start_done_work(struct work_struct *work) -{ - struct scsi_disk *sdkp = container_of(work, typeof(*sdkp), - start_done_work); - struct scsi_sense_hdr sshdr; - int res = sdkp->start_result; - - if (res == 0) - return; - - sd_print_result(sdkp, "Start/Stop Unit failed", res); - - if (res < 0) - return; - - if (scsi_normalize_sense(sdkp->start_sense_buffer, - sdkp->start_sense_len, &sshdr)) - sd_print_sense_hdr(sdkp, &sshdr); -} - -/* A START command finished. May be called from interrupt context. */ -static void sd_start_done(struct request *req, blk_status_t status) -{ - const struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); - struct scsi_disk *sdkp = scsi_disk(req->q->disk); - - sdkp->start_result = scmd->result; - WARN_ON_ONCE(scmd->sense_len > SCSI_SENSE_BUFFERSIZE); - sdkp->start_sense_len = scmd->sense_len; - memcpy(sdkp->start_sense_buffer, scmd->sense_buffer, - ARRAY_SIZE(sdkp->start_sense_buffer)); - WARN_ON_ONCE(!schedule_work(&sdkp->start_done_work)); -} - -/* Submit a START command asynchronously. */ -static int sd_submit_start(struct scsi_disk *sdkp, u8 cmd[], u8 cmd_len) -{ - struct scsi_device *sdev = sdkp->device; - struct request_queue *q = sdev->request_queue; - struct request *req; - struct scsi_cmnd *scmd; - - req = scsi_alloc_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_PM); - if (IS_ERR(req)) - return PTR_ERR(req); - - scmd = blk_mq_rq_to_pdu(req); - scmd->cmd_len = cmd_len; - memcpy(scmd->cmnd, cmd, cmd_len); - scmd->allowed = sdkp->max_retries; - req->timeout = SD_TIMEOUT; - req->rq_flags |= RQF_PM | RQF_QUIET; - req->end_io = sd_start_done; - blk_execute_rq_nowait(req, /*at_head=*/true); - - return 0; -} - static int sd_start_stop_device(struct scsi_disk *sdkp, int start) { unsigned char cmd[6] = { START_STOP }; /* START_VALID */ + struct scsi_sense_hdr sshdr; struct scsi_device *sdp = sdkp->device; + int res; if (start) cmd[4] |= 1; /* START */ @@ -3667,10 +3608,23 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start) if (!scsi_device_online(sdp)) return -ENODEV; - /* Wait until processing of sense data has finished. */ - flush_work(&sdkp->start_done_work); + res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, + SD_TIMEOUT, sdkp->max_retries, 0, RQF_PM, NULL); + if (res) { + sd_print_result(sdkp, "Start/Stop Unit failed", res); + if (res > 0 && scsi_sense_valid(&sshdr)) { + sd_print_sense_hdr(sdkp, &sshdr); + /* 0x3a is medium not present */ + if (sshdr.asc == 0x3a) + res = 0; + } + } - return sd_submit_start(sdkp, cmd, sizeof(cmd)); + /* SCSI error codes must not go to the generic layer */ + if (res) + return -EIO; + + return 0; } /* @@ -3697,8 +3651,6 @@ static void sd_shutdown(struct device *dev) sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } - - flush_work(&sdkp->start_done_work); } static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index b89187761d61f..5eea762f84d18 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -150,11 +150,6 @@ struct scsi_disk { unsigned urswrz : 1; unsigned security : 1; unsigned ignore_medium_access_errors : 1; - - int start_result; - u32 start_sense_len; - u8 start_sense_buffer[SCSI_SENSE_BUFFERSIZE]; - struct work_struct start_done_work; }; #define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev) From 4f3e509996a877dca1ddffc2abdbb5857727bfc4 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 22 Aug 2022 21:44:50 -0700 Subject: [PATCH 214/654] MAINTAINERS: add include/dt-bindings/input to INPUT DRIVERS Maintainers of the directory Documentation/devicetree/bindings/input are also the maintainers of the corresponding directory include/dt-bindings/input. Add the file entry for include/dt-bindings/input to the appropriate section in MAINTAINERS. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20220613115654.28117-1-lukas.bulwahn@gmail.com Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 66bffb24a348a..18477041d70d1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9798,6 +9798,7 @@ F: Documentation/devicetree/bindings/input/ F: Documentation/devicetree/bindings/serio/ F: Documentation/input/ F: drivers/input/ +F: include/dt-bindings/input/ F: include/linux/input.h F: include/linux/input/ F: include/uapi/linux/input-event-codes.h From c919c164fc87bcca8e80b3b9224492fa5b6455ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2022 01:01:36 -0500 Subject: [PATCH 215/654] smb3: missing inode locks in zero range smb3 fallocate zero range was not grabbing the inode or filemap_invalidate locks so could have race with pagemap reinstantiating the page. Cc: stable@vger.kernel.org Signed-off-by: David Howells Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 55 ++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96f3b0573606e..a6fe54281fd3e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3307,26 +3307,43 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, return pntsd; } +static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len, unsigned int xid) +{ + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); + + fsctl_buf.FileOffset = cpu_to_le64(offset); + fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + + return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, + (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), + 0, NULL, NULL); +} + static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len, bool keep_size) { struct cifs_ses *ses = tcon->ses; - struct inode *inode; - struct cifsInodeInfo *cifsi; + struct inode *inode = file_inode(file); + struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; - struct file_zero_data_information fsctl_buf; long rc; unsigned int xid; __le64 eof; xid = get_xid(); - inode = d_inode(cfile->dentry); - cifsi = CIFS_I(inode); - trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + /* * We zero the range through ioctl, so we need remove the page caches * first, otherwise the data may be inconsistent with the server. @@ -3334,26 +3351,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, truncate_pagecache_range(inode, offset, offset + len - 1); /* if file not oplocked can't be sure whether asking to extend size */ - if (!CIFS_CACHE_READ(cifsi)) - if (keep_size == false) { - rc = -EOPNOTSUPP; - trace_smb3_zero_err(xid, cfile->fid.persistent_fid, - tcon->tid, ses->Suid, offset, len, rc); - free_xid(xid); - return rc; - } - - cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); - - fsctl_buf.FileOffset = cpu_to_le64(offset); - fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + rc = -EOPNOTSUPP; + if (keep_size == false && !CIFS_CACHE_READ(cifsi)) + goto zero_range_exit; - rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, - cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, - (char *)&fsctl_buf, - sizeof(struct file_zero_data_information), - 0, NULL, NULL); - if (rc) + rc = smb3_zero_data(file, tcon, offset, len, xid); + if (rc < 0) goto zero_range_exit; /* @@ -3366,6 +3369,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, } zero_range_exit: + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); free_xid(xid); if (rc) trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid, From e41a88f38d87f730647bb14df9211d32241d1176 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Aug 2022 12:05:50 +0200 Subject: [PATCH 216/654] usb: dwc3: qcom: suppress unused-variable warning The dwc3_qcom_read_usb2_speed() helper is now only called when the controller is acting as host, but the compiler will warn that the hcd variable is unused in gadget-only W=1 builds. Fixes: c06795f114a6 ("usb: dwc3: qcom: fix gadget-only builds") Reported-by: kernel test robot Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220822100550.3039-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 197583ff3f3d8..d3f3937d70052 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -310,7 +310,7 @@ static enum usb_device_speed dwc3_qcom_read_usb2_speed(struct dwc3_qcom *qcom) { struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3); struct usb_device *udev; - struct usb_hcd *hcd; + struct usb_hcd __maybe_unused *hcd; /* * FIXME: Fix this layering violation. From 5f73aa2cf8bef4a39baa1591c3144ede4788826e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2022 08:54:55 +0200 Subject: [PATCH 217/654] Revert "usb: typec: ucsi: add a common function ucsi_unregister_connectors()" The recent commit 87d0e2f41b8c ("usb: typec: ucsi: add a common function ucsi_unregister_connectors()") introduced a regression that caused NULL dereference at reading the power supply sysfs. It's a stale sysfs entry that should have been removed but remains with NULL ops. The commit changed the error handling to skip the entries after a NULL con->wq, and this leaves the power device unreleased. For addressing the regression, the straight revert is applied here. Further code improvements can be done from the scratch again. Link: https://bugzilla.suse.com/show_bug.cgi?id=1202386 Link: https://lore.kernel.org/r/87r11cmbx0.wl-tiwai@suse.de Fixes: 87d0e2f41b8c ("usb: typec: ucsi: add a common function ucsi_unregister_connectors()") Cc: Acked-by: Heikki Krogerus Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20220823065455.32579-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 53 ++++++++++++++++------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 1aea46493b852..7f2624f427241 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1200,32 +1200,6 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) return ret; } -static void ucsi_unregister_connectors(struct ucsi *ucsi) -{ - struct ucsi_connector *con; - int i; - - if (!ucsi->connector) - return; - - for (i = 0; i < ucsi->cap.num_connectors; i++) { - con = &ucsi->connector[i]; - - if (!con->wq) - break; - - cancel_work_sync(&con->work); - ucsi_unregister_partner(con); - ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); - ucsi_unregister_port_psy(con); - destroy_workqueue(con->wq); - typec_unregister_port(con->port); - } - - kfree(ucsi->connector); - ucsi->connector = NULL; -} - /** * ucsi_init - Initialize UCSI interface * @ucsi: UCSI to be initialized @@ -1234,6 +1208,7 @@ static void ucsi_unregister_connectors(struct ucsi *ucsi) */ static int ucsi_init(struct ucsi *ucsi) { + struct ucsi_connector *con; u64 command; int ret; int i; @@ -1264,7 +1239,7 @@ static int ucsi_init(struct ucsi *ucsi) } /* Allocate the connectors. Released in ucsi_unregister() */ - ucsi->connector = kcalloc(ucsi->cap.num_connectors, + ucsi->connector = kcalloc(ucsi->cap.num_connectors + 1, sizeof(*ucsi->connector), GFP_KERNEL); if (!ucsi->connector) { ret = -ENOMEM; @@ -1288,7 +1263,15 @@ static int ucsi_init(struct ucsi *ucsi) return 0; err_unregister: - ucsi_unregister_connectors(ucsi); + for (con = ucsi->connector; con->port; con++) { + ucsi_unregister_partner(con); + ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); + ucsi_unregister_port_psy(con); + if (con->wq) + destroy_workqueue(con->wq); + typec_unregister_port(con->port); + con->port = NULL; + } err_reset: memset(&ucsi->cap, 0, sizeof(ucsi->cap)); @@ -1402,6 +1385,7 @@ EXPORT_SYMBOL_GPL(ucsi_register); void ucsi_unregister(struct ucsi *ucsi) { u64 cmd = UCSI_SET_NOTIFICATION_ENABLE; + int i; /* Make sure that we are not in the middle of driver initialization */ cancel_delayed_work_sync(&ucsi->work); @@ -1409,7 +1393,18 @@ void ucsi_unregister(struct ucsi *ucsi) /* Disable notifications */ ucsi->ops->async_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); - ucsi_unregister_connectors(ucsi); + for (i = 0; i < ucsi->cap.num_connectors; i++) { + cancel_work_sync(&ucsi->connector[i].work); + ucsi_unregister_partner(&ucsi->connector[i]); + ucsi_unregister_altmodes(&ucsi->connector[i], + UCSI_RECIPIENT_CON); + ucsi_unregister_port_psy(&ucsi->connector[i]); + if (ucsi->connector[i].wq) + destroy_workqueue(ucsi->connector[i].wq); + typec_unregister_port(ucsi->connector[i].port); + } + + kfree(ucsi->connector); } EXPORT_SYMBOL_GPL(ucsi_unregister); From afe7116f6d3b888778ed6d95e3cf724767b9aedf Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 8 Aug 2022 11:42:24 +0800 Subject: [PATCH 218/654] ieee802154/adf7242: defer destroy_workqueue call There is a possible race condition (use-after-free) like below (FREE) | (USE) adf7242_remove | adf7242_channel cancel_delayed_work_sync | destroy_workqueue (1) | adf7242_cmd_rx | mod_delayed_work (2) | The root cause for this race is that the upper layer (ieee802154) is unaware of this detaching event and the function adf7242_channel can be called without any checks. To fix this, we can add a flag write at the beginning of adf7242_remove and add flag check in adf7242_channel. Or we can just defer the destructive operation like other commit 3e0588c291d6 ("hamradio: defer ax25 kfree after unregister_netdev") which let the ieee802154_unregister_hw() to handle the synchronization. This patch takes the second option. Fixes: 58e9683d1475 ("net: ieee802154: adf7242: Fix OCL calibration runs") Signed-off-by: Lin Ma Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20220808034224.12642-1-linma@zju.edu.cn Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 6afdf1622944e..5cf218c674a5a 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1310,10 +1310,11 @@ static void adf7242_remove(struct spi_device *spi) debugfs_remove_recursive(lp->debugfs_root); + ieee802154_unregister_hw(lp->hw); + cancel_delayed_work_sync(&lp->work); destroy_workqueue(lp->wqueue); - ieee802154_unregister_hw(lp->hw); mutex_destroy(&lp->bmux); ieee802154_free_hw(lp->hw); } From b5a990209d72615e3cac2b3b0d8ddd445c020cf5 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Fri, 8 Jul 2022 23:15:38 +0800 Subject: [PATCH 219/654] net/ieee802154: fix repeated words in comments Delete the redundant word 'was'. Signed-off-by: Jilin Yuan Link: https://lore.kernel.org/r/20220708151538.51483-1-yuanjilin@cdjrlc.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 42c0b451088dc..450b16ad40a41 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2293,7 +2293,7 @@ static int ca8210_set_csma_params( * @retries: Number of retries * * Sets the number of times to retry a transmission if no acknowledgment was - * was received from the other end when one was requested. + * received from the other end when one was requested. * * Return: 0 or linux error code */ From ba0803050d610d5072666be727bca5e03e55b242 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2022 02:10:56 -0500 Subject: [PATCH 220/654] smb3: missing inode locks in punch hole smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate locks so could have race with pagemap reinstantiating the page. Cc: stable@vger.kernel.org Signed-off-by: David Howells Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index a6fe54281fd3e..4810bd62266a5 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3384,7 +3384,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len) { - struct inode *inode; + struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; struct file_zero_data_information fsctl_buf; long rc; @@ -3393,14 +3393,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, xid = get_xid(); - inode = d_inode(cfile->dentry); - + inode_lock(inode); /* Need to make file sparse, if not already, before freeing range. */ /* Consider adding equivalent for compressed since it could also work */ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { rc = -EOPNOTSUPP; - free_xid(xid); - return rc; + goto out; } filemap_invalidate_lock(inode->i_mapping); @@ -3420,8 +3418,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), CIFSMaxBufSize, NULL, NULL); - free_xid(xid); filemap_invalidate_unlock(inode->i_mapping); +out: + inode_unlock(inode); + free_xid(xid); return rc; } From 0b52f76351bf87f35b62195fca874b6055125bc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Li=C5=A1ka?= Date: Thu, 18 Aug 2022 22:31:57 +0200 Subject: [PATCH 221/654] docs/arm64: elf_hwcaps: unify newlines in HWCAP lists Unify horizontal spacing (remove extra newlines) which are sensitive to visual presentation by Sphinx. Fixes: 5e64b862c482 ("arm64/sme: Basic enumeration support") Signed-off-by: Martin Liska Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/84e3d6cc-75cf-d6f3-9bb8-be02075aaf6d@suse.cz Signed-off-by: Will Deacon --- Documentation/arm64/elf_hwcaps.rst | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 52b75a25c2054..311021f2e5600 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -242,44 +242,34 @@ HWCAP2_MTE3 by Documentation/arm64/memory-tagging-extension.rst. HWCAP2_SME - Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described by Documentation/arm64/sme.rst. HWCAP2_SME_I16I64 - Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111. HWCAP2_SME_F64F64 - Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1. HWCAP2_SME_I8I32 - Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111. HWCAP2_SME_F16F32 - Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1. HWCAP2_SME_B16F32 - Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1. HWCAP2_SME_F32F32 - Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1. HWCAP2_SME_FA64 - Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1. HWCAP2_WFXT - Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010. HWCAP2_EBF16 - Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010. 4. Unused AT_HWCAP bits From 729a916599eaf13df66ae5404f5489f38518ea8c Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Thu, 21 Jul 2022 11:05:31 +0800 Subject: [PATCH 222/654] arm64: Fix comment typo Replace wrong 'FIQ EL1h' comment with 'FIQ EL1t'. Signed-off-by: Kuan-Ying Lee Link: https://lore.kernel.org/r/20220721030531.21234-1-Kuan-Ying.Lee@mediatek.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 254fe31c03a07..2d73b3e793b2b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -502,7 +502,7 @@ tsk .req x28 // current thread_info SYM_CODE_START(vectors) kernel_ventry 1, t, 64, sync // Synchronous EL1t kernel_ventry 1, t, 64, irq // IRQ EL1t - kernel_ventry 1, t, 64, fiq // FIQ EL1h + kernel_ventry 1, t, 64, fiq // FIQ EL1t kernel_ventry 1, t, 64, error // Error EL1t kernel_ventry 1, h, 64, sync // Synchronous EL1h From 2e8cff0a0eee87b27f0cf87ad8310eb41b5886ab Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Aug 2022 16:40:22 +0100 Subject: [PATCH 223/654] arm64: fix rodata=full On arm64, "rodata=full" has been suppored (but not documented) since commit: c55191e96caa9d78 ("arm64: mm: apply r/o permissions of VM areas to its linear alias as well") As it's necessary to determine the rodata configuration early during boot, arm64 has an early_param() handler for this, whereas init/main.c has a __setup() handler which is run later. Unfortunately, this split meant that since commit: f9a40b0890658330 ("init/main.c: return 1 from handled __setup() functions") ... passing "rodata=full" would result in a spurious warning from the __setup() handler (though RO permissions would be configured appropriately). Further, "rodata=full" has been broken since commit: 0d6ea3ac94ca77c5 ("lib/kstrtox.c: add "false"/"true" support to kstrtobool()") ... which caused strtobool() to parse "full" as false (in addition to many other values not documented for the "rodata=" kernel parameter. This patch fixes this breakage by: * Moving the core parameter parser to an __early_param(), such that it is available early. * Adding an (optional) arch hook which arm64 can use to parse "full". * Updating the documentation to mention that "full" is valid for arm64. * Having the core parameter parser handle "on" and "off" explicitly, such that any undocumented values (e.g. typos such as "ful") are reported as errors rather than being silently accepted. Note that __setup() and early_param() have opposite conventions for their return values, where __setup() uses 1 to indicate a parameter was handled and early_param() uses 0 to indicate a parameter was handled. Fixes: f9a40b089065 ("init/main.c: return 1 from handled __setup() functions") Fixes: 0d6ea3ac94ca ("lib/kstrtox.c: add "false"/"true" support to kstrtobool()") Signed-off-by: Mark Rutland Cc: Andy Shevchenko Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Jagdish Gediya Cc: Matthew Wilcox Cc: Randy Dunlap Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220817154022.3974645-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- .../admin-guide/kernel-parameters.txt | 2 ++ arch/arm64/include/asm/setup.h | 17 +++++++++++++++++ arch/arm64/mm/mmu.c | 18 ------------------ init/main.c | 18 +++++++++++++++--- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d7f30902fda02..426fa892d311a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5331,6 +5331,8 @@ rodata= [KNL] on Mark read-only kernel memory as read-only (default). off Leave read-only kernel memory writable for debugging. + full Mark read-only kernel memory and aliases as read-only + [arm64] rockchip.usb_uart Enable the uart passthrough on the designated usb port diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index 6437df6617009..f4af547ef54ca 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -3,6 +3,8 @@ #ifndef __ARM64_ASM_SETUP_H #define __ARM64_ASM_SETUP_H +#include + #include void *get_early_fdt_ptr(void); @@ -14,4 +16,19 @@ void early_fdt_map(u64 dt_phys); extern phys_addr_t __fdt_pointer __initdata; extern u64 __cacheline_aligned boot_args[4]; +static inline bool arch_parse_debug_rodata(char *arg) +{ + extern bool rodata_enabled; + extern bool rodata_full; + + if (arg && !strcmp(arg, "full")) { + rodata_enabled = true; + rodata_full = true; + return true; + } + + return false; +} +#define arch_parse_debug_rodata arch_parse_debug_rodata + #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index db7c4e6ae57bb..e7ad44585f40a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -642,24 +642,6 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, vm_area_add_early(vma); } -static int __init parse_rodata(char *arg) -{ - int ret = strtobool(arg, &rodata_enabled); - if (!ret) { - rodata_full = false; - return 0; - } - - /* permit 'full' in addition to boolean options */ - if (strcmp(arg, "full")) - return -EINVAL; - - rodata_enabled = true; - rodata_full = true; - return 0; -} -early_param("rodata", parse_rodata); - #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { diff --git a/init/main.c b/init/main.c index 91642a4e69be6..1fe7942f5d4a8 100644 --- a/init/main.c +++ b/init/main.c @@ -1446,13 +1446,25 @@ static noinline void __init kernel_init_freeable(void); #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) bool rodata_enabled __ro_after_init = true; + +#ifndef arch_parse_debug_rodata +static inline bool arch_parse_debug_rodata(char *str) { return false; } +#endif + static int __init set_debug_rodata(char *str) { - if (strtobool(str, &rodata_enabled)) + if (arch_parse_debug_rodata(str)) + return 0; + + if (str && !strcmp(str, "on")) + rodata_enabled = true; + else if (str && !strcmp(str, "off")) + rodata_enabled = false; + else pr_warn("Invalid option string for rodata: '%s'\n", str); - return 1; + return 0; } -__setup("rodata=", set_debug_rodata); +early_param("rodata", set_debug_rodata); #endif #ifdef CONFIG_STRICT_KERNEL_RWX From e89d120c4b720e232cc6a94f0fcbd59c15d41489 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Fri, 19 Aug 2022 11:30:50 +0100 Subject: [PATCH 224/654] arm64: errata: add detection for AMEVCNTR01 incrementing incorrectly The AMU counter AMEVCNTR01 (constant counter) should increment at the same rate as the system counter. On affected Cortex-A510 cores, AMEVCNTR01 increments incorrectly giving a significantly higher output value. This results in inaccurate task scheduler utilization tracking and incorrect feedback on CPU frequency. Work around this problem by returning 0 when reading the affected counter in key locations that results in disabling all users of this counter from using it either for frequency invariance or as FFH reference counter. This effect is the same to firmware disabling affected counters. Details on how the two features are affected by this erratum: - AMU counters will not be used for frequency invariance for affected CPUs and CPUs in the same cpufreq policy. AMUs can still be used for frequency invariance for unaffected CPUs in the system. Although unlikely, if no alternative method can be found to support frequency invariance for affected CPUs (cpufreq based or solution based on platform counters) frequency invariance will be disabled. Please check the chapter on frequency invariance at Documentation/scheduler/sched-capacity.rst for details of its effect. - Given that FFH can be used to fetch either the core or constant counter values, restrictions are lifted regarding any of these counters returning a valid (!0) value. Therefore FFH is considered supported if there is a least one CPU that support AMUs, independent of any counters being disabled or affected by this erratum. Clarifying comments are now added to the cpc_ffh_supported(), cpu_read_constcnt() and cpu_read_corecnt() functions. The above is achieved through adding a new erratum: ARM64_ERRATUM_2457168. Signed-off-by: Ionela Voinescu Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: James Morse Link: https://lore.kernel.org/r/20220819103050.24211-1-ionela.voinescu@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 17 ++++++++++++++ arch/arm64/kernel/cpu_errata.c | 10 ++++++++ arch/arm64/kernel/cpufeature.c | 5 +++- arch/arm64/kernel/topology.c | 32 ++++++++++++++++++++++++-- arch/arm64/tools/cpucaps | 1 + 6 files changed, 64 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 33b04db8408f9..fda97b3fcf018 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -52,6 +52,8 @@ stable kernels. | Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 571cc234d0b3f..9fb9fff08c94d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -917,6 +917,23 @@ config ARM64_ERRATUM_1902691 If unsure, say Y. +config ARM64_ERRATUM_2457168 + bool "Cortex-A510: 2457168: workaround for AMEVCNTR01 incrementing incorrectly" + depends on ARM64_AMU_EXTN + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2457168. + + The AMU counter AMEVCNTR01 (constant counter) should increment at the same rate + as the system counter. On affected Cortex-A510 cores AMEVCNTR01 increments + incorrectly giving a significantly higher output value. + + Work around this problem by returning 0 when reading the affected counter in + key locations that results in disabling all users of this counter. This effect + is the same to firmware disabling affected counters. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0f7e9087d900d..53b973b6059f7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -656,6 +656,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2457168 + { + .desc = "ARM erratum 2457168", + .capability = ARM64_WORKAROUND_2457168, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + + /* Cortex-A510 r0p0-r1p1 */ + CAP_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1) + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_2038923 { .desc = "ARM erratum 2038923", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 907401e4fffb1..af4de817d7123 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1870,7 +1870,10 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n", smp_processor_id()); cpumask_set_cpu(smp_processor_id(), &amu_cpus); - update_freq_counters_refs(); + + /* 0 reference values signal broken/disabled counters */ + if (!this_cpu_has_cap(ARM64_WORKAROUND_2457168)) + update_freq_counters_refs(); } } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 869ffc4d44847..ad2bfc794257d 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -296,12 +296,25 @@ core_initcall(init_amu_fie); static void cpu_read_corecnt(void *val) { + /* + * A value of 0 can be returned if the current CPU does not support AMUs + * or if the counter is disabled for this CPU. A return value of 0 at + * counter read is properly handled as an error case by the users of the + * counter. + */ *(u64 *)val = read_corecnt(); } static void cpu_read_constcnt(void *val) { - *(u64 *)val = read_constcnt(); + /* + * Return 0 if the current CPU is affected by erratum 2457168. A value + * of 0 is also returned if the current CPU does not support AMUs or if + * the counter is disabled. A return value of 0 at counter read is + * properly handled as an error case by the users of the counter. + */ + *(u64 *)val = this_cpu_has_cap(ARM64_WORKAROUND_2457168) ? + 0UL : read_constcnt(); } static inline @@ -328,7 +341,22 @@ int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val) */ bool cpc_ffh_supported(void) { - return freq_counters_valid(get_cpu_with_amu_feat()); + int cpu = get_cpu_with_amu_feat(); + + /* + * FFH is considered supported if there is at least one present CPU that + * supports AMUs. Using FFH to read core and reference counters for CPUs + * that do not support AMUs, have counters disabled or that are affected + * by errata, will result in a return value of 0. + * + * This is done to allow any enabled and valid counters to be read + * through FFH, knowing that potentially returning 0 as counter value is + * properly handled by the users of these counters. + */ + if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) + return false; + + return true; } int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 779653771507a..63b2484ce6c3d 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -67,6 +67,7 @@ WORKAROUND_1902691 WORKAROUND_2038923 WORKAROUND_2064142 WORKAROUND_2077057 +WORKAROUND_2457168 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE From e75d18cecbb3805895d8ed64da4f78575ec96043 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 8 Aug 2022 09:46:40 +0100 Subject: [PATCH 225/654] arm64: cacheinfo: Fix incorrect assignment of signed error value to unsigned fw_level Though acpi_find_last_cache_level() always returned signed value and the document states it will return any errors caused by lack of a PPTT table, it never returned negative values before. Commit 0c80f9e165f8 ("ACPI: PPTT: Leave the table mapped for the runtime usage") however changed it by returning -ENOENT if no PPTT was found. The value returned from acpi_find_last_cache_level() is then assigned to unsigned fw_level. It will result in the number of cache leaves calculated incorrectly as a huge value which will then cause the following warning from __alloc_pages as the order would be great than MAX_ORDER because of incorrect and huge cache leaves value. | WARNING: CPU: 0 PID: 1 at mm/page_alloc.c:5407 __alloc_pages+0x74/0x314 | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-10393-g7c2a8d3ac4c0 #73 | pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : __alloc_pages+0x74/0x314 | lr : alloc_pages+0xe8/0x318 | Call trace: | __alloc_pages+0x74/0x314 | alloc_pages+0xe8/0x318 | kmalloc_order_trace+0x68/0x1dc | __kmalloc+0x240/0x338 | detect_cache_attributes+0xe0/0x56c | update_siblings_masks+0x38/0x284 | store_cpu_topology+0x78/0x84 | smp_prepare_cpus+0x48/0x134 | kernel_init_freeable+0xc4/0x14c | kernel_init+0x2c/0x1b4 | ret_from_fork+0x10/0x20 Fix the same by changing fw_level to be signed integer and return the error from init_cache_level() early in case of error. Reported-and-Tested-by: Bruno Goncalves Signed-off-by: Sudeep Holla Link: https://lore.kernel.org/r/20220808084640.3165368-1-sudeep.holla@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cacheinfo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 587543c6c51cb..97c42be71338a 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -45,7 +45,8 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, int init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves, fw_level; + unsigned int ctype, level, leaves; + int fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -63,6 +64,9 @@ int init_cache_level(unsigned int cpu) else fw_level = acpi_find_last_cache_level(cpu); + if (fw_level < 0) + return fw_level; + if (level < fw_level) { /* * some external caches not specified in CLIDR_EL1 From 02e483f8d414cc4f467389843d61b63bbbbcd892 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Aug 2022 22:36:11 +0100 Subject: [PATCH 226/654] arm64/sysreg: Directly include bitfield.h The SYS_FIELD_ macros in sysreg.h use definitions from bitfield.h but there is no direct inclusion of it, add one to ensure that sysreg.h is directly usable. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220818213613.733091-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7c71358d44c4a..c670026770156 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1116,6 +1116,7 @@ #else +#include #include #include #include From a10edea4efbba4e8756f493781e953dd3592f24a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Aug 2022 22:36:12 +0100 Subject: [PATCH 227/654] arm64/sysreg: Guard SYS_FIELD_ macros for asm The SYS_FIELD_ macros are not safe for assembly contexts, move them inside the guarded section. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220818213613.733091-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c670026770156..818df938a7ad0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1210,8 +1210,6 @@ par; \ }) -#endif - #define SYS_FIELD_GET(reg, field, val) \ FIELD_GET(reg##_##field##_MASK, val) @@ -1221,4 +1219,6 @@ #define SYS_FIELD_PREP_ENUM(reg, field, val) \ FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) +#endif + #endif /* __ASM_SYSREG_H */ From 53d2d84a1f6d68e036adab71df8e0f37cd2724f6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Aug 2022 22:36:13 +0100 Subject: [PATCH 228/654] arm64/cache: Fix cache_type_cwg() for register generation Ard noticed that since we converted CTR_EL0 to automatic generation we have been seeing errors on some systems handling the value of cache_type_cwg() such as CPU features: No Cache Writeback Granule information, assuming 128 This is because the manual definition of CTR_EL0_CWG_MASK was done without a shift while our convention is to define the mask after shifting. This means that the user in cache_type_cwg() was broken as it was written for the manually written shift then mask. Fix this by converting to use SYS_FIELD_GET(). The only other field where the _MASK for this register is used is IminLine which is at offset 0 so unaffected. Fixes: 9a3634d02301 ("arm64/sysreg: Convert CTR_EL0 to automatic generation") Reported-by: Ard Biesheuvel Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220818213613.733091-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ca9b487112ccb..34256bda0da9d 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -71,7 +71,7 @@ static __always_inline int icache_is_vpipt(void) static inline u32 cache_type_cwg(void) { - return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK; + return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype()); } #define __read_mostly __section(".data..read_mostly") From 7ddcaf78e93c9282b4d92184f511b4d5bee75355 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:21 +0100 Subject: [PATCH 229/654] arm64/signal: Raise limit on stack frames The signal code has a limit of 64K on the size of a stack frame that it will generate, if this limit is exceeded then a process will be killed if it receives a signal. Unfortunately with the advent of SME this limit is too small - the maximum possible size of the ZA register alone is 64K. This is not an issue for practical systems at present but is easily seen using virtual platforms. Raise the limit to 256K, this is substantially more than could be used by any current architecture extension. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 3e6d0352d7d36..0685b87849275 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -91,7 +91,7 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) * not taken into account. This limit is not a guarantee and is * NOT ABI. */ -#define SIGFRAME_MAXSZ SZ_64K +#define SIGFRAME_MAXSZ SZ_256K static int __sigframe_alloc(struct rt_sigframe_user_layout *user, unsigned long *offset, size_t size, bool extend) From ea64baacbc36a0d552aec0d87107182f40211131 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:22 +0100 Subject: [PATCH 230/654] arm64/signal: Flush FPSIMD register state when disabling streaming mode When handling a signal delivered to a context with streaming mode enabled we will disable streaming mode for the signal handler, when doing so we should also flush the saved FPSIMD register state like exiting streaming mode in the hardware would do so that if that state is reloaded we get the same behaviour. Without this we will reload whatever the last FPSIMD state that was saved for the task was. Fixes: 40a8e87bb328 ("arm64/sme: Disable ZA and streaming mode when handling signals") Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0685b87849275..0d28a783a69a1 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -926,6 +926,16 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Signal handlers are invoked with ZA and streaming mode disabled */ if (system_supports_sme()) { + /* + * If we were in streaming mode the saved register + * state was SVE but we will exit SM and use the + * FPSIMD register state - flush the saved FPSIMD + * register state in case it gets loaded. + */ + if (current->thread.svcr & SVCR_SM_MASK) + memset(¤t->thread.uw.fpsimd_state, 0, + sizeof(current->thread.uw.fpsimd_state)); + current->thread.svcr &= ~(SVCR_ZA_MASK | SVCR_SM_MASK); sme_smstop(); From 826a4fdd2ada9e5923c58bdd168f31a42e958ffc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:23 +0100 Subject: [PATCH 231/654] arm64/sme: Don't flush SVE register state when allocating SME storage Currently when taking a SME access trap we allocate storage for the SVE register state in order to be able to handle storage of streaming mode SVE. Due to the original usage in a purely SVE context the SVE register state allocation this also flushes the register state for SVE if storage was already allocated but in the SME context this is not desirable. For a SME access trap to be taken the task must not be in streaming mode so either there already is SVE register state present for regular SVE mode which would be corrupted or the task does not have TIF_SVE and the flush is redundant. Fix this by adding a flag to sve_alloc() indicating if we are in a SVE context and need to flush the state. Freshly allocated storage is always zeroed either way. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 4 ++-- arch/arm64/kernel/fpsimd.c | 10 ++++++---- arch/arm64/kernel/ptrace.c | 6 +++--- arch/arm64/kernel/signal.c | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9bb1873f52951..6f86b7ab6c28f 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -153,7 +153,7 @@ struct vl_info { #ifdef CONFIG_ARM64_SVE -extern void sve_alloc(struct task_struct *task); +extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); extern void fpsimd_force_sync_to_sve(struct task_struct *task); @@ -256,7 +256,7 @@ size_t sve_state_size(struct task_struct const *task); #else /* ! CONFIG_ARM64_SVE */ -static inline void sve_alloc(struct task_struct *task) { } +static inline void sve_alloc(struct task_struct *task, bool flush) { } static inline void fpsimd_release_task(struct task_struct *task) { } static inline void sve_sync_to_fpsimd(struct task_struct *task) { } static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dd63ffc3a2fa2..b9ae9827e6e8e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -715,10 +715,12 @@ size_t sve_state_size(struct task_struct const *task) * do_sve_acc() case, there is no ABI requirement to hide stale data * written previously be task. */ -void sve_alloc(struct task_struct *task) +void sve_alloc(struct task_struct *task, bool flush) { if (task->thread.sve_state) { - memset(task->thread.sve_state, 0, sve_state_size(task)); + if (flush) + memset(task->thread.sve_state, 0, + sve_state_size(task)); return; } @@ -1388,7 +1390,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) return; } - sve_alloc(current); + sve_alloc(current, true); if (!current->thread.sve_state) { force_sig(SIGKILL); return; @@ -1439,7 +1441,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) return; } - sve_alloc(current); + sve_alloc(current, false); sme_alloc(current); if (!current->thread.sve_state || !current->thread.za_state) { force_sig(SIGKILL); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 21da83187a602..eb7c08dfb8348 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -882,7 +882,7 @@ static int sve_set_common(struct task_struct *target, * state and ensure there's storage. */ if (target->thread.svcr != old_svcr) - sve_alloc(target); + sve_alloc(target, true); } /* Registers: FPSIMD-only case */ @@ -912,7 +912,7 @@ static int sve_set_common(struct task_struct *target, goto out; } - sve_alloc(target); + sve_alloc(target, true); if (!target->thread.sve_state) { ret = -ENOMEM; clear_tsk_thread_flag(target, TIF_SVE); @@ -1082,7 +1082,7 @@ static int za_set(struct task_struct *target, /* Ensure there is some SVE storage for streaming mode */ if (!target->thread.sve_state) { - sve_alloc(target); + sve_alloc(target, false); if (!target->thread.sve_state) { clear_thread_flag(TIF_SME); ret = -ENOMEM; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0d28a783a69a1..9ad911f1647c8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -310,7 +310,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) fpsimd_flush_task_state(current); /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sve_alloc(current); + sve_alloc(current, true); if (!current->thread.sve_state) { clear_thread_flag(TIF_SVE); return -ENOMEM; From 714f3cbd70a4db9f9b7fe5b8a032896ed33fb824 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 17 Aug 2022 19:23:24 +0100 Subject: [PATCH 232/654] arm64/sme: Don't flush SVE register state when handling SME traps Currently as part of handling a SME access trap we flush the SVE register state. This is not needed and would corrupt register state if the task has access to the SVE registers already. For non-streaming mode accesses the required flushing will be done in the SVE access trap. For streaming mode SVE register accesses the architecture guarantees that the register state will be flushed when streaming mode is entered or exited so there is no need for us to do so. Simply remove the register initialisation. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220817182324.638214-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b9ae9827e6e8e..23834d96d1e78 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1462,17 +1462,6 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) fpsimd_bind_task_to_cpu(); } - /* - * If SVE was not already active initialise the SVE registers, - * any non-shared state between the streaming and regular SVE - * registers is architecturally guaranteed to be zeroed when - * we enter streaming mode. We do not need to initialize ZA - * since ZA must be disabled at this point and enabling ZA is - * architecturally defined to zero ZA. - */ - if (system_supports_sve() && !test_thread_flag(TIF_SVE)) - sve_init_regs(); - put_cpu_fpsimd_context(); } From 13a8e0f6b01b14b2e28ba144e112c883f03a3db2 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 19 Aug 2022 15:16:11 -0700 Subject: [PATCH 233/654] Revert "driver core: Delete driver_deferred_probe_check_state()" This reverts commit 9cbffc7a59561be950ecc675d19a3d2b45202b2b. There are a few more issues to fix that have been reported in the thread for the original series [1]. We'll need to fix those before this will work. So, revert it for now. [1] - https://lore.kernel.org/lkml/20220601070707.3946847-1-saravanak@google.com/ Fixes: 9cbffc7a5956 ("driver core: Delete driver_deferred_probe_check_state()") Tested-by: Tony Lindgren Tested-by: Peng Fan Tested-by: Douglas Anderson Tested-by: Alexander Stein Reviewed-by: Tony Lindgren Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220819221616.2107893-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 30 ++++++++++++++++++++++++++++++ include/linux/device/driver.h | 1 + 2 files changed, 31 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 70f79fc715394..a8916d1bfdcba 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -274,12 +274,42 @@ static int __init deferred_probe_timeout_setup(char *str) } __setup("deferred_probe_timeout=", deferred_probe_timeout_setup); +/** + * driver_deferred_probe_check_state() - Check deferred probe state + * @dev: device to check + * + * Return: + * * -ENODEV if initcalls have completed and modules are disabled. + * * -ETIMEDOUT if the deferred probe timeout was set and has expired + * and modules are enabled. + * * -EPROBE_DEFER in other cases. + * + * Drivers or subsystems can opt-in to calling this function instead of directly + * returning -EPROBE_DEFER. + */ +int driver_deferred_probe_check_state(struct device *dev) +{ + if (!IS_ENABLED(CONFIG_MODULES) && initcalls_done) { + dev_warn(dev, "ignoring dependency for device, assuming no driver\n"); + return -ENODEV; + } + + if (!driver_deferred_probe_timeout && initcalls_done) { + dev_warn(dev, "deferred probe timeout, ignoring dependency\n"); + return -ETIMEDOUT; + } + + return -EPROBE_DEFER; +} +EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state); + static void deferred_probe_timeout_work_func(struct work_struct *work) { struct device_private *p; fw_devlink_drivers_done(); + driver_deferred_probe_timeout = 0; driver_deferred_probe_trigger(); flush_work(&deferred_probe_work); diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 7acaabde5396d..2114d65b862fc 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -242,6 +242,7 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); +int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); /** From cffaf9721f755b46370e7be0592dc41168c95019 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 19 Aug 2022 15:16:12 -0700 Subject: [PATCH 234/654] Revert "net: mdio: Delete usage of driver_deferred_probe_check_state()" This reverts commit f8217275b57aa48d98cc42051c2aac34152718d6. There are a few more issues to fix that have been reported in the thread for the original series [1]. We'll need to fix those before this will work. So, revert it for now. [1] - https://lore.kernel.org/lkml/CAMuHMdWo_wRwV-i_iyTxVnEsf3Th9GBAG+wxUQMQGnw1t2ijTg@mail.gmail.com/ Fixes: f8217275b57a ("net: mdio: Delete usage of driver_deferred_probe_check_state()") Reported-by: Geert Uytterhoeven Tested-by: Alexander Stein Reviewed-by: Tony Lindgren Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220819221616.2107893-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/mdio/fwnode_mdio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 3e79c2c519298..1c1584fca6327 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -47,7 +47,9 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, * just fall back to poll mode */ if (rc == -EPROBE_DEFER) - rc = -ENODEV; + rc = driver_deferred_probe_check_state(&phy->mdio.dev); + if (rc == -EPROBE_DEFER) + return rc; if (rc > 0) { phy->irq = rc; From e20813dcdc05aa295ef586b518142d0fab4c5692 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 19 Aug 2022 15:16:13 -0700 Subject: [PATCH 235/654] Revert "PM: domains: Delete usage of driver_deferred_probe_check_state()" This reverts commit 5a46079a96451cfb15e4f5f01f73f7ba24ef851a. Quite a few issues have been reported [1][2][3][4][5][6] on the original commit. While about half of them have been fixed, I'll need to fix the rest before driver_deferred_probe_check_state() can be deleted. So, revert the deletion for now. [1] - https://lore.kernel.org/all/DU0PR04MB941735271F45C716342D0410886B9@DU0PR04MB9417.eurprd04.prod.outlook.com/ [2] - https://lore.kernel.org/all/CM6REZS9Z8AC.2KCR9N3EFLNQR@otso/ [3] - https://lore.kernel.org/all/CAD=FV=XYVwaXZxqUKAuM5c7NiVjFz5C6m6gAHSJ7rBXBF94_Tg@mail.gmail.com/ [4] - https://lore.kernel.org/all/Yvpd2pwUJGp7R+YE@euler/ [5] - https://lore.kernel.org/lkml/20220601070707.3946847-2-saravanak@google.com/ [6] - https://lore.kernel.org/all/CA+G9fYt_cc5SiNv1Vbse=HYY_+uc+9OYPZuJ-x59bROSaLN6fw@mail.gmail.com/ Fixes: 5a46079a9645 ("PM: domains: Delete usage of driver_deferred_probe_check_state()") Reported-by: Peng Fan Reported-by: Luca Weiss Reported-by: Doug Anderson Reported-by: Colin Foster Reported-by: Tony Lindgren Reported-by: Alexander Stein Reported-by: Naresh Kamboju Tested-by: Tony Lindgren Tested-by: Peng Fan Tested-by: Douglas Anderson Tested-by: Alexander Stein Reviewed-by: Tony Lindgren Acked-by: Ulf Hansson Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220819221616.2107893-4-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5a2e0232862e0..55a10e6d4e2a7 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2733,7 +2733,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, mutex_unlock(&gpd_list_lock); dev_dbg(dev, "%s() failed to find PM domain: %ld\n", __func__, PTR_ERR(pd)); - return -ENODEV; + return driver_deferred_probe_check_state(base_dev); } dev_dbg(dev, "adding to PM domain %s\n", pd->name); From a4f124908617553ea3929a17293cea4960c92ba3 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 19 Aug 2022 15:16:14 -0700 Subject: [PATCH 236/654] Revert "iommu/of: Delete usage of driver_deferred_probe_check_state()" This reverts commit b09796d528bbf06e3e10a4a8f78038719da7ebc6. An issue was reported[1] on the original commit. I'll need to address that before I can delete the use of driver_deferred_probe_check_state(). So, bring it back for now. [1] - https://lore.kernel.org/lkml/4799738.LvFx2qVVIh@steina-w/ Fixes: b09796d528bb ("iommu/of: Delete usage of driver_deferred_probe_check_state()") Reported-by: Jean-Philippe Brucker Tested-by: Jean-Philippe Brucker Tested-by: Alexander Stein Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220819221616.2107893-5-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/of_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 41f4eb0052192..5696314ae69e7 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -40,7 +40,7 @@ static int of_iommu_xlate(struct device *dev, * a proper probe-ordering dependency mechanism in future. */ if (!ops) - return -ENODEV; + return driver_deferred_probe_check_state(dev); if (!try_module_get(ops->owner)) return -ENODEV; From c2fa700c5f7667800b6cfedcb9994eb5eb69e6cc Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 23 Aug 2022 18:45:48 +0700 Subject: [PATCH 237/654] MAINTAINERS: Add `include/linux/io_uring_types.h` File include/linux/io_uring_types.h doesn't have a maintainer, add it to the io_uring section. Signed-off-by: Ammar Faizi Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8a5012ba6ff98..6dc55be414202 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10657,6 +10657,7 @@ T: git git://git.kernel.dk/linux-block T: git git://git.kernel.dk/liburing F: io_uring/ F: include/linux/io_uring.h +F: include/linux/io_uring_types.h F: include/uapi/linux/io_uring.h F: tools/io_uring/ From e1d0c6d05afdcff01ace698edb3b8808db1dc066 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 23 Aug 2022 18:45:49 +0700 Subject: [PATCH 238/654] io_uring: uapi: Add `extern "C"` in io_uring.h for liburing Make it easy for liburing to integrate uapi header with the kernel. Previously, when this header changes, the liburing side can't directly copy this header file due to some small differences. Sync them. Link: https://lore.kernel.org/io-uring/f1feef16-6ea2-0653-238f-4aaee35060b6@kernel.dk Cc: Bart Van Assche Cc: Dylan Yudaken Cc: Facebook Kernel Team Signed-off-by: Ammar Faizi Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1463cfecb56b0..9e0b5c8d92cea 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -12,6 +12,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + /* * IO submission data structure (Submission Queue Entry) */ @@ -661,4 +665,8 @@ struct io_uring_recvmsg_out { __u32 flags; }; +#ifdef __cplusplus +} +#endif + #endif From 47abea041f897d64dbd5777f0cf7745148f85d75 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Aug 2022 07:26:08 -0600 Subject: [PATCH 239/654] io_uring: fix off-by-one in sync cancelation file check The passed in index should be validated against the number of registered files we have, it needs to be smaller than the index value to avoid going one beyond the end. Fixes: 78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()") Reported-by: Luo Likang Signed-off-by: Jens Axboe --- io_uring/cancel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index e4e1dc0325f0c..5fc5d3e80fcb3 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -218,7 +218,7 @@ static int __io_sync_cancel(struct io_uring_task *tctx, (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { unsigned long file_ptr; - if (unlikely(fd > ctx->nr_user_files)) + if (unlikely(fd >= ctx->nr_user_files)) return -EBADF; fd = array_index_nospec(fd, ctx->nr_user_files); file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; From 4b2e3a17e9f279325712b79fb01d1493f9e3e005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 21 Aug 2022 06:08:08 -0700 Subject: [PATCH 240/654] net: ipvtap - add __init/__exit annotations to module init/exit funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks to have been left out in an oversight. Cc: Mahesh Bandewar Cc: Sainath Grandhi Fixes: 235a9d89da97 ('ipvtap: IP-VLAN based tap driver') Signed-off-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20220821130808.12143-1-zenczykowski@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ipvlan/ipvtap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index ef02f2cf5ce13..cbabca167a078 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -194,7 +194,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = { .notifier_call = ipvtap_device_event, }; -static int ipvtap_init(void) +static int __init ipvtap_init(void) { int err; @@ -228,7 +228,7 @@ static int ipvtap_init(void) } module_init(ipvtap_init); -static void ipvtap_exit(void) +static void __exit ipvtap_exit(void) { rtnl_link_unregister(&ipvtap_link_ops); unregister_netdevice_notifier(&ipvtap_notifier_block); From 855a28f9c96c80e6cbd2d986a857235e34868064 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 19 Aug 2022 20:39:25 +0300 Subject: [PATCH 241/654] net: dsa: don't dereference NULL extack in dsa_slave_changeupper() When a driver returns -EOPNOTSUPP in dsa_port_bridge_join() but failed to provide a reason for it, DSA attempts to set the extack to say that software fallback will kick in. The problem is, when we use brctl and the legacy bridge ioctls, the extack will be NULL, and DSA dereferences it in the process of setting it. Sergei Antonov proves this using the following stack trace: Unable to handle kernel NULL pointer dereference at virtual address 00000000 PC is at dsa_slave_changeupper+0x5c/0x158 dsa_slave_changeupper from raw_notifier_call_chain+0x38/0x6c raw_notifier_call_chain from __netdev_upper_dev_link+0x198/0x3b4 __netdev_upper_dev_link from netdev_master_upper_dev_link+0x50/0x78 netdev_master_upper_dev_link from br_add_if+0x430/0x7f4 br_add_if from br_ioctl_stub+0x170/0x530 br_ioctl_stub from br_ioctl_call+0x54/0x7c br_ioctl_call from dev_ifsioc+0x4e0/0x6bc dev_ifsioc from dev_ioctl+0x2f8/0x758 dev_ioctl from sock_ioctl+0x5f0/0x674 sock_ioctl from sys_ioctl+0x518/0xe40 sys_ioctl from ret_fast_syscall+0x0/0x1c Fix the problem by only overriding the extack if non-NULL. Fixes: 1c6e8088d9a7 ("net: dsa: allow port_bridge_join() to override extack message") Link: https://lore.kernel.org/netdev/CABikg9wx7vB5eRDAYtvAm7fprJ09Ta27a4ZazC=NX5K4wn6pWA@mail.gmail.com/ Reported-by: Sergei Antonov Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Sergei Antonov Link: https://lore.kernel.org/r/20220819173925.3581871-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ad6a6663feeb5..1291c2431d440 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2484,7 +2484,7 @@ static int dsa_slave_changeupper(struct net_device *dev, if (!err) dsa_bridge_mtu_normalization(dp); if (err == -EOPNOTSUPP) { - if (!extack->_msg) + if (extack && !extack->_msg) NL_SET_ERR_MSG_MOD(extack, "Offloading not supported"); err = 0; From 22dec134dbfa825b963f8a1807ad19b943e46a56 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2022 09:27:16 +0200 Subject: [PATCH 242/654] ALSA: seq: oss: Fix data-race for max_midi_devs access ALSA OSS sequencer refers to a global variable max_midi_devs at creating a new port, storing it to its own field. Meanwhile this variable may be changed by other sequencer events at snd_seq_oss_midi_check_exit_port() in parallel, which may cause a data race. OTOH, this data race itself is almost harmless, as the access to the MIDI device is done via get_mdev() and it's protected with a refcount, hence its presence is guaranteed. Though, it's sill better to address the data-race from the code sanity POV, and this patch adds the proper spinlock for the protection. Reported-by: Abhishek Shah Cc: Link: https://lore.kernel.org/r/CAEHB2493pZRXs863w58QWnUTtv3HHfg85aYhLn5HJHCwxqtHQg@mail.gmail.com Link: https://lore.kernel.org/r/20220823072717.1706-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_midi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 1e3bf086f8671..07efb38f58ac1 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -270,7 +270,9 @@ snd_seq_oss_midi_clear_all(void) void snd_seq_oss_midi_setup(struct seq_oss_devinfo *dp) { + spin_lock_irq(®ister_lock); dp->max_mididev = max_midi_devs; + spin_unlock_irq(®ister_lock); } /* From a9c3eda7eada94e8cf29cb102aa80e1370d8fa2e Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 23 Aug 2022 20:40:22 +0530 Subject: [PATCH 243/654] io_uring: fix submission-failure handling for uring-cmd If ->uring_cmd returned an error value different from -EAGAIN or -EIOCBQUEUED, it gets overridden with IOU_OK. This invites trouble as caller (io_uring core code) handles IOU_OK differently than other error codes. Fix this by returning the actual error code. Signed-off-by: Kanchan Joshi Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 8e0cc2d9205ea..b9989ae7b957b 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -112,7 +112,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); - return IOU_OK; + return ret; } return IOU_ISSUE_SKIP_COMPLETE; From c6e50787889c8d55db65c769dccee98c7fcd1732 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Sun, 14 Aug 2022 13:38:00 +0100 Subject: [PATCH 244/654] soundwire: qcom: remove duplicate reset control get Looks like adding clock gate flag patch forgot to remove the old code that gets reset control. This causes below crash on platforms that do not need reset. [ 15.653501] reset_control_reset+0x124/0x170 [ 15.653508] qcom_swrm_init+0x50/0x1a0 [ 15.653514] qcom_swrm_probe+0x320/0x668 [ 15.653519] platform_probe+0x68/0xe0 [ 15.653529] really_probe+0xbc/0x2a8 [ 15.653535] __driver_probe_device+0x7c/0xe8 [ 15.653541] driver_probe_device+0x40/0x110 [ 15.653547] __device_attach_driver+0x98/0xd0 [ 15.653553] bus_for_each_drv+0x68/0xd0 [ 15.653559] __device_attach+0xf4/0x188 [ 15.653565] device_initial_probe+0x14/0x20 Fix this by removing old code. Reported-by: Amit Pundir Fixes: 1fd0d85affe4 ("soundwire: qcom: Add flag for software clock gating check") Signed-off-by: Srinivas Kandagatla Tested-by: Amit Pundir Link: https://lore.kernel.org/r/20220814123800.31200-1-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 9df970eeca454..a43961ad4614c 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1356,10 +1356,6 @@ static int qcom_swrm_probe(struct platform_device *pdev) ctrl->bus.compute_params = &qcom_swrm_compute_params; ctrl->bus.clk_stop_timeout = 300; - ctrl->audio_cgcr = devm_reset_control_get_exclusive(dev, "swr_audio_cgcr"); - if (IS_ERR(ctrl->audio_cgcr)) - dev_err(dev, "Failed to get audio_cgcr reset required for soundwire-v1.6.0\n"); - ret = qcom_swrm_get_port_config(ctrl); if (ret) goto err_clk; From 6b04ce966a738ecdd9294c9593e48513c0dc90aa Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 19 Aug 2022 22:09:28 +0200 Subject: [PATCH 245/654] nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf It is a bit unlcear to us why that's helping, but it does and unbreaks suspend/resume on a lot of GPUs without any known drawbacks. Cc: stable@vger.kernel.org # v5.15+ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156 Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 05076e530e7d4..e29175e4b44ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, if (ret == 0) { ret = nouveau_fence_new(chan, false, &fence); if (ret == 0) { + /* TODO: figure out a better solution here + * + * wait on the fence here explicitly as going through + * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. + * + * Without this the operation can timeout and we'll fallback to a + * software copy, which might take several minutes to finish. + */ + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001 From: Riwen Lu Date: Tue, 23 Aug 2022 15:43:42 +0800 Subject: [PATCH 246/654] ACPI: processor: Remove freq Qos request for all CPUs The freq Qos request would be removed repeatedly if the cpufreq policy relates to more than one CPU. Then, it would cause the "called for unknown object" warning. Remove the freq Qos request for each CPU relates to the cpufreq policy, instead of removing repeatedly for the last CPU of it. Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs") Reported-by: Jeremy Linton Tested-by: Jeremy Linton Signed-off-by: Riwen Lu Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index db6ac540e924a..e534fd49a67e5 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -151,7 +151,7 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) unsigned int cpu; for_each_cpu(cpu, policy->related_cpus) { - struct acpi_processor *pr = per_cpu(processors, policy->cpu); + struct acpi_processor *pr = per_cpu(processors, cpu); if (pr) freq_qos_remove_request(&pr->thermal_req); From 7997eff82828304b780dc0a39707e1946d6f1ebf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Aug 2022 17:38:37 +0200 Subject: [PATCH 247/654] netfilter: ebtables: reject blobs that don't provide all entry points Harshit Mogalapalli says: In ebt_do_table() function dereferencing 'private->hook_entry[hook]' can lead to NULL pointer dereference. [..] Kernel panic: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] [..] RIP: 0010:ebt_do_table+0x1dc/0x1ce0 Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 5c 16 00 00 48 b8 00 00 00 00 00 fc ff df 49 8b 6c df 08 48 8d 7d 2c 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 88 [..] Call Trace: nf_hook_slow+0xb1/0x170 __br_forward+0x289/0x730 maybe_deliver+0x24b/0x380 br_flood+0xc6/0x390 br_dev_xmit+0xa2e/0x12c0 For some reason ebtables rejects blobs that provide entry points that are not supported by the table, but what it should instead reject is the opposite: blobs that DO NOT provide an entry point supported by the table. t->valid_hooks is the bitmask of hooks (input, forward ...) that will see packets. Providing an entry point that is not support is harmless (never called/used), but the inverse isn't: it results in a crash because the ebtables traverser doesn't expect a NULL blob for a location its receiving packets for. Instead of fixing all the individual checks, do what iptables is doing and reject all blobs that differ from the expected hooks. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Harshit Mogalapalli Reported-by: syzkaller Signed-off-by: Florian Westphal --- include/linux/netfilter_bridge/ebtables.h | 4 ---- net/bridge/netfilter/ebtable_broute.c | 8 -------- net/bridge/netfilter/ebtable_filter.c | 8 -------- net/bridge/netfilter/ebtable_nat.c | 8 -------- net/bridge/netfilter/ebtables.c | 8 +------- 5 files changed, 1 insertion(+), 35 deletions(-) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index a13296d6c7ceb..fd533552a062c 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -94,10 +94,6 @@ struct ebt_table { struct ebt_replace_kernel *table; unsigned int valid_hooks; rwlock_t lock; - /* e.g. could be the table explicitly only allows certain - * matches, targets, ... 0 == let it in */ - int (*check)(const struct ebt_table_info *info, - unsigned int valid_hooks); /* the data used by the kernel */ struct ebt_table_info *private; struct nf_hook_ops *ops; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 1a11064f99907..8f19253024b0a 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)&initial_chain, }; -static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~(1 << NF_BR_BROUTING)) - return -EINVAL; - return 0; -} - static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index cb949436bc0e3..278f324e67524 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)initial_chains, }; -static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~FILTER_VALID_HOOKS) - return -EINVAL; - return 0; -} - static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 5ee0531ae5061..9066f7f376d57 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)initial_chains, }; -static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~NAT_VALID_HOOKS) - return -EINVAL; - return 0; -} - static const struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f2dbefb61ce83..9a0ae59cdc500 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1040,8 +1040,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_iterate; } - /* the table doesn't like it */ - if (t->check && (ret = t->check(newinfo, repl->valid_hooks))) + if (repl->valid_hooks != t->valid_hooks) goto free_unlock; if (repl->num_counters && repl->num_counters != t->private->nentries) { @@ -1231,11 +1230,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, if (ret != 0) goto free_chainstack; - if (table->check && table->check(newinfo, table->valid_hooks)) { - ret = -EINVAL; - goto free_chainstack; - } - table->private = newinfo; rwlock_init(&table->lock); mutex_lock(&ebt_mutex); From cf97769c761abfeac8931b35fe0e1a8d5fabc9d8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Aug 2022 00:42:31 +0200 Subject: [PATCH 248/654] netfilter: conntrack: work around exceeded receive window When a TCP sends more bytes than allowed by the receive window, all future packets can be marked as invalid. This can clog up the conntrack table because of 5-day default timeout. Sequence of packets: 01 initiator > responder: [S], seq 171, win 5840, options [mss 1330,sackOK,TS val 63 ecr 0,nop,wscale 1] 02 responder > initiator: [S.], seq 33211, ack 172, win 65535, options [mss 1460,sackOK,TS val 010 ecr 63,nop,wscale 8] 03 initiator > responder: [.], ack 33212, win 2920, options [nop,nop,TS val 068 ecr 010], length 0 04 initiator > responder: [P.], seq 172:240, ack 33212, win 2920, options [nop,nop,TS val 279 ecr 010], length 68 Window is 5840 starting from 33212 -> 39052. 05 responder > initiator: [.], ack 240, win 256, options [nop,nop,TS val 872 ecr 279], length 0 06 responder > initiator: [.], seq 33212:34530, ack 240, win 256, options [nop,nop,TS val 892 ecr 279], length 1318 This is fine, conntrack will flag the connection as having outstanding data (UNACKED), which lowers the conntrack timeout to 300s. 07 responder > initiator: [.], seq 34530:35848, ack 240, win 256, options [nop,nop,TS val 892 ecr 279], length 1318 08 responder > initiator: [.], seq 35848:37166, ack 240, win 256, options [nop,nop,TS val 892 ecr 279], length 1318 09 responder > initiator: [.], seq 37166:38484, ack 240, win 256, options [nop,nop,TS val 892 ecr 279], length 1318 10 responder > initiator: [.], seq 38484:39802, ack 240, win 256, options [nop,nop,TS val 892 ecr 279], length 1318 Packet 10 is already sending more than permitted, but conntrack doesn't validate this (only seq is tested vs. maxend, not 'seq+len'). 38484 is acceptable, but only up to 39052, so this packet should not have been sent (or only 568 bytes, not 1318). At this point, connection is still in '300s' mode. Next packet however will get flagged: 11 responder > initiator: [P.], seq 39802:40128, ack 240, win 256, options [nop,nop,TS val 892 ecr 279], length 326 nf_ct_proto_6: SEQ is over the upper bound (over the window of the receiver) .. LEN=378 .. SEQ=39802 ACK=240 ACK PSH .. Now, a couple of replies/acks comes in: 12 initiator > responder: [.], ack 34530, win 4368, [.. irrelevant acks removed ] 16 initiator > responder: [.], ack 39802, win 8712, options [nop,nop,TS val 296201291 ecr 2982371892], length 0 This ack is significant -- this acks the last packet send by the responder that conntrack considered valid. This means that ack == td_end. This will withdraw the 'unacked data' flag, the connection moves back to the 5-day timeout of established conntracks. 17 initiator > responder: ack 40128, win 10030, ... This packet is also flagged as invalid. Because conntrack only updates state based on packets that are considered valid, packet 11 'did not exist' and that gets us: nf_ct_proto_6: ACK is over upper bound 39803 (ACKed data not seen yet) .. SEQ=240 ACK=40128 WINDOW=10030 RES=0x00 ACK URG Because this received and processed by the endpoints, the conntrack entry remains in a bad state, no packets will ever be considered valid again: 30 responder > initiator: [F.], seq 40432, ack 2045, win 391, .. 31 initiator > responder: [.], ack 40433, win 11348, .. 32 initiator > responder: [F.], seq 2045, ack 40433, win 11348 .. ... all trigger 'ACK is over bound' test and we end up with non-early-evictable 5-day default timeout. NB: This patch triggers a bunch of checkpatch warnings because of silly indent. I will resend the cleanup series linked below to reduce the indent level once this change has propagated to net-next. I could route the cleanup via nf but that causes extra backport work for stable maintainers. Link: https://lore.kernel.org/netfilter-devel/20220720175228.17880-1-fw@strlen.de/T/#mb1d7147d36294573cc4f81d00f9f8dadfdd06cd8 Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_proto_tcp.c | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a63b51dceaf2c..a634c72b1ffcf 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -655,6 +655,37 @@ static bool tcp_in_window(struct nf_conn *ct, tn->tcp_be_liberal) res = true; if (!res) { + bool seq_ok = before(seq, sender->td_maxend + 1); + + if (!seq_ok) { + u32 overshot = end - sender->td_maxend + 1; + bool ack_ok; + + ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1); + + if (in_recv_win && + ack_ok && + overshot <= receiver->td_maxwin && + before(sack, receiver->td_end + 1)) { + /* Work around TCPs that send more bytes than allowed by + * the receive window. + * + * If the (marked as invalid) packet is allowed to pass by + * the ruleset and the peer acks this data, then its possible + * all future packets will trigger 'ACK is over upper bound' check. + * + * Thus if only the sequence check fails then do update td_end so + * possible ACK for this data can update internal state. + */ + sender->td_end = end; + sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + + nf_ct_l4proto_log_invalid(skb, ct, hook_state, + "%u bytes more than expected", overshot); + return res; + } + } + nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", before(seq, sender->td_maxend + 1) ? From 80b9ebd3e478cd41526cbf84f80c3e0eb885d1d3 Mon Sep 17 00:00:00 2001 From: Jarrah Gosbell Date: Tue, 23 Aug 2022 10:00:37 -0700 Subject: [PATCH 249/654] Input: goodix - add compatible string for GT1158 Add compatible string for GT1158 missing from the previous patch. Fixes: 425fe4709c76 ("Input: goodix - add support for GT1158") Signed-off-by: Jarrah Gosbell Link: https://lore.kernel.org/r/20220813043821.9981-1-kernel@undef.tools Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index ab03619d6b508..21c0dddbe41d4 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -1509,6 +1509,7 @@ MODULE_DEVICE_TABLE(acpi, goodix_acpi_match); #ifdef CONFIG_OF static const struct of_device_id goodix_of_match[] = { { .compatible = "goodix,gt1151" }, + { .compatible = "goodix,gt1158" }, { .compatible = "goodix,gt5663" }, { .compatible = "goodix,gt5688" }, { .compatible = "goodix,gt911" }, From 7931e28098a4c1a2a6802510b0cbe57546d2049d Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Mon, 8 Aug 2022 21:21:58 +0800 Subject: [PATCH 250/654] thermal/int340x_thermal: handle data_vault when the value is ZERO_SIZE_PTR In some case, the GDDV returns a package with a buffer which has zero length. It causes that kmemdup() returns ZERO_SIZE_PTR (0x10). Then the data_vault_read() got NULL point dereference problem when accessing the 0x10 value in data_vault. [ 71.024560] BUG: kernel NULL pointer dereference, address: 0000000000000010 This patch uses ZERO_OR_NULL_PTR() for checking ZERO_SIZE_PTR or NULL value in data_vault. Signed-off-by: "Lee, Chun-Yi" Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 80d4e0676083a..365489bf4b8c1 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -527,7 +527,7 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv) priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer, obj->package.elements[0].buffer.length, GFP_KERNEL); - if (!priv->data_vault) + if (ZERO_OR_NULL_PTR(priv->data_vault)) goto out_free; bin_attr_data_vault.private = priv->data_vault; @@ -597,7 +597,7 @@ static int int3400_thermal_probe(struct platform_device *pdev) goto free_imok; } - if (priv->data_vault) { + if (!ZERO_OR_NULL_PTR(priv->data_vault)) { result = sysfs_create_group(&pdev->dev.kobj, &data_attribute_group); if (result) @@ -615,7 +615,8 @@ static int int3400_thermal_probe(struct platform_device *pdev) free_sysfs: cleanup_odvp(priv); if (priv->data_vault) { - sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); + if (!ZERO_OR_NULL_PTR(priv->data_vault)) + sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); kfree(priv->data_vault); } free_uuid: @@ -647,7 +648,7 @@ static int int3400_thermal_remove(struct platform_device *pdev) if (!priv->rel_misc_dev_res) acpi_thermal_rel_misc_device_remove(priv->adev->handle); - if (priv->data_vault) + if (!ZERO_OR_NULL_PTR(priv->data_vault)) sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group); sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group); From ea902bcc1943f7539200ec464de3f54335588774 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 23 Aug 2022 10:48:19 -0700 Subject: [PATCH 251/654] x86/cpu: Add new Raptor Lake CPU model number Note1: Model 0xB7 already claimed the "no suffix" #define for a regular client part, so add (yet another) suffix "S" to distinguish this new part from the earlier one. Note2: the RAPTORLAKE* and ALDERLAKE* processors are very similar from a software enabling point of view. There are no known features that have model-specific enabling and also differ between the two. In other words, every single place that list *one* or more RAPTORLAKE* or ALDERLAKE* processors should list all of them. Note3: This is being merged before there is an in-tree user. Merging this provides an "anchor" so that the different folks can update their subsystems (like perf) in parallel to use this define and test it. [ dhansen: add a note about why this has no in-tree users yet ] Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lkml.kernel.org/r/20220823174819.223941-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index def6ca121111c..aeb38023a7039 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -27,6 +27,7 @@ * _X - regular server parts * _D - micro server parts * _N,_P - other mobile parts + * _S - other client parts * * Historical OPTDIFFs: * @@ -112,6 +113,7 @@ #define INTEL_FAM6_RAPTORLAKE 0xB7 #define INTEL_FAM6_RAPTORLAKE_P 0xBA +#define INTEL_FAM6_RAPTORLAKE_S 0xBF /* "Small Core" Processors (Atom) */ From 6ca7076fbfaeccce173aeab832d76b9e49e1034b Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 16 Aug 2022 13:01:57 +0100 Subject: [PATCH 252/654] cpufreq: check only freq_table in __resolve_freq() There is no need to check if the cpufreq driver implements callback cpufreq_driver::target_index. The logic in the __resolve_freq uses the frequency table available in the policy. It doesn't matter if the driver provides 'target_index' or 'target' callback. It just has to populate the 'policy->freq_table'. Thus, check only frequency table during the frequency resolving call. Acked-by: Viresh Kumar Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7820c4e742893..69b3d61852ac6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -532,7 +532,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, target_freq = clamp_val(target_freq, policy->min, policy->max); - if (!cpufreq_driver->target_index) + if (!policy->freq_table) return target_freq; idx = cpufreq_frequency_table_target(policy, target_freq, relation); From 763f4fb76e24959c370cdaa889b2492ba6175580 Mon Sep 17 00:00:00 2001 From: Jing-Ting Wu Date: Tue, 23 Aug 2022 13:41:46 +0800 Subject: [PATCH 253/654] cgroup: Fix race condition at rebind_subsystems() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: The rebind_subsystems() is no lock held when move css object from A list to B list,then let B's head be treated as css node at list_for_each_entry_rcu(). Solution: Add grace period before invalidating the removed rstat_css_node. Reported-by: Jing-Ting Wu Suggested-by: Michal Koutný Signed-off-by: Jing-Ting Wu Tested-by: Jing-Ting Wu Link: https://lore.kernel.org/linux-arm-kernel/d8f0bc5e2fb6ed259f9334c83279b4c011283c41.camel@mediatek.com/T/ Acked-by: Mukesh Ojha Fixes: a7df69b81aac ("cgroup: rstat: support cgroup1") Cc: stable@vger.kernel.org # v5.13+ Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e1387499b3361..e4bb5d57f4d1d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1820,6 +1820,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) if (ss->css_rstat_flush) { list_del_rcu(&css->rstat_css_node); + synchronize_rcu(); list_add_rcu(&css->rstat_css_node, &dcgrp->rstat_css_list); } From 18bbc3213383a82b05383827f4b1b882e3f0a5a5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Aug 2022 17:54:06 +0200 Subject: [PATCH 254/654] netfilter: nft_tproxy: restrict to prerouting hook TPROXY is only allowed from prerouting, but nft_tproxy doesn't check this. This fixes a crash (null dereference) when using tproxy from e.g. output. Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Reported-by: Shell Chen Signed-off-by: Florian Westphal --- net/netfilter/nft_tproxy.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 68b2eed742df9..62da25ad264bc 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb, return 0; } +static int nft_tproxy_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); +} + static struct nft_expr_type nft_tproxy_type; static const struct nft_expr_ops nft_tproxy_ops = { .type = &nft_tproxy_type, @@ -321,6 +328,7 @@ static const struct nft_expr_ops nft_tproxy_ops = { .destroy = nft_tproxy_destroy, .dump = nft_tproxy_dump, .reduce = NFT_REDUCE_READONLY, + .validate = nft_tproxy_validate, }; static struct nft_expr_type nft_tproxy_type __read_mostly = { From ced8ecf026fd8084cf175530ff85c76d6085d715 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 23 Aug 2022 11:28:13 -0700 Subject: [PATCH 255/654] btrfs: fix space cache corruption and potential double allocations When testing space_cache v2 on a large set of machines, we encountered a few symptoms: 1. "unable to add free space :-17" (EEXIST) errors. 2. Missing free space info items, sometimes caught with a "missing free space info for X" error. 3. Double-accounted space: ranges that were allocated in the extent tree and also marked as free in the free space tree, ranges that were marked as allocated twice in the extent tree, or ranges that were marked as free twice in the free space tree. If the latter made it onto disk, the next reboot would hit the BUG_ON() in add_new_free_space(). 4. On some hosts with no on-disk corruption or error messages, the in-memory space cache (dumped with drgn) disagreed with the free space tree. All of these symptoms have the same underlying cause: a race between caching the free space for a block group and returning free space to the in-memory space cache for pinned extents causes us to double-add a free range to the space cache. This race exists when free space is cached from the free space tree (space_cache=v2) or the extent tree (nospace_cache, or space_cache=v1 if the cache needs to be regenerated). struct btrfs_block_group::last_byte_to_unpin and struct btrfs_block_group::progress are supposed to protect against this race, but commit d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when waiting for a transaction commit") subtly broke this by allowing multiple transactions to be unpinning extents at the same time. Specifically, the race is as follows: 1. An extent is deleted from an uncached block group in transaction A. 2. btrfs_commit_transaction() is called for transaction A. 3. btrfs_run_delayed_refs() -> __btrfs_free_extent() runs the delayed ref for the deleted extent. 4. __btrfs_free_extent() -> do_free_extent_accounting() -> add_to_free_space_tree() adds the deleted extent back to the free space tree. 5. do_free_extent_accounting() -> btrfs_update_block_group() -> btrfs_cache_block_group() queues up the block group to get cached. block_group->progress is set to block_group->start. 6. btrfs_commit_transaction() for transaction A calls switch_commit_roots(). It sets block_group->last_byte_to_unpin to block_group->progress, which is block_group->start because the block group hasn't been cached yet. 7. The caching thread gets to our block group. Since the commit roots were already switched, load_free_space_tree() sees the deleted extent as free and adds it to the space cache. It finishes caching and sets block_group->progress to U64_MAX. 8. btrfs_commit_transaction() advances transaction A to TRANS_STATE_SUPER_COMMITTED. 9. fsync calls btrfs_commit_transaction() for transaction B. Since transaction A is already in TRANS_STATE_SUPER_COMMITTED and the commit is for fsync, it advances. 10. btrfs_commit_transaction() for transaction B calls switch_commit_roots(). This time, the block group has already been cached, so it sets block_group->last_byte_to_unpin to U64_MAX. 11. btrfs_commit_transaction() for transaction A calls btrfs_finish_extent_commit(), which calls unpin_extent_range() for the deleted extent. It sees last_byte_to_unpin set to U64_MAX (by transaction B!), so it adds the deleted extent to the space cache again! This explains all of our symptoms above: * If the sequence of events is exactly as described above, when the free space is re-added in step 11, it will fail with EEXIST. * If another thread reallocates the deleted extent in between steps 7 and 11, then step 11 will silently re-add that space to the space cache as free even though it is actually allocated. Then, if that space is allocated *again*, the free space tree will be corrupted (namely, the wrong item will be deleted). * If we don't catch this free space tree corruption, it will continue to get worse as extents are deleted and reallocated. The v1 space_cache is synchronously loaded when an extent is deleted (btrfs_update_block_group() with alloc=0 calls btrfs_cache_block_group() with load_cache_only=1), so it is not normally affected by this bug. However, as noted above, if we fail to load the space cache, we will fall back to caching from the extent tree and may hit this bug. The easiest fix for this race is to also make caching from the free space tree or extent tree synchronous. Josef tested this and found no performance regressions. A few extra changes fall out of this change. Namely, this fix does the following, with step 2 being the crucial fix: 1. Factor btrfs_caching_ctl_wait_done() out of btrfs_wait_block_group_cache_done() to allow waiting on a caching_ctl that we already hold a reference to. 2. Change the call in btrfs_cache_block_group() of btrfs_wait_space_cache_v1_finished() to btrfs_caching_ctl_wait_done(), which makes us wait regardless of the space_cache option. 3. Delete the now unused btrfs_wait_space_cache_v1_finished() and space_cache_v1_done(). 4. Change btrfs_cache_block_group()'s `int load_cache_only` parameter to `bool wait` to more accurately describe its new meaning. 5. Change a few callers which had a separate call to btrfs_wait_block_group_cache_done() to use wait = true instead. 6. Make btrfs_wait_block_group_cache_done() static now that it's not used outside of block-group.c anymore. Fixes: d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when waiting for a transaction commit") CC: stable@vger.kernel.org # 5.12+ Reviewed-by: Filipe Manana Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 47 ++++++++++++++---------------------------- fs/btrfs/block-group.h | 4 +--- fs/btrfs/ctree.h | 1 - fs/btrfs/extent-tree.c | 30 ++++++--------------------- 4 files changed, 22 insertions(+), 60 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 993aca2f1e181..e0375ba9d0fed 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, btrfs_put_caching_control(caching_ctl); } -int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) +static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache, + struct btrfs_caching_control *caching_ctl) +{ + wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); + return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0; +} + +static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) { struct btrfs_caching_control *caching_ctl; - int ret = 0; + int ret; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; - - wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); - if (cache->cached == BTRFS_CACHE_ERROR) - ret = -EIO; + ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); btrfs_put_caching_control(caching_ctl); return ret; } -static bool space_cache_v1_done(struct btrfs_block_group *cache) -{ - bool ret; - - spin_lock(&cache->lock); - ret = cache->cached != BTRFS_CACHE_FAST; - spin_unlock(&cache->lock); - - return ret; -} - -void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache, - struct btrfs_caching_control *caching_ctl) -{ - wait_event(caching_ctl->wait, space_cache_v1_done(cache)); -} - #ifdef CONFIG_BTRFS_DEBUG static void fragment_free_space(struct btrfs_block_group *block_group) { @@ -750,9 +737,8 @@ static noinline void caching_thread(struct btrfs_work *work) btrfs_put_block_group(block_group); } -int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) +int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) { - DEFINE_WAIT(wait); struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_caching_control *caching_ctl = NULL; int ret = 0; @@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only } WARN_ON(cache->caching_ctl); cache->caching_ctl = caching_ctl; - if (btrfs_test_opt(fs_info, SPACE_CACHE)) - cache->cached = BTRFS_CACHE_FAST; - else - cache->cached = BTRFS_CACHE_STARTED; + cache->cached = BTRFS_CACHE_STARTED; cache->has_caching_ctl = 1; spin_unlock(&cache->lock); @@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: - if (load_cache_only && caching_ctl) - btrfs_wait_space_cache_v1_finished(cache, caching_ctl); + if (wait && caching_ctl) + ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); if (caching_ctl) btrfs_put_caching_control(caching_ctl); @@ -3312,7 +3295,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, * space back to the block group, otherwise we will leak space. */ if (!alloc && !btrfs_block_group_done(cache)) - btrfs_cache_block_group(cache, 1); + btrfs_cache_block_group(cache, true); byte_in_group = bytenr - cache->start; WARN_ON(byte_in_group > cache->length); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 35e0e860cc0bf..6b3cdc4cbc41e 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes); -int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache); -int btrfs_cache_block_group(struct btrfs_block_group *cache, - int load_cache_only); +int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4edb4bfb21664..9ef162dbd4bc1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,7 +505,6 @@ struct btrfs_free_cluster { enum btrfs_caching_type { BTRFS_CACHE_NO, BTRFS_CACHE_STARTED, - BTRFS_CACHE_FAST, BTRFS_CACHE_FINISHED, BTRFS_CACHE_ERROR, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ab944d1f94ef0..6914cd8024ba0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2551,17 +2551,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, return -EINVAL; /* - * pull in the free space cache (if any) so that our pin - * removes the free space from the cache. We have load_only set - * to one because the slow code to read in the free extents does check - * the pinned extents. + * Fully cache the free space first so that our pin removes the free space + * from the cache. */ - btrfs_cache_block_group(cache, 1); - /* - * Make sure we wait until the cache is completely built in case it is - * missing or is invalid and therefore needs to be rebuilt. - */ - ret = btrfs_wait_block_group_cache_done(cache); + ret = btrfs_cache_block_group(cache, true); if (ret) goto out; @@ -2584,12 +2577,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, if (!block_group) return -EINVAL; - btrfs_cache_block_group(block_group, 1); - /* - * Make sure we wait until the cache is completely built in case it is - * missing or is invalid and therefore needs to be rebuilt. - */ - ret = btrfs_wait_block_group_cache_done(block_group); + ret = btrfs_cache_block_group(block_group, true); if (ret) goto out; @@ -4399,7 +4387,7 @@ static noinline int find_free_extent(struct btrfs_root *root, ffe_ctl->cached = btrfs_block_group_done(block_group); if (unlikely(!ffe_ctl->cached)) { ffe_ctl->have_caching_bg = true; - ret = btrfs_cache_block_group(block_group, 0); + ret = btrfs_cache_block_group(block_group, false); /* * If we get ENOMEM here or something else we want to @@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) if (end - start >= range->minlen) { if (!btrfs_block_group_done(cache)) { - ret = btrfs_cache_block_group(cache, 0); - if (ret) { - bg_failed++; - bg_ret = ret; - continue; - } - ret = btrfs_wait_block_group_cache_done(cache); + ret = btrfs_cache_block_group(cache, true); if (ret) { bg_failed++; bg_ret = ret; From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 22 Aug 2022 15:47:09 +0100 Subject: [PATCH 256/654] btrfs: fix silent failure when deleting root reference At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end up returning from the function with a value of 0 (success). This happens because the function returns the value stored in the variable 'err', which is 0, while the error value we got from btrfs_search_slot() is stored in the 'ret' variable. So fix it by setting 'err' with the error value. Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/root-tree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a64b26b169040..d647cb2938c01 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, key.offset = ref_id; again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - if (ret < 0) + if (ret < 0) { + err = ret; goto out; - if (ret == 0) { + } else if (ret == 0) { leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Aug 2022 18:32:18 +0800 Subject: [PATCH 257/654] btrfs: replace: drop assert for suspended replace If the filesystem mounts with the replace-operation in a suspended state and try to cancel the suspended replace-operation, we hit the assert. The assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state") that was actually not required. So just remove it. $ mount /dev/sda5 /btrfs BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded' $ mount -o degraded /dev/sda5 /btrfs <-- success. $ btrfs replace cancel /btrfs kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131 kernel: ------------[ cut here ]------------ kernel: kernel BUG at fs/btrfs/ctree.h:3750! After the patch: $ btrfs replace cancel /btrfs BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to canceled Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state") CC: stable@vger.kernel.org # 5.0+ Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index f43196a893ca3..b1cddef87cd3b 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -1129,8 +1129,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) up_write(&dev_replace->rwsem); /* Scrub for replace must not be running in suspended state */ - ret = btrfs_scrub_cancel(fs_info); - ASSERT(ret != -ENOTCONN); + btrfs_scrub_cancel(fs_info); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Aug 2022 18:32:19 +0800 Subject: [PATCH 258/654] btrfs: add info when mount fails due to stale replace target If the replace target device reappears after the suspended replace is cancelled, it blocks the mount operation as it can't find the matching replace-item in the metadata. As shown below, BTRFS error (device sda5): replace devid present without an active replace item To overcome this situation, the user can run the command btrfs device scan --forget and try the mount command again. And also, to avoid repeating the issue, superblock on the devid=0 must be wiped. wipefs -a device-path-to-devid=0. This patch adds some info when this situation occurs. Reported-by: Samuel Greiner Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/ CC: stable@vger.kernel.org # 5.0+ Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index b1cddef87cd3b..41cddd3ff0593 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -165,7 +165,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) */ if (btrfs_find_device(fs_info->fs_devices, &args)) { btrfs_err(fs_info, - "replace devid present without an active replace item"); +"replace without active item, run 'device scan --forget' on the target device"); ret = -EUCLEAN; } else { dev_replace->srcdev = NULL; From 3f67e69976035352db110443916bcce32c7f64ac Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:13 +0100 Subject: [PATCH 259/654] riscv: dts: microchip: mpfs: fix incorrect pcie child node name Recent versions of dt-schema complain about the PCIe controller's child node name: arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dtb: pcie@2000000000: Unevaluated properties are not allowed ('clock-names', 'clocks', 'legacy-interrupt-controller', 'microchip,axi-m-atr0' were unexpected) From schema: Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml Make the dts match the correct property name in the dts. Fixes: 528a5b1f2556 ("riscv: dts: microchip: add new peripherals to icicle kit device tree") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 0a6ad5b9ff8dd..b26fc7886e5d5 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -487,7 +487,7 @@ msi-controller; microchip,axi-m-atr0 = <0x10 0x0>; status = "disabled"; - pcie_intc: legacy-interrupt-controller { + pcie_intc: interrupt-controller { #address-cells = <0>; #interrupt-cells = <1>; interrupt-controller; From 72a05748cbd285567d69f173f8694e3471b79f20 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:14 +0100 Subject: [PATCH 260/654] riscv: dts: microchip: mpfs: remove ti,fifo-depth property Recent versions of dt-schema warn about a previously undetected undocument property on the icicle & polarberry devicetrees: arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dtb: ethernet@20112000: ethernet-phy@8: Unevaluated properties are not allowed ('ti,fifo-depth' was unexpected) From schema: Documentation/devicetree/bindings/net/cdns,macb.yaml I know what you're thinking, the binding doesn't look to be the problem and I agree. I am not sure why a TI vendor property was ever actually added since it has no meaning... just get rid of it. Fixes: bc47b2217f24 ("riscv: dts: microchip: add the sundance polarberry") Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts | 2 -- arch/riscv/boot/dts/microchip/mpfs-polarberry.dts | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 044982a11df50..ee548ab61a2a5 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -84,12 +84,10 @@ phy1: ethernet-phy@9 { reg = <9>; - ti,fifo-depth = <0x1>; }; phy0: ethernet-phy@8 { reg = <8>; - ti,fifo-depth = <0x1>; }; }; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index 82c93c8f5c17e..dc11bb8fc8338 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -54,12 +54,10 @@ phy1: ethernet-phy@5 { reg = <5>; - ti,fifo-depth = <0x01>; }; phy0: ethernet-phy@4 { reg = <4>; - ti,fifo-depth = <0x01>; }; }; From 2b55915d27dcaa35f54bad7925af0a76001079bc Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:15 +0100 Subject: [PATCH 261/654] riscv: dts: microchip: mpfs: remove bogus card-detect-delay Recent versions of dt-schema warn about a previously undetected undocumented property: arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dtb: mmc@20008000: Unevaluated properties are not allowed ('card-detect-delay' was unexpected) From schema: Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml There are no GPIOs connected to MSSIO6B4 pin K3 so adding the common cd-debounce-delay-ms property makes no sense. The Cadence IP has a register that sets the card detect delay as "DP * tclk". On MPFS, this clock frequency is not configurable (it must be 200 MHz) & the FPGA comes out of reset with this register already set. Fixes: bc47b2217f24 ("riscv: dts: microchip: add the sundance polarberry") Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts | 1 - arch/riscv/boot/dts/microchip/mpfs-polarberry.dts | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index ee548ab61a2a5..f3f87ed2007f3 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -100,7 +100,6 @@ disable-wp; cap-sd-highspeed; cap-mmc-highspeed; - card-detect-delay = <200>; mmc-ddr-1_8v; mmc-hs200-1_8v; sd-uhs-sdr12; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index dc11bb8fc8338..c87cc2d8fe29f 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -70,7 +70,6 @@ disable-wp; cap-sd-highspeed; cap-mmc-highspeed; - card-detect-delay = <200>; mmc-ddr-1_8v; mmc-hs200-1_8v; sd-uhs-sdr12; From e4009c5fa77b4356aa37ce002e9f9952dfd7a615 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sat, 20 Aug 2022 00:14:16 +0100 Subject: [PATCH 262/654] riscv: dts: microchip: mpfs: remove pci axi address translation property An AXI master address translation table property was inadvertently added to the device tree & this was not caught by dtbs_check at the time. Remove the property - it should not be in mpfs.dtsi anyway as it would be more suitable in -fabric.dtsi nor does it actually apply to the version of the reference design we are using for upstream. Link: https://www.microsemi.com/document-portal/doc_download/1245812-polarfire-fpga-and-polarfire-soc-fpga-pci-express-user-guide # Section 1.3.3 Fixes: 528a5b1f2556 ("riscv: dts: microchip: add new peripherals to icicle kit device tree") Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/microchip/mpfs.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index b26fc7886e5d5..74493344ea41b 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -485,7 +485,6 @@ ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>; msi-parent = <&pcie>; msi-controller; - microchip,axi-m-atr0 = <0x10 0x0>; status = "disabled"; pcie_intc: interrupt-controller { #address-cells = <0>; From 15f7cfae912ea1739c8844b7edf3621ba981a37a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 18 Aug 2022 19:48:09 +0300 Subject: [PATCH 263/654] net: dsa: microchip: make learning configurable and keep it off while standalone Address learning should initially be turned off by the driver for port operation in standalone mode, then the DSA core handles changes to it via ds->ops->port_bridge_flags(). Leaving address learning enabled while ports are standalone breaks any kind of communication which involves port B receiving what port A has sent. Notably it breaks the ksz9477 driver used with a (non offloaded, ports act as if standalone) bonding interface in active-backup mode, when the ports are connected together through external switches, for redundancy purposes. This fixes a major design flaw in the ksz9477 and ksz8795 drivers, which unconditionally leave address learning enabled even while ports operate as standalone. Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Link: https://lore.kernel.org/netdev/CAFZh4h-JVWt80CrQWkFji7tZJahMfOToUJQgKS5s0_=9zzpvYQ@mail.gmail.com/ Reported-by: Brian Hutchinson Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220818164809.3198039-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 45 +++++++++++++++++++++++++- drivers/net/dsa/microchip/ksz_common.h | 1 + 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7461272a6d410..6bd69a7e6809d 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -968,6 +968,7 @@ static void ksz_update_port_member(struct ksz_device *dev, int port) static int ksz_setup(struct dsa_switch *ds) { struct ksz_device *dev = ds->priv; + struct ksz_port *p; const u16 *regs; int ret; @@ -1007,6 +1008,14 @@ static int ksz_setup(struct dsa_switch *ds) return ret; } + /* Start with learning disabled on standalone user ports, and enabled + * on the CPU port. In lack of other finer mechanisms, learning on the + * CPU port will avoid flooding bridge local addresses on the network + * in some cases. + */ + p = &dev->ports[dev->cpu_port]; + p->learning = true; + /* start switch */ regmap_update_bits(dev->regmap[0], regs[S_START_CTRL], SW_START, SW_START); @@ -1283,6 +1292,8 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) ksz_pread8(dev, port, regs[P_STP_CTRL], &data); data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE); + p = &dev->ports[port]; + switch (state) { case BR_STATE_DISABLED: data |= PORT_LEARN_DISABLE; @@ -1292,9 +1303,13 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) break; case BR_STATE_LEARNING: data |= PORT_RX_ENABLE; + if (!p->learning) + data |= PORT_LEARN_DISABLE; break; case BR_STATE_FORWARDING: data |= (PORT_TX_ENABLE | PORT_RX_ENABLE); + if (!p->learning) + data |= PORT_LEARN_DISABLE; break; case BR_STATE_BLOCKING: data |= PORT_LEARN_DISABLE; @@ -1306,12 +1321,38 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) ksz_pwrite8(dev, port, regs[P_STP_CTRL], data); - p = &dev->ports[port]; p->stp_state = state; ksz_update_port_member(dev, port); } +static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + if (flags.mask & ~BR_LEARNING) + return -EINVAL; + + return 0; +} + +static int ksz_port_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + struct ksz_device *dev = ds->priv; + struct ksz_port *p = &dev->ports[port]; + + if (flags.mask & BR_LEARNING) { + p->learning = !!(flags.val & BR_LEARNING); + + /* Make the change take effect immediately */ + ksz_port_stp_state_set(ds, port, p->stp_state); + } + + return 0; +} + static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) @@ -1725,6 +1766,8 @@ static const struct dsa_switch_ops ksz_switch_ops = { .port_bridge_join = ksz_port_bridge_join, .port_bridge_leave = ksz_port_bridge_leave, .port_stp_state_set = ksz_port_stp_state_set, + .port_pre_bridge_flags = ksz_port_pre_bridge_flags, + .port_bridge_flags = ksz_port_bridge_flags, .port_fast_age = ksz_port_fast_age, .port_vlan_filtering = ksz_port_vlan_filtering, .port_vlan_add = ksz_port_vlan_add, diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 764ada3a0f42a..0d9520dc6d2db 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -65,6 +65,7 @@ struct ksz_chip_data { struct ksz_port { bool remove_tag; /* Remove Tag flag set, for ksz8795 only */ + bool learning; int stp_state; struct phy_device phydev; From 0947ae1121083d363d522ff7518ee72b55bd8d29 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 14:58:04 -0700 Subject: [PATCH 264/654] bpf: Fix a data-race around bpf_jit_limit. While reading bpf_jit_limit, it can be changed concurrently via sysctl, WRITE_ONCE() in __do_proc_doulongvec_minmax(). The size of bpf_jit_limit is long, so we need to add a paired READ_ONCE() to avoid load-tearing. Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220823215804.2177-1-kuniyu@amazon.com --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1e10d088dbb7..3d9eb3ae334ce 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -971,7 +971,7 @@ pure_initcall(bpf_jit_charge_init); int bpf_jit_charge_modmem(u32 size) { - if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) { + if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) { if (!bpf_capable()) { atomic_long_sub(size, &bpf_jit_current); return -EPERM; From 7dd3de7cb1d657a918c6b2bc673c71e318aa0c05 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 22 Aug 2022 11:06:51 -0400 Subject: [PATCH 265/654] bnxt_en: Use PAGE_SIZE to init buffer when multi buffer XDP is not in use Using BNXT_PAGE_MODE_BUF_SIZE + offset as buffer length value is not sufficient when running single buffer XDP programs doing redirect operations. The stack will complain on missing skb tail room. Fix it by using PAGE_SIZE when calling xdp_init_buff() for single buffer programs. Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff") Reviewed-by: Somnath Kotur Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 075c6206325ce..b1b17f9113006 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2130,6 +2130,7 @@ struct bnxt { #define BNXT_DUMP_CRASH 1 struct bpf_prog *xdp_prog; + u8 xdp_has_frags; struct bnxt_ptp_cfg *ptp_cfg; u8 ptp_all_rx_tstamp; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index f53387ed0167b..c3065ec0a4798 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -181,6 +181,7 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp) { struct bnxt_sw_rx_bd *rx_buf; + u32 buflen = PAGE_SIZE; struct pci_dev *pdev; dma_addr_t mapping; u32 offset; @@ -192,7 +193,10 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, mapping = rx_buf->mapping - bp->rx_dma_offset; dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); - xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq); + if (bp->xdp_has_frags) + buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; + + xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false); } @@ -397,8 +401,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); return -EOPNOTSUPP; } - if (prog) + if (prog) { tx_xdp = bp->rx_nr_rings; + bp->xdp_has_frags = prog->aux->xdp_has_frags; + } tc = netdev_get_num_tc(dev); if (!tc) From 574b2bb9692fd3d45ed631ac447176d4679f3010 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 22 Aug 2022 11:06:52 -0400 Subject: [PATCH 266/654] bnxt_en: set missing reload flag in devlink features Add missing devlink_set_features() API for callbacks reload_down and reload_up to function. Fixes: 228ea8c187d8 ("bnxt_en: implement devlink dev reload driver_reinit") Reviewed-by: Somnath Kotur Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 059f96f7a96f6..a36803e79e92e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -1306,6 +1306,7 @@ int bnxt_dl_register(struct bnxt *bp) if (rc) goto err_dl_port_unreg; + devlink_set_features(dl, DEVLINK_F_RELOAD); out: devlink_register(dl); return 0; From 09a89cc59ad67794a11e1d3dd13c5b3172adcc51 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 22 Aug 2022 11:06:53 -0400 Subject: [PATCH 267/654] bnxt_en: fix NQ resource accounting during vf creation on 57500 chips There are 2 issues: 1. We should decrement hw_resc->max_nqs instead of hw_resc->max_irqs with the number of NQs assigned to the VFs. The IRQs are fixed on each function and cannot be re-assigned. Only the NQs are being assigned to the VFs. 2. vf_msix is the total number of NQs to be assigned to the VFs. So we should decrement vf_msix from hw_resc->max_nqs. Fixes: b16b68918674 ("bnxt_en: Add SR-IOV support for 57500 chips.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 730febd19330a..a4cba7cb2783e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -623,7 +623,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset) hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n; hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n; if (bp->flags & BNXT_FLAG_CHIP_P5) - hw_resc->max_irqs -= vf_msix * n; + hw_resc->max_nqs -= vf_msix; rc = pf->active_vfs; } From 366c304741729e64d778c80555d9eb422cf5cc89 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 22 Aug 2022 11:06:54 -0400 Subject: [PATCH 268/654] bnxt_en: fix LRO/GRO_HW features in ndo_fix_features callback LRO/GRO_HW should be disabled if there is an attached XDP program. BNXT_FLAG_TPA is the current setting of the LRO/GRO_HW. Using BNXT_FLAG_TPA to disable LRO/GRO_HW will cause these features to be permanently disabled once they are disabled. Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ba0f1ffac5071..f46eefb5a0292 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11178,10 +11178,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp)) features &= ~NETIF_F_NTUPLE; - if (bp->flags & BNXT_FLAG_NO_AGG_RINGS) - features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); - - if (!(bp->flags & BNXT_FLAG_TPA)) + if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog) features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); if (!(features & NETIF_F_GRO)) From bc163555603e4ae9c817675ad80d618a4cdbfa2d Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 13 Jul 2022 17:49:51 +0200 Subject: [PATCH 269/654] clk: bcm: rpi: Prevent out-of-bounds access The while loop in raspberrypi_discover_clocks() relies on the assumption that the id of the last clock element is zero. Because this data comes from the Videocore firmware and it doesn't guarantuee such a behavior this could lead to out-of-bounds access. So fix this by providing a sentinel element. Fixes: 93d2725affd6 ("clk: bcm: rpi: Discover the firmware clocks") Link: https://github.com/raspberrypi/firmware/issues/1688 Suggested-by: Phil Elwell Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20220713154953.3336-2-stefan.wahren@i2se.com Acked-by: Florian Fainelli Reviewed-by: Ivan T. Ivanov Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-raspberrypi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c index 39d63c983d62c..e495f5f382ab9 100644 --- a/drivers/clk/bcm/clk-raspberrypi.c +++ b/drivers/clk/bcm/clk-raspberrypi.c @@ -344,8 +344,13 @@ static int raspberrypi_discover_clocks(struct raspberrypi_clk *rpi, struct rpi_firmware_get_clocks_response *clks; int ret; + /* + * The firmware doesn't guarantee that the last element of + * RPI_FIRMWARE_GET_CLOCKS is zeroed. So allocate an additional + * zero element as sentinel. + */ clks = devm_kcalloc(rpi->dev, - RPI_FIRMWARE_NUM_CLK_ID, sizeof(*clks), + RPI_FIRMWARE_NUM_CLK_ID + 1, sizeof(*clks), GFP_KERNEL); if (!clks) return -ENOMEM; From 13b5cf8d6a0d4a5d289e1ed046cadc63b416db85 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 13 Jul 2022 17:49:52 +0200 Subject: [PATCH 270/654] clk: bcm: rpi: Add missing newline Some log messages lacks the final newline. So add them. Fixes: 93d2725affd6 ("clk: bcm: rpi: Discover the firmware clocks") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20220713154953.3336-3-stefan.wahren@i2se.com Acked-by: Florian Fainelli Reviewed-by: Ivan T. Ivanov Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-raspberrypi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c index e495f5f382ab9..4df921d1e21ca 100644 --- a/drivers/clk/bcm/clk-raspberrypi.c +++ b/drivers/clk/bcm/clk-raspberrypi.c @@ -220,7 +220,7 @@ static int raspberrypi_fw_set_rate(struct clk_hw *hw, unsigned long rate, ret = raspberrypi_clock_property(rpi->firmware, data, RPI_FIRMWARE_SET_CLOCK_RATE, &_rate); if (ret) - dev_err_ratelimited(rpi->dev, "Failed to change %s frequency: %d", + dev_err_ratelimited(rpi->dev, "Failed to change %s frequency: %d\n", clk_hw_get_name(hw), ret); return ret; @@ -288,7 +288,7 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi, RPI_FIRMWARE_GET_MIN_CLOCK_RATE, &min_rate); if (ret) { - dev_err(rpi->dev, "Failed to get clock %d min freq: %d", + dev_err(rpi->dev, "Failed to get clock %d min freq: %d\n", id, ret); return ERR_PTR(ret); } @@ -365,7 +365,7 @@ static int raspberrypi_discover_clocks(struct raspberrypi_clk *rpi, struct raspberrypi_clk_variant *variant; if (clks->id > RPI_FIRMWARE_NUM_CLK_ID) { - dev_err(rpi->dev, "Unknown clock id: %u", clks->id); + dev_err(rpi->dev, "Unknown clock id: %u\n", clks->id); return -EINVAL; } From 1a6052e1483f28808845e51a1c82c96e0e7cd419 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 13 Jul 2022 17:49:53 +0200 Subject: [PATCH 271/654] clk: bcm: rpi: Show clock id limit in error case The clock id limit will be extended in the future, so it would be helpful to see the actual clock id limit in case the firmware response has been rejected. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20220713154953.3336-4-stefan.wahren@i2se.com Acked-by: Florian Fainelli Reviewed-by: Ivan T. Ivanov Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-raspberrypi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c index 4df921d1e21ca..876b37b8683c5 100644 --- a/drivers/clk/bcm/clk-raspberrypi.c +++ b/drivers/clk/bcm/clk-raspberrypi.c @@ -365,7 +365,8 @@ static int raspberrypi_discover_clocks(struct raspberrypi_clk *rpi, struct raspberrypi_clk_variant *variant; if (clks->id > RPI_FIRMWARE_NUM_CLK_ID) { - dev_err(rpi->dev, "Unknown clock id: %u\n", clks->id); + dev_err(rpi->dev, "Unknown clock id: %u (max: %u)\n", + clks->id, RPI_FIRMWARE_NUM_CLK_ID); return -EINVAL; } From 8d905254162965c8e6be697d82c7dbf5d08f574d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 5 Aug 2022 14:22:40 +0100 Subject: [PATCH 272/654] drm/i915/ttm: fix CCS handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crucible + recent Mesa seems to sometimes hit: GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER) And it looks like we can also trigger this with gem_lmem_swapping, if we modify the test to use slightly larger object sizes. Looking closer it looks like we have the following issues in migrate_copy(): - We are using plain integer in various places, which we can easily overflow with a large object. - We pass the entire object size (when the src is lmem) into emit_pte() and then try to copy it, which doesn't work, since we only have a few fixed sized windows in which to map the pages and perform the copy. With an object > 8M we therefore aren't properly copying the pages. And then with an object > 64M we trigger the GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER). So it looks like our copy handling for any object > 8M (which is our CHUNK_SZ) is currently broken on DG2. Fixes: da0595ae91da ("drm/i915/migrate: Evict and restore the flatccs capable lmem obj") Testcase: igt@gem_lmem_swapping Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Ramalingam C Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20220805132240.442747-2-matthew.auld@intel.com (cherry picked from commit 8676145eb2f53a9940ff70910caf0125bd8a4bc2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_migrate.c | 44 ++++++++++++------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 2b10b96b17b5b..933648cc90ff9 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -638,9 +638,9 @@ static int emit_copy(struct i915_request *rq, return 0; } -static int scatter_list_length(struct scatterlist *sg) +static u64 scatter_list_length(struct scatterlist *sg) { - int len = 0; + u64 len = 0; while (sg && sg_dma_len(sg)) { len += sg_dma_len(sg); @@ -650,28 +650,26 @@ static int scatter_list_length(struct scatterlist *sg) return len; } -static void +static int calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, - int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy) + u64 bytes_to_cpy, u64 ccs_bytes_to_cpy) { - if (ccs_bytes_to_cpy) { - if (!src_is_lmem) - /* - * When CHUNK_SZ is passed all the pages upto CHUNK_SZ - * will be taken for the blt. in Flat-ccs supported - * platform Smem obj will have more pages than required - * for main meory hence limit it to the required size - * for main memory - */ - *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ); - } else { /* ccs handling is not required */ - *src_sz = CHUNK_SZ; - } + if (ccs_bytes_to_cpy && !src_is_lmem) + /* + * When CHUNK_SZ is passed all the pages upto CHUNK_SZ + * will be taken for the blt. in Flat-ccs supported + * platform Smem obj will have more pages than required + * for main meory hence limit it to the required size + * for main memory + */ + return min_t(u64, bytes_to_cpy, CHUNK_SZ); + else + return CHUNK_SZ; } -static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy) +static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy) { - u32 len; + u64 len; do { GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg)); @@ -702,12 +700,12 @@ intel_context_migrate_copy(struct intel_context *ce, { struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs; struct drm_i915_private *i915 = ce->engine->i915; - u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; + u64 ccs_bytes_to_cpy = 0, bytes_to_cpy; enum i915_cache_level ccs_cache_level; u32 src_offset, dst_offset; u8 src_access, dst_access; struct i915_request *rq; - int src_sz, dst_sz; + u64 src_sz, dst_sz; bool ccs_is_src, overwrite_ccs; int err; @@ -790,8 +788,8 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - calculate_chunk_sz(i915, src_is_lmem, &src_sz, - bytes_to_cpy, ccs_bytes_to_cpy); + src_sz = calculate_chunk_sz(i915, src_is_lmem, + bytes_to_cpy, ccs_bytes_to_cpy); len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, src_offset, src_sz); From de2228c04150df8632ad22ee490de2ed579f64e8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 11 Aug 2022 14:08:12 -0700 Subject: [PATCH 273/654] drm/i915/guc: clear stalled request after a reset If the GuC CTs are full and we need to stall the request submission while waiting for space, we save the stalled request and where the stall occurred; when the CTs have space again we pick up the request submission from where we left off. If a full GT reset occurs, the state of all contexts is cleared and all non-guilty requests are unsubmitted, therefore we need to restart the stalled request submission from scratch. To make sure that we do so, clear the saved request after a reset. Fixes note: the patch that introduced the bug is in 5.15, but no officially supported platform had GuC submission enabled by default in that kernel, so the backport to that particular version (and only that one) can potentially be skipped. Fixes: 925dc1cf58ed ("drm/i915/guc: Implement GuC submission tasklet") Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Cc: # v5.15+ Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220811210812.3239621-1-daniele.ceraolospurio@intel.com (cherry picked from commit f922fbb0f2ad1fd3e3186f39c46673419e6d9281) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 76916aed897ad..834c707d1877b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4026,6 +4026,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) /* make sure all descriptors are clean... */ xa_destroy(&guc->context_lookup); + /* + * A reset might have occurred while we had a pending stalled request, + * so make sure we clean that up. + */ + guc->stalled_request = NULL; + guc->submission_stall_reason = STALL_NONE; + /* * Some contexts might have been pinned before we enabled GuC * submission, so we need to add them to the GuC bookeeping. From 607f41768a1ef9c7721866b00fbdeeea5359bc07 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 16 Aug 2022 18:37:20 +0300 Subject: [PATCH 274/654] drm/i915/dsi: filter invalid backlight and CABC ports Avoid using ports that aren't initialized in case the VBT backlight or CABC ports have invalid values. This fixes a NULL pointer dereference of intel_dsi->dsi_hosts[port] in such cases. Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Reviewed-by: Stanislav Lisovskiy Link: https://patchwork.freedesktop.org/patch/msgid/b0f4f087866257d280eb97d6bcfcefd109cc5fa2.1660664162.git.jani.nikula@intel.com (cherry picked from commit f4a6c7a454a6e71c5ccf25af82694213a9784013) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/icl_dsi.c | 7 +++++++ drivers/gpu/drm/i915/display/vlv_dsi.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 5dcfa7feffa9f..885c74f60366b 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -2070,7 +2070,14 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) else intel_dsi->ports = BIT(port); + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports; + intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports; + + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports; + intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports; for_each_dsi_port(port, intel_dsi->ports) { diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index b9b1fed998740..35136d26e5177 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1933,7 +1933,14 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) else intel_dsi->ports = BIT(port); + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports; + intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports; + + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports; + intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports; /* Create a DSI host (and a device) for each port. */ From 13393f65b77445d8b0f99c7b605cc9ccc936586f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 16 Aug 2022 18:37:21 +0300 Subject: [PATCH 275/654] drm/i915/dsi: fix dual-link DSI backlight and CABC ports for display 11+ The VBT dual-link DSI backlight and CABC still use ports A and C, both in Bspec and code, while display 11+ DSI only supports ports A and B. Assume port C actually means port B for display 11+ when parsing VBT. Bspec: 20154 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6476 Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Reviewed-by: Stanislav Lisovskiy Link: https://patchwork.freedesktop.org/patch/msgid/8c462718bcc7b36a83e09d0a5eef058b6bc8b1a2.1660664162.git.jani.nikula@intel.com (cherry picked from commit ab55165d73a444606af1530cd0d6448b04370f68) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 51dde5bfd9565..198a2f4920cc4 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1596,6 +1596,8 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, struct intel_panel *panel, enum port port) { + enum port port_bc = DISPLAY_VER(i915) >= 11 ? PORT_B : PORT_C; + if (!panel->vbt.dsi.config->dual_link || i915->vbt.version < 197) { panel->vbt.dsi.bl_ports = BIT(port); if (panel->vbt.dsi.config->cabc_supported) @@ -1609,11 +1611,11 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, panel->vbt.dsi.bl_ports = BIT(PORT_A); break; case DL_DCS_PORT_C: - panel->vbt.dsi.bl_ports = BIT(PORT_C); + panel->vbt.dsi.bl_ports = BIT(port_bc); break; default: case DL_DCS_PORT_A_AND_C: - panel->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(PORT_C); + panel->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(port_bc); break; } @@ -1625,12 +1627,12 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, panel->vbt.dsi.cabc_ports = BIT(PORT_A); break; case DL_DCS_PORT_C: - panel->vbt.dsi.cabc_ports = BIT(PORT_C); + panel->vbt.dsi.cabc_ports = BIT(port_bc); break; default: case DL_DCS_PORT_A_AND_C: panel->vbt.dsi.cabc_ports = - BIT(PORT_A) | BIT(PORT_C); + BIT(PORT_A) | BIT(port_bc); break; } } From 868e8e5156a1f8d92ca83fdbac6fd52798650792 Mon Sep 17 00:00:00 2001 From: Arun R Murthy Date: Mon, 8 Aug 2022 09:27:50 +0530 Subject: [PATCH 276/654] drm/i915/display: avoid warnings when registering dual panel backlight Commit 20f85ef89d94 ("drm/i915/backlight: use unique backlight device names") added support for multiple backlight devices on dual panel systems, but did so with error handling on -EEXIST from backlight_device_register(). Unfortunately, that triggered a warning in dmesg all the way down from sysfs_add_file_mode_ns() and sysfs_warn_dup(). Instead of optimistically always attempting to register with the default name ("intel_backlight", which we have to retain for backward compatibility), check if a backlight device with the name exists first, and, if so, use the card and connector based name. v2: reworked on top of the patch commit 20f85ef89d94 ("drm/i915/backlight: use unique backlight device names") v3: fixed the ref count leak(Jani N) Fixes: 20f85ef89d94 ("drm/i915/backlight: use unique backlight device names") Signed-off-by: Arun R Murthy Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220808035750.3111046-1-arun.r.murthy@intel.com (cherry picked from commit 4234ea30051200fc6016de10e4d58369e60b38f1) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/display/intel_backlight.c | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 110fc98ec280d..9314464133729 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -971,26 +971,24 @@ int intel_backlight_device_register(struct intel_connector *connector) if (!name) return -ENOMEM; - bd = backlight_device_register(name, connector->base.kdev, connector, - &intel_backlight_device_ops, &props); - - /* - * Using the same name independent of the drm device or connector - * prevents registration of multiple backlight devices in the - * driver. However, we need to use the default name for backward - * compatibility. Use unique names for subsequent backlight devices as a - * fallback when the default name already exists. - */ - if (IS_ERR(bd) && PTR_ERR(bd) == -EEXIST) { + bd = backlight_device_get_by_name(name); + if (bd) { + put_device(&bd->dev); + /* + * Using the same name independent of the drm device or connector + * prevents registration of multiple backlight devices in the + * driver. However, we need to use the default name for backward + * compatibility. Use unique names for subsequent backlight devices as a + * fallback when the default name already exists. + */ kfree(name); name = kasprintf(GFP_KERNEL, "card%d-%s-backlight", i915->drm.primary->index, connector->base.name); if (!name) return -ENOMEM; - - bd = backlight_device_register(name, connector->base.kdev, connector, - &intel_backlight_device_ops, &props); } + bd = backlight_device_register(name, connector->base.kdev, connector, + &intel_backlight_device_ops, &props); if (IS_ERR(bd)) { drm_err(&i915->drm, From 6067c82c576af13a6b1c892b42ac4a189aced8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Mon, 22 Aug 2022 17:08:36 +0300 Subject: [PATCH 277/654] drm/i915/backlight: Disable pps power hook for aux based backlight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pps power hook seems to be problematic for backlight controlled via aux channel. Disable it for such cases. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3657 Cc: stable@vger.kernel.org Signed-off-by: Jouni Högander Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220822140836.534432-1-jouni.hogander@intel.com (cherry picked from commit 869e3bb7acb59d88c1226892136661810e8223a4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_backlight.c | 11 ++++++++--- drivers/gpu/drm/i915/display/intel_dp.c | 2 -- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 9314464133729..f5e1d692976e7 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -16,6 +16,7 @@ #include "intel_dsi_dcs_backlight.h" #include "intel_panel.h" #include "intel_pci_config.h" +#include "intel_pps.h" /** * scale - scale values from one range to another @@ -1771,9 +1772,13 @@ void intel_backlight_init_funcs(struct intel_panel *panel) panel->backlight.pwm_funcs = &i9xx_pwm_funcs; } - if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP && - intel_dp_aux_init_backlight_funcs(connector) == 0) - return; + if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) { + if (intel_dp_aux_init_backlight_funcs(connector) == 0) + return; + + if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK)) + connector->panel.backlight.power = intel_pps_backlight_power; + } /* We're using a standard PWM backlight interface */ panel->backlight.funcs = &pwm_bl_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 32292c0be2bd0..ac90d455a7c73 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5293,8 +5293,6 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, intel_panel_init(intel_connector); - if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK)) - intel_connector->panel.backlight.power = intel_pps_backlight_power; intel_backlight_setup(intel_connector, pipe); intel_edp_add_properties(intel_dp); From 5dc52d83baac30decf5f3b371d5eb41dfa1d1412 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 10:28:25 +0200 Subject: [PATCH 278/654] netfilter: nf_tables: disallow updates of implicit chain Updates on existing implicit chain make no sense, disallow this. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 62cfb0e31c40e..dff2b5851bbb0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2574,6 +2574,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (chain != NULL) { + if (chain->flags & NFT_CHAIN_BINDING) + return -EINVAL; + if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, attr); return -EEXIST; From ab482c6b66a4a8c0a8c0b0f577a785cf9ff1c2e2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 10:52:48 +0200 Subject: [PATCH 279/654] netfilter: nf_tables: make table handle allocation per-netns friendly mutex is per-netns, move table_netns to the pernet area. *read-write* to 0xffffffff883a01e8 of 8 bytes by task 6542 on cpu 0: nf_tables_newtable+0x6dc/0xc00 net/netfilter/nf_tables_api.c:1221 nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv+0xa6a/0x13a0 net/netfilter/nfnetlink.c:652 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x652/0x730 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x643/0x740 net/netlink/af_netlink.c:1921 Fixes: f102d66b335a ("netfilter: nf_tables: use dedicated mutex to guard transactions") Reported-by: Abhishek Shah Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 99aae36c04b97..cdb7db9b0e252 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1652,6 +1652,7 @@ struct nftables_pernet { struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; + u64 table_handle; unsigned int base_seq; u8 validate_state; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dff2b5851bbb0..0951f31caabe5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_destroy_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); -static u64 table_handle; enum { NFT_VALIDATE_SKIP = 0, @@ -1235,7 +1234,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, INIT_LIST_HEAD(&table->flowtables); table->family = family; table->flags = flags; - table->handle = ++table_handle; + table->handle = ++nft_net->table_handle; if (table->flags & NFT_TABLE_F_OWNER) table->nlpid = NETLINK_CB(skb).portid; From 94254f990c07e9ddf1634e0b727fab821c3b5bf9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 11:47:04 +0200 Subject: [PATCH 280/654] netfilter: nft_payload: report ERANGE for too long offset and length Instead of offset and length are truncation to u8, report ERANGE. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 2e7ac007cb30f..4fee67abfe2c5 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -833,6 +833,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, { enum nft_payload_bases base; unsigned int offset, len; + int err; if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || @@ -859,8 +860,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_PAYLOAD_DREG] == NULL) return ERR_PTR(-EINVAL); - offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset); + if (err < 0) + return ERR_PTR(err); + + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len); + if (err < 0) + return ERR_PTR(err); if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER) From 7044ab281febae9e2fa9b0b247693d6026166293 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 11:55:19 +0200 Subject: [PATCH 281/654] netfilter: nft_payload: do not truncate csum_offset and csum_type Instead report ERANGE if csum_offset is too long, and EOPNOTSUPP if type is not support. Fixes: 7ec3f7b47b8d ("netfilter: nft_payload: add packet mangling support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 4fee67abfe2c5..eb0e40c297121 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -740,17 +740,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_payload_set *priv = nft_expr_priv(expr); + u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE; + int err; priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); if (tb[NFTA_PAYLOAD_CSUM_TYPE]) - priv->csum_type = - ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); - if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) - priv->csum_offset = - ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) { + err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX, + &csum_offset); + if (err < 0) + return err; + + priv->csum_offset = csum_offset; + } if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) { u32 flags; @@ -761,7 +767,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, priv->csum_flags = flags; } - switch (priv->csum_type) { + switch (csum_type) { case NFT_PAYLOAD_CSUM_NONE: case NFT_PAYLOAD_CSUM_INET: break; @@ -775,6 +781,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, default: return -EOPNOTSUPP; } + priv->csum_type = csum_type; return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, priv->len); From 43eb8949cfdffa764b92bc6c54b87cbe5b0003fe Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 12:41:33 +0200 Subject: [PATCH 282/654] netfilter: nf_tables: do not leave chain stats enabled on error Error might occur later in the nf_tables_addchain() codepath, enable static key only after transaction has been created. Fixes: 9f08ea848117 ("netfilter: nf_tables: keep chain counters away from hot path") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0951f31caabe5..72c066a334163 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2195,9 +2195,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; + struct nft_stats __percpu *stats = NULL; struct nft_table *table = ctx->table; struct nft_base_chain *basechain; - struct nft_stats __percpu *stats; struct net *net = ctx->net; char name[NFT_NAME_MAXLEN]; struct nft_rule_blob *blob; @@ -2235,7 +2235,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, return PTR_ERR(stats); } rcu_assign_pointer(basechain->stats, stats); - static_branch_inc(&nft_counters_enabled); } err = nft_basechain_init(basechain, family, &hook, flags); @@ -2318,6 +2317,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err_unregister_hook; } + if (stats) + static_branch_inc(&nft_counters_enabled); + table->use++; return 0; From 5f3b7aae14a706d0d7da9f9e39def52ff5fc3d39 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 16:25:07 +0200 Subject: [PATCH 283/654] netfilter: nft_osf: restrict osf to ipv4, ipv6 and inet families As it was originally intended, restrict extension to supported families. Fixes: b96af92d6eaf ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_osf.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 0053a697c9316..89342ccccdccf 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { - return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_FORWARD)); + unsigned int hooks; + + switch (ctx->family) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_FORWARD); + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); } static bool nft_osf_reduce(struct nft_regs_track *track, From 01e4092d53bc4fe122a6e4b6d664adbd57528ca3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 16:32:44 +0200 Subject: [PATCH 284/654] netfilter: nft_tunnel: restrict it to netdev family Only allow to use this expression from NFPROTO_NETDEV family. Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 5edaaded706d9..983ade4be3b39 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -161,6 +161,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = { static struct nft_expr_type nft_tunnel_type __read_mostly = { .name = "tunnel", + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_get_ops, .policy = nft_tunnel_policy, .maxattr = NFTA_TUNNEL_MAX, From e02f0d3970404bfea385b6edb86f2d936db0ea2b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 22 Aug 2022 11:06:39 +0200 Subject: [PATCH 285/654] netfilter: nf_tables: disallow binding to already bound chain Update nft_data_init() to report EINVAL if chain is already bound. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Gwangun Jung Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 72c066a334163..2ee50e23c9b71 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9711,6 +9711,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; + if (nft_chain_is_bound(chain)) + return -EINVAL; if (desc->flags & NFT_DATA_DESC_SETELEM && chain->flags & NFT_CHAIN_BINDING) return -EINVAL; From 759eebbcfafcefa23b59e912396306543764bd3c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 22 Aug 2022 23:13:00 +0200 Subject: [PATCH 286/654] netfilter: flowtable: add function to invoke garbage collection immediately Expose nf_flow_table_gc_run() to force a garbage collector run from the offload infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 + net/netfilter/nf_flow_table_core.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d5326c44b4535..476cc4423a901 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -270,6 +270,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); +void nf_flow_table_gc_run(struct nf_flowtable *flow_table); void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, struct net_device *dev); void nf_flow_table_cleanup(struct net_device *dev); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 765ac779bfc8f..60fc1e1b71821 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -437,12 +437,17 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, } } +void nf_flow_table_gc_run(struct nf_flowtable *flow_table) +{ + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); +} + static void nf_flow_offload_work_gc(struct work_struct *work) { struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); + nf_flow_table_gc_run(flow_table); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } @@ -601,10 +606,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); + nf_flow_table_gc_run(flow_table); nf_flow_table_offload_flush(flow_table); if (nf_flowtable_hw_offload(flow_table)) - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); + nf_flow_table_gc_run(flow_table); + rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); From 9afb4b27349a499483ae0134282cefd0c90f480f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Nov 2021 22:24:15 +0100 Subject: [PATCH 287/654] netfilter: flowtable: fix stuck flows on cleanup due to pending work To clear the flow table on flow table free, the following sequence normally happens in order: 1) gc_step work is stopped to disable any further stats/del requests. 2) All flow table entries are set to teardown state. 3) Run gc_step which will queue HW del work for each flow table entry. 4) Waiting for the above del work to finish (flush). 5) Run gc_step again, deleting all entries from the flow table. 6) Flow table is freed. But if a flow table entry already has pending HW stats or HW add work step 3 will not queue HW del work (it will be skipped), step 4 will wait for the pending add/stats to finish, and step 5 will queue HW del work which might execute after freeing of the flow table. To fix the above, this patch flushes the pending work, then it sets the teardown flag to all flows in the flowtable and it forces a garbage collector run to queue work to remove the flows from hardware, then it flushes this new pending work and (finally) it forces another garbage collector run to remove the entry from the software flowtable. Stack trace: [47773.882335] BUG: KASAN: use-after-free in down_read+0x99/0x460 [47773.883634] Write of size 8 at addr ffff888103b45aa8 by task kworker/u20:6/543704 [47773.885634] CPU: 3 PID: 543704 Comm: kworker/u20:6 Not tainted 5.12.0-rc7+ #2 [47773.886745] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009) [47773.888438] Workqueue: nf_ft_offload_del flow_offload_work_handler [nf_flow_table] [47773.889727] Call Trace: [47773.890214] dump_stack+0xbb/0x107 [47773.890818] print_address_description.constprop.0+0x18/0x140 [47773.892990] kasan_report.cold+0x7c/0xd8 [47773.894459] kasan_check_range+0x145/0x1a0 [47773.895174] down_read+0x99/0x460 [47773.899706] nf_flow_offload_tuple+0x24f/0x3c0 [nf_flow_table] [47773.907137] flow_offload_work_handler+0x72d/0xbe0 [nf_flow_table] [47773.913372] process_one_work+0x8ac/0x14e0 [47773.921325] [47773.921325] Allocated by task 592159: [47773.922031] kasan_save_stack+0x1b/0x40 [47773.922730] __kasan_kmalloc+0x7a/0x90 [47773.923411] tcf_ct_flow_table_get+0x3cb/0x1230 [act_ct] [47773.924363] tcf_ct_init+0x71c/0x1156 [act_ct] [47773.925207] tcf_action_init_1+0x45b/0x700 [47773.925987] tcf_action_init+0x453/0x6b0 [47773.926692] tcf_exts_validate+0x3d0/0x600 [47773.927419] fl_change+0x757/0x4a51 [cls_flower] [47773.928227] tc_new_tfilter+0x89a/0x2070 [47773.936652] [47773.936652] Freed by task 543704: [47773.937303] kasan_save_stack+0x1b/0x40 [47773.938039] kasan_set_track+0x1c/0x30 [47773.938731] kasan_set_free_info+0x20/0x30 [47773.939467] __kasan_slab_free+0xe7/0x120 [47773.940194] slab_free_freelist_hook+0x86/0x190 [47773.941038] kfree+0xce/0x3a0 [47773.941644] tcf_ct_flow_table_cleanup_work Original patch description and stack trace by Paul Blakey. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Paul Blakey Tested-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 2 ++ net/netfilter/nf_flow_table_core.c | 7 +++---- net/netfilter/nf_flow_table_offload.c | 8 ++++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 476cc4423a901..cd982f4a0f50c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -307,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, struct flow_offload *flow); void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); +void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); + int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 60fc1e1b71821..81c26a96c30bb 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -605,12 +605,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) mutex_unlock(&flowtable_lock); cancel_delayed_work_sync(&flow_table->gc_work); + nf_flow_table_offload_flush(flow_table); + /* ... no more pending work after this stage ... */ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_gc_run(flow_table); - nf_flow_table_offload_flush(flow_table); - if (nf_flowtable_hw_offload(flow_table)) - nf_flow_table_gc_run(flow_table); - + nf_flow_table_offload_flush_cleanup(flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 103b6cbf257f2..b04645ced89ba 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1074,6 +1074,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, flow_offload_queue_work(offload); } +void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable) +{ + if (nf_flowtable_hw_offload(flowtable)) { + flush_workqueue(nf_flow_offload_del_wq); + nf_flow_table_gc_run(flowtable); + } +} + void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) { if (nf_flowtable_hw_offload(flowtable)) { From 3e7e04b747adea36f349715d9f0998eeebf15d72 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2022 09:27:17 +0200 Subject: [PATCH 288/654] ALSA: seq: Fix data-race at module auto-loading It's been reported that there is a possible data-race accessing to the global card_requested[] array at ALSA sequencer core, which is used for determining whether to call request_module() for the card or not. This data race itself is almost harmless, as it might end up with one extra request_module() call for the already loaded module at most. But it's still better to fix. This patch addresses the possible data race of card_requested[] and client_requested[] arrays by replacing them with bitmask. It's an atomic operation and can work without locks. Reported-by: Abhishek Shah Cc: Link: https://lore.kernel.org/r/CAEHB24_ay6YzARpA1zgCsE7=H9CSJJzux618E=Ka4h0YdKn=qA@mail.gmail.com Link: https://lore.kernel.org/r/20220823072717.1706-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 2e9d695d336c9..2d707afa1ef1c 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -121,13 +121,13 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) spin_unlock_irqrestore(&clients_lock, flags); #ifdef CONFIG_MODULES if (!in_interrupt()) { - static char client_requested[SNDRV_SEQ_GLOBAL_CLIENTS]; - static char card_requested[SNDRV_CARDS]; + static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS); + static DECLARE_BITMAP(card_requested, SNDRV_CARDS); + if (clientid < SNDRV_SEQ_GLOBAL_CLIENTS) { int idx; - if (!client_requested[clientid]) { - client_requested[clientid] = 1; + if (!test_and_set_bit(clientid, client_requested)) { for (idx = 0; idx < 15; idx++) { if (seq_client_load[idx] < 0) break; @@ -142,10 +142,8 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid) int card = (clientid - SNDRV_SEQ_GLOBAL_CLIENTS) / SNDRV_SEQ_CLIENTS_PER_CARD; if (card < snd_ecards_limit) { - if (! card_requested[card]) { - card_requested[card] = 1; + if (!test_and_set_bit(card, card_requested)) snd_request_card(card); - } snd_seq_device_load_drivers(); } } From 2e6481a3f3ee6234ce577454e1d88aca55f51d47 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 23 Aug 2022 15:24:05 +0300 Subject: [PATCH 289/654] ALSA: hda: intel-nhlt: Correct the handling of fmt_config flexible array The struct nhlt_format's fmt_config is a flexible array, it must not be used as normal array. When moving to the next nhlt_fmt_cfg we need to take into account the data behind the ->config.caps (indicated by ->config.size). Fixes: a864e8f159b13 ("ALSA: hda: intel-nhlt: verify config type") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Jaska Uimonen Link: https://lore.kernel.org/r/20220823122405.18464-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-nhlt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 9db5ccd9aa2db..13bb0ccfb36c0 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -55,16 +55,22 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) /* find max number of channels based on format_configuration */ if (fmt_configs->fmt_count) { + struct nhlt_fmt_cfg *fmt_cfg = fmt_configs->fmt_config; + dev_dbg(dev, "found %d format definitions\n", fmt_configs->fmt_count); for (i = 0; i < fmt_configs->fmt_count; i++) { struct wav_fmt_ext *fmt_ext; - fmt_ext = &fmt_configs->fmt_config[i].fmt_ext; + fmt_ext = &fmt_cfg->fmt_ext; if (fmt_ext->fmt.channels > max_ch) max_ch = fmt_ext->fmt.channels; + + /* Move to the next nhlt_fmt_cfg */ + fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps + + fmt_cfg->config.size); } dev_dbg(dev, "max channels found %d\n", max_ch); } else { From 00cd7bf9f9e06769ef84d5102774c8becd6a498a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2022 16:38:48 -0700 Subject: [PATCH 290/654] netfilter: nf_defrag_ipv6: allow nf_conntrack_frag6_high_thresh increases Currently, net.netfilter.nf_conntrack_frag6_high_thresh can only be lowered. I found this issue while investigating a probable kernel issue causing flakes in tools/testing/selftests/net/ip_defrag.sh In particular, these sysctl changes were ignored: ip netns exec "${NETNS}" sysctl -w net.netfilter.nf_conntrack_frag6_high_thresh=9000000 >/dev/null 2>&1 ip netns exec "${NETNS}" sysctl -w net.netfilter.nf_conntrack_frag6_low_thresh=7000000 >/dev/null 2>&1 This change is inline with commit 836196239298 ("net/ipfrag: let ip[6]frag_high_thresh in ns be higher than in init_net") Fixes: 8db3d41569bb ("netfilter: nf_defrag_ipv6: use net_generic infra") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 7dd3629dd19e7..38db0064d6613 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -86,7 +86,6 @@ static int nf_ct_frag6_sysctl_register(struct net *net) table[1].extra2 = &nf_frag->fqdir->high_thresh; table[2].data = &nf_frag->fqdir->high_thresh; table[2].extra1 = &nf_frag->fqdir->low_thresh; - table[2].extra2 = &nf_frag->fqdir->high_thresh; hdr = register_net_sysctl(net, "net/netfilter", table); if (hdr == NULL) From 4b1c742407571eff58b6de9881889f7ca7c4b4dc Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 23 Aug 2022 11:07:34 -0500 Subject: [PATCH 291/654] x86/boot: Don't propagate uninitialized boot_params->cc_blob_address In some cases, bootloaders will leave boot_params->cc_blob_address uninitialized rather than zeroing it out. This field is only meant to be set by the boot/compressed kernel in order to pass information to the uncompressed kernel when SEV-SNP support is enabled. Therefore, there are no cases where the bootloader-provided values should be treated as anything other than garbage. Otherwise, the uncompressed kernel may attempt to access this bogus address, leading to a crash during early boot. Normally, sanitize_boot_params() would be used to clear out such fields but that happens too late: sev_enable() may have already initialized it to a valid value that should not be zeroed out. Instead, have sev_enable() zero it out unconditionally beforehand. Also ensure this happens for !CONFIG_AMD_MEM_ENCRYPT as well by also including this handling in the sev_enable() stub function. [ bp: Massage commit message and comments. ] Fixes: b190a043c49a ("x86/sev: Add SEV-SNP feature detection/setup") Reported-by: Jeremi Piotrowski Reported-by: watnuss@gmx.de Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=216387 Link: https://lore.kernel.org/r/20220823160734.89036-1-michael.roth@amd.com --- arch/x86/boot/compressed/misc.h | 12 +++++++++++- arch/x86/boot/compressed/sev.c | 8 ++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 4910bf230d7b4..62208ec04ca4b 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr); void snp_set_page_shared(unsigned long paddr); void sev_prep_identity_maps(unsigned long top_level_pgt); #else -static inline void sev_enable(struct boot_params *bp) { } +static inline void sev_enable(struct boot_params *bp) +{ + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 unconditionally (thus here in this stub too) to + * ensure that uninitialized values from buggy bootloaders aren't + * propagated. + */ + if (bp) + bp->cc_blob_address = 0; +} static inline void sev_es_shutdown_ghcb(void) { } static inline bool sev_es_check_ghcb_fault(unsigned long address) { diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 52f989f6acc28..c93930d5ccbd0 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp) struct msr m; bool snp; + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + /* * Setup/preliminary detection of SNP. This will be sanity-checked * against CPUID/MSR values later. From cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 23 Aug 2022 16:55:51 -0500 Subject: [PATCH 292/654] x86/sev: Don't use cc_platform_has() for early SEV-SNP calls When running identity-mapped and depending on the kernel configuration, it is possible that the compiler uses jump tables when generating code for cc_platform_has(). This causes a boot failure because the jump table uses un-mapped kernel virtual addresses, not identity-mapped addresses. This has been seen with CONFIG_RETPOLINE=n. Similar to sme_encrypt_kernel(), use an open-coded direct check for the status of SNP rather than trying to eliminate the jump table. This preserves any code optimization in cc_platform_has() that can be useful post boot. It also limits the changes to SEV-specific files so that future compiler features won't necessarily require possible build changes just because they are not compatible with running identity-mapped. [ bp: Massage commit message. ] Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes") Reported-by: Sean Christopherson Suggested-by: Sean Christopherson Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: # 5.19.x Link: https://lore.kernel.org/all/YqfabnTRxFSM+LoX@google.com/ --- arch/x86/kernel/sev.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 63dc626627a03..4f84c3f11af5b 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -701,7 +701,13 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + /* + * This can be invoked in early boot while running identity mapped, so + * use an open coded check for SNP instead of using cc_platform_has(). + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; /* @@ -717,7 +723,13 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + /* + * This can be invoked in early boot while running identity mapped, so + * use an open coded check for SNP instead of using cc_platform_has(). + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; /* Invalidate the memory pages before they are marked shared in the RMP table. */ From d5485d9dd24e1d04e5509916515260186eb1455c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 22 Aug 2022 10:53:46 +0800 Subject: [PATCH 293/654] net: neigh: don't call kfree_skb() under spin_lock_irqsave() It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. So add all skb to a tmp list, then free them after spin_unlock_irqrestore() at once. Fixes: 66ba215cb513 ("neigh: fix possible DoS due to net iface start/stop loop") Suggested-by: Denis V. Lunev Signed-off-by: Yang Yingliang Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/neighbour.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5b669eb802708..78cc8fb688140 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -309,14 +309,17 @@ static int neigh_del_timer(struct neighbour *n) static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) { + struct sk_buff_head tmp; unsigned long flags; struct sk_buff *skb; + skb_queue_head_init(&tmp); spin_lock_irqsave(&list->lock, flags); skb = skb_peek(list); while (skb != NULL) { struct sk_buff *skb_next = skb_peek_next(skb, list); struct net_device *dev = skb->dev; + if (net == NULL || net_eq(dev_net(dev), net)) { struct in_device *in_dev; @@ -326,13 +329,16 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) in_dev->arp_parms->qlen--; rcu_read_unlock(); __skb_unlink(skb, list); - - dev_put(dev); - kfree_skb(skb); + __skb_queue_tail(&tmp, skb); } skb = skb_next; } spin_unlock_irqrestore(&list->lock, flags); + + while ((skb = __skb_dequeue(&tmp))) { + dev_put(skb->dev); + kfree_skb(skb); + } } static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, From f79959220fa5fbda939592bf91c7a9ea90419040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cs=C3=B3k=C3=A1s=20Bence?= Date: Mon, 22 Aug 2022 10:10:52 +0200 Subject: [PATCH 294/654] fec: Restart PPS after link state change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On link state change, the controller gets reset, causing PPS to drop out and the PHC to lose its time and calibration. So we restart it if needed, restoring calibration and time registers. Changes since v2: * Add `fec_ptp_save_state()`/`fec_ptp_restore_state()` * Use `ktime_get_real_ns()` * Use `BIT()` macro Changes since v1: * More ECR #define's * Stop PPS in `fec_ptp_stop()` Signed-off-by: Csókás Bence Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 10 ++++++ drivers/net/ethernet/freescale/fec_main.c | 42 ++++++++++++++++++++--- drivers/net/ethernet/freescale/fec_ptp.c | 29 ++++++++++++++++ 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index ed7301b691694..0cebe4b63adb7 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -634,6 +634,13 @@ struct fec_enet_private { int pps_enable; unsigned int next_counter; + struct { + struct timespec64 ts_phc; + u64 ns_sys; + u32 at_corr; + u8 at_inc_corr; + } ptp_saved_state; + u64 ethtool_stats[]; }; @@ -644,5 +651,8 @@ void fec_ptp_disable_hwts(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); +void fec_ptp_save_state(struct fec_enet_private *fep); +int fec_ptp_restore_state(struct fec_enet_private *fep); + /****************************************************************************/ #endif /* FEC_H */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index e8e2aa1e7f01b..b0d60f898249b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -285,8 +285,11 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #define FEC_MMFR_TA (2 << 16) #define FEC_MMFR_DATA(v) (v & 0xffff) /* FEC ECR bits definition */ -#define FEC_ECR_MAGICEN (1 << 2) -#define FEC_ECR_SLEEP (1 << 3) +#define FEC_ECR_RESET BIT(0) +#define FEC_ECR_ETHEREN BIT(1) +#define FEC_ECR_MAGICEN BIT(2) +#define FEC_ECR_SLEEP BIT(3) +#define FEC_ECR_EN1588 BIT(4) #define FEC_MII_TIMEOUT 30000 /* us */ @@ -982,6 +985,9 @@ fec_restart(struct net_device *ndev) u32 temp_mac[2]; u32 rcntl = OPT_FRAME_SIZE | 0x04; u32 ecntl = 0x2; /* ETHEREN */ + struct ptp_clock_request ptp_rq = { .type = PTP_CLK_REQ_PPS }; + + fec_ptp_save_state(fep); /* Whack a reset. We should wait for this. * For i.MX6SX SOC, enet use AXI bus, we use disable MAC @@ -1135,7 +1141,7 @@ fec_restart(struct net_device *ndev) } if (fep->bufdesc_ex) - ecntl |= (1 << 4); + ecntl |= FEC_ECR_EN1588; if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT && fep->rgmii_txc_dly) @@ -1156,6 +1162,14 @@ fec_restart(struct net_device *ndev) if (fep->bufdesc_ex) fec_ptp_start_cyclecounter(ndev); + /* Restart PPS if needed */ + if (fep->pps_enable) { + /* Clear flag so fec_ptp_enable_pps() doesn't return immediately */ + fep->pps_enable = 0; + fec_ptp_restore_state(fep); + fep->ptp_caps.enable(&fep->ptp_caps, &ptp_rq, 1); + } + /* Enable interrupts we wish to service */ if (fep->link) writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); @@ -1206,6 +1220,8 @@ fec_stop(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8); u32 val; + struct ptp_clock_request ptp_rq = { .type = PTP_CLK_REQ_PPS }; + u32 ecntl = 0; /* We cannot expect a graceful transmit stop without link !!! */ if (fep->link) { @@ -1215,6 +1231,8 @@ fec_stop(struct net_device *ndev) netdev_err(ndev, "Graceful transmit stop did not complete!\n"); } + fec_ptp_save_state(fep); + /* Whack a reset. We should wait for this. * For i.MX6SX SOC, enet use AXI bus, we use disable MAC * instead of reset MAC itself. @@ -1234,12 +1252,28 @@ fec_stop(struct net_device *ndev) writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); + if (fep->bufdesc_ex) + ecntl |= FEC_ECR_EN1588; + /* We have to keep ENET enabled to have MII interrupt stay working */ if (fep->quirks & FEC_QUIRK_ENET_MAC && !(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) { - writel(2, fep->hwp + FEC_ECNTRL); + ecntl |= FEC_ECR_ETHEREN; writel(rmii_mode, fep->hwp + FEC_R_CNTRL); } + + writel(ecntl, fep->hwp + FEC_ECNTRL); + + if (fep->bufdesc_ex) + fec_ptp_start_cyclecounter(ndev); + + /* Restart PPS if needed */ + if (fep->pps_enable) { + /* Clear flag so fec_ptp_enable_pps() doesn't return immediately */ + fep->pps_enable = 0; + fec_ptp_restore_state(fep); + fep->ptp_caps.enable(&fep->ptp_caps, &ptp_rq, 1); + } } diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 3dc3c0b626c21..c74d04f4b2fd2 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -633,7 +633,36 @@ void fec_ptp_stop(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + if (fep->pps_enable) + fec_ptp_enable_pps(fep, 0); + cancel_delayed_work_sync(&fep->time_keep); if (fep->ptp_clock) ptp_clock_unregister(fep->ptp_clock); } + +void fec_ptp_save_state(struct fec_enet_private *fep) +{ + u32 atime_inc_corr; + + fec_ptp_gettime(&fep->ptp_caps, &fep->ptp_saved_state.ts_phc); + fep->ptp_saved_state.ns_sys = ktime_get_ns(); + + fep->ptp_saved_state.at_corr = readl(fep->hwp + FEC_ATIME_CORR); + atime_inc_corr = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_CORR_MASK; + fep->ptp_saved_state.at_inc_corr = (u8)(atime_inc_corr >> FEC_T_INC_CORR_OFFSET); +} + +int fec_ptp_restore_state(struct fec_enet_private *fep) +{ + u32 atime_inc = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_MASK; + u64 ns_sys; + + writel(fep->ptp_saved_state.at_corr, fep->hwp + FEC_ATIME_CORR); + atime_inc |= ((u32)fep->ptp_saved_state.at_inc_corr) << FEC_T_INC_CORR_OFFSET; + writel(atime_inc, fep->hwp + FEC_ATIME_INC); + + ns_sys = ktime_get_ns() - fep->ptp_saved_state.ns_sys; + timespec64_add_ns(&fep->ptp_saved_state.ts_phc, ns_sys); + return fec_ptp_settime(&fep->ptp_caps, &fep->ptp_saved_state.ts_phc); +} From 6ab55ec0a938c7f943a4edba3d6514f775983887 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Wed, 24 Aug 2022 16:16:54 +0800 Subject: [PATCH 295/654] ALSA: control: Fix an out-of-bounds bug in get_ctl_id_hash() Since the user can control the arguments provided to the kernel by the ioctl() system call, an out-of-bounds bug occurs when the 'id->name' provided by the user does not end with '\0'. The following log can reveal it: [ 10.002313] BUG: KASAN: stack-out-of-bounds in snd_ctl_find_id+0x36c/0x3a0 [ 10.002895] Read of size 1 at addr ffff888109f5fe28 by task snd/439 [ 10.004934] Call Trace: [ 10.007140] snd_ctl_find_id+0x36c/0x3a0 [ 10.007489] snd_ctl_ioctl+0x6cf/0x10e0 Fix this by checking the bound of 'id->name' in the loop. Fixes: c27e1efb61c5 ("ALSA: control: Use xarray for faster lookups") Signed-off-by: Zheyu Ma Link: https://lore.kernel.org/r/20220824081654.3767739-1-zheyuma97@gmail.com Signed-off-by: Takashi Iwai --- sound/core/control.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index f3e893715369f..e8fc4c511e5ff 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -385,14 +385,14 @@ static bool elem_id_matches(const struct snd_kcontrol *kctl, #define MULTIPLIER 37 static unsigned long get_ctl_id_hash(const struct snd_ctl_elem_id *id) { + int i; unsigned long h; - const unsigned char *p; h = id->iface; h = MULTIPLIER * h + id->device; h = MULTIPLIER * h + id->subdevice; - for (p = id->name; *p; p++) - h = MULTIPLIER * h + *p; + for (i = 0; id->name[i] && i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN; i++) + h = MULTIPLIER * h + id->name[i]; h = MULTIPLIER * h + id->index; h &= LONG_MAX; return h; From c624c58e08b15105662b9ab9be23d14a6b945a49 Mon Sep 17 00:00:00 2001 From: lily Date: Mon, 22 Aug 2022 22:44:11 -0700 Subject: [PATCH 296/654] net/core/skbuff: Check the return value of skb_copy_bits() skb_copy_bits() could fail, which requires a check on the return value. Signed-off-by: Li Zhong Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 974bbbbe7138a..5ea1d074a920b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4205,9 +4205,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } else { - skb_copy_bits(head_skb, offset, - skb_put(nskb, len), - len); + if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len)) + goto err; } continue; } From 1227c1771dd2ad44318aa3ab9e3a293b3f34ff2a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:44 -0700 Subject: [PATCH 297/654] net: Fix data-races around sysctl_[rw]mem_(max|default). While reading sysctl_[rw]mem_(max|default), they can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/filter.c | 4 ++-- net/core/sock.c | 8 ++++---- net/ipv4/ip_output.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index e8508aaafd27d..c4f14ad820294 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5034,14 +5034,14 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: - val = min_t(u32, val, sysctl_rmem_max); + val = min_t(u32, val, READ_ONCE(sysctl_rmem_max)); val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: - val = min_t(u32, val, sysctl_wmem_max); + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, diff --git a/net/core/sock.c b/net/core/sock.c index 4cb957d934a25..303af52f3b793 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1101,7 +1101,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ - val = min_t(u32, val, sysctl_wmem_max); + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); set_sndbuf: /* Ensure val * 2 fits into an int, to prevent max_t() * from treating it as a negative value. @@ -1133,7 +1133,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ - __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max)); + __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); break; case SO_RCVBUFFORCE: @@ -3309,8 +3309,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) timer_setup(&sk->sk_timer, NULL, 0); sk->sk_allocation = GFP_KERNEL; - sk->sk_rcvbuf = sysctl_rmem_default; - sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); + sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d7bd1daf022b5..04e2034f2f8ed 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1730,7 +1730,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; - sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); ipc.sockc.mark = fl4.flowi4_mark; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 78b654ff421b1..290019de766dc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -239,7 +239,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, if (wscale_ok) { /* Set window scaling on max possible window */ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); - space = max_t(u32, space, sysctl_rmem_max); + space = max_t(u32, space, READ_ONCE(sysctl_rmem_max)); space = min_t(u32, space, *window_clamp); *rcv_wscale = clamp_t(int, ilog2(space) - 15, 0, TCP_MAX_WSCALE); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 9d43277b8b4fe..a56fd0b5a430a 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val) lock_sock(sk); if (mode) { val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, - sysctl_wmem_max); + READ_ONCE(sysctl_wmem_max)); sk->sk_sndbuf = val * 2; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } else { val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, - sysctl_rmem_max); + READ_ONCE(sysctl_rmem_max)); sk->sk_rcvbuf = val * 2; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } From bf955b5ab8f6f7b0632cdef8e36b14e4f6e77829 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:45 -0700 Subject: [PATCH 298/654] net: Fix data-races around weight_p and dev_weight_[rt]x_bias. While reading weight_p, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Also, dev_[rt]x_weight can be read/written at the same time. So, we need to use READ_ONCE() and WRITE_ONCE() for its access. Moreover, to use the same weight_p while changing dev_[rt]x_weight, we add a mutex in proc_do_dev_weight(). Fixes: 3d48b53fb2ae ("net: dev_weight: TX/RX orthogonality") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- net/core/sysctl_net_core.c | 15 +++++++++------ net/sched/sch_generic.c | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 716df64fcfa57..b5b92dcd5eea4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5918,7 +5918,7 @@ static int process_backlog(struct napi_struct *napi, int quota) net_rps_action_and_irq_enable(sd); } - napi->weight = dev_rx_weight; + napi->weight = READ_ONCE(dev_rx_weight); while (again) { struct sk_buff *skb; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 71a13596ea2bf..725891527814c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -234,14 +234,17 @@ static int set_default_qdisc(struct ctl_table *table, int write, static int proc_do_dev_weight(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - int ret; + static DEFINE_MUTEX(dev_weight_mutex); + int ret, weight; + mutex_lock(&dev_weight_mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); - if (ret != 0) - return ret; - - dev_rx_weight = weight_p * dev_weight_rx_bias; - dev_tx_weight = weight_p * dev_weight_tx_bias; + if (!ret && write) { + weight = READ_ONCE(weight_p); + WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); + WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias); + } + mutex_unlock(&dev_weight_mutex); return ret; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d47b9689eba6a..99b697ad2b983 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) void __qdisc_run(struct Qdisc *q) { - int quota = dev_tx_weight; + int quota = READ_ONCE(dev_tx_weight); int packets; while (qdisc_restart(q, &packets)) { From 5dcd08cd19912892586c6082d56718333e2d19db Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:46 -0700 Subject: [PATCH 299/654] net: Fix data-races around netdev_max_backlog. While reading netdev_max_backlog, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. While at it, we remove the unnecessary spaces in the doc. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- Documentation/admin-guide/sysctl/net.rst | 2 +- net/core/dev.c | 4 ++-- net/core/gro_cells.c | 2 +- net/xfrm/espintcp.c | 2 +- net/xfrm/xfrm_input.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 805f2281e000a..60d44165fba76 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -271,7 +271,7 @@ poll cycle or the number of packets processed reaches netdev_budget. netdev_max_backlog ------------------ -Maximum number of packets, queued on the INPUT side, when the interface +Maximum number of packets, queued on the INPUT side, when the interface receives packets faster than kernel can process them. netdev_rss_key diff --git a/net/core/dev.c b/net/core/dev.c index b5b92dcd5eea4..07da69c1ac0a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4624,7 +4624,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) struct softnet_data *sd; unsigned int old_flow, new_flow; - if (qlen < (netdev_max_backlog >> 1)) + if (qlen < (READ_ONCE(netdev_max_backlog) >> 1)) return false; sd = this_cpu_ptr(&softnet_data); @@ -4672,7 +4672,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, if (!netif_running(skb->dev)) goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { + if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) { if (qlen) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 541c7a72a28a4..21619c70a82b7 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) cell = this_cpu_ptr(gcells->cells); - if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { + if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) { drop: dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 82d14eea1b5ad..974eb97b77d22 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -168,7 +168,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) { struct espintcp_ctx *ctx = espintcp_getctx(sk); - if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog) + if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog)) return -ENOBUFS; __skb_queue_tail(&ctx->out_queue, skb); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 70a8c36f0ba6e..b2f4ec9c537f0 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -782,7 +782,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, trans = this_cpu_ptr(&xfrm_trans_tasklet); - if (skb_queue_len(&trans->queue) >= netdev_max_backlog) + if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog)) return -ENOBUFS; BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb)); From 61adf447e38664447526698872e21c04623afb8e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:47 -0700 Subject: [PATCH 300/654] net: Fix data-races around netdev_tstamp_prequeue. While reading netdev_tstamp_prequeue, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 3b098e2d7c69 ("net: Consistent skb timestamping") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 07da69c1ac0a4..4705e6630efa3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4928,7 +4928,7 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); trace_netif_rx(skb); @@ -5281,7 +5281,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, int ret = NET_RX_DROP; __be16 type; - net_timestamp_check(!netdev_tstamp_prequeue, skb); + net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb); trace_netif_receive_skb(skb); @@ -5664,7 +5664,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; - net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; @@ -5694,7 +5694,7 @@ void netif_receive_skb_list_internal(struct list_head *head) INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); skb_list_del_init(skb); if (!skb_defer_rx_timestamp(skb)) list_add_tail(&skb->list, &sublist); From 6bae8ceb90ba76cdba39496db936164fa672b9be Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:48 -0700 Subject: [PATCH 301/654] ratelimit: Fix data-races in ___ratelimit(). While reading rs->interval and rs->burst, they can be changed concurrently via sysctl (e.g. net_ratelimit_state). Thus, we need to add READ_ONCE() to their readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- lib/ratelimit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/ratelimit.c b/lib/ratelimit.c index e01a93f46f833..ce945c17980b9 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -26,10 +26,16 @@ */ int ___ratelimit(struct ratelimit_state *rs, const char *func) { + /* Paired with WRITE_ONCE() in .proc_handler(). + * Changing two values seperately could be inconsistent + * and some message could be lost. (See: net_ratelimit_state). + */ + int interval = READ_ONCE(rs->interval); + int burst = READ_ONCE(rs->burst); unsigned long flags; int ret; - if (!rs->interval) + if (!interval) return 1; /* @@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) if (!rs->begin) rs->begin = jiffies; - if (time_is_before_jiffies(rs->begin + rs->interval)) { + if (time_is_before_jiffies(rs->begin + interval)) { if (rs->missed) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { printk_deferred(KERN_WARNING @@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) rs->begin = jiffies; rs->printed = 0; } - if (rs->burst && rs->burst > rs->printed) { + if (burst && burst > rs->printed) { rs->printed++; ret = 1; } else { From 7de6d09f51917c829af2b835aba8bb5040f8e86a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:49 -0700 Subject: [PATCH 302/654] net: Fix data-races around sysctl_optmem_max. While reading sysctl_optmem_max, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/bpf_sk_storage.c | 5 +++-- net/core/filter.c | 9 +++++---- net/core/sock.c | 8 +++++--- net/ipv4/ip_sockglue.c | 6 +++--- net/ipv6/ipv6_sockglue.c | 4 ++-- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 1b7f385643b4c..94374d529ea42 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -310,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) { + int optmem_max = READ_ONCE(sysctl_optmem_max); struct sock *sk = (struct sock *)owner; /* same check as in sock_kmalloc() */ - if (size <= sysctl_optmem_max && - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { + if (size <= optmem_max && + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { atomic_add(size, &sk->sk_omem_alloc); return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index c4f14ad820294..c191db80ce93c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1214,10 +1214,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) { u32 filter_size = bpf_prog_size(fp->prog->len); + int optmem_max = READ_ONCE(sysctl_optmem_max); /* same check as in sock_kmalloc() */ - if (filter_size <= sysctl_optmem_max && - atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { + if (filter_size <= optmem_max && + atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) { atomic_add(filter_size, &sk->sk_omem_alloc); return true; } @@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - if (bpf_prog_size(prog->len) > sysctl_optmem_max) + if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) err = -ENOMEM; else err = reuseport_attach_prog(sk, prog); @@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) } } else { /* BPF_PROG_TYPE_SOCKET_FILTER */ - if (bpf_prog_size(prog->len) > sysctl_optmem_max) { + if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) { err = -ENOMEM; goto err_prog_put; } diff --git a/net/core/sock.c b/net/core/sock.c index 303af52f3b793..95abf4604d884 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2536,7 +2536,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > - sysctl_optmem_max) + READ_ONCE(sysctl_optmem_max)) return NULL; skb = alloc_skb(size, priority); @@ -2554,8 +2554,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { - if ((unsigned int)size <= sysctl_optmem_max && - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { + int optmem_max = READ_ONCE(sysctl_optmem_max); + + if ((unsigned int)size <= optmem_max && + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { void *mem; /* First do the add, to avoid the race if kmalloc * might sleep. diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index a8a323ecbb54b..e49a61a053a68 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -772,7 +772,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) if (optlen < GROUP_FILTER_SIZE(0)) return -EINVAL; - if (optlen > sysctl_optmem_max) + if (optlen > READ_ONCE(sysctl_optmem_max)) return -ENOBUFS; gsf = memdup_sockptr(optval, optlen); @@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (optlen < size0) return -EINVAL; - if (optlen > sysctl_optmem_max - 4) + if (optlen > READ_ONCE(sysctl_optmem_max) - 4) return -ENOBUFS; p = kmalloc(optlen + 4, GFP_KERNEL); @@ -1233,7 +1233,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, if (optlen < IP_MSFILTER_SIZE(0)) goto e_inval; - if (optlen > sysctl_optmem_max) { + if (optlen > READ_ONCE(sysctl_optmem_max)) { err = -ENOBUFS; break; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 222f6bf220ba0..e0dcc7a193df2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (optlen < GROUP_FILTER_SIZE(0)) return -EINVAL; - if (optlen > sysctl_optmem_max) + if (optlen > READ_ONCE(sysctl_optmem_max)) return -ENOBUFS; gsf = memdup_sockptr(optval, optlen); @@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (optlen < size0) return -EINVAL; - if (optlen > sysctl_optmem_max - 4) + if (optlen > READ_ONCE(sysctl_optmem_max) - 4) return -ENOBUFS; p = kmalloc(optlen + 4, GFP_KERNEL); From d2154b0afa73c0159b2856f875c6b4fe7cf6a95e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:50 -0700 Subject: [PATCH 303/654] net: Fix a data-race around sysctl_tstamp_allow_data. While reading sysctl_tstamp_allow_data, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5ea1d074a920b..84bb5e188d0d4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4797,7 +4797,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; - if (likely(sysctl_tstamp_allow_data || tsonly)) + if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) return true; read_lock_bh(&sk->sk_callback_lock); From c42b7cddea47503411bfb5f2f93a4154aaffa2d9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:51 -0700 Subject: [PATCH 304/654] net: Fix a data-race around sysctl_net_busy_poll. While reading sysctl_net_busy_poll, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 060212928670 ("net: add low latency socket poll") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/busy_poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c4898fcbf923b..f90f0021f5f2d 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly; static inline bool net_busy_loop_on(void) { - return sysctl_net_busy_poll; + return READ_ONCE(sysctl_net_busy_poll); } static inline bool sk_can_busy_loop(const struct sock *sk) From e59ef36f0795696ab229569c153936bfd068d21c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:52 -0700 Subject: [PATCH 305/654] net: Fix a data-race around sysctl_net_busy_read. While reading sysctl_net_busy_read, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 2d48d67fa8cd ("net: poll/select low latency socket support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 95abf4604d884..788c1372663cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3367,7 +3367,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; - sk->sk_ll_usec = sysctl_net_busy_read; + sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); #endif sk->sk_max_pacing_rate = ~0UL; From 2e0c42374ee32e72948559d2ae2f7ba3dc6b977c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:53 -0700 Subject: [PATCH 306/654] net: Fix a data-race around netdev_budget. While reading netdev_budget, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 51b0bdedb8e7 ("[NET]: Separate two usages of netdev_max_backlog.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4705e6630efa3..c83e23cfc57d7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6666,7 +6666,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + usecs_to_jiffies(netdev_budget_usecs); - int budget = netdev_budget; + int budget = READ_ONCE(netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll); From 657b991afb89d25fe6c4783b1b75a8ad4563670d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:54 -0700 Subject: [PATCH 307/654] net: Fix data-races around sysctl_max_skb_frags. While reading sysctl_max_skb_frags, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- net/mptcp/protocol.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bbe2187536620..e5011c136fdb7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1000,7 +1000,7 @@ static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= sysctl_max_skb_frags) { + if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1354,7 +1354,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i >= sysctl_max_skb_frags) { + if (i >= READ_ONCE(sysctl_max_skb_frags)) { tcp_mark_push(tp, skb); goto new_segment; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index da4257504fad0..d398f3810662b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1263,7 +1263,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); - if (!can_coalesce && i >= sysctl_max_skb_frags) { + if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) { tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } From fa45d484c52c73f79db2c23b0cdfc6c6455093ad Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:55 -0700 Subject: [PATCH 308/654] net: Fix a data-race around netdev_budget_usecs. While reading netdev_budget_usecs, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index c83e23cfc57d7..8221322d86db9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6665,7 +6665,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + - usecs_to_jiffies(netdev_budget_usecs); + usecs_to_jiffies(READ_ONCE(netdev_budget_usecs)); int budget = READ_ONCE(netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll); From af67508ea6cbf0e4ea27f8120056fa2efce127dd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:56 -0700 Subject: [PATCH 309/654] net: Fix data-races around sysctl_fb_tunnels_only_for_init_net. While reading sysctl_fb_tunnels_only_for_init_net, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 79134e6ce2c9 ("net: do not create fallback tunnels for non-default namespaces") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1a3cb93c3dcce..6d3a33fd0cdb3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -640,9 +640,14 @@ extern int sysctl_devconf_inherit_init_net; */ static inline bool net_has_fallback_tunnels(const struct net *net) { - return !IS_ENABLED(CONFIG_SYSCTL) || - !sysctl_fb_tunnels_only_for_init_net || - (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1); +#if IS_ENABLED(CONFIG_SYSCTL) + int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net); + + return !fb_tunnels_only_for_init_net || + (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1); +#else + return true; +#endif } static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) From a5612ca10d1aa05624ebe72633e0c8c792970833 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:57 -0700 Subject: [PATCH 310/654] net: Fix data-races around sysctl_devconf_inherit_init_net. While reading sysctl_devconf_inherit_init_net, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 856c395cfa63 ("net: introduce a knob to control whether to inherit devconf config") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ net/ipv4/devinet.c | 16 ++++++++++------ net/ipv6/addrconf.c | 5 ++--- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6d3a33fd0cdb3..05d6f3facd5a5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -650,6 +650,15 @@ static inline bool net_has_fallback_tunnels(const struct net *net) #endif } +static inline int net_inherit_devconf(void) +{ +#if IS_ENABLED(CONFIG_SYSCTL) + return READ_ONCE(sysctl_devconf_inherit_init_net); +#else + return 0; +#endif +} + static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 92b778e423df8..e8b9a9202fecd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2682,23 +2682,27 @@ static __net_init int devinet_init_net(struct net *net) #endif if (!net_eq(net, &init_net)) { - if (IS_ENABLED(CONFIG_SYSCTL) && - sysctl_devconf_inherit_init_net == 3) { + switch (net_inherit_devconf()) { + case 3: /* copy from the current netns */ memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all, sizeof(ipv4_devconf)); memcpy(dflt, current->nsproxy->net_ns->ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); - } else if (!IS_ENABLED(CONFIG_SYSCTL) || - sysctl_devconf_inherit_init_net != 2) { - /* inherit == 0 or 1: copy from init_net */ + break; + case 0: + case 1: + /* copy from init_net */ memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf)); memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); + break; + case 2: + /* use compiled values */ + break; } - /* else inherit == 2: use compiled values */ } #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b624e3d8c5f0a..e15f64f22fa83 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7162,9 +7162,8 @@ static int __net_init addrconf_init_net(struct net *net) if (!dflt) goto err_alloc_dflt; - if (IS_ENABLED(CONFIG_SYSCTL) && - !net_eq(net, &init_net)) { - switch (sysctl_devconf_inherit_init_net) { + if (!net_eq(net, &init_net)) { + switch (net_inherit_devconf()) { case 1: /* copy from init_net */ memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); From 8db24af3f02ebdbf302196006ebb270c4c3a2706 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:58 -0700 Subject: [PATCH 311/654] net: Fix a data-race around gro_normal_batch. While reading gro_normal_batch, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs") Signed-off-by: Kuniyuki Iwashima Acked-by: Edward Cree Signed-off-by: David S. Miller --- include/net/gro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/gro.h b/include/net/gro.h index 867656b0739c0..24003dea8fa4d 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -439,7 +439,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, { list_add_tail(&skb->list, &napi->rx_list); napi->rx_count += segs; - if (napi->rx_count >= gro_normal_batch) + if (napi->rx_count >= READ_ONCE(gro_normal_batch)) gro_normal_list(napi); } From 05e49cfc89e4f325eebbc62d24dd122e55f94c23 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:59 -0700 Subject: [PATCH 312/654] net: Fix a data-race around netdev_unregister_timeout_secs. While reading netdev_unregister_timeout_secs, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 5aa3afe107d9 ("net: make unregister netdev warning timeout configurable") Signed-off-by: Kuniyuki Iwashima Acked-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 8221322d86db9..56c8b0921c9fd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10284,7 +10284,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) return dev; if (time_after(jiffies, warning_time + - netdev_unregister_timeout_secs * HZ)) { + READ_ONCE(netdev_unregister_timeout_secs) * HZ)) { list_for_each_entry(dev, list, todo_list) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, netdev_refcnt_read(dev)); From 3c9ba81d72047f2e81bb535d42856517b613aba7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:47:00 -0700 Subject: [PATCH 313/654] net: Fix a data-race around sysctl_somaxconn. While reading sysctl_somaxconn, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 9b27c5e4e5ba8..7378375d3a5b6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1801,7 +1801,7 @@ int __sys_listen(int fd, int backlog) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; + somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn); if ((unsigned int)backlog > somaxconn) backlog = somaxconn; From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Tue, 23 Aug 2022 21:38:10 +0530 Subject: [PATCH 314/654] loop: Check for overflow while configuring loop The userspace can configure a loop using an ioctl call, wherein a configuration of type loop_config is passed (see lo_ioctl()'s case on line 1550 of drivers/block/loop.c). This proceeds to call loop_configure() which in turn calls loop_set_status_from_info() (see line 1050 of loop.c), passing &config->info which is of type loop_info64*. This function then sets the appropriate values, like the offset. loop_device has lo_offset of type loff_t (see line 52 of loop.c), which is typdef-chained to long long, whereas loop_info64 has lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h). The function directly copies offset from info to the device as follows (See line 980 of loop.c): lo->lo_offset = info->lo_offset; This results in an overflow, which triggers a warning in iomap_iter() due to a call to iomap_iter_done() which has: WARN_ON_ONCE(iter->iomap.offset > iter->pos); Thus, check for negative value during loop_set_status_from_info(). Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Siddh Raman Pant Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me Signed-off-by: Jens Axboe --- drivers/block/loop.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e3c0ba93c1a34..ad92192c7d617 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_device *lo, lo->lo_offset = info->lo_offset; lo->lo_sizelimit = info->lo_sizelimit; + + /* loff_t vars have been assigned __u64 */ + if (lo->lo_offset < 0 || lo->lo_sizelimit < 0) + return -EOVERFLOW; + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; lo->lo_flags = info->lo_flags; From 2cacedc873ab5f5945d8f1b71804b0bcea0383ff Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:38 +0100 Subject: [PATCH 315/654] io_uring/net: fix must_hold annotation Fix up the io_alloc_notif()'s __must_hold as we don't have a ctx argument there but should get it from the slot instead. Reported-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cbb0a920f18e0aed590bf58300af817b9befb8a3.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/notif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/notif.c b/io_uring/notif.c index 977736e82c1aa..568ff17dc5527 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -73,7 +73,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, } void io_notif_slot_flush(struct io_notif_slot *slot) - __must_hold(&ctx->uring_lock) + __must_hold(&slot->notif->ctx->uring_lock) { struct io_kiocb *notif = slot->notif; struct io_notif_data *nd = io_notif_to_data(notif); From 5a848b7c9e5e4d94390fbc391ccb81d40f3ccfb5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:39 +0100 Subject: [PATCH 316/654] io_uring/net: fix zc send link failing Failed requests should be marked with req_set_fail(), so links and cqe skipping work correctly, which is missing in io_sendzc(). Note, io_sendzc() return IOU_OK on failure, so the core code won't do the cleanup for us. Fixes: 06a5464be84e4 ("io_uring: wire send zc request type") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e47d46fda9db30154ce66a549bb0d3380b780520.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index f8cdf1dc3863b..d6310c655a0f6 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1023,6 +1023,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) } if (ret == -ERESTARTSYS) ret = -EINTR; + req_set_fail(req); } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) { io_notif_slot_flush_submit(notif_slot, 0); } From 986e263def32eec89153babf469859d837507d34 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:40 +0100 Subject: [PATCH 317/654] io_uring/net: fix indentation Fix up indentation before we get complaints from tooling. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/bd5754e3764215ccd7fb04cd636ea9167aaa275d.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index d6310c655a0f6..3adcb09ae2643 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -989,7 +989,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); if (unlikely(ret)) - return ret; + return ret; } else { ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter); From 53bdc88aac9a21aae937452724fa4738cd843795 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:41 +0100 Subject: [PATCH 318/654] io_uring/notif: order notif vs send CQEs Currently, there is no ordering between notification CQEs and completions of the send flushing it, this quite complicates the userspace, especially since we don't flush notification when the send(+flush) request fails, i.e. there will be only one CQE. What we can do is to make sure that notification completions come only after sends. The easiest way to achieve this is to not try to complete a notification inline from io_sendzc() but defer it to task_work, considering that io-wq sendzc is disallowed CQEs will be naturally ordered because task_works will only be executed after we're done with submission and so inline completion. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cddfd1c2bf91f22b9fe08e13b7dffdd8f858a151.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/notif.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/io_uring/notif.c b/io_uring/notif.c index 568ff17dc5527..96f076b175e07 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -81,8 +81,10 @@ void io_notif_slot_flush(struct io_notif_slot *slot) slot->notif = NULL; /* drop slot's master ref */ - if (refcount_dec_and_test(&nd->uarg.refcnt)) - io_notif_complete(notif); + if (refcount_dec_and_test(&nd->uarg.refcnt)) { + notif->io_task_work.func = __io_notif_complete_tw; + io_req_task_work_add(notif); + } } __cold int io_notif_unregister(struct io_ring_ctx *ctx) From 5916943943d19a854238d50d1fe2047467cbeb3c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:42 +0100 Subject: [PATCH 319/654] io_uring: conditional ->async_data allocation There are opcodes that need ->async_data only in some cases and allocation it unconditionally may hurt performance. Add an option to opdef to make move the allocation part from the core io_uring to opcode specific code. Note, we can't just set opdef->async_size to zero because there are other helpers that rely on it, e.g. io_alloc_async_data(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9dc62be9e88dd0ed63c48365340e8922d2498293.1661342812.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 7 ++++--- io_uring/opdef.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ebfdb2212ec25..77616279000b0 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1450,9 +1450,10 @@ int io_req_prep_async(struct io_kiocb *req) return 0; if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT; - if (io_alloc_async_data(req)) - return -EAGAIN; - + if (!io_op_defs[req->opcode].manual_alloc) { + if (io_alloc_async_data(req)) + return -EAGAIN; + } return def->prep_async(req); } diff --git a/io_uring/opdef.h b/io_uring/opdef.h index ece8ed4f96c43..763c6e54e2ee5 100644 --- a/io_uring/opdef.h +++ b/io_uring/opdef.h @@ -25,6 +25,8 @@ struct io_op_def { unsigned ioprio : 1; /* supports iopoll */ unsigned iopoll : 1; + /* opcode specific path will handle ->async_data allocation if needed */ + unsigned manual_alloc : 1; /* size of async data needed, if any */ unsigned short async_size; From 6afd9db630b037c7f0bc939368216512568de607 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:19 +0200 Subject: [PATCH 320/654] lib/test_cpumask: drop cpu_possible_mask full test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the number of CPUs that can possibly be brought online is known at boot time, e.g. when HOTPLUG is disabled, nr_cpu_ids may be smaller than NR_CPUS. In that case, cpu_possible_mask would not be completely filled, and cpumask_full(cpu_possible_mask) can return false for valid system configurations. Without this test, cpu_possible_mask contents are still constrained by a check on cpumask_weight(), as well as tests in test_cpumask_first(), test_cpumask_last(), test_cpumask_next(), and test_cpumask_iterators(). Fixes: c41e8866c28c ("lib/test: introduce cpumask KUnit test suite") Link: https://lore.kernel.org/lkml/346cb279-8e75-24b0-7d12-9803f2b41c73@riseup.net/ Reported-by: Maíra Canal Signed-off-by: Sander Vanheule Tested-by: Maíra Canal Reviewed-by: David Gow Signed-off-by: Yury Norov --- lib/test_cpumask.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c index a31a1622f1f6e..4ebf9f5805f32 100644 --- a/lib/test_cpumask.c +++ b/lib/test_cpumask.c @@ -54,7 +54,6 @@ static cpumask_t mask_all; static void test_cpumask_weight(struct kunit *test) { KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); - KUNIT_EXPECT_TRUE(test, cpumask_full(cpu_possible_mask)); KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); From fbbc94d8486c6d683650269154c9a7d8897f35d7 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:20 +0200 Subject: [PATCH 321/654] lib/test_cpumask: fix cpu_possible_mask last test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since cpumask_first() on the cpu_possible_mask must return at most nr_cpu_ids - 1 for a valid result, cpumask_last() cannot return anything larger than this value. As test_cpumask_weight() also verifies that the total weight of cpu_possible_mask must equal nr_cpu_ids, the last bit set in this mask must be at nr_cpu_ids - 1. Fixes: c41e8866c28c ("lib/test: introduce cpumask KUnit test suite") Link: https://lore.kernel.org/lkml/346cb279-8e75-24b0-7d12-9803f2b41c73@riseup.net/ Reported-by: Maíra Canal Signed-off-by: Sander Vanheule Tested-by: Maíra Canal Reviewed-by: David Gow Acked-by: Yury Norov Signed-off-by: Yury Norov --- lib/test_cpumask.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c index 4ebf9f5805f32..4d353614d853e 100644 --- a/lib/test_cpumask.c +++ b/lib/test_cpumask.c @@ -73,7 +73,7 @@ static void test_cpumask_first(struct kunit *test) static void test_cpumask_last(struct kunit *test) { KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpumask_bits - 1, cpumask_last(cpu_possible_mask)); + KUNIT_EXPECT_EQ(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask)); } static void test_cpumask_next(struct kunit *test) From d3c0ca4992cc21b0e1d7d8e530faa4ab16f8ec41 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:21 +0200 Subject: [PATCH 322/654] lib/test_cpumask: follow KUnit style guidelines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cpumask test suite doesn't follow the KUnit style guidelines, as laid out in Documentation/dev-tools/kunit/style.rst. The file is renamed to lib/cpumask_kunit.c to clearly distinguish it from other, non-KUnit, tests. Link: https://lore.kernel.org/lkml/346cb279-8e75-24b0-7d12-9803f2b41c73@riseup.net/ Suggested-by: Maíra Canal Signed-off-by: Sander Vanheule Reviewed-by: Maíra Canal Reviewed-by: David Gow Acked-by: Yury Norov Signed-off-by: Yury Norov --- lib/Kconfig.debug | 7 +++++-- lib/Makefile | 2 +- lib/{test_cpumask.c => cpumask_kunit.c} | 0 3 files changed, 6 insertions(+), 3 deletions(-) rename lib/{test_cpumask.c => cpumask_kunit.c} (100%) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 072e4b289c13e..bcbe60d6c80c1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2029,13 +2029,16 @@ config LKDTM Documentation on how to use the module can be found in Documentation/fault-injection/provoke-crashes.rst -config TEST_CPUMASK - tristate "cpumask tests" if !KUNIT_ALL_TESTS +config CPUMASK_KUNIT_TEST + tristate "KUnit test for cpumask" if !KUNIT_ALL_TESTS depends on KUNIT default KUNIT_ALL_TESTS help Enable to turn on cpumask tests, running at boot or module load time. + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + If unsure, say N. config TEST_LIST_SORT diff --git a/lib/Makefile b/lib/Makefile index 5927d7fa08063..ffabc30a27d4e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror +obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o @@ -100,7 +101,6 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o -obj-$(CONFIG_TEST_CPUMASK) += test_cpumask.o CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE) obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o # diff --git a/lib/test_cpumask.c b/lib/cpumask_kunit.c similarity index 100% rename from lib/test_cpumask.c rename to lib/cpumask_kunit.c From bf5413586b8dbdb8eedee41cfd1450a4e587e845 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:22 +0200 Subject: [PATCH 323/654] lib/cpumask_kunit: log mask contents For extra context, log the contents of the masks under test. This should help with finding out why a certain test fails. Link: https://lore.kernel.org/lkml/CABVgOSkPXBc-PWk1zBZRQ_Tt+Sz1ruFHBj3ixojymZF=Vi4tpQ@mail.gmail.com/ Suggested-by: David Gow Signed-off-by: Sander Vanheule Signed-off-by: Yury Norov --- lib/cpumask_kunit.c | 51 +++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c index 4d353614d853e..ecbeec72221ea 100644 --- a/lib/cpumask_kunit.c +++ b/lib/cpumask_kunit.c @@ -9,6 +9,10 @@ #include #include +#define MASK_MSG(m) \ + "%s contains %sCPUs %*pbl", #m, (cpumask_weight(m) ? "" : "no "), \ + nr_cpumask_bits, cpumask_bits(m) + #define EXPECT_FOR_EACH_CPU_EQ(test, mask) \ do { \ const cpumask_t *m = (mask); \ @@ -16,7 +20,7 @@ int cpu, iter = 0; \ for_each_cpu(cpu, m) \ iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \ } while (0) #define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ @@ -26,7 +30,7 @@ int cpu, iter = 0; \ for_each_cpu_not(cpu, m) \ iter++; \ - KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), nr_cpu_ids - mask_weight, iter, MASK_MSG(mask)); \ } while (0) #define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \ @@ -36,7 +40,7 @@ int cpu, iter = 0; \ for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \ iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \ } while (0) #define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \ @@ -45,7 +49,7 @@ int cpu, iter = 0; \ for_each_##name##_cpu(cpu) \ iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(cpu_##name##_mask)); \ } while (0) static cpumask_t mask_empty; @@ -53,36 +57,43 @@ static cpumask_t mask_all; static void test_cpumask_weight(struct kunit *test) { - KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); - KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); + KUNIT_EXPECT_TRUE_MSG(test, cpumask_empty(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_TRUE_MSG(test, cpumask_full(&mask_all), MASK_MSG(&mask_all)); - KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask)); - KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_weight(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpumask_bits, cpumask_weight(&mask_all), MASK_MSG(&mask_all)); } static void test_cpumask_first(struct kunit *test) { - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_first(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_first(cpu_possible_mask), MASK_MSG(cpu_possible_mask)); - KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_first_zero(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); } static void test_cpumask_last(struct kunit *test) { - KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask)); + KUNIT_EXPECT_LE_MSG(test, nr_cpumask_bits, cpumask_last(&mask_empty), + MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); } static void test_cpumask_next(struct kunit *test) { - KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask)); - - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_next_zero(-1, &mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); + + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_next(-1, &mask_empty), + MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_next(-1, cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); } static void test_cpumask_iterators(struct kunit *test) From 5d7fef0804b0a72a7efe196cd23b438edf84726c Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 23 Aug 2022 08:12:23 +0200 Subject: [PATCH 324/654] lib/cpumask_kunit: add tests file to MAINTAINERS cpumask related files are listed under the BITMAP API section, so the file with tests for cpumask should be added to that list. Signed-off-by: Sander Vanheule Reviewed-by: David Gow Acked-by: Yury Norov Signed-off-by: Yury Norov --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9d7f64dc0efe8..59b3718e66d19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3612,6 +3612,7 @@ F: include/linux/find.h F: include/linux/nodemask.h F: lib/bitmap.c F: lib/cpumask.c +F: lib/cpumask_kunit.c F: lib/find_bit.c F: lib/find_bit_benchmark.c F: lib/test_bitmap.c From a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Aug 2022 17:00:19 -0300 Subject: [PATCH 325/654] cifs: skip extra NULL byte in filenames Since commit: cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty alloc_path_with_tree_prefix() function was no longer including the trailing separator when @path is empty, although @out_len was still assuming a path separator thus adding an extra byte to the final filename. This has caused mount issues in some Synology servers due to the extra NULL byte in filenames when sending SMB2_CREATE requests with SMB2_FLAGS_DFS_OPERATIONS set. Fix this by checking if @path is not empty and then add extra byte for separator. Also, do not include any trailing NULL bytes in filename as MS-SMB2 requires it to be 8-byte aligned and not NULL terminated. Cc: stable@vger.kernel.org Fixes: 7eacba3b00a3 ("cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 91cfc5b47ac7c..128e44e575280 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2572,19 +2572,15 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, path_len = UniStrnlen((wchar_t *)path, PATH_MAX); - /* - * make room for one path separator between the treename and - * path - */ - *out_len = treename_len + 1 + path_len; + /* make room for one path separator only if @path isn't empty */ + *out_len = treename_len + (path[0] ? 1 : 0) + path_len; /* - * final path needs to be null-terminated UTF16 with a - * size aligned to 8 + * final path needs to be 8-byte aligned as specified in + * MS-SMB2 2.2.13 SMB2 CREATE Request. */ - - *out_size = roundup((*out_len+1)*2, 8); - *out_path = kzalloc(*out_size, GFP_KERNEL); + *out_size = roundup(*out_len * sizeof(__le16), 8); + *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL); if (!*out_path) return -ENOMEM; From 265ad47a40da581be77172b4a8e1fb72b2bd914a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Aug 2022 11:20:12 -0700 Subject: [PATCH 326/654] md/raid10: Fix the data type of an r10_sync_page_io() argument Fix the following sparse warning: drivers/md/raid10.c:2647:60: sparse: sparse: incorrect type in argument 5 (different base types) @@ expected restricted blk_opf_t [usertype] opf @@ got int rw @@ This patch does not change any functionality since REQ_OP_READ = READ = 0 and since REQ_OP_WRITE = WRITE = 1. Cc: Rong A Chen Cc: Jens Axboe Cc: Paul Menzel Fixes: 4ce4c73f662b ("md/core: Combine two sync_page_io() arguments") Reported-by: kernel test robot Signed-off-by: Bart Van Assche Signed-off-by: Song Liu --- drivers/md/raid10.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9117fcdee1be1..64d6e4cd8a3a0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2639,18 +2639,18 @@ static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) } static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, - int sectors, struct page *page, int rw) + int sectors, struct page *page, enum req_op op) { sector_t first_bad; int bad_sectors; if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) - && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) + && (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags))) return -1; - if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) + if (sync_page_io(rdev, sector, sectors << 9, page, op, false)) /* success */ return 1; - if (rw == WRITE) { + if (op == REQ_OP_WRITE) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, @@ -2780,7 +2780,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 if (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s, conf->tmppage, WRITE) + s, conf->tmppage, REQ_OP_WRITE) == 0) { /* Well, this device is dead */ pr_notice("md/raid10:%s: read correction write failed (%d sectors at %llu on %pg)\n", @@ -2814,8 +2814,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 switch (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s, conf->tmppage, - READ)) { + s, conf->tmppage, REQ_OP_READ)) { case 0: /* Well, this device is dead */ pr_notice("md/raid10:%s: unable to read back corrected sectors (%d sectors at %llu on %pg)\n", From 5e8daf906f890560df430d30617c692a794acb73 Mon Sep 17 00:00:00 2001 From: David Sloan Date: Thu, 11 Aug 2022 11:14:13 -0600 Subject: [PATCH 327/654] md: Flush workqueue md_rdev_misc_wq in md_alloc() A race condition still exists when removing and re-creating md devices in test cases. However, it is only seen on some setups. The race condition was tracked down to a reference still being held to the kobject by the rdev in the md_rdev_misc_wq which will be released in rdev_delayed_delete(). md_alloc() waits for previous deletions by waiting on the md_misc_wq, but the md_rdev_misc_wq may still be holding a reference to a recently removed device. To fix this, also flush the md_rdev_misc_wq in md_alloc(). Signed-off-by: David Sloan [logang@deltatee.com: rewrote commit message] Signed-off-by: Logan Gunthorpe Signed-off-by: Song Liu --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index afaf36b2f6ab8..71d221601bf80 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5620,6 +5620,7 @@ struct mddev *md_alloc(dev_t dev, char *name) * removed (mddev_delayed_delete). */ flush_workqueue(md_misc_wq); + flush_workqueue(md_rdev_misc_wq); mutex_lock(&disks_mutex); mddev = mddev_alloc(dev); From 1d258758cf06a0734482989911d184dd5837ed4e Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 17 Aug 2022 20:05:13 +0800 Subject: [PATCH 328/654] Revert "md-raid: destroy the bitmap after destroying the thread" This reverts commit e151db8ecfb019b7da31d076130a794574c89f6f. Because it obviously breaks clustered raid as noticed by Neil though it fixed KASAN issue for dm-raid, let's revert it and fix KASAN issue in next commit. [1]. https://lore.kernel.org/linux-raid/a6657e08-b6a7-358b-2d2a-0ac37d49d23a@linux.dev/T/#m95ac225cab7409f66c295772483d091084a6d470 Fixes: e151db8ecfb0 ("md-raid: destroy the bitmap after destroying the thread") Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 71d221601bf80..107c4c953c35a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6239,11 +6239,11 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; + md_bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ if (mddev->event_work.func) flush_workqueue(md_misc_wq); - md_bitmap_destroy(mddev); spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); From 0dd84b319352bb8ba64752d4e45396d8b13e6018 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 17 Aug 2022 20:05:14 +0800 Subject: [PATCH 329/654] md: call __md_stop_writes in md_stop From the link [1], we can see raid1d was running even after the path raid_dtr -> md_stop -> __md_stop. Let's stop write first in destructor to align with normal md-raid to fix the KASAN issue. [1]. https://lore.kernel.org/linux-raid/CAPhsuW5gc4AakdGNdF8ubpezAuDLFOYUO_sfMZcec6hQFm8nhg@mail.gmail.com/T/#m7f12bf90481c02c6d2da68c64aeed4779b7df74a Fixes: 48df498daf62 ("md: move bitmap_destroy to the beginning of __md_stop") Reported-by: Mikulas Patocka Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 107c4c953c35a..729be2c5296c6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6261,6 +6261,7 @@ void md_stop(struct mddev *mddev) /* stop the array and free an attached data structures. * This is called from dm-raid */ + __md_stop_writes(mddev); __md_stop(mddev); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); From 25d7a5f5a6bb15a2dae0a3f39ea5dda215024726 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 1 Aug 2022 17:24:19 -0700 Subject: [PATCH 330/654] ixgbe: stop resetting SYSTIME in ixgbe_ptp_start_cyclecounter The ixgbe_ptp_start_cyclecounter is intended to be called whenever the cyclecounter parameters need to be changed. Since commit a9763f3cb54c ("ixgbe: Update PTP to support X550EM_x devices"), this function has cleared the SYSTIME registers and reset the TSAUXC DISABLE_SYSTIME bit. While these need to be cleared during ixgbe_ptp_reset, it is wrong to clear them during ixgbe_ptp_start_cyclecounter. This function may be called during both reset and link status change. When link changes, the SYSTIME counter is still operating normally, but the cyclecounter should be updated to account for the possibly changed parameters. Clearing SYSTIME when link changes causes the timecounter to jump because the cycle counter now reads zero. Extract the SYSTIME initialization out to a new function and call this during ixgbe_ptp_reset. This prevents the timecounter adjustment and avoids an unnecessary reset of the current time. This also restores the original SYSTIME clearing that occurred during ixgbe_ptp_reset before the commit above. Reported-by: Steve Payne Reported-by: Ilya Evenbach Fixes: a9763f3cb54c ("ixgbe: Update PTP to support X550EM_x devices") Signed-off-by: Jacob Keller Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 59 +++++++++++++++----- 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 9f06896a049b4..f8605f57bd067 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1214,7 +1214,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) struct cyclecounter cc; unsigned long flags; u32 incval = 0; - u32 tsauxc = 0; u32 fuse0 = 0; /* For some of the boards below this mask is technically incorrect. @@ -1249,18 +1248,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) case ixgbe_mac_x550em_a: case ixgbe_mac_X550: cc.read = ixgbe_ptp_read_X550; - - /* enable SYSTIME counter */ - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); - tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, - tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); - IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); - - IXGBE_WRITE_FLUSH(hw); break; case ixgbe_mac_X540: cc.read = ixgbe_ptp_read_82599; @@ -1292,6 +1279,50 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) spin_unlock_irqrestore(&adapter->tmreg_lock, flags); } +/** + * ixgbe_ptp_init_systime - Initialize SYSTIME registers + * @adapter: the ixgbe private board structure + * + * Initialize and start the SYSTIME registers. + */ +static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 tsauxc; + + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + case ixgbe_mac_X550: + tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); + + /* Reset SYSTIME registers to 0 */ + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); + + /* Reset interrupt settings */ + IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); + + /* Activate the SYSTIME counter */ + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, + tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); + break; + case ixgbe_mac_X540: + case ixgbe_mac_82599EB: + /* Reset SYSTIME registers to 0 */ + IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); + break; + default: + /* Other devices aren't supported */ + return; + }; + + IXGBE_WRITE_FLUSH(hw); +} + /** * ixgbe_ptp_reset * @adapter: the ixgbe private board structure @@ -1318,6 +1349,8 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) ixgbe_ptp_start_cyclecounter(adapter); + ixgbe_ptp_init_systime(adapter); + spin_lock_irqsave(&adapter->tmreg_lock, flags); timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ktime_to_ns(ktime_get_real())); From bcf3a156429306070afbfda5544f2b492d25e75b Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Fri, 19 Aug 2022 12:45:52 +0200 Subject: [PATCH 331/654] i40e: Fix incorrect address type for IPv6 flow rules It was not possible to create 1-tuple flow director rule for IPv6 flow type. It was caused by incorrectly checking for source IP address when validating user provided destination IP address. Fix this by changing ip6src to correct ip6dst address in destination IP address validation for IPv6 flow type. Fixes: efca91e89b67 ("i40e: Add flow director support for IPv6") Signed-off-by: Sylwester Dziedziuch Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 156e92c437803..e9cd0fa6a0d2f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4485,7 +4485,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, (struct in6_addr *)&ipv6_full_mask)) new_mask |= I40E_L3_V6_DST_MASK; else if (ipv6_addr_any((struct in6_addr *) - &usr_ip6_spec->ip6src)) + &usr_ip6_spec->ip6dst)) new_mask &= ~I40E_L3_V6_DST_MASK; else return -EOPNOTSUPP; From acf4c6205e862304681234a6a4375b478af12552 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Fri, 12 Aug 2022 14:52:23 +0800 Subject: [PATCH 332/654] fbdev: omapfb: Fix tests for platform_get_irq() failure The platform_get_irq() returns negative error codes. It can't actually return zero. Signed-off-by: Yu Zhe Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index dfb4ddc45701e..fbb3af883d4df 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1642,14 +1642,14 @@ static int omapfb_do_probe(struct platform_device *pdev, goto cleanup; } fbdev->int_irq = platform_get_irq(pdev, 0); - if (!fbdev->int_irq) { + if (fbdev->int_irq < 0) { dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; } fbdev->ext_irq = platform_get_irq(pdev, 1); - if (!fbdev->ext_irq) { + if (fbdev->ext_irq < 0) { dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; From 868ce967af1ea5d946635f8f89f752319212d769 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Tue, 16 Aug 2022 21:07:13 +0800 Subject: [PATCH 333/654] fbdev: ssd1307fb: Fix repeated words in comments Delete the redundant word 'set'. Signed-off-by: Jilin Yuan Signed-off-by: Helge Deller --- drivers/video/fbdev/ssd1307fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index 5c765655d000a..52e4ed9da78cd 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -450,7 +450,7 @@ static int ssd1307fb_init(struct ssd1307fb_par *par) if (ret < 0) return ret; - /* Set Set Area Color Mode ON/OFF & Low Power Display Mode */ + /* Set Area Color Mode ON/OFF & Low Power Display Mode */ if (par->area_color_enable || par->low_power) { u32 mode; From 19f953e7435644b81332dd632ba1b2d80b1e37af Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Thu, 18 Aug 2022 18:44:24 +0800 Subject: [PATCH 334/654] fbdev: fb_pm2fb: Avoid potential divide by zero error In `do_fb_ioctl()` of fbmem.c, if cmd is FBIOPUT_VSCREENINFO, var will be copied from user, then go through `fb_set_var()` and `info->fbops->fb_check_var()` which could may be `pm2fb_check_var()`. Along the path, `var->pixclock` won't be modified. This function checks whether reciprocal of `var->pixclock` is too high. If `var->pixclock` is zero, there will be a divide by zero error. So, it is necessary to check whether denominator is zero to avoid crash. As this bug is found by Syzkaller, logs are listed below. divide error in pm2fb_check_var Call Trace: fb_set_var+0x367/0xeb0 drivers/video/fbdev/core/fbmem.c:1015 do_fb_ioctl+0x234/0x670 drivers/video/fbdev/core/fbmem.c:1110 fb_ioctl+0xdd/0x130 drivers/video/fbdev/core/fbmem.c:1189 Reported-by: Zheyu Ma Signed-off-by: Letu Ren Signed-off-by: Helge Deller --- drivers/video/fbdev/pm2fb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c index d3be2c64f1c08..8fd79deb1e2ae 100644 --- a/drivers/video/fbdev/pm2fb.c +++ b/drivers/video/fbdev/pm2fb.c @@ -617,6 +617,11 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) return -EINVAL; } + if (!var->pixclock) { + DPRINTK("pixclock is zero\n"); + return -EINVAL; + } + if (PICOS2KHZ(var->pixclock) > PM2_MAX_PIXCLOCK) { DPRINTK("pixclock too high (%ldKHz)\n", PICOS2KHZ(var->pixclock)); From 3119cabcc5237c63fc5316409c5beada6304ca75 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 19 Aug 2022 19:04:14 +0800 Subject: [PATCH 335/654] fbdev: sisfb: Clean up some inconsistent indenting No functional modification involved. drivers/video/fbdev/sis/sis_main.c:6165 sisfb_probe() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:4266 sisfb_post_300_rwtest() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2388 SISDoSense() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2531 SiS_Sense30x() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2382 SISDoSense() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:2250 sisfb_sense_crt1() warn: inconsistent indenting. drivers/video/fbdev/sis/sis_main.c:672 sisfb_validate_mode() warn: inconsistent indenting. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=1934 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Helge Deller --- drivers/video/fbdev/sis/sis_main.c | 274 +++++++++++++++-------------- 1 file changed, 141 insertions(+), 133 deletions(-) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index f28fd69d5eb75..d29ded67cecb2 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -649,37 +649,37 @@ sisfb_validate_mode(struct sis_video_info *ivideo, int myindex, u32 vbflags) u16 xres=0, yres, myres; #ifdef CONFIG_FB_SIS_300 - if(ivideo->sisvga_engine == SIS_300_VGA) { - if(!(sisbios_mode[myindex].chipset & MD_SIS300)) + if (ivideo->sisvga_engine == SIS_300_VGA) { + if (!(sisbios_mode[myindex].chipset & MD_SIS300)) return -1 ; } #endif #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { - if(!(sisbios_mode[myindex].chipset & MD_SIS315)) + if (ivideo->sisvga_engine == SIS_315_VGA) { + if (!(sisbios_mode[myindex].chipset & MD_SIS315)) return -1; } #endif myres = sisbios_mode[myindex].yres; - switch(vbflags & VB_DISPTYPE_DISP2) { + switch (vbflags & VB_DISPTYPE_DISP2) { case CRT2_LCD: xres = ivideo->lcdxres; yres = ivideo->lcdyres; - if((ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL848) && - (ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL856)) { - if(sisbios_mode[myindex].xres > xres) + if ((ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL848) && + (ivideo->SiS_Pr.SiS_CustomT != CUT_PANEL856)) { + if (sisbios_mode[myindex].xres > xres) return -1; - if(myres > yres) + if (myres > yres) return -1; } - if(ivideo->sisfb_fstn) { - if(sisbios_mode[myindex].xres == 320) { - if(myres == 240) { - switch(sisbios_mode[myindex].mode_no[1]) { + if (ivideo->sisfb_fstn) { + if (sisbios_mode[myindex].xres == 320) { + if (myres == 240) { + switch (sisbios_mode[myindex].mode_no[1]) { case 0x50: myindex = MODE_FSTN_8; break; case 0x56: myindex = MODE_FSTN_16; break; case 0x53: return -1; @@ -688,7 +688,7 @@ sisfb_validate_mode(struct sis_video_info *ivideo, int myindex, u32 vbflags) } } - if(SiS_GetModeID_LCD(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, + if (SiS_GetModeID_LCD(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, sisbios_mode[myindex].yres, 0, ivideo->sisfb_fstn, ivideo->SiS_Pr.SiS_CustomT, xres, yres, ivideo->vbflags2) < 0x14) { return -1; @@ -696,14 +696,14 @@ sisfb_validate_mode(struct sis_video_info *ivideo, int myindex, u32 vbflags) break; case CRT2_TV: - if(SiS_GetModeID_TV(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, + if (SiS_GetModeID_TV(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, sisbios_mode[myindex].yres, 0, ivideo->vbflags2) < 0x14) { return -1; } break; case CRT2_VGA: - if(SiS_GetModeID_VGA2(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, + if (SiS_GetModeID_VGA2(ivideo->sisvga_engine, vbflags, sisbios_mode[myindex].xres, sisbios_mode[myindex].yres, 0, ivideo->vbflags2) < 0x14) { return -1; } @@ -2204,82 +2204,88 @@ static bool sisfb_test_DDC1(struct sis_video_info *ivideo) static void sisfb_sense_crt1(struct sis_video_info *ivideo) { - bool mustwait = false; - u8 sr1F, cr17; + bool mustwait = false; + u8 sr1F, cr17; #ifdef CONFIG_FB_SIS_315 - u8 cr63=0; + u8 cr63 = 0; #endif - u16 temp = 0xffff; - int i; + u16 temp = 0xffff; + int i; + + sr1F = SiS_GetReg(SISSR, 0x1F); + SiS_SetRegOR(SISSR, 0x1F, 0x04); + SiS_SetRegAND(SISSR, 0x1F, 0x3F); - sr1F = SiS_GetReg(SISSR, 0x1F); - SiS_SetRegOR(SISSR, 0x1F, 0x04); - SiS_SetRegAND(SISSR, 0x1F, 0x3F); - if(sr1F & 0xc0) mustwait = true; + if (sr1F & 0xc0) + mustwait = true; #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { - cr63 = SiS_GetReg(SISCR, ivideo->SiS_Pr.SiS_MyCR63); - cr63 &= 0x40; - SiS_SetRegAND(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF); - } + if (ivideo->sisvga_engine == SIS_315_VGA) { + cr63 = SiS_GetReg(SISCR, ivideo->SiS_Pr.SiS_MyCR63); + cr63 &= 0x40; + SiS_SetRegAND(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF); + } #endif - cr17 = SiS_GetReg(SISCR, 0x17); - cr17 &= 0x80; - if(!cr17) { - SiS_SetRegOR(SISCR, 0x17, 0x80); - mustwait = true; - SiS_SetReg(SISSR, 0x00, 0x01); - SiS_SetReg(SISSR, 0x00, 0x03); - } + cr17 = SiS_GetReg(SISCR, 0x17); + cr17 &= 0x80; - if(mustwait) { - for(i=0; i < 10; i++) sisfbwaitretracecrt1(ivideo); - } + if (!cr17) { + SiS_SetRegOR(SISCR, 0x17, 0x80); + mustwait = true; + SiS_SetReg(SISSR, 0x00, 0x01); + SiS_SetReg(SISSR, 0x00, 0x03); + } + if (mustwait) { + for (i = 0; i < 10; i++) + sisfbwaitretracecrt1(ivideo); + } #ifdef CONFIG_FB_SIS_315 - if(ivideo->chip >= SIS_330) { - SiS_SetRegAND(SISCR, 0x32, ~0x20); - if(ivideo->chip >= SIS_340) { - SiS_SetReg(SISCR, 0x57, 0x4a); - } else { - SiS_SetReg(SISCR, 0x57, 0x5f); - } - SiS_SetRegOR(SISCR, 0x53, 0x02); - while ((SiS_GetRegByte(SISINPSTAT)) & 0x01) break; - while (!((SiS_GetRegByte(SISINPSTAT)) & 0x01)) break; - if ((SiS_GetRegByte(SISMISCW)) & 0x10) temp = 1; - SiS_SetRegAND(SISCR, 0x53, 0xfd); - SiS_SetRegAND(SISCR, 0x57, 0x00); - } + if (ivideo->chip >= SIS_330) { + SiS_SetRegAND(SISCR, 0x32, ~0x20); + if (ivideo->chip >= SIS_340) + SiS_SetReg(SISCR, 0x57, 0x4a); + else + SiS_SetReg(SISCR, 0x57, 0x5f); + + SiS_SetRegOR(SISCR, 0x53, 0x02); + while ((SiS_GetRegByte(SISINPSTAT)) & 0x01) + break; + while (!((SiS_GetRegByte(SISINPSTAT)) & 0x01)) + break; + if ((SiS_GetRegByte(SISMISCW)) & 0x10) + temp = 1; + + SiS_SetRegAND(SISCR, 0x53, 0xfd); + SiS_SetRegAND(SISCR, 0x57, 0x00); + } #endif - if(temp == 0xffff) { - i = 3; - do { - temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, - ivideo->sisvga_engine, 0, 0, NULL, ivideo->vbflags2); - } while(((temp == 0) || (temp == 0xffff)) && i--); + if (temp == 0xffff) { + i = 3; - if((temp == 0) || (temp == 0xffff)) { - if(sisfb_test_DDC1(ivideo)) temp = 1; - } - } + do { + temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, + ivideo->sisvga_engine, 0, 0, NULL, ivideo->vbflags2); + } while (((temp == 0) || (temp == 0xffff)) && i--); - if((temp) && (temp != 0xffff)) { - SiS_SetRegOR(SISCR, 0x32, 0x20); - } + if ((temp == 0) || (temp == 0xffff)) { + if (sisfb_test_DDC1(ivideo)) + temp = 1; + } + } + + if ((temp) && (temp != 0xffff)) + SiS_SetRegOR(SISCR, 0x32, 0x20); #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { - SiS_SetRegANDOR(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF, cr63); - } + if (ivideo->sisvga_engine == SIS_315_VGA) + SiS_SetRegANDOR(SISCR, ivideo->SiS_Pr.SiS_MyCR63, 0xBF, cr63); #endif - SiS_SetRegANDOR(SISCR, 0x17, 0x7F, cr17); - - SiS_SetReg(SISSR, 0x1F, sr1F); + SiS_SetRegANDOR(SISCR, 0x17, 0x7F, cr17); + SiS_SetReg(SISSR, 0x1F, sr1F); } /* Determine and detect attached devices on SiS30x */ @@ -2293,25 +2299,25 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) ivideo->SiS_Pr.PanelSelfDetected = false; /* LCD detection only for TMDS bridges */ - if(!(ivideo->vbflags2 & VB2_SISTMDSBRIDGE)) + if (!(ivideo->vbflags2 & VB2_SISTMDSBRIDGE)) return; - if(ivideo->vbflags2 & VB2_30xBDH) + if (ivideo->vbflags2 & VB2_30xBDH) return; /* If LCD already set up by BIOS, skip it */ reg = SiS_GetReg(SISCR, 0x32); - if(reg & 0x08) + if (reg & 0x08) return; realcrtno = 1; - if(ivideo->SiS_Pr.DDCPortMixup) + if (ivideo->SiS_Pr.DDCPortMixup) realcrtno = 0; /* Check DDC capabilities */ temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, ivideo->sisvga_engine, realcrtno, 0, &buffer[0], ivideo->vbflags2); - if((!temp) || (temp == 0xffff) || (!(temp & 0x02))) + if ((!temp) || (temp == 0xffff) || (!(temp & 0x02))) return; /* Read DDC data */ @@ -2320,17 +2326,17 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) temp = SiS_HandleDDC(&ivideo->SiS_Pr, ivideo->vbflags, ivideo->sisvga_engine, realcrtno, 1, &buffer[0], ivideo->vbflags2); - } while((temp) && i--); + } while ((temp) && i--); - if(temp) + if (temp) return; /* No digital device */ - if(!(buffer[0x14] & 0x80)) + if (!(buffer[0x14] & 0x80)) return; /* First detailed timing preferred timing? */ - if(!(buffer[0x18] & 0x02)) + if (!(buffer[0x18] & 0x02)) return; xres = buffer[0x38] | ((buffer[0x3a] & 0xf0) << 4); @@ -2338,26 +2344,26 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) switch(xres) { case 1024: - if(yres == 768) + if (yres == 768) paneltype = 0x02; break; case 1280: - if(yres == 1024) + if (yres == 1024) paneltype = 0x03; break; case 1600: - if((yres == 1200) && (ivideo->vbflags2 & VB2_30xC)) + if ((yres == 1200) && (ivideo->vbflags2 & VB2_30xC)) paneltype = 0x0b; break; } - if(!paneltype) + if (!paneltype) return; - if(buffer[0x23]) + if (buffer[0x23]) cr37 |= 0x10; - if((buffer[0x47] & 0x18) == 0x18) + if ((buffer[0x47] & 0x18) == 0x18) cr37 |= ((((buffer[0x47] & 0x06) ^ 0x06) << 5) | 0x20); else cr37 |= 0xc0; @@ -2372,31 +2378,34 @@ static void SiS_SenseLCD(struct sis_video_info *ivideo) static int SISDoSense(struct sis_video_info *ivideo, u16 type, u16 test) { - int temp, mytest, result, i, j; - - for(j = 0; j < 10; j++) { - result = 0; - for(i = 0; i < 3; i++) { - mytest = test; - SiS_SetReg(SISPART4, 0x11, (type & 0x00ff)); - temp = (type >> 8) | (mytest & 0x00ff); - SiS_SetRegANDOR(SISPART4, 0x10, 0xe0, temp); - SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1500); - mytest >>= 8; - mytest &= 0x7f; - temp = SiS_GetReg(SISPART4, 0x03); - temp ^= 0x0e; - temp &= mytest; - if(temp == mytest) result++; + int temp, mytest, result, i, j; + + for (j = 0; j < 10; j++) { + result = 0; + for (i = 0; i < 3; i++) { + mytest = test; + SiS_SetReg(SISPART4, 0x11, (type & 0x00ff)); + temp = (type >> 8) | (mytest & 0x00ff); + SiS_SetRegANDOR(SISPART4, 0x10, 0xe0, temp); + SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1500); + mytest >>= 8; + mytest &= 0x7f; + temp = SiS_GetReg(SISPART4, 0x03); + temp ^= 0x0e; + temp &= mytest; + if (temp == mytest) + result++; #if 1 - SiS_SetReg(SISPART4, 0x11, 0x00); - SiS_SetRegAND(SISPART4, 0x10, 0xe0); - SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1000); + SiS_SetReg(SISPART4, 0x11, 0x00); + SiS_SetRegAND(SISPART4, 0x10, 0xe0); + SiS_DDC2Delay(&ivideo->SiS_Pr, 0x1000); #endif - } - if((result == 0) || (result >= 2)) break; - } - return result; + } + + if ((result == 0) || (result >= 2)) + break; + } + return result; } static void SiS_Sense30x(struct sis_video_info *ivideo) @@ -4262,18 +4271,17 @@ static int sisfb_post_300_rwtest(struct sis_video_info *ivideo, int iteration, unsigned int k, RankCapacity, PageCapacity, BankNumHigh, BankNumMid; unsigned int PhysicalAdrOtherPage, PhysicalAdrHigh, PhysicalAdrHalfPage; - for(k = 0; k < ARRAY_SIZE(SiS_DRAMType); k++) { - + for (k = 0; k < ARRAY_SIZE(SiS_DRAMType); k++) { RankCapacity = buswidth * SiS_DRAMType[k][3]; - if(RankCapacity != PseudoRankCapacity) + if (RankCapacity != PseudoRankCapacity) continue; - if((SiS_DRAMType[k][2] + SiS_DRAMType[k][0]) > PseudoAdrPinCount) + if ((SiS_DRAMType[k][2] + SiS_DRAMType[k][0]) > PseudoAdrPinCount) continue; BankNumHigh = RankCapacity * 16 * iteration - 1; - if(iteration == 3) { /* Rank No */ + if (iteration == 3) { /* Rank No */ BankNumMid = RankCapacity * 16 - 1; } else { BankNumMid = RankCapacity * 16 * iteration / 2 - 1; @@ -4287,18 +4295,22 @@ static int sisfb_post_300_rwtest(struct sis_video_info *ivideo, int iteration, SiS_SetRegAND(SISSR, 0x15, 0xFB); /* Test */ SiS_SetRegOR(SISSR, 0x15, 0x04); /* Test */ sr14 = (SiS_DRAMType[k][3] * buswidth) - 1; - if(buswidth == 4) sr14 |= 0x80; - else if(buswidth == 2) sr14 |= 0x40; + + if (buswidth == 4) + sr14 |= 0x80; + else if (buswidth == 2) + sr14 |= 0x40; + SiS_SetReg(SISSR, 0x13, SiS_DRAMType[k][4]); SiS_SetReg(SISSR, 0x14, sr14); BankNumHigh <<= 16; BankNumMid <<= 16; - if((BankNumHigh + PhysicalAdrHigh >= mapsize) || - (BankNumMid + PhysicalAdrHigh >= mapsize) || - (BankNumHigh + PhysicalAdrHalfPage >= mapsize) || - (BankNumHigh + PhysicalAdrOtherPage >= mapsize)) + if ((BankNumHigh + PhysicalAdrHigh >= mapsize) || + (BankNumMid + PhysicalAdrHigh >= mapsize) || + (BankNumHigh + PhysicalAdrHalfPage >= mapsize) || + (BankNumHigh + PhysicalAdrOtherPage >= mapsize)) continue; /* Write data */ @@ -4312,7 +4324,7 @@ static int sisfb_post_300_rwtest(struct sis_video_info *ivideo, int iteration, (FBAddr + BankNumHigh + PhysicalAdrOtherPage)); /* Read data */ - if(readw(FBAddr + BankNumHigh + PhysicalAdrHigh) == PhysicalAdrHigh) + if (readw(FBAddr + BankNumHigh + PhysicalAdrHigh) == PhysicalAdrHigh) return 1; } @@ -6150,24 +6162,20 @@ static int sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #endif #ifdef CONFIG_FB_SIS_315 - if(ivideo->sisvga_engine == SIS_315_VGA) { + if (ivideo->sisvga_engine == SIS_315_VGA) { int result = 1; - /* if((ivideo->chip == SIS_315H) || - (ivideo->chip == SIS_315) || - (ivideo->chip == SIS_315PRO) || - (ivideo->chip == SIS_330)) { - sisfb_post_sis315330(pdev); - } else */ if(ivideo->chip == XGI_20) { + + if (ivideo->chip == XGI_20) { result = sisfb_post_xgi(pdev); ivideo->sisfb_can_post = 1; - } else if((ivideo->chip == XGI_40) && ivideo->haveXGIROM) { + } else if ((ivideo->chip == XGI_40) && ivideo->haveXGIROM) { result = sisfb_post_xgi(pdev); ivideo->sisfb_can_post = 1; } else { printk(KERN_INFO "sisfb: Card is not " "POSTed and sisfb can't do this either.\n"); } - if(!result) { + if (!result) { printk(KERN_ERR "sisfb: Failed to POST card\n"); ret = -ENODEV; goto error_3; From 1cd3bf3348b4a686cc955c9cd11034d7652219cd Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 19 Aug 2022 19:06:59 +0800 Subject: [PATCH 336/654] fbdev: radeon: Clean up some inconsistent indenting No functional modification involved. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=1932 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/radeon_base.c | 46 +++++++++++++-------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index 6851f47613e17..15933c0ea763d 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -2094,34 +2094,34 @@ static void radeon_identify_vram(struct radeonfb_info *rinfo) u32 tmp; /* framebuffer size */ - if ((rinfo->family == CHIP_FAMILY_RS100) || + if ((rinfo->family == CHIP_FAMILY_RS100) || (rinfo->family == CHIP_FAMILY_RS200) || (rinfo->family == CHIP_FAMILY_RS300) || (rinfo->family == CHIP_FAMILY_RC410) || (rinfo->family == CHIP_FAMILY_RS400) || (rinfo->family == CHIP_FAMILY_RS480) ) { - u32 tom = INREG(NB_TOM); - tmp = ((((tom >> 16) - (tom & 0xffff) + 1) << 6) * 1024); - - radeon_fifo_wait(6); - OUTREG(MC_FB_LOCATION, tom); - OUTREG(DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); - OUTREG(CRTC2_DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); - OUTREG(OV0_BASE_ADDR, (tom & 0xffff) << 16); - - /* This is supposed to fix the crtc2 noise problem. */ - OUTREG(GRPH2_BUFFER_CNTL, INREG(GRPH2_BUFFER_CNTL) & ~0x7f0000); - - if ((rinfo->family == CHIP_FAMILY_RS100) || - (rinfo->family == CHIP_FAMILY_RS200)) { - /* This is to workaround the asic bug for RMX, some versions - of BIOS doesn't have this register initialized correctly. - */ - OUTREGP(CRTC_MORE_CNTL, CRTC_H_CUTOFF_ACTIVE_EN, - ~CRTC_H_CUTOFF_ACTIVE_EN); - } - } else { - tmp = INREG(CNFG_MEMSIZE); + u32 tom = INREG(NB_TOM); + + tmp = ((((tom >> 16) - (tom & 0xffff) + 1) << 6) * 1024); + radeon_fifo_wait(6); + OUTREG(MC_FB_LOCATION, tom); + OUTREG(DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); + OUTREG(CRTC2_DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); + OUTREG(OV0_BASE_ADDR, (tom & 0xffff) << 16); + + /* This is supposed to fix the crtc2 noise problem. */ + OUTREG(GRPH2_BUFFER_CNTL, INREG(GRPH2_BUFFER_CNTL) & ~0x7f0000); + + if ((rinfo->family == CHIP_FAMILY_RS100) || + (rinfo->family == CHIP_FAMILY_RS200)) { + /* This is to workaround the asic bug for RMX, some versions + * of BIOS doesn't have this register initialized correctly. + */ + OUTREGP(CRTC_MORE_CNTL, CRTC_H_CUTOFF_ACTIVE_EN, + ~CRTC_H_CUTOFF_ACTIVE_EN); + } + } else { + tmp = INREG(CNFG_MEMSIZE); } /* mem size is bits [28:0], mask off the rest */ From 58559dfc1ebba2ae0c7627dc8f8991ae1984c6e3 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 21 Aug 2022 20:17:31 +0900 Subject: [PATCH 337/654] fbdev: fbcon: Destroy mutex on freeing struct fb_info It's needed to destroy bl_curve_mutex on freeing struct fb_info since the mutex is embedded in the structure and initialized when it's allocated. Signed-off-by: Shigeru Yoshida Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbsysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index c2a60b187467e..4d7f63892dcc4 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -84,6 +84,10 @@ void framebuffer_release(struct fb_info *info) if (WARN_ON(refcount_read(&info->count))) return; +#if IS_ENABLED(CONFIG_FB_BACKLIGHT) + mutex_destroy(&info->bl_curve_mutex); +#endif + kfree(info->apertures); kfree(info); } From 07c55c9803dea748d17a054000cbf1913ce06399 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 19 Aug 2022 16:57:52 +0800 Subject: [PATCH 338/654] fbdev: chipsfb: Add missing pci_disable_device() in chipsfb_pci_init() Add missing pci_disable_device() in error path in chipsfb_pci_init(). Signed-off-by: Yang Yingliang Signed-off-by: Helge Deller --- drivers/video/fbdev/chipsfb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index 393894af26f84..2b00a9d554fc0 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -430,6 +430,7 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) err_release_fb: framebuffer_release(p); err_disable: + pci_disable_device(dp); err_out: return rc; } From 144c467398aa9fb274583590c848e6d6388e89d9 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 24 Aug 2022 19:44:55 +0800 Subject: [PATCH 339/654] fbdev: omap: Remove unnecessary print function dev_err() The print function dev_err() is redundant because platform_get_irq() already prints an error. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=1957 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index fbb3af883d4df..17cda57656838 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1643,14 +1643,12 @@ static int omapfb_do_probe(struct platform_device *pdev, } fbdev->int_irq = platform_get_irq(pdev, 0); if (fbdev->int_irq < 0) { - dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; } fbdev->ext_irq = platform_get_irq(pdev, 1); if (fbdev->ext_irq < 0) { - dev_err(&pdev->dev, "unable to get irq\n"); r = ENXIO; goto cleanup; } From 8d0268585b9c07dd01b77b63913a4f31ad99239b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:01:17 +0200 Subject: [PATCH 340/654] fbdev: Move fbdev drivers from strlcpy to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Helge Deller --- drivers/video/console/sticore.c | 2 +- drivers/video/fbdev/aty/atyfb_base.c | 2 +- drivers/video/fbdev/aty/radeon_base.c | 2 +- drivers/video/fbdev/bw2.c | 2 +- drivers/video/fbdev/cirrusfb.c | 2 +- drivers/video/fbdev/clps711x-fb.c | 2 +- drivers/video/fbdev/core/fbcon.c | 2 +- drivers/video/fbdev/cyber2000fb.c | 8 ++++---- drivers/video/fbdev/ffb.c | 2 +- drivers/video/fbdev/geode/gx1fb_core.c | 6 +++--- drivers/video/fbdev/gxt4500.c | 2 +- drivers/video/fbdev/i740fb.c | 2 +- drivers/video/fbdev/imxfb.c | 2 +- drivers/video/fbdev/matrox/matroxfb_base.c | 6 +++--- drivers/video/fbdev/omap2/omapfb/omapfb-main.c | 2 +- drivers/video/fbdev/pxa168fb.c | 2 +- drivers/video/fbdev/pxafb.c | 2 +- drivers/video/fbdev/s3fb.c | 2 +- drivers/video/fbdev/simplefb.c | 2 +- drivers/video/fbdev/sis/sis_main.c | 4 ++-- drivers/video/fbdev/sm501fb.c | 2 +- drivers/video/fbdev/sstfb.c | 2 +- drivers/video/fbdev/sunxvr1000.c | 2 +- drivers/video/fbdev/sunxvr2500.c | 2 +- drivers/video/fbdev/sunxvr500.c | 2 +- drivers/video/fbdev/tcx.c | 2 +- drivers/video/fbdev/tdfxfb.c | 4 ++-- drivers/video/fbdev/tgafb.c | 2 +- drivers/video/fbdev/tridentfb.c | 2 +- 29 files changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index bd4dc97d4d340..db568f67e4dc2 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -290,7 +290,7 @@ static char default_sti_path[21] __read_mostly; static int __init sti_setup(char *str) { if (str) - strlcpy (default_sti_path, str, sizeof (default_sti_path)); + strscpy(default_sti_path, str, sizeof(default_sti_path)); return 1; } diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index a3e6faed7745a..14eb718bd67c7 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3891,7 +3891,7 @@ static int __init atyfb_setup(char *options) && (!strncmp(this_opt, "Mach64:", 7))) { static unsigned char m64_num; static char mach64_str[80]; - strlcpy(mach64_str, this_opt + 7, sizeof(mach64_str)); + strscpy(mach64_str, this_opt + 7, sizeof(mach64_str)); if (!store_video_par(mach64_str, m64_num)) { m64_num++; mach64_count = m64_num; diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index 15933c0ea763d..a14a8d73035c0 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -1980,7 +1980,7 @@ static int radeon_set_fbinfo(struct radeonfb_info *rinfo) info->screen_base = rinfo->fb_base; info->screen_size = rinfo->mapped_vram; /* Fill fix common fields */ - strlcpy(info->fix.id, rinfo->name, sizeof(info->fix.id)); + strscpy(info->fix.id, rinfo->name, sizeof(info->fix.id)); info->fix.smem_start = rinfo->fb_base_phys; info->fix.smem_len = rinfo->video_ram; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/bw2.c b/drivers/video/fbdev/bw2.c index e7702fe1fe7d7..6403ae07970d6 100644 --- a/drivers/video/fbdev/bw2.c +++ b/drivers/video/fbdev/bw2.c @@ -182,7 +182,7 @@ static int bw2_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) static void bw2_init_fix(struct fb_info *info, int linebytes) { - strlcpy(info->fix.id, "bwtwo", sizeof(info->fix.id)); + strscpy(info->fix.id, "bwtwo", sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_MONO01; diff --git a/drivers/video/fbdev/cirrusfb.c b/drivers/video/fbdev/cirrusfb.c index a41a75841e10e..2a9fa06881b5e 100644 --- a/drivers/video/fbdev/cirrusfb.c +++ b/drivers/video/fbdev/cirrusfb.c @@ -1999,7 +1999,7 @@ static int cirrusfb_set_fbinfo(struct fb_info *info) } /* Fill fix common fields */ - strlcpy(info->fix.id, cirrusfb_board_info[cinfo->btype].name, + strscpy(info->fix.id, cirrusfb_board_info[cinfo->btype].name, sizeof(info->fix.id)); /* monochrome: only 1 memory plane */ diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c index 771ce1f769515..a1061c2f16406 100644 --- a/drivers/video/fbdev/clps711x-fb.c +++ b/drivers/video/fbdev/clps711x-fb.c @@ -326,7 +326,7 @@ static int clps711x_fb_probe(struct platform_device *pdev) info->var.vmode = FB_VMODE_NONINTERLACED; info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.accel = FB_ACCEL_NONE; - strlcpy(info->fix.id, CLPS711X_FB_NAME, sizeof(info->fix.id)); + strscpy(info->fix.id, CLPS711X_FB_NAME, sizeof(info->fix.id)); fb_videomode_to_var(&info->var, &cfb->mode); ret = fb_alloc_cmap(&info->cmap, BIT(CLPS711X_FB_BPP_MAX), 0); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index cf9ac4da0a82c..4a032fcf0d14d 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -412,7 +412,7 @@ static int __init fb_console_setup(char *this_opt) while ((options = strsep(&this_opt, ",")) != NULL) { if (!strncmp(options, "font:", 5)) { - strlcpy(fontname, options + 5, sizeof(fontname)); + strscpy(fontname, options + 5, sizeof(fontname)); continue; } diff --git a/drivers/video/fbdev/cyber2000fb.c b/drivers/video/fbdev/cyber2000fb.c index d45355b9a58ca..8f041f9b14c71 100644 --- a/drivers/video/fbdev/cyber2000fb.c +++ b/drivers/video/fbdev/cyber2000fb.c @@ -1134,7 +1134,7 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx) info->fb_size = int_cfb_info->fb.fix.smem_len; info->info = int_cfb_info; - strlcpy(info->dev_name, int_cfb_info->fb.fix.id, + strscpy(info->dev_name, int_cfb_info->fb.fix.id, sizeof(info->dev_name)); } @@ -1229,7 +1229,7 @@ static int cyber2000fb_ddc_getsda(void *data) static int cyber2000fb_setup_ddc_bus(struct cfb_info *cfb) { - strlcpy(cfb->ddc_adapter.name, cfb->fb.fix.id, + strscpy(cfb->ddc_adapter.name, cfb->fb.fix.id, sizeof(cfb->ddc_adapter.name)); cfb->ddc_adapter.owner = THIS_MODULE; cfb->ddc_adapter.class = I2C_CLASS_DDC; @@ -1304,7 +1304,7 @@ static int cyber2000fb_i2c_getscl(void *data) static int cyber2000fb_i2c_register(struct cfb_info *cfb) { - strlcpy(cfb->i2c_adapter.name, cfb->fb.fix.id, + strscpy(cfb->i2c_adapter.name, cfb->fb.fix.id, sizeof(cfb->i2c_adapter.name)); cfb->i2c_adapter.owner = THIS_MODULE; cfb->i2c_adapter.algo_data = &cfb->i2c_algo; @@ -1500,7 +1500,7 @@ static int cyber2000fb_setup(char *options) if (strncmp(opt, "font:", 5) == 0) { static char default_font_storage[40]; - strlcpy(default_font_storage, opt + 5, + strscpy(default_font_storage, opt + 5, sizeof(default_font_storage)); default_font = default_font_storage; continue; diff --git a/drivers/video/fbdev/ffb.c b/drivers/video/fbdev/ffb.c index b3d580e57221e..7cba3969a9702 100644 --- a/drivers/video/fbdev/ffb.c +++ b/drivers/video/fbdev/ffb.c @@ -883,7 +883,7 @@ static void ffb_init_fix(struct fb_info *info) } else ffb_type_name = "Elite 3D"; - strlcpy(info->fix.id, ffb_type_name, sizeof(info->fix.id)); + strscpy(info->fix.id, ffb_type_name, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_TRUECOLOR; diff --git a/drivers/video/fbdev/geode/gx1fb_core.c b/drivers/video/fbdev/geode/gx1fb_core.c index 5d34d89fb6653..e41204ecb0e35 100644 --- a/drivers/video/fbdev/geode/gx1fb_core.c +++ b/drivers/video/fbdev/geode/gx1fb_core.c @@ -410,13 +410,13 @@ static void __init gx1fb_setup(char *options) continue; if (!strncmp(this_opt, "mode:", 5)) - strlcpy(mode_option, this_opt + 5, sizeof(mode_option)); + strscpy(mode_option, this_opt + 5, sizeof(mode_option)); else if (!strncmp(this_opt, "crt:", 4)) crt_option = !!simple_strtoul(this_opt + 4, NULL, 0); else if (!strncmp(this_opt, "panel:", 6)) - strlcpy(panel_option, this_opt + 6, sizeof(panel_option)); + strscpy(panel_option, this_opt + 6, sizeof(panel_option)); else - strlcpy(mode_option, this_opt, sizeof(mode_option)); + strscpy(mode_option, this_opt, sizeof(mode_option)); } } #endif diff --git a/drivers/video/fbdev/gxt4500.c b/drivers/video/fbdev/gxt4500.c index e5475ae1e1587..94588b809ebf8 100644 --- a/drivers/video/fbdev/gxt4500.c +++ b/drivers/video/fbdev/gxt4500.c @@ -650,7 +650,7 @@ static int gxt4500_probe(struct pci_dev *pdev, const struct pci_device_id *ent) cardtype = ent->driver_data; par->refclk_ps = cardinfo[cardtype].refclk_ps; info->fix = gxt4500_fix; - strlcpy(info->fix.id, cardinfo[cardtype].cardname, + strscpy(info->fix.id, cardinfo[cardtype].cardname, sizeof(info->fix.id)); info->pseudo_palette = par->pseudo_palette; diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c index 7f09a0daaaa24..bd30d8314b687 100644 --- a/drivers/video/fbdev/i740fb.c +++ b/drivers/video/fbdev/i740fb.c @@ -159,7 +159,7 @@ static int i740fb_setup_ddc_bus(struct fb_info *info) { struct i740fb_par *par = info->par; - strlcpy(par->ddc_adapter.name, info->fix.id, + strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; par->ddc_adapter.class = I2C_CLASS_DDC; diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index d97d7456d15a0..94f3bc637fc88 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -681,7 +681,7 @@ static int imxfb_init_fbinfo(struct platform_device *pdev) fbi->devtype = pdev->id_entry->driver_data; - strlcpy(info->fix.id, IMX_NAME, sizeof(info->fix.id)); + strscpy(info->fix.id, IMX_NAME, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.type_aux = 0; diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 236521b19daf7..68bba2688f4c1 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -2383,9 +2383,9 @@ static int __init matroxfb_setup(char *options) { else if (!strncmp(this_opt, "mem:", 4)) mem = simple_strtoul(this_opt+4, NULL, 0); else if (!strncmp(this_opt, "mode:", 5)) - strlcpy(videomode, this_opt+5, sizeof(videomode)); + strscpy(videomode, this_opt + 5, sizeof(videomode)); else if (!strncmp(this_opt, "outputs:", 8)) - strlcpy(outputs, this_opt+8, sizeof(outputs)); + strscpy(outputs, this_opt + 8, sizeof(outputs)); else if (!strncmp(this_opt, "dfp:", 4)) { dfp_type = simple_strtoul(this_opt+4, NULL, 0); dfp = 1; @@ -2455,7 +2455,7 @@ static int __init matroxfb_setup(char *options) { else if (!strcmp(this_opt, "dfp")) dfp = value; else { - strlcpy(videomode, this_opt, sizeof(videomode)); + strscpy(videomode, this_opt, sizeof(videomode)); } } } diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c index afa688e754b95..5ccddcfce7228 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c @@ -1331,7 +1331,7 @@ static void clear_fb_info(struct fb_info *fbi) { memset(&fbi->var, 0, sizeof(fbi->var)); memset(&fbi->fix, 0, sizeof(fbi->fix)); - strlcpy(fbi->fix.id, MODULE_NAME, sizeof(fbi->fix.id)); + strscpy(fbi->fix.id, MODULE_NAME, sizeof(fbi->fix.id)); } static int omapfb_free_all_fbmem(struct omapfb2_device *fbdev) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index e943300d23e8e..d5d0bbd39213b 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -640,7 +640,7 @@ static int pxa168fb_probe(struct platform_device *pdev) info->flags = FBINFO_DEFAULT | FBINFO_PARTIAL_PAN_OK | FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN; info->node = -1; - strlcpy(info->fix.id, mi->id, 16); + strscpy(info->fix.id, mi->id, 16); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.type_aux = 0; info->fix.xpanstep = 0; diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index 66cfc3e9d3cfd..696ac54311809 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -2042,7 +2042,7 @@ static int __init pxafb_setup_options(void) return -ENODEV; if (options) - strlcpy(g_options, options, sizeof(g_options)); + strscpy(g_options, options, sizeof(g_options)); return 0; } diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c index 5069f6f67923f..67b63a753cb27 100644 --- a/drivers/video/fbdev/s3fb.c +++ b/drivers/video/fbdev/s3fb.c @@ -248,7 +248,7 @@ static int s3fb_setup_ddc_bus(struct fb_info *info) { struct s3fb_info *par = info->par; - strlcpy(par->ddc_adapter.name, info->fix.id, + strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; par->ddc_adapter.class = I2C_CLASS_DDC; diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c index cf2a90ecd64e0..e770b4a356b57 100644 --- a/drivers/video/fbdev/simplefb.c +++ b/drivers/video/fbdev/simplefb.c @@ -355,7 +355,7 @@ static int simplefb_regulators_get(struct simplefb_par *par, if (!p || p == prop->name) continue; - strlcpy(name, prop->name, + strscpy(name, prop->name, strlen(prop->name) - strlen(SUPPLY_SUFFIX) + 1); regulator = devm_regulator_get_optional(&pdev->dev, name); if (IS_ERR(regulator)) { diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index d29ded67cecb2..c9e77429dfa30 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1872,7 +1872,7 @@ sisfb_get_fix(struct fb_fix_screeninfo *fix, int con, struct fb_info *info) memset(fix, 0, sizeof(struct fb_fix_screeninfo)); - strlcpy(fix->id, ivideo->myid, sizeof(fix->id)); + strscpy(fix->id, ivideo->myid, sizeof(fix->id)); mutex_lock(&info->mm_lock); fix->smem_start = ivideo->video_base + ivideo->video_offset; @@ -5879,7 +5879,7 @@ static int sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ivideo->cardnumber++; } - strlcpy(ivideo->myid, chipinfo->chip_name, sizeof(ivideo->myid)); + strscpy(ivideo->myid, chipinfo->chip_name, sizeof(ivideo->myid)); ivideo->warncount = 0; ivideo->chip_id = pdev->device; diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c index 6a52eba645596..fce6cfbadfd60 100644 --- a/drivers/video/fbdev/sm501fb.c +++ b/drivers/video/fbdev/sm501fb.c @@ -1719,7 +1719,7 @@ static int sm501fb_init_fb(struct fb_info *fb, enum sm501_controller head, enable = 0; } - strlcpy(fb->fix.id, fbname, sizeof(fb->fix.id)); + strscpy(fb->fix.id, fbname, sizeof(fb->fix.id)); memcpy(&par->ops, (head == HEAD_CRT) ? &sm501fb_ops_crt : &sm501fb_ops_pnl, diff --git a/drivers/video/fbdev/sstfb.c b/drivers/video/fbdev/sstfb.c index 27d4b0ace2d61..cd4d640f94779 100644 --- a/drivers/video/fbdev/sstfb.c +++ b/drivers/video/fbdev/sstfb.c @@ -1382,7 +1382,7 @@ static int sstfb_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto fail; } sst_get_memsize(info, &fix->smem_len); - strlcpy(fix->id, spec->name, sizeof(fix->id)); + strscpy(fix->id, spec->name, sizeof(fix->id)); printk(KERN_INFO "%s (revision %d) with %s dac\n", fix->id, par->revision, par->dac_sw.name); diff --git a/drivers/video/fbdev/sunxvr1000.c b/drivers/video/fbdev/sunxvr1000.c index 15b079505a000..490bd9a147638 100644 --- a/drivers/video/fbdev/sunxvr1000.c +++ b/drivers/video/fbdev/sunxvr1000.c @@ -80,7 +80,7 @@ static int gfb_set_fbinfo(struct gfb_info *gp) info->pseudo_palette = gp->pseudo_palette; /* Fill fix common fields */ - strlcpy(info->fix.id, "gfb", sizeof(info->fix.id)); + strscpy(info->fix.id, "gfb", sizeof(info->fix.id)); info->fix.smem_start = gp->fb_base_phys; info->fix.smem_len = gp->fb_size; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/sunxvr2500.c b/drivers/video/fbdev/sunxvr2500.c index 1d3bacd9d5acd..1279b02234f87 100644 --- a/drivers/video/fbdev/sunxvr2500.c +++ b/drivers/video/fbdev/sunxvr2500.c @@ -84,7 +84,7 @@ static int s3d_set_fbinfo(struct s3d_info *sp) info->pseudo_palette = sp->pseudo_palette; /* Fill fix common fields */ - strlcpy(info->fix.id, "s3d", sizeof(info->fix.id)); + strscpy(info->fix.id, "s3d", sizeof(info->fix.id)); info->fix.smem_start = sp->fb_base_phys; info->fix.smem_len = sp->fb_size; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/sunxvr500.c b/drivers/video/fbdev/sunxvr500.c index 9daf17b111065..f7b463633ba05 100644 --- a/drivers/video/fbdev/sunxvr500.c +++ b/drivers/video/fbdev/sunxvr500.c @@ -207,7 +207,7 @@ static int e3d_set_fbinfo(struct e3d_info *ep) info->pseudo_palette = ep->pseudo_palette; /* Fill fix common fields */ - strlcpy(info->fix.id, "e3d", sizeof(info->fix.id)); + strscpy(info->fix.id, "e3d", sizeof(info->fix.id)); info->fix.smem_start = ep->fb_base_phys; info->fix.smem_len = ep->fb_size; info->fix.type = FB_TYPE_PACKED_PIXELS; diff --git a/drivers/video/fbdev/tcx.c b/drivers/video/fbdev/tcx.c index 1638a40fed225..01d87f53324d9 100644 --- a/drivers/video/fbdev/tcx.c +++ b/drivers/video/fbdev/tcx.c @@ -333,7 +333,7 @@ tcx_init_fix(struct fb_info *info, int linebytes) else tcx_name = "TCX24"; - strlcpy(info->fix.id, tcx_name, sizeof(info->fix.id)); + strscpy(info->fix.id, tcx_name, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_PSEUDOCOLOR; diff --git a/drivers/video/fbdev/tdfxfb.c b/drivers/video/fbdev/tdfxfb.c index 67e37a62b07c3..8a8122f8bfeb3 100644 --- a/drivers/video/fbdev/tdfxfb.c +++ b/drivers/video/fbdev/tdfxfb.c @@ -1264,7 +1264,7 @@ static int tdfxfb_setup_ddc_bus(struct tdfxfb_i2c_chan *chan, const char *name, { int rc; - strlcpy(chan->adapter.name, name, sizeof(chan->adapter.name)); + strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; chan->adapter.class = I2C_CLASS_DDC; chan->adapter.algo_data = &chan->algo; @@ -1293,7 +1293,7 @@ static int tdfxfb_setup_i2c_bus(struct tdfxfb_i2c_chan *chan, const char *name, { int rc; - strlcpy(chan->adapter.name, name, sizeof(chan->adapter.name)); + strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = dev; diff --git a/drivers/video/fbdev/tgafb.c b/drivers/video/fbdev/tgafb.c index ae0cf55406369..1fff5fd7ab512 100644 --- a/drivers/video/fbdev/tgafb.c +++ b/drivers/video/fbdev/tgafb.c @@ -1344,7 +1344,7 @@ tgafb_init_fix(struct fb_info *info) memory_size = 16777216; } - strlcpy(info->fix.id, tga_type_name, sizeof(info->fix.id)); + strscpy(info->fix.id, tga_type_name, sizeof(info->fix.id)); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.type_aux = 0; diff --git a/drivers/video/fbdev/tridentfb.c b/drivers/video/fbdev/tridentfb.c index 319131bd72cff..cda095420ee85 100644 --- a/drivers/video/fbdev/tridentfb.c +++ b/drivers/video/fbdev/tridentfb.c @@ -270,7 +270,7 @@ static int tridentfb_setup_ddc_bus(struct fb_info *info) { struct tridentfb_par *par = info->par; - strlcpy(par->ddc_adapter.name, info->fix.id, + strscpy(par->ddc_adapter.name, info->fix.id, sizeof(par->ddc_adapter.name)); par->ddc_adapter.owner = THIS_MODULE; par->ddc_adapter.class = I2C_CLASS_DDC; From b09da0126ce09bd84545e4a8e8697f2a5bdeb745 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 24 Aug 2022 09:29:45 +0200 Subject: [PATCH 341/654] MAINTAINERS: rectify file entry in BONDING DRIVER Commit c078290a2b76 ("selftests: include bonding tests into the kselftest infra") adds the bonding tests in the directory: tools/testing/selftests/drivers/net/bonding/ The file entry in MAINTAINERS for the BONDING DRIVER however refers to: tools/testing/selftests/net/bonding/ Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken file pattern. Repair this file entry in BONDING DRIVER. Signed-off-by: Lukas Bulwahn Acked-by: Jonathan Toppins Link: https://lore.kernel.org/r/20220824072945.28606-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 274b2c1e506e0..5a699275d9d10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3679,7 +3679,7 @@ F: Documentation/networking/bonding.rst F: drivers/net/bonding/ F: include/net/bond* F: include/uapi/linux/if_bonding.h -F: tools/testing/selftests/net/bonding/ +F: tools/testing/selftests/drivers/net/bonding/ BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER M: Dan Robertson From 9789de8bdc92a9ec95c9bc7b43e94de91acc4460 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Aug 2022 20:52:00 +0800 Subject: [PATCH 342/654] cifs: Use help macro to get the header preamble size It's better to use HEADER_PREAMBLE_SIZE because the unfolded expression too long. No actual functional changes, minor readability improvement. Signed-off-by: Zhang Xiaoxu Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 2 +- fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 20 ++++++++++---------- fs/cifs/transport.c | 21 ++++++++++----------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 8f7835ccbca16..61a9fed56548e 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -32,7 +32,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst, int rc; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; - int is_smb2 = server->vals->header_preamble_size == 0; + bool is_smb2 = HEADER_PREAMBLE_SIZE(server) == 0; /* iov[0] is actual data and not the rfc1002 length for SMB2+ */ if (is_smb2) { diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f15d7b0c123d7..b15b84d03fb66 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -557,6 +557,7 @@ struct smb_version_values { #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) +#define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3da5da9f16b0c..ccf7f32e0c3e2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -871,7 +871,7 @@ smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (server->vals->header_preamble_size) + if (HEADER_PREAMBLE_SIZE(server)) return 0; return le16_to_cpu(shdr->CreditRequest); @@ -1050,7 +1050,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* make sure this will fit in a large buffer */ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - - server->vals->header_preamble_size) { + HEADER_PREAMBLE_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); cifs_reconnect(server, true); return -ECONNABORTED; @@ -1065,8 +1065,8 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* now read the rest */ length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, - pdu_length - HEADER_SIZE(server) + 1 - + server->vals->header_preamble_size); + pdu_length - HEADER_SIZE(server) + 1 + + HEADER_PREAMBLE_SIZE(server)); if (length < 0) return length; @@ -1122,7 +1122,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (server->vals->header_preamble_size) + if (HEADER_PREAMBLE_SIZE(server)) return; if (shdr->CreditRequest) { @@ -1180,7 +1180,7 @@ cifs_demultiplex_thread(void *p) if (length < 0) continue; - if (server->vals->header_preamble_size == 0) + if (HEADER_PREAMBLE_SIZE(server) == 0) server->total_read = 0; else server->total_read = length; @@ -1199,7 +1199,7 @@ cifs_demultiplex_thread(void *p) /* make sure we have enough to get to the MID */ if (server->pdu_size < HEADER_SIZE(server) - 1 - - server->vals->header_preamble_size) { + HEADER_PREAMBLE_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n", server->pdu_size); cifs_reconnect(server, true); @@ -1208,9 +1208,9 @@ cifs_demultiplex_thread(void *p) /* read down to the MID */ length = cifs_read_from_socket(server, - buf + server->vals->header_preamble_size, - HEADER_SIZE(server) - 1 - - server->vals->header_preamble_size); + buf + HEADER_PREAMBLE_SIZE(server), + HEADER_SIZE(server) - 1 - + HEADER_PREAMBLE_SIZE(server)); if (length < 0) continue; server->total_read += length; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index de7aeced7e16b..bb1052dbac5bb 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -261,8 +261,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) int nvec; unsigned long buflen = 0; - if (server->vals->header_preamble_size == 0 && - rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { + if (HEADER_PREAMBLE_SIZE(server) == 0 && rqst->rq_nvec >= 2 && + rqst->rq_iov[0].iov_len == 4) { iov = &rqst->rq_iov[1]; nvec = rqst->rq_nvec - 1; } else { @@ -346,7 +346,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, sigprocmask(SIG_BLOCK, &mask, &oldmask); /* Generate a rfc1002 marker for SMB2+ */ - if (server->vals->header_preamble_size == 0) { + if (HEADER_PREAMBLE_SIZE(server) == 0) { struct kvec hiov = { .iov_base = &rfc1002_marker, .iov_len = 4 @@ -1238,7 +1238,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, buf = (char *)midQ[i]->resp_buf; resp_iov[i].iov_base = buf; resp_iov[i].iov_len = midQ[i]->resp_buf_size + - server->vals->header_preamble_size; + HEADER_PREAMBLE_SIZE(server); if (midQ[i]->large_buf) resp_buf_type[i] = CIFS_LARGE_BUFFER; @@ -1643,7 +1643,7 @@ int cifs_discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = server->pdu_size; - int remaining = rfclen + server->vals->header_preamble_size - + int remaining = rfclen + HEADER_PREAMBLE_SIZE(server) - server->total_read; while (remaining > 0) { @@ -1689,8 +1689,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) unsigned int data_offset, data_len; struct cifs_readdata *rdata = mid->callback_data; char *buf = server->smallbuf; - unsigned int buflen = server->pdu_size + - server->vals->header_preamble_size; + unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server); bool use_rdma_mr = false; cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", @@ -1724,10 +1723,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* set up first two iov for signature check and to get credits */ rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = server->vals->header_preamble_size; - rdata->iov[1].iov_base = buf + server->vals->header_preamble_size; + rdata->iov[0].iov_len = HEADER_PREAMBLE_SIZE(server); + rdata->iov[1].iov_base = buf + HEADER_PREAMBLE_SIZE(server); rdata->iov[1].iov_len = - server->total_read - server->vals->header_preamble_size; + server->total_read - HEADER_PREAMBLE_SIZE(server); cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", rdata->iov[0].iov_base, rdata->iov[0].iov_len); cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", @@ -1752,7 +1751,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) } data_offset = server->ops->read_data_offset(buf) + - server->vals->header_preamble_size; + HEADER_PREAMBLE_SIZE(server); if (data_offset < server->total_read) { /* * win2k8 sometimes sends an offset of 0 when the read From b6b3624d016b980f917b46e6b964f943ac5ac56e Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Aug 2022 20:52:01 +0800 Subject: [PATCH 343/654] cifs: Use help macro to get the mid header size It's better to use MID_HEADER_SIZE because the unfolded expression too long. No actual functional changes, minor readability improvement. Signed-off-by: Zhang Xiaoxu Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b15b84d03fb66..6c8293314530d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -558,6 +558,7 @@ struct smb_version_values { #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) #define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) +#define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server)) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ccf7f32e0c3e2..0123f41c26f52 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1065,8 +1065,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* now read the rest */ length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, - pdu_length - HEADER_SIZE(server) + 1 + - HEADER_PREAMBLE_SIZE(server)); + pdu_length - MID_HEADER_SIZE(server)); if (length < 0) return length; @@ -1198,8 +1197,7 @@ cifs_demultiplex_thread(void *p) server->pdu_size = pdu_length; /* make sure we have enough to get to the MID */ - if (server->pdu_size < HEADER_SIZE(server) - 1 - - HEADER_PREAMBLE_SIZE(server)) { + if (server->pdu_size < MID_HEADER_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n", server->pdu_size); cifs_reconnect(server, true); @@ -1209,8 +1207,7 @@ cifs_demultiplex_thread(void *p) /* read down to the MID */ length = cifs_read_from_socket(server, buf + HEADER_PREAMBLE_SIZE(server), - HEADER_SIZE(server) - 1 - - HEADER_PREAMBLE_SIZE(server)); + MID_HEADER_SIZE(server)); if (length < 0) continue; server->total_read += length; From d291e703f420d5f8f999fe54f360d54d213bddb4 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Aug 2022 20:52:02 +0800 Subject: [PATCH 344/654] cifs: Add helper function to check smb1+ server SMB1 server's header_preamble_size is not 0, add use is_smb1 function to simplify the code, no actual functional changes. Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 3 +-- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/connect.c | 10 +++++----- fs/cifs/transport.c | 4 ++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 61a9fed56548e..46f5718754f94 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -32,10 +32,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst, int rc; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; - bool is_smb2 = HEADER_PREAMBLE_SIZE(server) == 0; /* iov[0] is actual data and not the rfc1002 length for SMB2+ */ - if (is_smb2) { + if (!is_smb1(server)) { if (iov[0].iov_len <= 4) return -EIO; i = 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6c8293314530d..ae7f571a7dba2 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -752,6 +752,11 @@ struct TCP_Server_Info { #endif }; +static inline bool is_smb1(struct TCP_Server_Info *server) +{ + return HEADER_PREAMBLE_SIZE(server) != 0; +} + static inline void cifs_server_lock(struct TCP_Server_Info *server) { unsigned int nofs_flag = memalloc_nofs_save(); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0123f41c26f52..a0a06b6f252be 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -871,7 +871,7 @@ smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (HEADER_PREAMBLE_SIZE(server)) + if (is_smb1(server)) return 0; return le16_to_cpu(shdr->CreditRequest); @@ -1121,7 +1121,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (HEADER_PREAMBLE_SIZE(server)) + if (is_smb1(server)) return; if (shdr->CreditRequest) { @@ -1179,10 +1179,10 @@ cifs_demultiplex_thread(void *p) if (length < 0) continue; - if (HEADER_PREAMBLE_SIZE(server) == 0) - server->total_read = 0; - else + if (is_smb1(server)) server->total_read = length; + else + server->total_read = 0; /* * The right amount was read from socket - 4 bytes, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bb1052dbac5bb..c2fe035e573ba 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -261,7 +261,7 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) int nvec; unsigned long buflen = 0; - if (HEADER_PREAMBLE_SIZE(server) == 0 && rqst->rq_nvec >= 2 && + if (!is_smb1(server) && rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { iov = &rqst->rq_iov[1]; nvec = rqst->rq_nvec - 1; @@ -346,7 +346,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, sigprocmask(SIG_BLOCK, &mask, &oldmask); /* Generate a rfc1002 marker for SMB2+ */ - if (HEADER_PREAMBLE_SIZE(server) == 0) { + if (!is_smb1(server)) { struct kvec hiov = { .iov_base = &rfc1002_marker, .iov_len = 4 From 0382a35bef70ecc074db67192ff8d37737d02b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 20 Aug 2022 13:51:13 +0200 Subject: [PATCH 345/654] powerpc/pci: Enable PCI domains in /proc when PCI bus numbers are not unique MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit powerpc systems with more PCIe controllers and more PCI domains, where on more PCI domains are same PCI numbers, when kernel is compiled with CONFIG_PROC_FS=y and CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT=y options, kernel prints "proc_dir_entry 'pci/01' already registered" error message. proc_dir_entry 'pci/01' already registered WARNING: CPU: 0 PID: 1 at fs/proc/generic.c:377 proc_register+0x1a8/0x1ac ... NIP proc_register+0x1a8/0x1ac LR proc_register+0x1a8/0x1ac Call Trace: proc_register+0x1a8/0x1ac (unreliable) _proc_mkdir+0x78/0xa4 pci_proc_attach_device+0x11c/0x168 pci_proc_init+0x80/0x98 do_one_initcall+0x80/0x284 kernel_init_freeable+0x1f4/0x2a0 kernel_init+0x24/0x150 ret_from_kernel_thread+0x5c/0x64 This regression started appearing after commit 566356813082 ("powerpc/pci: Add config option for using all 256 PCI buses") in case in each mPCIe slot is connected PCIe card and therefore PCI bus 1 is populated in for every PCIe controller / PCI domain. The reason is that PCI procfs code expects that when PCI bus numbers are not unique across all PCI domains, function pci_proc_domain() returns true for domain dependent buses. Fix this issue by setting PCI_ENABLE_PROC_DOMAINS and PCI_COMPAT_DOMAIN_0 flags for 32-bit powerpc code when CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT is enabled. Same approach is already implemented for 64-bit powerpc code (where PCI bus numbers are always domain dependent). Fixes: 566356813082 ("powerpc/pci: Add config option for using all 256 PCI buses") Signed-off-by: Pali Rohár [mpe: Trim change log oops message] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220820115113.30581-1-pali@kernel.org --- arch/powerpc/kernel/pci_32.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 433965bf37b4b..855b59892c5c9 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -245,6 +245,15 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); +#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + /* + * Enable PCI domains in /proc when PCI bus numbers are not unique + * across all PCI domains to prevent conflicts. And keep PCI domain 0 + * backward compatible in /proc for video cards. + */ + pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); +#endif + if (pci_has_flag(PCI_REASSIGN_ALL_BUS)) pci_assign_all_buses = 1; From 60deb9f10eec5c6a20252ed36238b55d8b614a2c Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Sat, 20 Aug 2022 01:33:40 +0530 Subject: [PATCH 346/654] wifi: mac80211: Fix UAF in ieee80211_scan_rx() ieee80211_scan_rx() tries to access scan_req->flags after a null check, but a UAF is observed when the scan is completed and __ieee80211_scan_completed() executes, which then calls cfg80211_scan_done() leading to the freeing of scan_req. Since scan_req is rcu_dereference()'d, prevent the racing in __ieee80211_scan_completed() by ensuring that from mac80211's POV it is no longer accessed from an RCU read critical section before we call cfg80211_scan_done(). Cc: stable@vger.kernel.org Link: https://syzkaller.appspot.com/bug?extid=f9acff9bf08a845f225d Reported-by: syzbot+f9acff9bf08a845f225d@syzkaller.appspotmail.com Suggested-by: Johannes Berg Signed-off-by: Siddh Raman Pant Link: https://lore.kernel.org/r/20220819200340.34826-1-code@siddh.me Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index fa8ddf576bc1c..c4f2aeb31da3a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -469,16 +469,19 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->mtx)); - if (scan_req != local->int_scan_req) { - local->scan_info.aborted = aborted; - cfg80211_scan_done(scan_req, &local->scan_info); - } RCU_INIT_POINTER(local->scan_req, NULL); RCU_INIT_POINTER(local->scan_sdata, NULL); local->scanning = 0; local->scan_chandef.chan = NULL; + synchronize_rcu(); + + if (scan_req != local->int_scan_req) { + local->scan_info.aborted = aborted; + cfg80211_scan_done(scan_req, &local->scan_info); + } + /* Set power back to normal operating levels. */ ieee80211_hw_config(local, 0); From 36fe8e4e5cb02131719612aea1e64379670d1846 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 23 Aug 2022 15:22:23 +0200 Subject: [PATCH 347/654] wifi: mac80211: always free sta in __sta_info_alloc in case of error Free sta pointer in __sta_info_alloc routine if sta_info_alloc_link() fails. Fixes: 246b39e4a1ba5 ("wifi: mac80211: refactor some sta_info link handling") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/a3d079208684cddbc25289f7f7e0fed795b0cad4.1661260857.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cb23da9aff1e6..330dab41f2fef 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -494,7 +494,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sdata = sdata; if (sta_info_alloc_link(local, &sta->deflink, gfp)) - return NULL; + goto free; if (link_id >= 0) { sta_info_add_link(sta, link_id, &sta->deflink, From 62b03f45c6352410d13bf0710d24ef6290632eb1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 18 Aug 2022 12:33:49 +0800 Subject: [PATCH 348/654] wifi: mac80211: fix possible leak in ieee80211_tx_control_port() Add missing dev_kfree_skb() in an error path in ieee80211_tx_control_port() to avoid a memory leak. Fixes: dd820ed6336a ("wifi: mac80211: return error from control port TX for drops") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220818043349.4168835-1-yangyingliang@huawei.com Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 45df9932d0ba1..594bd70ee641a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5885,6 +5885,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, rcu_read_lock(); err = ieee80211_lookup_ra_sta(sdata, skb, &sta); if (err) { + dev_kfree_skb(skb); rcu_read_unlock(); return err; } From 15bc8966b6d3a5b9bfe4c9facfa02f2b69b1e5f0 Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Sun, 14 Aug 2022 20:45:12 +0530 Subject: [PATCH 349/654] wifi: mac80211: Don't finalize CSA in IBSS mode if state is disconnected When we are not connected to a channel, sending channel "switch" announcement doesn't make any sense. The BSS list is empty in that case. This causes the for loop in cfg80211_get_bss() to be bypassed, so the function returns NULL (check line 1424 of net/wireless/scan.c), causing the WARN_ON() in ieee80211_ibss_csa_beacon() to get triggered (check line 500 of net/mac80211/ibss.c), which was consequently reported on the syzkaller dashboard. Thus, check if we have an existing connection before generating the CSA beacon in ieee80211_ibss_finish_csa(). Cc: stable@vger.kernel.org Fixes: cd7760e62c2a ("mac80211: add support for CSA in IBSS mode") Link: https://syzkaller.appspot.com/bug?id=05603ef4ae8926761b678d2939a3b2ad28ab9ca6 Reported-by: syzbot+b6c9fe29aefe68e4ad34@syzkaller.appspotmail.com Signed-off-by: Siddh Raman Pant Tested-by: syzbot+b6c9fe29aefe68e4ad34@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20220814151512.9985-1-code@siddh.me Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index d56890e3fabb3..9b283bbc7bb4b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -530,6 +530,10 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) sdata_assert_lock(sdata); + /* When not connected/joined, sending CSA doesn't make sense. */ + if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) + return -ENOLINK; + /* update cfg80211 bss information with the new channel */ if (!is_zero_ether_addr(ifibss->bssid)) { cbss = cfg80211_get_bss(sdata->local->hw.wiphy, From d776763f48084926b5d9e25507a3ddb7c9243d5e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 10:03:21 +0300 Subject: [PATCH 350/654] wifi: cfg80211: debugfs: fix return type in ht40allow_map_read() The return type is supposed to be ssize_t, which is signed long, but "r" was declared as unsigned int. This means that on 64 bit systems we return positive values instead of negative error codes. Fixes: 80a3511d70e8 ("cfg80211: add debugfs HT40 allow map") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YutvOQeJm0UjLhwU@kili Signed-off-by: Johannes Berg --- net/wireless/debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index aab43469a2f04..0878b162890af 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -65,9 +65,10 @@ static ssize_t ht40allow_map_read(struct file *file, { struct wiphy *wiphy = file->private_data; char *buf; - unsigned int offset = 0, buf_size = PAGE_SIZE, i, r; + unsigned int offset = 0, buf_size = PAGE_SIZE, i; enum nl80211_band band; struct ieee80211_supported_band *sband; + ssize_t r; buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) From 55f0a4894484e8d6ddf662f5aebbf3b4cb028541 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Jul 2022 17:25:16 +0300 Subject: [PATCH 351/654] wifi: mac80211: potential NULL dereference in ieee80211_tx_control_port() The ieee80211_lookup_ra_sta() function will sometimes set "sta" to NULL so add this NULL check to prevent an Oops. Fixes: 9dd1953846c7 ("wifi: nl80211/mac80211: clarify link ID in control port TX") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YuKcTAyO94YOy0Bu@kili Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 594bd70ee641a..bf7fe6cd9dfca 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5900,7 +5900,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, * for MLO STA, the SA should be the AP MLD address, but * the link ID has been selected already */ - if (sta->sta.mlo) + if (sta && sta->sta.mlo) memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN); } rcu_read_unlock(); From cd11d1a6114bd4bc6450ae59f6e110ec47362126 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 8 Jul 2022 08:40:09 +0100 Subject: [PATCH 352/654] HID: steam: Prevent NULL pointer dereference in steam_{recv,send}_report It is possible for a malicious device to forgo submitting a Feature Report. The HID Steam driver presently makes no prevision for this and de-references the 'struct hid_report' pointer obtained from the HID devices without first checking its validity. Let's change that. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Fixes: c164d6abf3841 ("HID: add driver for Valve Steam Controller") Signed-off-by: Lee Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index a3b151b29bd71..fc616db4231bb 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -134,6 +134,11 @@ static int steam_recv_report(struct steam_device *steam, int ret; r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (!r) { + hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); + return -EINVAL; + } + if (hid_report_len(r) < 64) return -EINVAL; @@ -165,6 +170,11 @@ static int steam_send_report(struct steam_device *steam, int ret; r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (!r) { + hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); + return -EINVAL; + } + if (hid_report_len(r) < 64) return -EINVAL; From a5623a203cffe2d2b84d2f6c989d9017db1856af Mon Sep 17 00:00:00 2001 From: Karthik Alapati Date: Thu, 28 Jul 2022 21:13:17 +0530 Subject: [PATCH 353/654] HID: hidraw: fix memory leak in hidraw_release() Free the buffered reports before deleting the list entry. BUG: memory leak unreferenced object 0xffff88810e72f180 (size 32): comm "softirq", pid 0, jiffies 4294945143 (age 16.080s) hex dump (first 32 bytes): 64 f3 c6 6a d1 88 07 04 00 00 00 00 00 00 00 00 d..j............ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemdup+0x23/0x50 mm/util.c:128 [] kmemdup include/linux/fortify-string.h:440 [inline] [] hidraw_report_event+0xa2/0x150 drivers/hid/hidraw.c:521 [] hid_report_raw_event+0x27d/0x740 drivers/hid/hid-core.c:1992 [] hid_input_report+0x1ae/0x270 drivers/hid/hid-core.c:2065 [] hid_irq_in+0x1ff/0x250 drivers/hid/usbhid/hid-core.c:284 [] __usb_hcd_giveback_urb+0xf9/0x230 drivers/usb/core/hcd.c:1670 [] usb_hcd_giveback_urb+0x1b6/0x1d0 drivers/usb/core/hcd.c:1747 [] dummy_timer+0x8e4/0x14c0 drivers/usb/gadget/udc/dummy_hcd.c:1988 [] call_timer_fn+0x38/0x200 kernel/time/timer.c:1474 [] expire_timers kernel/time/timer.c:1519 [inline] [] __run_timers.part.0+0x316/0x430 kernel/time/timer.c:1790 [] __run_timers kernel/time/timer.c:1768 [inline] [] run_timer_softirq+0x44/0x90 kernel/time/timer.c:1803 [] __do_softirq+0xe6/0x2ea kernel/softirq.c:571 [] invoke_softirq kernel/softirq.c:445 [inline] [] __irq_exit_rcu kernel/softirq.c:650 [inline] [] irq_exit_rcu+0xc0/0x110 kernel/softirq.c:662 [] sysvec_apic_timer_interrupt+0xa2/0xd0 arch/x86/kernel/apic/apic.c:1106 [] asm_sysvec_apic_timer_interrupt+0x1b/0x20 arch/x86/include/asm/idtentry.h:649 [] native_safe_halt arch/x86/include/asm/irqflags.h:51 [inline] [] arch_safe_halt arch/x86/include/asm/irqflags.h:89 [inline] [] acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline] [] acpi_idle_do_entry+0xc0/0xd0 drivers/acpi/processor_idle.c:554 Link: https://syzkaller.appspot.com/bug?id=19a04b43c75ed1092021010419b5e560a8172c4f Reported-by: syzbot+f59100a0428e6ded9443@syzkaller.appspotmail.com Signed-off-by: Karthik Alapati Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 681614a8302a5..197b1e7bf029e 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -350,6 +350,8 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); + for (int i = list->tail; i < list->head; i++) + kfree(list->buffer[i].value); list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); From 1c0cc9d11c665020cbeb80e660fb8929164407f4 Mon Sep 17 00:00:00 2001 From: Josh Kilmer Date: Thu, 28 Jul 2022 12:51:11 -0500 Subject: [PATCH 354/654] HID: asus: ROG NKey: Ignore portion of 0x5a report On an Asus G513QY, of the 5 bytes in a 0x5a report, only the first byte is a meaningful keycode. The other bytes are zeroed out or hold garbage from the last packet sent to the keyboard. This patch fixes up the report descriptor for this event so that the general hid code will only process 1 byte for keycodes, avoiding spurious key events and unmapped Asus vendor usagepage code warnings. Signed-off-by: Josh Kilmer Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 08c9a9a60ae47..b59c3dafa6a48 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1212,6 +1212,13 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc = new_rdesc; } + if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD && + *rsize == 331 && rdesc[190] == 0x85 && rdesc[191] == 0x5a && + rdesc[204] == 0x95 && rdesc[205] == 0x05) { + hid_info(hdev, "Fixing up Asus N-KEY keyb report descriptor\n"); + rdesc[205] = 0x01; + } + return rdesc; } From 94553f8a218540d676efbf3f7827ed493d1057cf Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 4 Aug 2022 08:58:14 +0800 Subject: [PATCH 355/654] HID: ishtp-hid-clientHID: ishtp-hid-client: Fix comment typo The double `like' is duplicated in the comment, remove one. Signed-off-by: Jason Wang Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h index 6a5cc11aefd89..35dddc5015b37 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.h +++ b/drivers/hid/intel-ish-hid/ishtp-hid.h @@ -105,7 +105,7 @@ struct report_list { * @multi_packet_cnt: Count of fragmented packet count * * This structure is used to store completion flags and per client data like - * like report description, number of HID devices etc. + * report description, number of HID devices etc. */ struct ishtp_cl_data { /* completion flags */ From e1fa076706209cc447d7a2abd0843a18277e5ef7 Mon Sep 17 00:00:00 2001 From: Even Xu Date: Thu, 4 Aug 2022 08:59:19 +0800 Subject: [PATCH 356/654] hid: intel-ish-hid: ishtp: Fix ishtp client sending disordered message There is a timing issue captured during ishtp client sending stress tests. It was observed during stress tests that ISH firmware is getting out of ordered messages. This is a rare scenario as the current set of ISH client drivers don't send much data to firmware. But this may not be the case going forward. When message size is bigger than IPC MTU, ishtp splits the message into fragments and uses serialized async method to send message fragments. The call stack: ishtp_cl_send_msg_ipc->ipc_tx_callback(first fregment)-> ishtp_send_msg(with callback)->write_ipc_to_queue-> write_ipc_from_queue->callback->ipc_tx_callback(next fregment)...... When an ipc write complete interrupt is received, driver also calls write_ipc_from_queue->ipc_tx_callback in ISR to start sending of next fragment. Through ipc_tx_callback uses spin_lock to protect message splitting, as the serialized sending method will call back to ipc_tx_callback again, so it doesn't put sending under spin_lock, it causes driver cannot guarantee all fragments be sent in order. Considering this scenario: ipc_tx_callback just finished a fragment splitting, and not call ishtp_send_msg yet, there is a write complete interrupt happens, then ISR->write_ipc_from_queue ->ipc_tx_callback->ishtp_send_msg->write_ipc_to_queue...... Because ISR has higher exec priority than normal thread, this causes the new fragment be sent out before previous fragment. This disordered message causes invalid message to firmware. The solution is, to send fragments synchronously: Use ishtp_write_message writing fragments into tx queue directly one by one, instead of ishtp_send_msg only writing one fragment with completion callback. As no completion callback be used, so change ipc_tx_callback to ipc_tx_send. Signed-off-by: Even Xu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp/client.c | 68 ++++++++++++++---------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c index 405e0d5212cc8..df0a825694f52 100644 --- a/drivers/hid/intel-ish-hid/ishtp/client.c +++ b/drivers/hid/intel-ish-hid/ishtp/client.c @@ -626,13 +626,14 @@ static void ishtp_cl_read_complete(struct ishtp_cl_rb *rb) } /** - * ipc_tx_callback() - IPC tx callback function + * ipc_tx_send() - IPC tx send function * @prm: Pointer to client device instance * - * Send message over IPC either first time or on callback on previous message - * completion + * Send message over IPC. Message will be split into fragments + * if message size is bigger than IPC FIFO size, and all + * fragments will be sent one by one. */ -static void ipc_tx_callback(void *prm) +static void ipc_tx_send(void *prm) { struct ishtp_cl *cl = prm; struct ishtp_cl_tx_ring *cl_msg; @@ -677,32 +678,41 @@ static void ipc_tx_callback(void *prm) list); rem = cl_msg->send_buf.size - cl->tx_offs; - ishtp_hdr.host_addr = cl->host_client_id; - ishtp_hdr.fw_addr = cl->fw_client_id; - ishtp_hdr.reserved = 0; - pmsg = cl_msg->send_buf.data + cl->tx_offs; + while (rem > 0) { + ishtp_hdr.host_addr = cl->host_client_id; + ishtp_hdr.fw_addr = cl->fw_client_id; + ishtp_hdr.reserved = 0; + pmsg = cl_msg->send_buf.data + cl->tx_offs; + + if (rem <= dev->mtu) { + /* Last fragment or only one packet */ + ishtp_hdr.length = rem; + ishtp_hdr.msg_complete = 1; + /* Submit to IPC queue with no callback */ + ishtp_write_message(dev, &ishtp_hdr, pmsg); + cl->tx_offs = 0; + cl->sending = 0; - if (rem <= dev->mtu) { - ishtp_hdr.length = rem; - ishtp_hdr.msg_complete = 1; - cl->sending = 0; - list_del_init(&cl_msg->list); /* Must be before write */ - spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags); - /* Submit to IPC queue with no callback */ - ishtp_write_message(dev, &ishtp_hdr, pmsg); - spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags); - list_add_tail(&cl_msg->list, &cl->tx_free_list.list); - ++cl->tx_ring_free_size; - spin_unlock_irqrestore(&cl->tx_free_list_spinlock, - tx_free_flags); - } else { - /* Send IPC fragment */ - spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags); - cl->tx_offs += dev->mtu; - ishtp_hdr.length = dev->mtu; - ishtp_hdr.msg_complete = 0; - ishtp_send_msg(dev, &ishtp_hdr, pmsg, ipc_tx_callback, cl); + break; + } else { + /* Send ipc fragment */ + ishtp_hdr.length = dev->mtu; + ishtp_hdr.msg_complete = 0; + /* All fregments submitted to IPC queue with no callback */ + ishtp_write_message(dev, &ishtp_hdr, pmsg); + cl->tx_offs += dev->mtu; + rem = cl_msg->send_buf.size - cl->tx_offs; + } } + + list_del_init(&cl_msg->list); + spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags); + + spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags); + list_add_tail(&cl_msg->list, &cl->tx_free_list.list); + ++cl->tx_ring_free_size; + spin_unlock_irqrestore(&cl->tx_free_list_spinlock, + tx_free_flags); } /** @@ -720,7 +730,7 @@ static void ishtp_cl_send_msg_ipc(struct ishtp_device *dev, return; cl->tx_offs = 0; - ipc_tx_callback(cl); + ipc_tx_send(cl); ++cl->send_msg_cnt_ipc; } From d9a17651f3749e69890db57ca66e677dfee70829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=BCbner?= Date: Fri, 5 Aug 2022 10:05:23 +0200 Subject: [PATCH 357/654] HID: thrustmaster: Add sparco wheel and fix array length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device id for the Sparco R383 Mod wheel. Fix wheel info array length to match actual wheel count present in the array. Signed-off-by: Michael Hübner Signed-off-by: Jiri Kosina --- drivers/hid/hid-thrustmaster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index c3e6d69fdfbd9..cf1679b0d4fbb 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -67,12 +67,13 @@ static const struct tm_wheel_info tm_wheels_infos[] = { {0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"}, {0x0206, 0x0005, "Thrustmaster T300RS"}, {0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"}, + {0x020a, 0x0005, "Thrustmaster T300RS (Sparco R383 Mod)"}, {0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"}, {0x0002, 0x0002, "Thrustmaster T500RS"} //{0x0407, 0x0001, "Thrustmaster TMX"} }; -static const uint8_t tm_wheels_infos_length = 4; +static const uint8_t tm_wheels_infos_length = 7; /* * This structs contains (in little endian) the response data From adada3f4930ac084740ea340bd8e94028eba4f22 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 16 Aug 2022 19:21:20 +0900 Subject: [PATCH 358/654] HID: AMD_SFH: Add a DMI quirk entry for Chromebooks Google Chromebooks use Chrome OS Embedded Controller Sensor Hub instead of Sensor Hub Fusion and leaves MP2 uninitialized, which disables all functionalities, even including the registers necessary for feature detections. The behavior was observed with Lenovo ThinkPad C13 Yoga. Signed-off-by: Akihiko Odaki Suggested-by: Mario Limonciello Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 4b90c86ee5f8f..47774b9ab3de0 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -288,11 +288,29 @@ int amd_sfh_irq_init(struct amd_mp2_dev *privdata) return 0; } +static const struct dmi_system_id dmi_nodevs[] = { + { + /* + * Google Chromebooks use Chrome OS Embedded Controller Sensor + * Hub instead of Sensor Hub Fusion and leaves MP2 + * uninitialized, which disables all functionalities, even + * including the registers necessary for feature detections. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Google"), + }, + }, + { } +}; + static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct amd_mp2_dev *privdata; int rc; + if (dmi_first_match(dmi_nodevs)) + return -ENODEV; + privdata = devm_kzalloc(&pdev->dev, sizeof(*privdata), GFP_KERNEL); if (!privdata) return -ENOMEM; From 3a47fa7b14c7d9613909a844aba27f99d3c58634 Mon Sep 17 00:00:00 2001 From: Steev Klimaszewski Date: Thu, 18 Aug 2022 21:39:24 -0500 Subject: [PATCH 359/654] HID: add Lenovo Yoga C630 battery quirk Similar to the Surface Go devices, the Elantech touchscreen/digitizer in the Lenovo Yoga C630 mistakenly reports the battery of the stylus, and always reports an empty battery. Apply the HID_BATTERY_QUIRK_IGNORE quirk to ignore this battery and prevent the erroneous low battery warnings. Signed-off-by: Steev Klimaszewski Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0fb720a96399a..347d783da82c7 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -414,6 +414,7 @@ #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A #define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C +#define I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN 0x279F #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 48c1c02c69f4e..17a453bb09a2d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -383,6 +383,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, {} }; From 750ec977288d96e9a11424e3507ede097af732c4 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Sun, 21 Aug 2022 08:04:45 +0000 Subject: [PATCH 360/654] HID: Add Apple Touchbar on T2 Macs in hid_have_special_driver list The touchbar on Apple T2 Macs has 2 modes, one that shows the function keys and other that shows the media controls. The user can use the fn key on his keyboard to switch between the 2 modes. On Linux, if people were using an external keyboard or mouse, the touchbar failed to change modes on pressing the fn key with the following in dmesg :- [ 10.661445] apple-ib-als 0003:05AC:8262.0001: : USB HID v1.01 Device [Apple Inc. Ambient Light Sensor] on usb-bce-vhci-3/input0 [ 11.830992] apple-ib-touchbar 0003:05AC:8302.0007: input: USB HID v1.01 Keyboard [Apple Inc. Touch Bar Display] on usb-bce-vhci-6/input0 [ 12.139407] apple-ib-touchbar 0003:05AC:8102.0008: : USB HID v1.01 Device [Apple Inc. Touch Bar Backlight] on usb-bce-vhci-7/input0 [ 12.211824] apple-ib-touchbar 0003:05AC:8102.0009: : USB HID v1.01 Device [Apple Inc. Touch Bar Backlight] on usb-bce-vhci-7/input1 [ 14.219759] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 2 (-110) [ 24.395670] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 2 (-110) [ 34.635791] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 2 (-110) [ 269.579233] apple-ib-touchbar 0003:05AC:8302.0007: tb: Failed to set touch bar mode to 1 (-110) Add the USB IDs of the touchbar found in T2 Macs to HID have special driver list to fix the issue. Signed-off-by: Aditya Garg Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 2 ++ drivers/hid/hid-quirks.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 347d783da82c7..f80d6193fca6e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -185,6 +185,8 @@ #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 0x029c #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 0x029a #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021 0x029f +#define USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT 0x8102 +#define USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY 0x8302 #define USB_VENDOR_ID_ASUS 0x0486 #define USB_DEVICE_ID_ASUS_T91MT 0x0185 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index dc67717d2dabc..70f602c64fd13 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -314,6 +314,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, #endif #if IS_ENABLED(CONFIG_HID_APPLEIR) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) }, From 8db8be9cfc89935c97d791c7e6264e710a7e8a56 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 22 Aug 2022 08:22:47 +0200 Subject: [PATCH 361/654] HID: input: fix uclogic tablets commit 87562fcd1342 ("HID: input: remove the need for HID_QUIRK_INVERT") made the assumption that it was the only one handling tablets and thus kept an internal state regarding the tool. Turns out that the uclogic driver has a timer to release the in range bit, effectively making hid-input ignoring all in range information after the very first one. Fix that by having a more rationale approach which consists in forwarding every event and let the input stack filter out the duplicates. Reported-by: Stefan Hansson Fixes: 87562fcd1342 ("HID: input: remove the need for HID_QUIRK_INVERT") Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 17a453bb09a2d..859aeb07542e3 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1534,7 +1534,10 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct * assume ours */ if (!report->tool) - hid_report_set_tool(report, input, usage->code); + report->tool = usage->code; + + /* drivers may have changed the value behind our back, resend it */ + hid_report_set_tool(report, input, report->tool); } else { hid_report_release_tool(report, input, usage->code); } From a3f7c10a269d5b77dd5822ade822643ced3057f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 10 Aug 2022 19:26:17 +0200 Subject: [PATCH 362/654] dma-buf/dma-resv: check if the new fence is really later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously when we added a fence to a dma_resv object we always assumed the the newer than all the existing fences. With Jason's work to add an UAPI to explicit export/import that's not necessary the case any more. So without this check we would allow userspace to force the kernel into an use after free error. Since the change is very small and defensive it's probably a good idea to backport this to stable kernels as well just in case others are using the dma_resv object in the same way. Signed-off-by: Christian König Reviewed-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20220810172617.140047-1-christian.koenig@amd.com Cc: stable@vger.kernel.org # v5.19+ --- drivers/dma-buf/dma-resv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 205acb2c744de..e3885c90a3acb 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -295,7 +295,8 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, enum dma_resv_usage old_usage; dma_resv_list_entry(fobj, i, obj, &old, &old_usage); - if ((old->context == fence->context && old_usage >= usage) || + if ((old->context == fence->context && old_usage >= usage && + dma_fence_is_later(fence, old)) || dma_fence_is_signaled(old)) { dma_resv_list_set(fobj, i, fence, usage); dma_fence_put(old); From 0cf731f9ebb5bf6f252055bebf4463a5c0bd490b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 23 Aug 2022 14:24:07 +0200 Subject: [PATCH 363/654] net: ethernet: mtk_eth_soc: fix hw hash reporting for MTK_NETSYS_V2 Properly report hw rx hash for mt7986 chipset accroding to the new dma descriptor layout. Fixes: 197c9e9b17b11 ("net: ethernet: mtk_eth_soc: introduce support for mt7986 chipset") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/091394ea4e705fbb35f828011d98d0ba33808f69.1661257293.git.lorenzo@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 22 +++++++++++---------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 5 +++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8aff4c0c28bd7..5ace4609de47d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1891,10 +1891,19 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb->dev = netdev; bytes += skb->len; - if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { + hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY; + if (hash != MTK_RXD5_FOE_ENTRY) + skb_set_hash(skb, jhash_1word(hash, 0), + PKT_HASH_TYPE_L4); rxdcsum = &trxd.rxd3; - else + } else { + hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY; + if (hash != MTK_RXD4_FOE_ENTRY) + skb_set_hash(skb, jhash_1word(hash, 0), + PKT_HASH_TYPE_L4); rxdcsum = &trxd.rxd4; + } if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid) skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1902,16 +1911,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, netdev); - hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY; - if (hash != MTK_RXD4_FOE_ENTRY) { - hash = jhash_1word(hash, 0); - skb_set_hash(skb, hash, PKT_HASH_TYPE_L4); - } - reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4); if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) - mtk_ppe_check_skb(eth->ppe, skb, - trxd.rxd4 & MTK_RXD4_FOE_ENTRY); + mtk_ppe_check_skb(eth->ppe, skb, hash); if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 7405c97cda660..ecf85e9ed8240 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -314,6 +314,11 @@ #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */ #define RX_DMA_SPECIAL_TAG BIT(22) +/* PDMA descriptor rxd5 */ +#define MTK_RXD5_FOE_ENTRY GENMASK(14, 0) +#define MTK_RXD5_PPE_CPU_REASON GENMASK(22, 18) +#define MTK_RXD5_SRC_PORT GENMASK(29, 26) + #define RX_DMA_GET_SPORT(x) (((x) >> 19) & 0xf) #define RX_DMA_GET_SPORT_V2(x) (((x) >> 26) & 0x7) From 06865077b34c792181e3c4f82417a2888c6e7453 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Fri, 12 Aug 2022 14:06:45 +0100 Subject: [PATCH 364/654] ACPI: property: Fix type detection of unified integer reading functions The current code expects the type of the value to be an integer type, instead the value passed to the macro is a pointer. Ensure the size comparison uses the correct pointer type to choose the max value, instead of using the integer type. Fixes: 923044133367 ("ACPI: property: Unify integer value reading functions") Signed-off-by: Stefan Binding Reviewed-by: Sakari Ailus Tested-by: Sakari Ailus Tested-by: John Garry Acked-by: Ard Biesheuvel Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 7b3ad8ed2f4e6..b1d4a8db89df5 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1043,10 +1043,10 @@ static int acpi_data_prop_read_single(const struct acpi_device_data *data, break; \ } \ if (__items[i].integer.value > _Generic(__val, \ - u8: U8_MAX, \ - u16: U16_MAX, \ - u32: U32_MAX, \ - u64: U64_MAX, \ + u8 *: U8_MAX, \ + u16 *: U16_MAX, \ + u32 *: U32_MAX, \ + u64 *: U64_MAX, \ default: 0U)) { \ ret = -EOVERFLOW; \ break; \ From 2ea3b19792dbe32287b0c48f3ff7e866f61967c7 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 24 Aug 2022 14:59:56 +0300 Subject: [PATCH 365/654] ACPI: property: Ignore already existing data node tags ACPI node pointers are attached to data node handles, in order to resolve string references to them. _DSD guide allows the same node to be reached from multiple parent nodes, leading the node enumeration algorithm to each such nodes more than once. As attached data already already exists, attaching data with the same tag will fail. Address this problem by ignoring nodes that have been already tagged. Fixes: 1d52f10917a7 ("ACPI: property: Tie data nodes to acpi handles") Reported-by: Pierre-Louis Bossart Signed-off-by: Sakari Ailus Tested-by: Pierre-Louis Bossart Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index b1d4a8db89df5..91d0e75859d37 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -370,7 +370,7 @@ static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data) bool ret; status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) { acpi_handle_err(dn->handle, "Can't tag data node\n"); return false; } From bd9594ae4c55351f134620ab890259820abf4c43 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 25 Aug 2022 14:17:15 +0300 Subject: [PATCH 366/654] ACPI: property: Remove default association from integer maximum values Remove the default association from integer maximum value checks. It is not necessary and has caused a bug in other associations being unnoticed. Fixes: 923044133367 ("ACPI: property: Unify integer value reading functions") Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 91d0e75859d37..d4c168ce428ca 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1046,8 +1046,7 @@ static int acpi_data_prop_read_single(const struct acpi_device_data *data, u8 *: U8_MAX, \ u16 *: U16_MAX, \ u32 *: U32_MAX, \ - u64 *: U64_MAX, \ - default: 0U)) { \ + u64 *: U64_MAX)) { \ ret = -EOVERFLOW; \ break; \ } \ From 7c12bb8f59d36507bf70c35dae083dfe9cafc815 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: [PATCH 367/654] LoongArch: Select PCI_QUIRKS to avoid build error PCI_LOONGSON is a mandatory for LoongArch and it is selected in Kconfig unconditionally, but its dependency PCI_QUIRKS is missing and may cause a build error when "make randconfig": arch/loongarch/pci/acpi.c: In function 'pci_acpi_setup_ecam_mapping': >> arch/loongarch/pci/acpi.c:103:29: error: 'loongson_pci_ecam_ops' undeclared (first use in this function) 103 | ecam_ops = &loongson_pci_ecam_ops; | ^~~~~~~~~~~~~~~~~~~~~ arch/loongarch/pci/acpi.c:103:29: note: each undeclared identifier is reported only once for each function it appears in Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for PCI_LOONGSON Depends on [n]: PCI [=y] && (MACH_LOONGSON64 [=y] || COMPILE_TEST [=y]) && (OF [=y] || ACPI [=y]) && PCI_QUIRKS [=n] Selected by [y]: - LOONGARCH [=y] Fix it by selecting PCI_QUIRKS unconditionally, too. Reported-by: kernel test robot Tested-by: Randy Dunlap Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 4abc9a28aba4e..26aeb1408e569 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -111,6 +111,7 @@ config LOONGARCH select PCI_ECAM if ACPI select PCI_LOONGSON select PCI_MSI_ARCH_FALLBACKS + select PCI_QUIRKS select PERF_USE_VMALLOC select RTC_LIB select SMP From 84e762060147582912581fd99cb25f3559ec8c22 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: [PATCH 368/654] LoongArch: Fix build warnings in VDSO Fix build warnings in VDSO as below: arch/loongarch/vdso/vgettimeofday.c:9:5: warning: no previous prototype for '__vdso_clock_gettime' [-Wmissing-prototypes] 9 | int __vdso_clock_gettime(clockid_t clock, | ^~~~~~~~~~~~~~~~~~~~ arch/loongarch/vdso/vgettimeofday.c:15:5: warning: no previous prototype for '__vdso_gettimeofday' [-Wmissing-prototypes] 15 | int __vdso_gettimeofday(struct __kernel_old_timeval *tv, | ^~~~~~~~~~~~~~~~~~~ arch/loongarch/vdso/vgettimeofday.c:21:5: warning: no previous prototype for '__vdso_clock_getres' [-Wmissing-prototypes] 21 | int __vdso_clock_getres(clockid_t clock_id, | ^~~~~~~~~~~~~~~~~~~ arch/loongarch/vdso/vgetcpu.c:27:5: warning: no previous prototype for '__vdso_getcpu' [-Wmissing-prototypes] 27 | int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) Reported-by: kernel test robot Signed-off-by: Huacai Chen --- arch/loongarch/vdso/vgetcpu.c | 2 ++ arch/loongarch/vdso/vgettimeofday.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c index 43a0078e44185..e02e775f53608 100644 --- a/arch/loongarch/vdso/vgetcpu.c +++ b/arch/loongarch/vdso/vgetcpu.c @@ -24,6 +24,8 @@ static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void) return (struct vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE); } +extern +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) { int cpu_id; diff --git a/arch/loongarch/vdso/vgettimeofday.c b/arch/loongarch/vdso/vgettimeofday.c index b1f4548dae924..8f22863bd7ea8 100644 --- a/arch/loongarch/vdso/vgettimeofday.c +++ b/arch/loongarch/vdso/vgettimeofday.c @@ -6,20 +6,23 @@ */ #include -int __vdso_clock_gettime(clockid_t clock, - struct __kernel_timespec *ts) +extern +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { return __cvdso_clock_gettime(clock, ts); } -int __vdso_gettimeofday(struct __kernel_old_timeval *tv, - struct timezone *tz) +extern +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } -int __vdso_clock_getres(clockid_t clock_id, - struct __kernel_timespec *res) +extern +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res); +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { return __cvdso_clock_getres(clock_id, res); } From da48b67cfb6b4f115ae652dd5995c56fe2a2cf9b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: [PATCH 369/654] LoongArch: Cleanup reset routines with new API Cleanup reset routines by using new do_kernel_power_off() instead of old pm_power_off(), and then simplify the whole file (reset.c) organization by inlining some functions. This cleanup also fix a poweroff error if EFI runtime is disabled. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/reboot.h | 10 ----- arch/loongarch/kernel/reset.c | 69 +++++++++-------------------- 2 files changed, 21 insertions(+), 58 deletions(-) delete mode 100644 arch/loongarch/include/asm/reboot.h diff --git a/arch/loongarch/include/asm/reboot.h b/arch/loongarch/include/asm/reboot.h deleted file mode 100644 index 51151749d8f05..0000000000000 --- a/arch/loongarch/include/asm/reboot.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_REBOOT_H -#define _ASM_REBOOT_H - -extern void (*pm_restart)(void); - -#endif /* _ASM_REBOOT_H */ diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 800c965a17eaa..8c82021eb2f44 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -15,10 +15,16 @@ #include #include #include -#include -static void default_halt(void) +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +void machine_halt(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif local_irq_disable(); clear_csr_ecfg(ECFG0_IM); @@ -30,18 +36,29 @@ static void default_halt(void) } } -static void default_poweroff(void) +void machine_power_off(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif + do_kernel_power_off(); #ifdef CONFIG_EFI efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); #endif + while (true) { __arch_cpu_idle(); } } -static void default_restart(void) +void machine_restart(char *command) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif + do_kernel_restart(command); #ifdef CONFIG_EFI if (efi_capsule_pending(NULL)) efi_reboot(REBOOT_WARM, NULL); @@ -55,47 +72,3 @@ static void default_restart(void) __arch_cpu_idle(); } } - -void (*pm_restart)(void); -EXPORT_SYMBOL(pm_restart); - -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void machine_halt(void) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - default_halt(); -} - -void machine_power_off(void) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - pm_power_off(); -} - -void machine_restart(char *command) -{ -#ifdef CONFIG_SMP - preempt_disable(); - smp_send_stop(); -#endif - do_kernel_restart(command); - pm_restart(); -} - -static int __init loongarch_reboot_setup(void) -{ - pm_restart = default_restart; - pm_power_off = default_poweroff; - - return 0; -} - -arch_initcall(loongarch_reboot_setup); From 092e9ebe52a664f9f58e2d24136ae791fe71c6db Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: [PATCH 370/654] LoongArch: Cleanup headers to avoid circular dependency When enable GENERIC_IOREMAP, there will be circular dependency to cause build errors. The root cause is that pgtable.h shouldn't include io.h but pgtable.h need some macros defined in io.h. So cleanup those macros and remove the unnecessary inclusions, as other architectures do. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/addrspace.h | 16 ++++++++++++++++ arch/loongarch/include/asm/io.h | 19 ------------------- arch/loongarch/include/asm/page.h | 2 +- arch/loongarch/include/asm/pgtable.h | 7 +++---- arch/loongarch/mm/mmap.c | 11 ++--------- 5 files changed, 22 insertions(+), 33 deletions(-) diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index b91e0733b2e57..d342935e5a72d 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -109,4 +109,20 @@ extern unsigned long vm_map_base; */ #define PHYSADDR(a) ((_ACAST64_(a)) & TO_PHYS_MASK) +/* + * On LoongArch, I/O ports mappring is following: + * + * | .... | + * |-----------------------| + * | pci io ports(16K~32M) | + * |-----------------------| + * | isa io ports(0 ~16K) | + * PCI_IOBASE ->|-----------------------| + * | .... | + */ +#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) +#define PCI_IOSIZE SZ_32M +#define ISA_IOSIZE SZ_16K +#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) + #endif /* _ASM_ADDRSPACE_H */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 884599739b367..999944ea1cea4 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -7,34 +7,15 @@ #define ARCH_HAS_IOREMAP_WC -#include #include #include #include -#include -#include #include #include #include #include -/* - * On LoongArch, I/O ports mappring is following: - * - * | .... | - * |-----------------------| - * | pci io ports(64K~32M) | - * |-----------------------| - * | isa io ports(0 ~16K) | - * PCI_IOBASE ->|-----------------------| - * | .... | - */ -#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) -#define PCI_IOSIZE SZ_32M -#define ISA_IOSIZE SZ_16K -#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) - /* * Change "struct page" to physical address. */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index a37324ac460b6..53f284a961823 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -95,7 +95,7 @@ static inline int pfn_valid(unsigned long pfn) #endif -#define virt_to_pfn(kaddr) PFN_DOWN(virt_to_phys((void *)(kaddr))) +#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) extern int __virt_addr_valid(volatile void *kaddr); diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index e03443abaf7da..8ea57e2f0e04c 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -59,7 +59,6 @@ #include #include #include -#include struct mm_struct; struct vm_area_struct; @@ -145,7 +144,7 @@ static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) *p4d = p4dval; } -#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d)) +#define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) #define p4d_page(p4d) (pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT)) #endif @@ -188,7 +187,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) -#define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) +#define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) #endif @@ -221,7 +220,7 @@ static inline void pmd_clear(pmd_t *pmdp) #define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) -#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd)) +#define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 52e40f0ba732d..381a569635a9d 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -2,16 +2,9 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ -#include -#include -#include +#include #include #include -#include -#include -#include -#include -#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); @@ -120,6 +113,6 @@ int __virt_addr_valid(volatile void *kaddr) if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) return 0; - return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); + return pfn_valid(PFN_DOWN(PHYSADDR(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); From 720dc7ab252bbdf404cab7b909e26b31e602bf7e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: [PATCH 371/654] LoongArch: Add subword xchg/cmpxchg emulation LoongArch only support 32-bit/64-bit xchg/cmpxchg in native. But percpu operation, qspinlock and some drivers need 8-bit/16-bit xchg/cmpxchg. We add subword xchg/cmpxchg emulation in this patch because the emulation has better performance than the generic implementation (on NUMA system), and it can fix some build errors meanwhile [1]. LoongArch's guarantee for forward progress (avoid many ll/sc happening at the same time and no one succeeds): We have the "exclusive access (with timeout) of ll" feature to avoid simultaneous ll (which also blocks other memory load/store on the same address), and the "random delay of sc" feature to avoid simultaneous sc. It is a mandatory requirement for multi-core LoongArch processors to implement such features, only except those single-core and dual-core processors (they also don't support multi-chip interconnection). Feature bits are introduced in CPUCFG3, bit 3 and bit 4 [2]. [1] https://lore.kernel.org/loongarch/CAAhV-H6vvkuOzy8OemWdYK3taj5Jn3bFX0ZTwE=twM8ywpBUYA@mail.gmail.com/T/#t [2] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_cpucfg Reported-by: Sudip Mukherjee (Codethink) Suggested-by: Linus Torvalds Signed-off-by: Rui Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cmpxchg.h | 98 +++++++++++++++++++++++++++- arch/loongarch/include/asm/percpu.h | 8 +++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 0a9b0fac1eeeb..ae19e33c77548 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -5,8 +5,9 @@ #ifndef __ASM_CMPXCHG_H #define __ASM_CMPXCHG_H -#include +#include #include +#include #define __xchg_asm(amswap_db, m, val) \ ({ \ @@ -21,10 +22,53 @@ __ret; \ }) +static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val, + unsigned int size) +{ + unsigned int shift; + u32 old32, mask, temp; + volatile u32 *ptr32; + + /* Mask value to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + val &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * exchange within the naturally aligned 4 byte integerthat includes + * it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " andn %1, %0, %z4 \n" + " or %1, %1, %z5 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (val << shift) + : "memory"); + + return (old32 & mask) >> shift; +} + static inline unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { + case 1: + case 2: + return __xchg_small(ptr, x, size); + case 4: return __xchg_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x); @@ -67,10 +111,62 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, __ret; \ }) +static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old, + unsigned int new, unsigned int size) +{ + unsigned int shift; + u32 old32, mask, temp; + volatile u32 *ptr32; + + /* Mask inputs to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + old &= mask; + new &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * compare & exchange within the naturally aligned 4 byte integer + * that includes it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + old <<= shift; + new <<= shift; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " and %1, %0, %z4 \n" + " bne %1, %z5, 2f \n" + " andn %1, %0, %z4 \n" + " or %1, %1, %z6 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + " b 3f \n" + "2: \n" + __WEAK_LLSC_MB + "3: \n" + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new) + : "memory"); + + return (old32 & mask) >> shift; +} + static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + case 2: + return __cmpxchg_small(ptr, old, new, size); + case 4: return __cmpxchg_asm("ll.w", "sc.w", (volatile u32 *)ptr, (u32)old, new); diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index e6569f18c6ddf..0bd6b0110198f 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -123,6 +123,10 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { + case 1: + case 2: + return __xchg_small((volatile void *)ptr, val, size); + case 4: return __xchg_asm("amswap.w", (volatile u32 *)ptr, (u32)val); @@ -204,9 +208,13 @@ do { \ #define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) #define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) +#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) #define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) #define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) From b83699ea1e62951857c2d8648bd93a4744899eb7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Aug 2022 19:34:59 +0800 Subject: [PATCH 372/654] LoongArch: mm: Avoid unnecessary page fault retires on shared memory types Commit d92725256b4f22d0 ("mm: avoid unnecessary page fault retires on shared memory types") modifies do_page_fault() to handle the VM_FAULT_ COMPLETED case, but forget to change for LoongArch, so fix it as other architectures does. Fixes: d92725256b4f22d0 ("mm: avoid unnecessary page fault retires on shared memory types") Reviewed-by: Guo Ren Signed-off-by: Huacai Chen --- arch/loongarch/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 605579b19a002..1ccd53655cab0 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -216,6 +216,10 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, return; } + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; From ab0af755d40f2ea4ef01503e7bebf083f65c9062 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 17 Aug 2022 06:43:33 +0200 Subject: [PATCH 373/654] xen: x86: remove setting the obsolete config XEN_MAX_DOMAIN_MEMORY Commit c70727a5bc18 ("xen: allow more than 512 GB of RAM for 64 bit pv-domains") from July 2015 replaces the config XEN_MAX_DOMAIN_MEMORY with a new config XEN_512GB, but misses to adjust arch/x86/configs/xen.config. As XEN_512GB defaults to yes, there is no need to explicitly set any config in xen.config. Just remove setting the obsolete config XEN_MAX_DOMAIN_MEMORY. Signed-off-by: Lukas Bulwahn Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220817044333.22310-1-lukas.bulwahn@gmail.com Signed-off-by: Juergen Gross --- arch/x86/configs/xen.config | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config index d9fc7139fd46e..581296255b39e 100644 --- a/arch/x86/configs/xen.config +++ b/arch/x86/configs/xen.config @@ -14,7 +14,6 @@ CONFIG_CPU_FREQ=y # x86 xen specific config options CONFIG_XEN_PVH=y -CONFIG_XEN_MAX_DOMAIN_MEMORY=500 CONFIG_XEN_SAVE_RESTORE=y # CONFIG_XEN_DEBUG_FS is not set CONFIG_XEN_MCE_LOG=y From 6bb79f5b4c84a09cd6bed46ef3e46ef4fc21407d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:01:22 +0200 Subject: [PATCH 374/654] xen: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Reviewed-by: Oleksandr Tyshchenko Link: https://lore.kernel.org/r/20220818210122.7613-1-wsa+renesas@sang-engineering.com Signed-off-by: Juergen Gross --- drivers/xen/xen-scsiback.c | 2 +- drivers/xen/xenbus/xenbus_probe_frontend.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 7a0c93acc2c57..d3dcda3449892 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1121,7 +1121,7 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, "%s: writing %s", __func__, state); return; } - strlcpy(phy, val, VSCSI_NAMELEN); + strscpy(phy, val, VSCSI_NAMELEN); kfree(val); /* virtual SCSI device */ diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 07b010a68fcf9..f44d5a64351e4 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -40,7 +40,7 @@ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) return -EINVAL; } - strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + strscpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); if (!strchr(bus_id, '/')) { pr_warn("bus_id %s no slash\n", bus_id); return -EINVAL; From 467249a7dff68451868ca79696aef69764193a8a Mon Sep 17 00:00:00 2001 From: Even Xu Date: Tue, 23 Aug 2022 09:10:59 +0800 Subject: [PATCH 375/654] HID: intel-ish-hid: ipc: Add Meteor Lake PCI device ID Add device ID of Meteor Lake P into ishtp support list. Signed-off-by: Even Xu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/hw-ish.h | 1 + drivers/hid/intel-ish-hid/ipc/pci-ish.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index e600dbf04dfc6..fc108f19a64c3 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -32,6 +32,7 @@ #define ADL_P_DEVICE_ID 0x51FC #define ADL_N_DEVICE_ID 0x54FC #define RPL_S_DEVICE_ID 0x7A78 +#define MTL_P_DEVICE_ID 0x7E45 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 2c67ec17bec6f..7120b30ac51d0 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -43,6 +43,7 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_P_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_N_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, RPL_S_DEVICE_ID)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MTL_P_DEVICE_ID)}, {0, } }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 16 Aug 2022 11:54:07 -0400 Subject: [PATCH 376/654] s390: fix double free of GS and RI CBs on fork() failure The pointers for guarded storage and runtime instrumentation control blocks are stored in the thread_struct of the associated task. These pointers are initially copied on fork() via arch_dup_task_struct() and then cleared via copy_thread() before fork() returns. If fork() happens to fail after the initial task dup and before copy_thread(), the newly allocated task and associated thread_struct memory are freed via free_task() -> arch_release_task_struct(). This results in a double free of the guarded storage and runtime info structs because the fields in the failed task still refer to memory associated with the source task. This problem can manifest as a BUG_ON() in set_freepointer() (with CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled) when running trinity syscall fuzz tests on s390x. To avoid this problem, clear the associated pointer fields in arch_dup_task_struct() immediately after the new task is copied. Note that the RI flag is still cleared in copy_thread() because it resides in thread stack memory and that is where stack info is copied. Signed-off-by: Brian Foster Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling") Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling") Cc: # 4.15 Reviewed-by: Gerald Schaefer Reviewed-by: Heiko Carstens Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com Signed-off-by: Vasily Gorbik --- arch/s390/kernel/process.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 89949b9f3cf88..d5119e039d855 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) memcpy(dst, src, arch_task_struct_size); dst->thread.fpu.regs = dst->thread.fpu.fprs; + + /* + * Don't transfer over the runtime instrumentation or the guarded + * storage control block pointers. These fields are cleared here instead + * of in copy_thread() to avoid premature freeing of associated memory + * on fork() failure. Wait to clear the RI flag because ->stack still + * refers to the source thread. + */ + dst->thread.ri_cb = NULL; + dst->thread.gs_cb = NULL; + dst->thread.gs_bc_cb = NULL; + return 0; } @@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->childregs.flags = 0; if (new_stackp) frame->childregs.gprs[15] = new_stackp; - - /* Don't copy runtime instrumentation info */ - p->thread.ri_cb = NULL; + /* + * Clear the runtime instrumentation flag after the above childregs + * copy. The CB pointer was already cleared in arch_dup_task_struct(). + */ frame->childregs.psw.mask &= ~PSW_MASK_RI; - /* Don't copy guarded storage control block */ - p->thread.gs_cb = NULL; - p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { From 41ac42f137080bc230b5882e3c88c392ab7f2d32 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 17 Aug 2022 15:26:03 +0200 Subject: [PATCH 377/654] s390/mm: do not trigger write fault when vma does not allow VM_WRITE For non-protection pXd_none() page faults in do_dat_exception(), we call do_exception() with access == (VM_READ | VM_WRITE | VM_EXEC). In do_exception(), vma->vm_flags is checked against that before calling handle_mm_fault(). Since commit 92f842eac7ee3 ("[S390] store indication fault optimization"), we call handle_mm_fault() with FAULT_FLAG_WRITE, when recognizing that it was a write access. However, the vma flags check is still only checking against (VM_READ | VM_WRITE | VM_EXEC), and therefore also calling handle_mm_fault() with FAULT_FLAG_WRITE in cases where the vma does not allow VM_WRITE. Fix this by changing access check in do_exception() to VM_WRITE only, when recognizing write access. Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com Fixes: 92f842eac7ee3 ("[S390] store indication fault optimization") Cc: Reported-by: David Hildenbrand Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 13449941516c2..09b6e756d521d 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) flags = FAULT_FLAG_DEFAULT; if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if (access == VM_WRITE || is_write) + if (is_write) + access = VM_WRITE; + if (access == VM_WRITE) flags |= FAULT_FLAG_WRITE; mmap_read_lock(mm); From 1ff89e06c2e5fab30274e4b02360d4241d6e605e Mon Sep 17 00:00:00 2001 From: "Daniel J. Ogorchock" Date: Wed, 13 Jul 2022 16:20:59 -0400 Subject: [PATCH 378/654] HID: nintendo: fix rumble worker null pointer deref We can dereference a null pointer trying to queue work to a destroyed workqueue. If the device is disconnected, nintendo_hid_remove is called, in which the rumble_queue is destroyed. Avoid using that queue to defer rumble work once the controller state is set to JOYCON_CTLR_STATE_REMOVED. This eliminates the null pointer dereference. Signed-off-by: Daniel J. Ogorchock Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 92ac4f605f134..6028af3c3aae5 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -1221,6 +1221,7 @@ static void joycon_parse_report(struct joycon_ctlr *ctlr, spin_lock_irqsave(&ctlr->lock, flags); if (IS_ENABLED(CONFIG_NINTENDO_FF) && rep->vibrator_report && + ctlr->ctlr_state != JOYCON_CTLR_STATE_REMOVED && (msecs - ctlr->rumble_msecs) >= JC_RUMBLE_PERIOD_MS && (ctlr->rumble_queue_head != ctlr->rumble_queue_tail || ctlr->rumble_zero_countdown > 0)) { @@ -1545,12 +1546,13 @@ static int joycon_set_rumble(struct joycon_ctlr *ctlr, u16 amp_r, u16 amp_l, ctlr->rumble_queue_head = 0; memcpy(ctlr->rumble_data[ctlr->rumble_queue_head], data, JC_RUMBLE_DATA_SIZE); - spin_unlock_irqrestore(&ctlr->lock, flags); /* don't wait for the periodic send (reduces latency) */ - if (schedule_now) + if (schedule_now && ctlr->ctlr_state != JOYCON_CTLR_STATE_REMOVED) queue_work(ctlr->rumble_queue, &ctlr->rumble_worker); + spin_unlock_irqrestore(&ctlr->lock, flags); + return 0; } From 581711c46612c1fd7f98960f9ad53f04fdb89853 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Aug 2022 13:07:43 +0100 Subject: [PATCH 379/654] io_uring/net: save address for sendzc async execution We usually copy all bits that a request needs from the userspace for async execution, so the userspace can keep them on the stack. However, send zerocopy violates this pattern for addresses and may reloads it e.g. from io-wq. Save the address if any in ->async_data as usual. Reported-by: Stefan Metzmacher Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d7512d7aa9abcd36e9afe1a4d292a24cb2d157e5.1661342812.git.asml.silence@gmail.com [axboe: fold in incremental fix] Signed-off-by: Jens Axboe --- io_uring/net.c | 53 +++++++++++++++++++++++++++++++++++++++++------- io_uring/net.h | 1 + io_uring/opdef.c | 4 +++- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 3adcb09ae2643..0af8a02df580f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -182,6 +182,37 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, &iomsg->free_iov); } +int io_sendzc_prep_async(struct io_kiocb *req) +{ + struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + struct io_async_msghdr *io; + int ret; + + if (!zc->addr || req_has_async_data(req)) + return 0; + if (io_alloc_async_data(req)) + return -ENOMEM; + + io = req->async_data; + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); + return ret; +} + +static int io_setup_async_addr(struct io_kiocb *req, + struct sockaddr_storage *addr, + unsigned int issue_flags) +{ + struct io_async_msghdr *io; + + if (!addr || req_has_async_data(req)) + return -EAGAIN; + if (io_alloc_async_data(req)) + return -ENOMEM; + io = req->async_data; + memcpy(&io->addr, addr, sizeof(io->addr)); + return -EAGAIN; +} + int io_sendmsg_prep_async(struct io_kiocb *req) { int ret; @@ -944,7 +975,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) { - struct sockaddr_storage address; + struct sockaddr_storage __address, *addr = NULL; struct io_ring_ctx *ctx = req->ctx; struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); struct io_notif_slot *notif_slot; @@ -978,10 +1009,17 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_namelen = 0; if (zc->addr) { - ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); - if (unlikely(ret < 0)) - return ret; - msg.msg_name = (struct sockaddr *)&address; + if (req_has_async_data(req)) { + struct io_async_msghdr *io = req->async_data; + + msg.msg_name = addr = &io->addr; + } else { + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); + if (unlikely(ret < 0)) + return ret; + msg.msg_name = (struct sockaddr *)&__address; + addr = &__address; + } msg.msg_namelen = zc->addr_len; } @@ -1013,13 +1051,14 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret < min_ret)) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return -EAGAIN; + return io_setup_async_addr(req, addr, issue_flags); + if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { zc->len -= ret; zc->buf += ret; zc->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return -EAGAIN; + return io_setup_async_addr(req, addr, issue_flags); } if (ret == -ERESTARTSYS) ret = -EINTR; diff --git a/io_uring/net.h b/io_uring/net.h index 7c438d39c0899..f91f56c6eeacf 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -31,6 +31,7 @@ struct io_async_connect { int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_shutdown(struct io_kiocb *req, unsigned int issue_flags); +int io_sendzc_prep_async(struct io_kiocb *req); int io_sendmsg_prep_async(struct io_kiocb *req); void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req); int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 72dd2b2d8a9df..41410126c1c68 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -478,13 +478,15 @@ const struct io_op_def io_op_defs[] = { .pollout = 1, .audit_skip = 1, .ioprio = 1, + .manual_alloc = 1, #if defined(CONFIG_NET) + .async_size = sizeof(struct io_async_msghdr), .prep = io_sendzc_prep, .issue = io_sendzc, + .prep_async = io_sendzc_prep_async, #else .prep = io_eopnotsupp_prep, #endif - }, }; From c93c296fff6b369a7115916145047c8a3db6e27f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 24 Aug 2022 17:13:26 +0200 Subject: [PATCH 380/654] x86/sev: Mark snp_abort() noreturn Mark both the function prototype and definition as noreturn in order to prevent the compiler from doing transformations which confuse objtool like so: vmlinux.o: warning: objtool: sme_enable+0x71: unreachable instruction This triggers with gcc-12. Add it and sev_es_terminate() to the objtool noreturn tracking array too. Sort it while at it. Suggested-by: Michael Matz Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20220824152420.20547-1-bp@alien8.de --- arch/x86/include/asm/sev.h | 2 +- arch/x86/kernel/sev.c | 2 +- tools/objtool/check.c | 34 ++++++++++++++++++---------------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 4a23e52fe0ee1..ebc271bb6d8ed 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -195,7 +195,7 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void snp_abort(void); +void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4f84c3f11af5b..a428c62330d37 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2112,7 +2112,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init snp_abort(void) +void __init __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0cec74da7ffea..ad51689dfb415 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -162,32 +162,34 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, /* * Unfortunately these have to be hard coded because the noreturn - * attribute isn't provided in ELF data. + * attribute isn't provided in ELF data. Keep 'em sorted. */ static const char * const global_noreturns[] = { + "__invalid_creds", + "__module_put_and_kthread_exit", + "__reiserfs_panic", "__stack_chk_fail", - "panic", + "__ubsan_handle_builtin_unreachable", + "cpu_bringup_and_idle", + "cpu_startup_entry", "do_exit", + "do_group_exit", "do_task_dead", - "kthread_exit", - "make_task_dead", - "__module_put_and_kthread_exit", + "ex_handler_msr_mce", + "fortify_panic", "kthread_complete_and_exit", - "__reiserfs_panic", + "kthread_exit", + "kunit_try_catch_throw", "lbug_with_loc", - "fortify_panic", - "usercopy_abort", "machine_real_restart", + "make_task_dead", + "panic", "rewind_stack_and_make_dead", - "kunit_try_catch_throw", - "xen_start_kernel", - "cpu_bringup_and_idle", - "do_group_exit", + "sev_es_terminate", + "snp_abort", "stop_this_cpu", - "__invalid_creds", - "cpu_startup_entry", - "__ubsan_handle_builtin_unreachable", - "ex_handler_msr_mce", + "usercopy_abort", + "xen_start_kernel", }; if (!func) From 00da0cb385d05a89226e150a102eb49d8abb0359 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Mon, 1 Aug 2022 11:15:30 +0200 Subject: [PATCH 381/654] Documentation/ABI: Mention retbleed vulnerability info file for sysfs While reporting for the AMD retbleed vulnerability was added in 6b80b59b3555 ("x86/bugs: Report AMD retbleed vulnerability") the new sysfs file was not mentioned so far in the ABI documentation for sysfs-devices-system-cpu. Fix that. Fixes: 6b80b59b3555 ("x86/bugs: Report AMD retbleed vulnerability") Signed-off-by: Salvatore Bonaccorso Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220801091529.325327-1-carnil@debian.org --- Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 5bf61881f0126..760c889b6cd14 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -523,6 +523,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/retbleed Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 25 Aug 2022 16:19:18 +0200 Subject: [PATCH 382/654] xen/privcmd: fix error exit of privcmd_ioctl_dm_op() The error exit of privcmd_ioctl_dm_op() is calling unlock_pages() potentially with pages being NULL, leading to a NULL dereference. Additionally lock_pages() doesn't check for pin_user_pages_fast() having been completely successful, resulting in potentially not locking all pages into memory. This could result in sporadic failures when using the related memory in user mode. Fix all of that by calling unlock_pages() always with the real number of pinned pages, which will be zero in case pages being NULL, and by checking the number of pages pinned by pin_user_pages_fast() matching the expected number of pages. Cc: Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP") Reported-by: Rustam Subkhankulov Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Oleksandr Tyshchenko Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 3369734108af2..e88e8f6f0a334 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -581,27 +581,30 @@ static int lock_pages( struct privcmd_dm_op_buf kbufs[], unsigned int num, struct page *pages[], unsigned int nr_pages, unsigned int *pinned) { - unsigned int i; + unsigned int i, off = 0; - for (i = 0; i < num; i++) { + for (i = 0; i < num; ) { unsigned int requested; int page_count; requested = DIV_ROUND_UP( offset_in_page(kbufs[i].uptr) + kbufs[i].size, - PAGE_SIZE); + PAGE_SIZE) - off; if (requested > nr_pages) return -ENOSPC; page_count = pin_user_pages_fast( - (unsigned long) kbufs[i].uptr, + (unsigned long)kbufs[i].uptr + off * PAGE_SIZE, requested, FOLL_WRITE, pages); - if (page_count < 0) - return page_count; + if (page_count <= 0) + return page_count ? : -EFAULT; *pinned += page_count; nr_pages -= page_count; pages += page_count; + + off = (requested == page_count) ? 0 : off + page_count; + i += !off; } return 0; @@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) } rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned); - if (rc < 0) { - nr_pages = pinned; + if (rc < 0) goto out; - } for (i = 0; i < kdata.num; i++) { set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); @@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) xen_preemptible_hcall_end(); out: - unlock_pages(pages, nr_pages); + unlock_pages(pages, pinned); kfree(xbufs); kfree(pages); kfree(kbufs); From 4a593a62a9e3a25ab4bc37f612e4edec144f7f43 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Aug 2022 18:08:38 +0300 Subject: [PATCH 383/654] xhci: Fix null pointer dereference in remove if xHC has only one roothub The remove path in xhci platform driver tries to remove and put both main and shared hcds even if only a main hcd exists (one roothub) This causes a null pointer dereference in reboot for those controllers. Check that the shared_hcd exists before trying to remove it. Fixes: e0fe986972f5 ("usb: host: xhci-plat: prepare operation w/o shared hcd") Reported-by: Alexey Sheplyakov Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220825150840.132216-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 044855818cb11..a8641b6536eea 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -398,12 +398,17 @@ static int xhci_plat_remove(struct platform_device *dev) pm_runtime_get_sync(&dev->dev); xhci->xhc_state |= XHCI_STATE_REMOVING; - usb_remove_hcd(shared_hcd); - xhci->shared_hcd = NULL; + if (shared_hcd) { + usb_remove_hcd(shared_hcd); + xhci->shared_hcd = NULL; + } + usb_phy_shutdown(hcd->usb_phy); usb_remove_hcd(hcd); - usb_put_hcd(shared_hcd); + + if (shared_hcd) + usb_put_hcd(shared_hcd); clk_disable_unprepare(clk); clk_disable_unprepare(reg_clk); From 33e321586e37b642ad10594b9ef25a613555cd08 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Aug 2022 18:08:39 +0300 Subject: [PATCH 384/654] xhci: Add grace period after xHC start to prevent premature runtime suspend. After xHC controller is started, either in probe or resume, it can take a while before any of the connected usb devices are visible to the roothub due to link training. It's possible xhci driver loads, sees no acivity and suspends the host before the USB device is visible. In one testcase with a hotplugged xHC controller the host finally detected the connected USB device and generated a wake 500ms after host initial start. If hosts didn't suspend the device duringe training it probablty wouldn't take up to 500ms to detect it, but looking at specs reveal USB3 link training has a couple long timeout values, such as 120ms RxDetectQuietTimeout, and 360ms PollingLFPSTimeout. So Add a 500ms grace period that keeps polling the roothub for 500ms after start, preventing runtime suspend until USB devices are detected. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220825150840.132216-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 11 +++++++++++ drivers/usb/host/xhci.c | 4 +++- drivers/usb/host/xhci.h | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0fdc014c94011..b30298986a698 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1648,6 +1648,17 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) status = bus_state->resuming_ports; + /* + * SS devices are only visible to roothub after link training completes. + * Keep polling roothubs for a grace period after xHC start + */ + if (xhci->run_graceperiod) { + if (time_before(jiffies, xhci->run_graceperiod)) + status = 1; + else + xhci->run_graceperiod = 0; + } + mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC; /* For each port, did anything change? If so, set that bit in buf. */ diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 65858f6074377..1afd32beec99c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -151,9 +151,11 @@ int xhci_start(struct xhci_hcd *xhci) xhci_err(xhci, "Host took too long to start, " "waited %u microseconds.\n", XHCI_MAX_HALT_USEC); - if (!ret) + if (!ret) { /* clear state flags. Including dying, halted or removing */ xhci->xhc_state = 0; + xhci->run_graceperiod = jiffies + msecs_to_jiffies(500); + } return ret; } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 1960b47acfb28..df6f2ebaff188 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1826,7 +1826,7 @@ struct xhci_hcd { /* Host controller watchdog timer structures */ unsigned int xhc_state; - + unsigned long run_graceperiod; u32 command; struct s3_save s3; /* Host controller is dying - not responding to commands. "I'm not dead yet!" From 8531aa1659f7278d4f2ec7408cc000eaa8d85217 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Aug 2022 18:08:40 +0300 Subject: [PATCH 385/654] Revert "xhci: turn off port power in shutdown" This reverts commit 83810f84ecf11dfc5a9414a8b762c3501b328185. Turning off port power in shutdown did cause issues such as a laptop not proprly powering off, and some specific usb devies failing to enumerate the subsequent boot after a warm reset. So revert this. Fixes: 83810f84ecf1 ("xhci: turn off port power in shutdown") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220825150840.132216-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci.c | 15 ++------------- drivers/usb/host/xhci.h | 2 -- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index b30298986a698..4619d5e89d5be 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -652,7 +652,7 @@ struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd) * It will release and re-aquire the lock while calling ACPI * method. */ -void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, +static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, bool on, unsigned long *flags) __must_hold(&xhci->lock) { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1afd32beec99c..38649284ff889 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -793,8 +793,6 @@ static void xhci_stop(struct usb_hcd *hcd) void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); - unsigned long flags; - int i; if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); @@ -810,21 +808,12 @@ void xhci_shutdown(struct usb_hcd *hcd) del_timer_sync(&xhci->shared_hcd->rh_timer); } - spin_lock_irqsave(&xhci->lock, flags); + spin_lock_irq(&xhci->lock); xhci_halt(xhci); - - /* Power off USB2 ports*/ - for (i = 0; i < xhci->usb2_rhub.num_ports; i++) - xhci_set_port_power(xhci, xhci->main_hcd, i, false, &flags); - - /* Power off USB3 ports*/ - for (i = 0; i < xhci->usb3_rhub.num_ports; i++) - xhci_set_port_power(xhci, xhci->shared_hcd, i, false, &flags); - /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) xhci_reset(xhci, XHCI_RESET_SHORT_USEC); - spin_unlock_irqrestore(&xhci->lock, flags); + spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index df6f2ebaff188..7caa0db5e826d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2196,8 +2196,6 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd); -void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, - bool on, unsigned long *flags); void xhci_hc_died(struct xhci_hcd *xhci); From 43626dade36fa74d3329046f4ae2d7fdefe401c6 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 25 Aug 2022 17:38:38 +0900 Subject: [PATCH 386/654] cgroup: Add missing cpus_read_lock() to cgroup_attach_task_all() syzbot is hitting percpu_rwsem_assert_held(&cpu_hotplug_lock) warning at cpuset_attach() [1], for commit 4f7e7236435ca0ab ("cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock") missed that cpuset_attach() is also called from cgroup_attach_task_all(). Add cpus_read_lock() like what cgroup_procs_write_start() does. Link: https://syzkaller.appspot.com/bug?extid=29d3a3b4d86c8136ad9e [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Fixes: 4f7e7236435ca0ab ("cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock") Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 2ade21b54dc44..ff6a8099eb2a2 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -59,6 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) int retval = 0; mutex_lock(&cgroup_mutex); + cpus_read_lock(); percpu_down_write(&cgroup_threadgroup_rwsem); for_each_root(root) { struct cgroup *from_cgrp; @@ -72,6 +73,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) break; } percpu_up_write(&cgroup_threadgroup_rwsem); + cpus_read_unlock(); mutex_unlock(&cgroup_mutex); return retval; From ad3b0b99113783f697579c7b09285916019865ea Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 13:03:49 +0800 Subject: [PATCH 387/654] drm/amdgpu: add TX_POWER_CTRL_1 macro definitions for NBIO IP v7.7.0 Add the BIF0_PCIE_TX_POWER_CTRL_1 register offset and mask macro definitions for AMD_CG_SUPPORT_BIF_LS. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- .../amd/include/asic_reg/nbio/nbio_7_7_0_offset.h | 2 ++ .../amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h index 2ed95790a6006..cf8d60c4df1be 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h @@ -15243,6 +15243,8 @@ #define regBIF0_PCIE_TX_TRACKING_ADDR_HI_BASE_IDX 5 #define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS 0x420186 #define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS_BASE_IDX 5 +#define regBIF0_PCIE_TX_POWER_CTRL_1 0x420187 +#define regBIF0_PCIE_TX_POWER_CTRL_1_BASE_IDX 5 #define regBIF0_PCIE_TX_CTRL_4 0x42018b #define regBIF0_PCIE_TX_CTRL_4_BASE_IDX 5 #define regBIF0_PCIE_TX_STATUS 0x420194 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h index eb62a18fcc480..3d60c9e925481 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h @@ -85627,6 +85627,19 @@ #define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_PORT_MASK 0x0000000EL #define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_UNIT_ID_MASK 0x00007F00L #define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_STATUS_VALID_MASK 0x00008000L +//BIF0_PCIE_TX_POWER_CTRL_1 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN__SHIFT 0x0 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN__SHIFT 0x1 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN__SHIFT 0x2 +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN__SHIFT 0x3 +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN__SHIFT 0x4 +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN__SHIFT 0x5 +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN_MASK 0x00000002L +#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN_MASK 0x00000004L +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN_MASK 0x00000010L +#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN_MASK 0x00000020L //BIF0_PCIE_TX_CTRL_4 #define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW__SHIFT 0x0 #define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW_MASK 0x0000000FL From 2037769f995e45d3a368fb74983954b3ed8da178 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 13:12:21 +0800 Subject: [PATCH 388/654] drm/amdgpu: add NBIO IP v7.7.0 Clock Gating support Add BIF Clock Gating MGCG and LS support for NBIO IP v7.7.0. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 78 ++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index 01e8288d09a8f..1dc95ef21da6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -247,6 +247,81 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev) } +static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (enable) { + data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data); +} + +static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (enable) + data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset, @@ -262,6 +337,9 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_7_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_7_get_clockgating_state, .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, }; From 16c01544e30a4b4cf5f3eaacf7a4c19a3622b597 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 15 Aug 2022 13:50:46 +0800 Subject: [PATCH 389/654] drm/amdgpu: enable NBIO IP v7.7.0 Clock Gating Enable AMD_CG_SUPPORT_BIF_MGCG and AMD_CG_SUPPORT_BIF_LS support. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 1ff7fc7bb3400..982c129648791 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -603,6 +603,8 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = @@ -702,6 +704,7 @@ static int soc21_common_set_clockgating_state(void *handle, switch (adev->ip_versions[NBIO_HWIP][0]) { case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): + case IP_VERSION(7, 7, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, @@ -709,10 +712,6 @@ static int soc21_common_set_clockgating_state(void *handle, adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); break; - case IP_VERSION(7, 7, 0): - adev->hdp.funcs->update_clock_gating(adev, - state == AMD_CG_STATE_GATE); - break; default: break; } From 4e3464badbeebb3528c457aefe91413f8a9070b6 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 22 Aug 2022 12:37:10 -0400 Subject: [PATCH 390/654] drm/amd/display: enable PCON support for dcn314 [Why] DCN314 supports PCON. [How] Explicitly enable it in dcn314 resources. Signed-off-by: Roman Li Reviewed-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 85f32206a7662..3a9e3870b3a95 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1750,6 +1750,7 @@ static bool dcn314_resource_construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; From 00047c3d967d7ef8adf8bac3c3579294a3bc0bb1 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 22 Aug 2022 13:30:44 +0800 Subject: [PATCH 391/654] drm/amdgpu: add sdma instance check for gfx11 CGCG For some ASICs, like GFX IP v11.0.1, only have one SDMA instance, so not need to configure SDMA1_RLC_CGCG_CTRL for this case. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f45db80810fa8..e8db772e068c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5182,9 +5182,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } else { /* Program RLC_CGCG_CGLS_CTRL */ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); @@ -5213,9 +5216,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } } From da1acbb12b33cbc651d8a7e956d254f1acc5034f Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 8 Aug 2022 10:41:26 +0800 Subject: [PATCH 392/654] drm/amd/pm: update SMU 13.0.0 driver_if header To fit the latest 78.53 PMFW. Signed-off-by: Evan Quan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- .../inc/pmfw_if/smu13_driver_if_v13_0_0.h | 31 +++++++++++-------- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index 78620b0bd2790..f745cd8f1ab76 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -24,12 +24,8 @@ #ifndef SMU13_DRIVER_IF_V13_0_0_H #define SMU13_DRIVER_IF_V13_0_0_H -// *** IMPORTANT *** -// PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x23 - //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x1D +#define PPTABLE_VERSION 0x22 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -1193,8 +1189,17 @@ typedef struct { // SECTION: Advanced Options uint32_t DebugOverrides; + // Section: Total Board Power idle vs active coefficients + uint8_t TotalBoardPowerSupport; + uint8_t TotalBoardPowerPadding[3]; + + int16_t TotalIdleBoardPowerM; + int16_t TotalIdleBoardPowerB; + int16_t TotalBoardPowerM; + int16_t TotalBoardPowerB; + // SECTION: Sku Reserved - uint32_t Spare[64]; + uint32_t Spare[61]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; @@ -1259,7 +1264,8 @@ typedef struct { // SECTION: Clock Spread Spectrum // UCLK Spread Spectrum - uint16_t UclkSpreadPadding; + uint8_t UclkTrainingModeSpreadPercent; + uint8_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // UCLK Spread Spectrum @@ -1272,11 +1278,7 @@ typedef struct { // Section: Memory Config uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e - uint8_t PaddingMem1[3]; - - // Section: Total Board Power - uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power - uint16_t BoardPowerPadding; + uint8_t PaddingMem1[7]; // SECTION: UMC feature flags uint8_t HsrEnabled; @@ -1375,8 +1377,11 @@ typedef struct { uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; - uint16_t AverageSocketPower ; + uint16_t AverageSocketPower; + uint16_t AverageTotalBoardPower; + uint16_t AvgTemperature[TEMP_COUNT]; + uint16_t TempPadding; uint8_t PcieRate ; uint8_t PcieWidth ; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 6fe2fe92ebd75..ac308e72241a5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,7 +30,7 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2E #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From 894c9c540f8315007a4752320e2399bc2e0c46b7 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 19 Aug 2022 17:15:08 -0400 Subject: [PATCH 393/654] drm/amdgpu: Fix page table setup on Arcturus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When translate_further is enabled, page table depth needs to be updated. This was missing on Arcturus MMHUB init. This was causing address translations to fail for SDMA user-mode queues. Fixes: 352e683b72e7 ("drm/amdgpu: Enable translate_further to extend UTCL2 reach") Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 6e0145b2b408a..445cb06b9d264 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + unsigned int num_level, block_size; uint32_t tmp; int i; + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->gmc.translate_further) + num_level -= 1; + else + block_size -= 9; + for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); @@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - adev->vm_manager.num_level); + num_level); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, @@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, From ee8086dbc1585d9f4020a19447388246a5cff5c8 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 24 Aug 2022 11:16:51 +0800 Subject: [PATCH 394/654] drm/amdkfd: Fix isa version for the GC 10.3.7 Correct the isa version for handling KFD test. Fixes: 7c4f4f197e0c ("drm/amdkfd: Add GC 10.3.6 and 10.3.7 KFD definitions") Signed-off-by: Prike Liang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 357298e69495f..22c0929d410b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -382,12 +382,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &gfx_v10_3_kfd2kgd; break; case IP_VERSION(10, 3, 6): - gfx_target_version = 100306; - if (!vf) - f2g = &gfx_v10_3_kfd2kgd; - break; case IP_VERSION(10, 3, 7): - gfx_target_version = 100307; + gfx_target_version = 100306; if (!vf) f2g = &gfx_v10_3_kfd2kgd; break; From 61251b2cffea8c1811bbd2dbef175b65f64aaa86 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Tue, 23 Aug 2022 15:34:10 +0800 Subject: [PATCH 395/654] drm/amdgpu: add MGCG perfmon setting for gfx11 Enable GFX11 MGCG perfmon setting. V2: set rlc to saft mode before setting. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/soc21.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e8db772e068c0..f6b1bb40e5036 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -131,6 +131,8 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, bool all_hub, uint8_t dst_sel); static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, + bool enable); static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -1139,6 +1141,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, .init_spm_golden = &gfx_v11_0_init_spm_golden_registers, + .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, }; static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 982c129648791..55284b24f1139 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -494,6 +494,20 @@ static void soc21_pre_asic_init(struct amdgpu_device *adev) { } +static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, + bool enter) +{ + if (enter) + amdgpu_gfx_rlc_enter_safe_mode(adev); + else + amdgpu_gfx_rlc_exit_safe_mode(adev); + + if (adev->gfx.funcs->update_perfmon_mgcg) + adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); + + return 0; +} + static const struct amdgpu_asic_funcs soc21_asic_funcs = { .read_disabled_bios = &soc21_read_disabled_bios, @@ -513,6 +527,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = .supports_baco = &amdgpu_dpm_is_baco_supported, .pre_asic_init = &soc21_pre_asic_init, .query_video_codecs = &soc21_query_video_codecs, + .update_umd_stable_pstate = &soc21_update_umd_stable_pstate, }; static int soc21_common_early_init(void *handle) From b8983d42524f10ac6bf35bbce6a7cc8e45f61e04 Mon Sep 17 00:00:00 2001 From: Qu Huang Date: Tue, 23 Aug 2022 14:44:06 +0800 Subject: [PATCH 396/654] drm/amdgpu: mmVM_L2_CNTL3 register not initialized correctly The mmVM_L2_CNTL3 register is not assigned an initial value Signed-off-by: Qu Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 3f44a099c52a4..3e51e773f92be 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); + tmp = mmVM_L2_CNTL3_DEFAULT; if (adev->gmc.translate_further) { tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, From 602684adb42a04858e23248b22d4931b7ef2ad7e Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 24 Aug 2022 10:08:36 +0200 Subject: [PATCH 397/654] docs: Update version number from 5.x to 6.x in README.rst A quick 'grep "5\.x" . -R' on Documentation shows that README.rst, 2.Process.rst and applying-patches.rst all mention the version number "5.x" for kernel releases. As the next release will be version 6.0, updating the version number to 6.x in README.rst seems reasonable. The description in 2.Process.rst is just a description of recent kernel releases, it was last updated in the beginning of 2020, and can be revisited at any time on a regular basis, independent of changing the version number from 5 to 6. So, there is no need to update this document now when transitioning from 5.x to 6.x numbering. The document applying-patches.rst is probably obsolete for most users anyway, a reader will sufficiently well understand the steps, even it mentions version 5 rather than version 6. So, do not update that to a version 6.x numbering scheme. Update version number from 5.x to 6.x in README.rst only. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20220824080836.23087-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/README.rst | 30 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index caa3c09a5c3f9..9eb6b9042f75f 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -1,9 +1,9 @@ .. _readme: -Linux kernel release 5.x +Linux kernel release 6.x ============================================= -These are the release notes for Linux version 5. Read them carefully, +These are the release notes for Linux version 6. Read them carefully, as they tell you what this is all about, explain how to install the kernel, and what to do if something goes wrong. @@ -63,7 +63,7 @@ Installing the kernel source directory where you have permissions (e.g. your home directory) and unpack it:: - xz -cd linux-5.x.tar.xz | tar xvf - + xz -cd linux-6.x.tar.xz | tar xvf - Replace "X" with the version number of the latest kernel. @@ -72,12 +72,12 @@ Installing the kernel source files. They should match the library, and not get messed up by whatever the kernel-du-jour happens to be. - - You can also upgrade between 5.x releases by patching. Patches are + - You can also upgrade between 6.x releases by patching. Patches are distributed in the xz format. To install by patching, get all the newer patch files, enter the top level directory of the kernel source - (linux-5.x) and execute:: + (linux-6.x) and execute:: - xz -cd ../patch-5.x.xz | patch -p1 + xz -cd ../patch-6.x.xz | patch -p1 Replace "x" for all versions bigger than the version "x" of your current source tree, **in_order**, and you should be ok. You may want to remove @@ -85,13 +85,13 @@ Installing the kernel source that there are no failed patches (some-file-name# or some-file-name.rej). If there are, either you or I have made a mistake. - Unlike patches for the 5.x kernels, patches for the 5.x.y kernels + Unlike patches for the 6.x kernels, patches for the 6.x.y kernels (also known as the -stable kernels) are not incremental but instead apply - directly to the base 5.x kernel. For example, if your base kernel is 5.0 - and you want to apply the 5.0.3 patch, you must not first apply the 5.0.1 - and 5.0.2 patches. Similarly, if you are running kernel version 5.0.2 and - want to jump to 5.0.3, you must first reverse the 5.0.2 patch (that is, - patch -R) **before** applying the 5.0.3 patch. You can read more on this in + directly to the base 6.x kernel. For example, if your base kernel is 6.0 + and you want to apply the 6.0.3 patch, you must not first apply the 6.0.1 + and 6.0.2 patches. Similarly, if you are running kernel version 6.0.2 and + want to jump to 6.0.3, you must first reverse the 6.0.2 patch (that is, + patch -R) **before** applying the 6.0.3 patch. You can read more on this in :ref:`Documentation/process/applying-patches.rst `. Alternatively, the script patch-kernel can be used to automate this @@ -114,7 +114,7 @@ Installing the kernel source Software requirements --------------------- - Compiling and running the 5.x kernels requires up-to-date + Compiling and running the 6.x kernels requires up-to-date versions of various software packages. Consult :ref:`Documentation/process/changes.rst ` for the minimum version numbers required and how to get updates for these packages. Beware that using @@ -132,12 +132,12 @@ Build directory for the kernel place for the output files (including .config). Example:: - kernel source code: /usr/src/linux-5.x + kernel source code: /usr/src/linux-6.x build directory: /home/name/build/kernel To configure and build the kernel, use:: - cd /usr/src/linux-5.x + cd /usr/src/linux-6.x make O=/home/name/build/kernel menuconfig make O=/home/name/build/kernel sudo make O=/home/name/build/kernel modules_install install From 465d0eb0dc31ae26c05504668d3957db91e99799 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 23 Aug 2022 19:40:53 +0800 Subject: [PATCH 398/654] Docs/admin-guide/mm/damon/usage: fix the example code snip The workflow example code is not working since it got the file names wrong. So fix this. Fixes: b18402726bd1 ("Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface") Reviewed-by: SeongJae Park Signed-off-by: Kairui Song Link: https://lore.kernel.org/r/20220823114053.53305-1-ryncsn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/mm/damon/usage.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index d52f572a90298..ca91ecc290785 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -50,10 +50,10 @@ For a short example, users can monitor the virtual address space of a given workload as below. :: # cd /sys/kernel/mm/damon/admin/ - # echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr + # echo 1 > kdamonds/nr_kdamonds && echo 1 > kdamonds/0/contexts/nr_contexts # echo vaddr > kdamonds/0/contexts/0/operations - # echo 1 > kdamonds/0/contexts/0/targets/nr - # echo $(pidof ) > kdamonds/0/contexts/0/targets/0/pid + # echo 1 > kdamonds/0/contexts/0/targets/nr_targets + # echo $(pidof ) > kdamonds/0/contexts/0/targets/0/pid_target # echo on > kdamonds/0/state Files Hierarchy @@ -366,12 +366,12 @@ memory rate becomes larger than 60%, or lower than 30%". :: # echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes # cd kdamonds/0/contexts/0/schemes/0 # # set the basic access pattern and the action - # echo 4096 > access_patterns/sz/min - # echo 8192 > access_patterns/sz/max - # echo 0 > access_patterns/nr_accesses/min - # echo 5 > access_patterns/nr_accesses/max - # echo 10 > access_patterns/age/min - # echo 20 > access_patterns/age/max + # echo 4096 > access_pattern/sz/min + # echo 8192 > access_pattern/sz/max + # echo 0 > access_pattern/nr_accesses/min + # echo 5 > access_pattern/nr_accesses/max + # echo 10 > access_pattern/age/min + # echo 20 > access_pattern/age/max # echo pageout > action # # set quotas # echo 10 > quotas/ms From 2fc31465c5373b5ca4edf2e5238558cb62902311 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 23 Aug 2022 20:52:59 +0200 Subject: [PATCH 399/654] bpf: Do mark_chain_precision for ARG_CONST_ALLOC_SIZE_OR_ZERO Precision markers need to be propagated whenever we have an ARG_CONST_* style argument, as the verifier cannot consider imprecise scalars to be equivalent for the purposes of states_equal check when such arguments refine the return value (in this case, set mem_size for PTR_TO_MEM). The resultant mem_size for the R0 is derived from the constant value, and if the verifier incorrectly prunes states considering them equivalent where such arguments exist (by seeing that both registers have reg->precise as false in regsafe), we can end up with invalid programs passing the verifier which can do access beyond what should have been the correct mem_size in that explored state. To show a concrete example of the problem: 0000000000000000 : 0: r2 = *(u32 *)(r1 + 80) 1: r1 = *(u32 *)(r1 + 76) 2: r3 = r1 3: r3 += 4 4: if r3 > r2 goto +18 5: w2 = 0 6: *(u32 *)(r1 + 0) = r2 7: r1 = *(u32 *)(r1 + 0) 8: r2 = 1 9: if w1 == 0 goto +1 10: r2 = -1 0000000000000058 : 11: r1 = 0 ll 13: r3 = 0 14: call bpf_ringbuf_reserve 15: if r0 == 0 goto +7 16: r1 = r0 17: r1 += 16777215 18: w2 = 0 19: *(u8 *)(r1 + 0) = r2 20: r1 = r0 21: r2 = 0 22: call bpf_ringbuf_submit 00000000000000b8 : 23: w0 = 0 24: exit For the first case, the single line execution's exploration will prune the search at insn 14 for the branch insn 9's second leg as it will be verified first using r2 = -1 (UINT_MAX), while as w1 at insn 9 will always be 0 so at runtime we don't get error for being greater than UINT_MAX/4 from bpf_ringbuf_reserve. The verifier during regsafe just sees reg->precise as false for both r2 registers in both states, hence considers them equal for purposes of states_equal. If we propagated precise markers using the backtracking support, we would use the precise marking to then ensure that old r2 (UINT_MAX) was within the new r2 (1) and this would never be true, so the verification would rightfully fail. The end result is that the out of bounds access at instruction 19 would be permitted without this fix. Note that reg->precise is always set to true when user does not have CAP_BPF (or when subprog count is greater than 1 (i.e. use of any static or global functions)), hence this is only a problem when precision marks need to be explicitly propagated (i.e. privileged users with CAP_BPF). A simplified test case has been included in the next patch to prevent future regressions. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220823185300.406-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 096fdac701654..30c6eebce1461 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6066,6 +6066,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EACCES; } meta->mem_size = reg->var_off.value; + err = mark_chain_precision(env, regno); + if (err) + return err; break; case ARG_PTR_TO_INT: case ARG_PTR_TO_LONG: From 1800b2ac96d8bc4ccdddc2ea9e83ecaffd54d3f2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 23 Aug 2022 20:55:00 +0200 Subject: [PATCH 400/654] selftests/bpf: Add regression test for pruning fix Add a test to ensure we do mark_chain_precision for the argument type ARG_CONST_ALLOC_SIZE_OR_ZERO. For other argument types, this was already done, but propagation for missing for this case. Without the fix, this test case loads successfully. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220823185500.467-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/verifier/precise.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 9e754423fa8b0..6c03a7d805f9d 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -192,3 +192,28 @@ .result = VERBOSE_ACCEPT, .retval = -1, }, +{ + "precise: mark_chain_precision for ARG_CONST_ALLOC_SIZE_OR_ZERO", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, ingress_ifindex)), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1), + BPF_MOV64_IMM(BPF_REG_2, 0x1000), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 42), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "invalid access to memory, mem_size=1 off=42 size=8", + .result = REJECT, +}, From b0f571ecd7943423c25947439045f0d352ca3dbf Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Aug 2022 17:35:45 +0100 Subject: [PATCH 401/654] rxrpc: Fix locking in rxrpc's sendmsg Fix three bugs in the rxrpc's sendmsg implementation: (1) rxrpc_new_client_call() should release the socket lock when returning an error from rxrpc_get_call_slot(). (2) rxrpc_wait_for_tx_window_intr() will return without the call mutex held in the event that we're interrupted by a signal whilst waiting for tx space on the socket or relocking the call mutex afterwards. Fix this by: (a) moving the unlock/lock of the call mutex up to rxrpc_send_data() such that the lock is not held around all of rxrpc_wait_for_tx_window*() and (b) indicating to higher callers whether we're return with the lock dropped. Note that this means recvmsg() will not block on this call whilst we're waiting. (3) After dropping and regaining the call mutex, rxrpc_send_data() needs to go and recheck the state of the tx_pending buffer and the tx_total_len check in case we raced with another sendmsg() on the same call. Thinking on this some more, it might make sense to have different locks for sendmsg() and recvmsg(). There's probably no need to make recvmsg() wait for sendmsg(). It does mean that recvmsg() can return MSG_EOR indicating that a call is dead before a sendmsg() to that call returns - but that can currently happen anyway. Without fix (2), something like the following can be induced: WARNING: bad unlock balance detected! 5.16.0-rc6-syzkaller #0 Not tainted ------------------------------------- syz-executor011/3597 is trying to release lock (&call->user_mutex) at: [] rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748 but there are no more locks to release! other info that might help us debug this: no locks held by syz-executor011/3597. ... Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_unlock_imbalance_bug include/trace/events/lock.h:58 [inline] __lock_release kernel/locking/lockdep.c:5306 [inline] lock_release.cold+0x49/0x4e kernel/locking/lockdep.c:5657 __mutex_unlock_slowpath+0x99/0x5e0 kernel/locking/mutex.c:900 rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748 rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:561 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae [Thanks to Hawkins Jiawei and Khalid Masum for their attempts to fix this] Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals") Reported-by: syzbot+7f0483225d0c94cb3441@syzkaller.appspotmail.com Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: syzbot+7f0483225d0c94cb3441@syzkaller.appspotmail.com cc: Hawkins Jiawei cc: Khalid Masum cc: Dan Carpenter cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/166135894583.600315.7170979436768124075.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/call_object.c | 4 +- net/rxrpc/sendmsg.c | 92 ++++++++++++++++++++++++----------------- 2 files changed, 57 insertions(+), 39 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 84d0a41096450..6401cdf7a6246 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -285,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, p->user_call_ID); limiter = rxrpc_get_call_slot(p, gfp); - if (!limiter) + if (!limiter) { + release_sock(&rx->sk); return ERR_PTR(-ERESTARTSYS); + } call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); if (IS_ERR(call)) { diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1d38e279e2efa..3c3a626459deb 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, return sock_intr_errno(*timeo); trace_rxrpc_transmit(call, rxrpc_transmit_wait); - mutex_unlock(&call->user_mutex); *timeo = schedule_timeout(*timeo); - if (mutex_lock_interruptible(&call->user_mutex) < 0) - return sock_intr_errno(*timeo); } } @@ -290,37 +287,48 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len, - rxrpc_notify_end_tx_t notify_end_tx) + rxrpc_notify_end_tx_t notify_end_tx, + bool *_dropped_lock) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; struct sock *sk = &rx->sk; + enum rxrpc_call_state state; long timeo; - bool more; - int ret, copied; + bool more = msg->msg_flags & MSG_MORE; + int ret, copied = 0; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); +reload: + ret = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) - return -EPIPE; - - more = msg->msg_flags & MSG_MORE; - + goto maybe_error; + state = READ_ONCE(call->state); + ret = -ESHUTDOWN; + if (state >= RXRPC_CALL_COMPLETE) + goto maybe_error; + ret = -EPROTO; + if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && + state != RXRPC_CALL_SERVER_ACK_REQUEST && + state != RXRPC_CALL_SERVER_SEND_REPLY) + goto maybe_error; + + ret = -EMSGSIZE; if (call->tx_total_len != -1) { - if (len > call->tx_total_len) - return -EMSGSIZE; - if (!more && len != call->tx_total_len) - return -EMSGSIZE; + if (len - copied > call->tx_total_len) + goto maybe_error; + if (!more && len - copied != call->tx_total_len) + goto maybe_error; } skb = call->tx_pending; call->tx_pending = NULL; rxrpc_see_skb(skb, rxrpc_skb_seen); - copied = 0; do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) @@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); - if (!rxrpc_check_tx_space(call, NULL)) { - ret = -EAGAIN; - if (msg->msg_flags & MSG_DONTWAIT) - goto maybe_error; - ret = rxrpc_wait_for_tx_window(rx, call, - &timeo, - msg->msg_flags & MSG_WAITALL); - if (ret < 0) - goto maybe_error; - } + if (!rxrpc_check_tx_space(call, NULL)) + goto wait_for_space; /* Work out the maximum size of a packet. Assume that * the security header is going to be in the padded @@ -468,6 +468,27 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, efault: ret = -EFAULT; goto out; + +wait_for_space: + ret = -EAGAIN; + if (msg->msg_flags & MSG_DONTWAIT) + goto maybe_error; + mutex_unlock(&call->user_mutex); + *_dropped_lock = true; + ret = rxrpc_wait_for_tx_window(rx, call, &timeo, + msg->msg_flags & MSG_WAITALL); + if (ret < 0) + goto maybe_error; + if (call->interruptibility == RXRPC_INTERRUPTIBLE) { + if (mutex_lock_interruptible(&call->user_mutex) < 0) { + ret = sock_intr_errno(timeo); + goto maybe_error; + } + } else { + mutex_lock(&call->user_mutex); + } + *_dropped_lock = false; + goto reload; } /* @@ -629,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) enum rxrpc_call_state state; struct rxrpc_call *call; unsigned long now, j; + bool dropped_lock = false; int ret; struct rxrpc_send_params p = { @@ -737,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_abort_packet(call); } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; - } else if (rxrpc_is_client_call(call) && - state != RXRPC_CALL_CLIENT_SEND_REQUEST) { - /* request phase complete for this client call */ - ret = -EPROTO; - } else if (rxrpc_is_service_call(call) && - state != RXRPC_CALL_SERVER_ACK_REQUEST && - state != RXRPC_CALL_SERVER_SEND_REPLY) { - /* Reply phase not begun or not complete for service call. */ - ret = -EPROTO; } else { - ret = rxrpc_send_data(rx, call, msg, len, NULL); + ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); } out_put_unlock: - mutex_unlock(&call->user_mutex); + if (!dropped_lock) + mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); @@ -779,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, size_t len, rxrpc_notify_end_tx_t notify_end_tx) { + bool dropped_lock = false; int ret; _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); @@ -796,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, case RXRPC_CALL_SERVER_ACK_REQUEST: case RXRPC_CALL_SERVER_SEND_REPLY: ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, - notify_end_tx); + notify_end_tx, &dropped_lock); break; case RXRPC_CALL_COMPLETE: read_lock_bh(&call->state_lock); @@ -810,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, break; } - mutex_unlock(&call->user_mutex); + if (!dropped_lock) + mutex_unlock(&call->user_mutex); _leave(" = %d", ret); return ret; } From 9cb9dadb8f45c67e4310e002c2f221b70312b293 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 24 Aug 2022 09:50:49 -0700 Subject: [PATCH 402/654] ionic: clear broken state on generation change There is a case found in heavy testing where a link flap happens just before a firmware Recovery event and the driver gets stuck in the BROKEN state. This comes from the driver getting interrupted by a FW generation change when coming back up from the link flap, and the call to ionic_start_queues() in ionic_link_status_check() fails. This can be addressed by having the fw_up code clear the BROKEN bit if seen, rather than waiting for a user to manually force the interface down and then back up. Fixes: 9e8eaf8427b6 ("ionic: stop watchdog when in broken state") Signed-off-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1443f788ee37c..d4226999547e8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2963,6 +2963,9 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) mutex_lock(&lif->queue_lock); + if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state)) + dev_info(ionic->dev, "FW Up: clearing broken state\n"); + err = ionic_qcqs_alloc(lif); if (err) goto err_unlock; From 0fc4dd452d6c14828eed6369155c75c0ac15bab3 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 24 Aug 2022 09:50:50 -0700 Subject: [PATCH 403/654] ionic: fix up issues with handling EAGAIN on FW cmds In looping on FW update tests we occasionally see the FW_ACTIVATE_STATUS command fail while it is in its EAGAIN loop waiting for the FW activate step to finsh inside the FW. The firmware is complaining that the done bit is set when a new dev_cmd is going to be processed. Doing a clean on the cmd registers and doorbell before exiting the wait-for-done and cleaning the done bit before the sleep prevents this from occurring. Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") Signed-off-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 4029b4e021f86..56f93b0305519 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -474,8 +474,8 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, ionic_opcode_to_str(opcode), opcode, ionic_error_to_str(err), err); - msleep(1000); iowrite32(0, &idev->dev_cmd_regs->done); + msleep(1000); iowrite32(1, &idev->dev_cmd_regs->doorbell); goto try_again; } @@ -488,6 +488,8 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, return ionic_error_to_errno(err); } + ionic_dev_cmd_clean(ionic); + return 0; } From 19058be7c48ceb3e60fa3948e24da1059bd68ee4 Mon Sep 17 00:00:00 2001 From: R Mohamed Shah Date: Wed, 24 Aug 2022 09:50:51 -0700 Subject: [PATCH 404/654] ionic: VF initial random MAC address if no assigned mac Assign a random mac address to the VF interface station address if it boots with a zero mac address in order to match similar behavior seen in other VF drivers. Handle the errors where the older firmware does not allow the VF to set its own station address. Newer firmware will allow the VF to set the station mac address if it hasn't already been set administratively through the PF. Setting it will also be allowed if the VF has trust. Fixes: fbb39807e9ae ("ionic: support sr-iov operations") Signed-off-by: R Mohamed Shah Signed-off-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- .../net/ethernet/pensando/ionic/ionic_lif.c | 92 ++++++++++++++++++- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index d4226999547e8..0be79c5167813 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1564,8 +1564,67 @@ static int ionic_set_features(struct net_device *netdev, return err; } +static int ionic_set_attr_mac(struct ionic_lif *lif, u8 *mac) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_MAC, + }, + }; + + ether_addr_copy(ctx.cmd.lif_setattr.mac, mac); + return ionic_adminq_post_wait(lif, &ctx); +} + +static int ionic_get_attr_mac(struct ionic_lif *lif, u8 *mac_addr) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_getattr = { + .opcode = IONIC_CMD_LIF_GETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_MAC, + }, + }; + int err; + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + return err; + + ether_addr_copy(mac_addr, ctx.comp.lif_getattr.mac); + return 0; +} + +static int ionic_program_mac(struct ionic_lif *lif, u8 *mac) +{ + u8 get_mac[ETH_ALEN]; + int err; + + err = ionic_set_attr_mac(lif, mac); + if (err) + return err; + + err = ionic_get_attr_mac(lif, get_mac); + if (err) + return err; + + /* To deal with older firmware that silently ignores the set attr mac: + * doesn't actually change the mac and doesn't return an error, so we + * do the get attr to verify whether or not the set actually happened + */ + if (!ether_addr_equal(get_mac, mac)) + return 1; + + return 0; +} + static int ionic_set_mac_address(struct net_device *netdev, void *sa) { + struct ionic_lif *lif = netdev_priv(netdev); struct sockaddr *addr = sa; u8 *mac; int err; @@ -1574,6 +1633,14 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) if (ether_addr_equal(netdev->dev_addr, mac)) return 0; + err = ionic_program_mac(lif, mac); + if (err < 0) + return err; + + if (err > 0) + netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", + __func__); + err = eth_prepare_mac_addr_change(netdev, addr); if (err) return err; @@ -3172,6 +3239,7 @@ static int ionic_station_set(struct ionic_lif *lif) .attr = IONIC_LIF_ATTR_MAC, }, }; + u8 mac_address[ETH_ALEN]; struct sockaddr addr; int err; @@ -3180,8 +3248,23 @@ static int ionic_station_set(struct ionic_lif *lif) return err; netdev_dbg(lif->netdev, "found initial MAC addr %pM\n", ctx.comp.lif_getattr.mac); - if (is_zero_ether_addr(ctx.comp.lif_getattr.mac)) - return 0; + ether_addr_copy(mac_address, ctx.comp.lif_getattr.mac); + + if (is_zero_ether_addr(mac_address)) { + eth_hw_addr_random(netdev); + netdev_dbg(netdev, "Random Mac generated: %pM\n", netdev->dev_addr); + ether_addr_copy(mac_address, netdev->dev_addr); + + err = ionic_program_mac(lif, mac_address); + if (err < 0) + return err; + + if (err > 0) { + netdev_dbg(netdev, "%s:SET/GET ATTR Mac are not same-due to old FW running\n", + __func__); + return 0; + } + } if (!is_zero_ether_addr(netdev->dev_addr)) { /* If the netdev mac is non-zero and doesn't match the default @@ -3189,12 +3272,11 @@ static int ionic_station_set(struct ionic_lif *lif) * likely here again after a fw-upgrade reset. We need to be * sure the netdev mac is in our filter list. */ - if (!ether_addr_equal(ctx.comp.lif_getattr.mac, - netdev->dev_addr)) + if (!ether_addr_equal(mac_address, netdev->dev_addr)) ionic_lif_addr_add(lif, netdev->dev_addr); } else { /* Update the netdev mac with the device's mac */ - memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); + ether_addr_copy(addr.sa_data, mac_address); addr.sa_family = AF_INET; err = eth_prepare_mac_addr_change(netdev, &addr); if (err) { From a3a57bf07de23fe1ff779e0fdf710aa581c3ff73 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 24 Aug 2022 22:34:49 +0200 Subject: [PATCH 405/654] net: stmmac: work around sporadic tx issue on link-up This is a follow-up to the discussion in [0]. It seems to me that at least the IP version used on Amlogic SoC's sometimes has a problem if register MAC_CTRL_REG is written whilst the chip is still processing a previous write. But that's just a guess. Adding a delay between two writes to this register helps, but we can also simply omit the offending second write. This patch uses the second approach and is based on a suggestion from Qi Duan. Benefit of this approach is that we can save few register writes, also on not affected chip versions. [0] https://www.spinics.net/lists/netdev/msg831526.html Fixes: bfab27a146ed ("stmmac: add the experimental PCI support") Suggested-by: Qi Duan Suggested-by: Jerome Brunet Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/e99857ce-bd90-5093-ca8c-8cd480b5a0a2@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 8 ++++++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index caa4bfc4c1d62..9b6138b117766 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -258,14 +258,18 @@ EXPORT_SYMBOL_GPL(stmmac_set_mac_addr); /* Enable disable MAC RX/TX */ void stmmac_set_mac(void __iomem *ioaddr, bool enable) { - u32 value = readl(ioaddr + MAC_CTRL_REG); + u32 old_val, value; + + old_val = readl(ioaddr + MAC_CTRL_REG); + value = old_val; if (enable) value |= MAC_ENABLE_RX | MAC_ENABLE_TX; else value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX); - writel(value, ioaddr + MAC_CTRL_REG); + if (value != old_val) + writel(value, ioaddr + MAC_CTRL_REG); } void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 070b5ef165eba..592d29abcb1c2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -986,10 +986,10 @@ static void stmmac_mac_link_up(struct phylink_config *config, bool tx_pause, bool rx_pause) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); - u32 ctrl; + u32 old_ctrl, ctrl; - ctrl = readl(priv->ioaddr + MAC_CTRL_REG); - ctrl &= ~priv->hw->link.speed_mask; + old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG); + ctrl = old_ctrl & ~priv->hw->link.speed_mask; if (interface == PHY_INTERFACE_MODE_USXGMII) { switch (speed) { @@ -1064,7 +1064,8 @@ static void stmmac_mac_link_up(struct phylink_config *config, if (tx_pause && rx_pause) stmmac_mac_flow_ctrl(priv, duplex); - writel(ctrl, priv->ioaddr + MAC_CTRL_REG); + if (ctrl != old_ctrl) + writel(ctrl, priv->ioaddr + MAC_CTRL_REG); stmmac_mac_set(priv, priv->ioaddr, true); if (phy && priv->dma_cap.eee) { From c8b043702dc0894c07721c5b019096cebc8c798f Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Wed, 24 Aug 2022 23:54:06 +0200 Subject: [PATCH 406/654] net: lantiq_xrx200: confirm skb is allocated before using xrx200_hw_receive() assumes build_skb() always works and goes straight to skb_reserve(). However, build_skb() can fail under memory pressure. Add a check in case build_skb() failed to allocate and return NULL. Fixes: e015593573b3 ("net: lantiq_xrx200: convert to build_skb") Reported-by: Eric Dumazet Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/lantiq_xrx200.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 5edb68a8aab1e..89314b645c822 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -239,6 +239,12 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) } skb = build_skb(buf, priv->rx_skb_size); + if (!skb) { + skb_free_frag(buf); + net_dev->stats.rx_dropped++; + return -ENOMEM; + } + skb_reserve(skb, NET_SKB_PAD); skb_put(skb, len); From c4b6e9341f930e4dd089231c0414758f5f1f9dbd Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Wed, 24 Aug 2022 23:54:07 +0200 Subject: [PATCH 407/654] net: lantiq_xrx200: fix lock under memory pressure When the xrx200_hw_receive() function returns -ENOMEM, the NAPI poll function immediately returns an error. This is incorrect for two reasons: * the function terminates without enabling interrupts or scheduling NAPI, * the error code (-ENOMEM) is returned instead of the number of received packets. After the first memory allocation failure occurs, packet reception is locked due to disabled interrupts from DMA.. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/lantiq_xrx200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 89314b645c822..25adce7f0c7c0 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -294,7 +294,7 @@ static int xrx200_poll_rx(struct napi_struct *napi, int budget) if (ret == XRX200_DMA_PACKET_IN_PROGRESS) continue; if (ret != XRX200_DMA_PACKET_COMPLETE) - return ret; + break; rx++; } else { break; From c9c3b1775f80fa21f5bff874027d2ccb10f5d90c Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Wed, 24 Aug 2022 23:54:08 +0200 Subject: [PATCH 408/654] net: lantiq_xrx200: restore buffer if memory allocation failed In a situation where memory allocation fails, an invalid buffer address is stored. When this descriptor is used again, the system panics in the build_skb() function when accessing memory. Fixes: 7ea6cd16f159 ("lantiq: net: fix duplicated skb in rx descriptor ring") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/lantiq_xrx200.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 25adce7f0c7c0..57f27cc7724e7 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -193,6 +193,7 @@ static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size); if (!ch->rx_buff[ch->dma.desc]) { + ch->rx_buff[ch->dma.desc] = buf; ret = -ENOMEM; goto skip; } From d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 25 Aug 2022 15:32:40 -0400 Subject: [PATCH 409/654] audit: move audit_return_fixup before the filters The success and return_code are needed by the filters. Move audit_return_fixup() before the filters. This was causing syscall auditing events to be missed. Link: https://github.com/linux-audit/audit-kernel/issues/138 Cc: stable@vger.kernel.org Fixes: 12c5e81d3fd0 ("audit: prepare audit_context for use in calling contexts beyond syscalls") Signed-off-by: Richard Guy Briggs [PM: manual merge required] Signed-off-by: Paul Moore --- kernel/auditsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index dd8d9ab747c3e..79a5da1bc5bb6 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1940,6 +1940,7 @@ void __audit_uring_exit(int success, long code) goto out; } + audit_return_fixup(ctx, success, code); if (ctx->context == AUDIT_CTX_SYSCALL) { /* * NOTE: See the note in __audit_uring_entry() about the case @@ -1981,7 +1982,6 @@ void __audit_uring_exit(int success, long code) audit_filter_inodes(current, ctx); if (ctx->current_state != AUDIT_STATE_RECORD) goto out; - audit_return_fixup(ctx, success, code); audit_log_exit(); out: @@ -2065,13 +2065,13 @@ void __audit_syscall_exit(int success, long return_code) if (!list_empty(&context->killed_trees)) audit_kill_trees(context); + audit_return_fixup(context, success, return_code); /* run through both filters to ensure we set the filterkey properly */ audit_filter_syscall(current, context); audit_filter_inodes(current, context); if (context->current_state < AUDIT_STATE_RECORD) goto out; - audit_return_fixup(context, success, return_code); audit_log_exit(); out: From a657182a5c5150cdfacb6640aad1d2712571a409 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 Aug 2022 23:26:47 +0200 Subject: [PATCH 410/654] bpf: Don't use tnum_range on array range checking for poke descriptors Hsin-Wei reported a KASAN splat triggered by their BPF runtime fuzzer which is based on a customized syzkaller: BUG: KASAN: slab-out-of-bounds in bpf_int_jit_compile+0x1257/0x13f0 Read of size 8 at addr ffff888004e90b58 by task syz-executor.0/1489 CPU: 1 PID: 1489 Comm: syz-executor.0 Not tainted 5.19.0 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x9c/0xc9 print_address_description.constprop.0+0x1f/0x1f0 ? bpf_int_jit_compile+0x1257/0x13f0 kasan_report.cold+0xeb/0x197 ? kvmalloc_node+0x170/0x200 ? bpf_int_jit_compile+0x1257/0x13f0 bpf_int_jit_compile+0x1257/0x13f0 ? arch_prepare_bpf_dispatcher+0xd0/0xd0 ? rcu_read_lock_sched_held+0x43/0x70 bpf_prog_select_runtime+0x3e8/0x640 ? bpf_obj_name_cpy+0x149/0x1b0 bpf_prog_load+0x102f/0x2220 ? __bpf_prog_put.constprop.0+0x220/0x220 ? find_held_lock+0x2c/0x110 ? __might_fault+0xd6/0x180 ? lock_downgrade+0x6e0/0x6e0 ? lock_is_held_type+0xa6/0x120 ? __might_fault+0x147/0x180 __sys_bpf+0x137b/0x6070 ? bpf_perf_link_attach+0x530/0x530 ? new_sync_read+0x600/0x600 ? __fget_files+0x255/0x450 ? lock_downgrade+0x6e0/0x6e0 ? fput+0x30/0x1a0 ? ksys_write+0x1a8/0x260 __x64_sys_bpf+0x7a/0xc0 ? syscall_enter_from_user_mode+0x21/0x70 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f917c4e2c2d The problem here is that a range of tnum_range(0, map->max_entries - 1) has limited ability to represent the concrete tight range with the tnum as the set of resulting states from value + mask can result in a superset of the actual intended range, and as such a tnum_in(range, reg->var_off) check may yield true when it shouldn't, for example tnum_range(0, 2) would result in 00XX -> v = 0000, m = 0011 such that the intended set of {0, 1, 2} is here represented by a less precise superset of {0, 1, 2, 3}. As the register is known const scalar, really just use the concrete reg->var_off.value for the upper index check. Fixes: d2e4c1e6c294 ("bpf: Constant map key tracking for prog array pokes") Reported-by: Hsin-Wei Hung Signed-off-by: Daniel Borkmann Cc: Shung-Hsi Yu Acked-by: John Fastabend Link: https://lore.kernel.org/r/984b37f9fdf7ac36831d2137415a4a915744c1b6.1661462653.git.daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 30c6eebce1461..3eadb14e090b7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7033,8 +7033,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; struct bpf_reg_state *regs = cur_regs(env), *reg; struct bpf_map *map = meta->map_ptr; - struct tnum range; - u64 val; + u64 val, max; int err; if (func_id != BPF_FUNC_tail_call) @@ -7044,10 +7043,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return -EINVAL; } - range = tnum_range(0, map->max_entries - 1); reg = ®s[BPF_REG_3]; + val = reg->var_off.value; + max = map->max_entries; - if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { + if (!(register_is_const(reg) && val < max)) { bpf_map_key_store(aux, BPF_MAP_KEY_POISON); return 0; } @@ -7055,8 +7055,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, err = mark_chain_precision(env, BPF_REG_3); if (err) return err; - - val = reg->var_off.value; if (bpf_map_key_unseen(aux)) bpf_map_key_store(aux, val); else if (!bpf_map_key_poisoned(aux) && From c7acee3d2f128a38b68fb7af85dbbd91bfd0b4ad Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 21 Aug 2022 01:51:29 +0900 Subject: [PATCH 411/654] powerpc: align syscall table for ppc32 Christophe Leroy reported that commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing CONFIG_MODULE_REL_CRCS") broke mpc85xx_defconfig + CONFIG_RELOCATABLE=y. LD vmlinux SYSMAP System.map SORTTAB vmlinux CHKREL vmlinux WARNING: 451 bad relocations c0b312a9 R_PPC_UADDR32 .head.text-0x3ff9ed54 c0b312ad R_PPC_UADDR32 .head.text-0x3ffac224 c0b312b1 R_PPC_UADDR32 .head.text-0x3ffb09f4 c0b312b5 R_PPC_UADDR32 .head.text-0x3fe184dc c0b312b9 R_PPC_UADDR32 .head.text-0x3fe183a8 ... The compiler emits a bunch of R_PPC_UADDR32, which is not supported by arch/powerpc/kernel/reloc_32.S. The reason is there exists an unaligned symbol. $ powerpc-linux-gnu-nm -n vmlinux ... c0b31258 d spe_aligninfo c0b31298 d __func__.0 c0b312a9 D sys_call_table c0b319b8 d __func__.0 Commit 7b4537199a4a is not the root cause. Even before that, I can reproduce the same issue for mpc85xx_defconfig + CONFIG_RELOCATABLE=y + CONFIG_MODVERSIONS=n. It is just that nobody noticed because when CONFIG_MODVERSIONS is enabled, a __crc_* symbol inserted before sys_call_table was hiding the unalignment issue. Adding alignment to the syscall table for ppc32 fixes the issue. Cc: stable@vger.kernel.org Reported-by: Christophe Leroy Signed-off-by: Masahiro Yamada Tested-by: Christophe Leroy [mpe: Trim change log discussion, add Cc stable] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/lkml/38605f6a-a568-f884-f06f-ea4da5b214f0@csgroup.eu/ Link: https://lore.kernel.org/r/20220820165129.1147589-1-masahiroy@kernel.org --- arch/powerpc/kernel/systbl.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index cb3358886203e..6c1db3b6de2dc 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -18,6 +18,7 @@ .p2align 3 #define __SYSCALL(nr, entry) .8byte entry #else + .p2align 2 #define __SYSCALL(nr, entry) .long entry #endif From 310d1344e3c58cc2d625aa4e52cfcb7d8a26fcbf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 23 Aug 2022 21:59:51 +1000 Subject: [PATCH 412/654] Revert "powerpc: Remove unused FW_FEATURE_NATIVE references" This reverts commit 79b74a68486765a4fe685ac4069bc71366c538f5. It broke booting on IBM Cell machines when the kernel is also built with CONFIG_PPC_PS3=y. That's because FW_FEATURE_NATIVE_ALWAYS = 0 does have an important effect, which is to clear the PS3 ALWAYS features from FW_FEATURE_ALWAYS. Note that CONFIG_PPC_NATIVE has since been renamed CONFIG_PPC_HASH_MMU_NATIVE. Fixes: 79b74a684867 ("powerpc: Remove unused FW_FEATURE_NATIVE references") Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220823115952.1203106-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/firmware.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 398e0b5e485f6..ed6db13a1d7c4 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -83,6 +83,8 @@ enum { FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, + FW_FEATURE_NATIVE_POSSIBLE = 0, + FW_FEATURE_NATIVE_ALWAYS = 0, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES FW_FEATURE_PSERIES_POSSIBLE | @@ -92,6 +94,9 @@ enum { #endif #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_POSSIBLE | +#endif +#ifdef CONFIG_PPC_HASH_MMU_NATIVE + FW_FEATURE_NATIVE_ALWAYS | #endif 0, FW_FEATURE_ALWAYS = @@ -103,6 +108,9 @@ enum { #endif #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_ALWAYS & +#endif +#ifdef CONFIG_PPC_HASH_MMU_NATIVE + FW_FEATURE_NATIVE_ALWAYS & #endif FW_FEATURE_POSSIBLE, From 91926d8b7e71aaf5f84f0cf208fc5a8b7a761050 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 23 Aug 2022 21:59:52 +1000 Subject: [PATCH 413/654] powerpc/rtas: Fix RTAS MSR[HV] handling for Cell The semi-recent changes to MSR handling when entering RTAS (firmware) cause crashes on IBM Cell machines. An example trace: kernel tried to execute user page (2fff01a8) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0x2fff01a8 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=4 NUMA Cell Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 6.0.0-rc2-00433-gede0a8d3307a #207 NIP: 000000002fff01a8 LR: 0000000000032608 CTR: 0000000000000000 REGS: c0000000015236b0 TRAP: 0400 Tainted: G W (6.0.0-rc2-00433-gede0a8d3307a) MSR: 0000000008001002 CR: 00000000 XER: 20000000 ... NIP 0x2fff01a8 LR 0x32608 Call Trace: 0xc00000000143c5f8 (unreliable) .rtas_call+0x224/0x320 .rtas_get_boot_time+0x70/0x150 .read_persistent_clock64+0x114/0x140 .read_persistent_wall_and_boot_offset+0x24/0x80 .timekeeping_init+0x40/0x29c .start_kernel+0x674/0x8f0 start_here_common+0x1c/0x50 Unlike PAPR platforms where RTAS is only used in guests, on the IBM Cell machines Linux runs with MSR[HV] set but also uses RTAS, provided by SLOF. Fix it by copying the MSR[HV] bit from the MSR value we've just read using mfmsr into the value used for RTAS. It seems like we could also fix it using an #ifdef CELL to set MSR[HV], but that doesn't work because it's possible to build a single kernel image that runs on both Cell native and pseries. Fixes: b6b1c3ce06ca ("powerpc/rtas: Keep MSR[RI] set when calling RTAS") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Michael Ellerman Reviewed-by: Jordan Niethe Link: https://lore.kernel.org/r/20220823115952.1203106-2-mpe@ellerman.id.au --- arch/powerpc/kernel/rtas_entry.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S index 9a434d42e660a..6ce95ddadbcdb 100644 --- a/arch/powerpc/kernel/rtas_entry.S +++ b/arch/powerpc/kernel/rtas_entry.S @@ -109,8 +109,12 @@ __enter_rtas: * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S] * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if * MSR[S] is set, it will remain when entering RTAS. + * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV + * from the saved MSR value and insert into the value RTAS will use. */ + extrdi r0, r6, 1, 63 - MSR_HV_LG LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI) + insrdi r6, r0, 1, 63 - MSR_HV_LG li r0,0 mtmsrd r0,1 /* disable RI before using SRR0/1 */ From b82a26d8633cc89367fac75beb3ec33061bea44a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 7 Aug 2022 22:57:40 +0200 Subject: [PATCH 414/654] Bluetooth: hci_event: Fix vendor (unknown) opcode status handling Commit c8992cffbe74 ("Bluetooth: hci_event: Use of a function table to handle Command Complete") was (presumably) meant to only refactor things without any functional changes. But it does have one undesirable side-effect, before *status would always be set to skb->data[0] and it might be overridden by some of the opcode specific handling. While now it always set by the opcode specific handlers. This means that if the opcode is not known *status does not get set any more at all! This behavior change has broken bluetooth support for BCM4343A0 HCIs, the hci_bcm.c code tries to configure UART attached HCIs at a higher baudraute using vendor specific opcodes. The BCM4343A0 does not support this and this used to simply fail: [ 25.646442] Bluetooth: hci0: BCM: failed to write clock (-56) [ 25.646481] Bluetooth: hci0: Failed to set baudrate After which things would continue with the initial baudraute. But now that hci_cmd_complete_evt() no longer sets status for unknown opcodes *status is left at 0. This causes the hci_bcm.c code to think the baudraute has been changed on the HCI side and to also adjust the UART baudrate, after which communication with the HCI is broken, leading to: [ 28.579042] Bluetooth: hci0: command 0x0c03 tx timeout [ 36.961601] Bluetooth: hci0: BCM: Reset failed (-110) And non working bluetooth. Fix this by restoring the previous default "*status = skb->data[0]" handling for unknown opcodes. Fixes: c8992cffbe74 ("Bluetooth: hci_event: Use of a function table to handle Command Complete") Signed-off-by: Hans de Goede Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 485c814cf44aa..73aa9ee9d21af 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4179,6 +4179,17 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, } } + if (i == ARRAY_SIZE(hci_cc_table)) { + /* Unknown opcode, assume byte 0 contains the status, so + * that e.g. __hci_cmd_sync() properly returns errors + * for vendor specific commands send by HCI drivers. + * If a vendor doesn't actually follow this convention we may + * need to introduce a vendor CC table in order to properly set + * the status. + */ + *status = skb->data[0]; + } + handle_cmd_cnt_and_timer(hdev, ev->ncmd); hci_req_cmd_complete(hdev, *opcode, *status, req_complete, From 1fd02d56dae35b08e4cba8e6bd2c2e7ccff68ecc Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 11 Aug 2022 14:20:46 -0700 Subject: [PATCH 415/654] Bluetooth: hci_sync: Fix suspend performance regression This attempts to fix suspend performance when there is no connections by not updating the event mask. Fixes: ef61b6ea1544 ("Bluetooth: Always set event mask on suspend") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index e6d804b82b674..5fe440cdc68dc 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5288,17 +5288,21 @@ int hci_suspend_sync(struct hci_dev *hdev) /* Prevent disconnects from causing scanning to be re-enabled */ hci_pause_scan_sync(hdev); - /* Soft disconnect everything (power off) */ - err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); - if (err) { - /* Set state to BT_RUNNING so resume doesn't notify */ - hdev->suspend_state = BT_RUNNING; - hci_resume_sync(hdev); - return err; - } + if (hci_conn_count(hdev)) { + /* Soft disconnect everything (power off) */ + err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); + if (err) { + /* Set state to BT_RUNNING so resume doesn't notify */ + hdev->suspend_state = BT_RUNNING; + hci_resume_sync(hdev); + return err; + } - /* Update event mask so only the allowed event can wakeup the host */ - hci_set_event_mask_sync(hdev); + /* Update event mask so only the allowed event can wakeup the + * host. + */ + hci_set_event_mask_sync(hdev); + } /* Only configure accept list if disconnect succeeded and wake * isn't being prevented. From b840304fb46cdf7012722f456bce06f151b3e81b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 12 Aug 2022 15:33:57 -0700 Subject: [PATCH 416/654] Bluetooth: L2CAP: Fix build errors in some archs This attempts to fix the follow errors: In function 'memcmp', inlined from 'bacmp' at ./include/net/bluetooth/bluetooth.h:347:9, inlined from 'l2cap_global_chan_by_psm' at net/bluetooth/l2cap_core.c:2003:15: ./include/linux/fortify-string.h:44:33: error: '__builtin_memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 44 | #define __underlying_memcmp __builtin_memcmp | ^ ./include/linux/fortify-string.h:420:16: note: in expansion of macro '__underlying_memcmp' 420 | return __underlying_memcmp(p, q, size); | ^~~~~~~~~~~~~~~~~~~ In function 'memcmp', inlined from 'bacmp' at ./include/net/bluetooth/bluetooth.h:347:9, inlined from 'l2cap_global_chan_by_psm' at net/bluetooth/l2cap_core.c:2004:15: ./include/linux/fortify-string.h:44:33: error: '__builtin_memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 44 | #define __underlying_memcmp __builtin_memcmp | ^ ./include/linux/fortify-string.h:420:16: note: in expansion of macro '__underlying_memcmp' 420 | return __underlying_memcmp(p, q, size); | ^~~~~~~~~~~~~~~~~~~ Fixes: 332f1795ca20 ("Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm regression") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index cbe0cae73434f..2c9de67daadcf 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1992,11 +1992,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { - c = l2cap_chan_hold_unless_zero(c); - if (c) { - read_unlock(&chan_list_lock); - return c; - } + if (!l2cap_chan_hold_unless_zero(c)) + continue; + + read_unlock(&chan_list_lock); + return c; } /* Closest match */ From 23b72814da1a094b4c065e0bb598249f310c5577 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Aug 2022 16:14:32 -0700 Subject: [PATCH 417/654] Bluetooth: MGMT: Fix Get Device Flags Get Device Flags don't check if device does actually use an RPA in which case it shall only set HCI_CONN_FLAG_REMOTE_WAKEUP if LL Privacy is enabled. Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 71 ++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6e31023b84f5f..566b6291b38e6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4547,6 +4547,22 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_NOT_SUPPORTED); } +static u32 get_params_flags(struct hci_dev *hdev, + struct hci_conn_params *params) +{ + u32 flags = hdev->conn_flags; + + /* Devices using RPAs can only be programmed in the acceptlist if + * LL Privacy has been enable otherwise they cannot mark + * HCI_CONN_FLAG_REMOTE_WAKEUP. + */ + if ((flags & HCI_CONN_FLAG_REMOTE_WAKEUP) && !use_ll_privacy(hdev) && + hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) + flags &= ~HCI_CONN_FLAG_REMOTE_WAKEUP; + + return flags; +} + static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -4578,10 +4594,10 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, } else { params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, le_addr_type(cp->addr.type)); - if (!params) goto done; + supported_flags = get_params_flags(hdev, params); current_flags = params->flags; } @@ -4649,38 +4665,35 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)", &cp->addr.bdaddr, cp->addr.type); } - } else { - params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, - le_addr_type(cp->addr.type)); - if (params) { - /* Devices using RPAs can only be programmed in the - * acceptlist LL Privacy has been enable otherwise they - * cannot mark HCI_CONN_FLAG_REMOTE_WAKEUP. - */ - if ((current_flags & HCI_CONN_FLAG_REMOTE_WAKEUP) && - !use_ll_privacy(hdev) && - hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - bt_dev_warn(hdev, - "Cannot set wakeable for RPA"); - goto unlock; - } - params->flags = current_flags; - status = MGMT_STATUS_SUCCESS; + goto unlock; + } - /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY - * has been set. - */ - if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY) - hci_update_passive_scan(hdev); - } else { - bt_dev_warn(hdev, "No such LE device %pMR (0x%x)", - &cp->addr.bdaddr, - le_addr_type(cp->addr.type)); - } + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + if (!params) { + bt_dev_warn(hdev, "No such LE device %pMR (0x%x)", + &cp->addr.bdaddr, le_addr_type(cp->addr.type)); + goto unlock; + } + + supported_flags = get_params_flags(hdev, params); + + if ((supported_flags | current_flags) != supported_flags) { + bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)", + current_flags, supported_flags); + goto unlock; } + params->flags = current_flags; + status = MGMT_STATUS_SUCCESS; + + /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY + * has been set. + */ + if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY) + hci_update_passive_scan(hdev); + unlock: hci_dev_unlock(hdev); From 3cfbc6ac22d62d0a06be9ce2996ba9fed75436cd Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 17 Aug 2022 20:14:36 +0900 Subject: [PATCH 418/654] Bluetooth: hci_sync: fix double mgmt_pending_free() in remove_adv_monitor() syzbot is reporting double kfree() at remove_adv_monitor() [1], for commit 7cf5c2978f23fdbb ("Bluetooth: hci_sync: Refactor remove Adv Monitor") forgot to remove duplicated mgmt_pending_remove() when merging "if (err) {" path and "if (!pending) {" path. Link: https://syzkaller.appspot.com/bug?extid=915a8416bf15895b8e07 [1] Reported-by: syzbot Fixes: 7cf5c2978f23fdbb ("Bluetooth: hci_sync: Refactor remove Adv Monitor") Signed-off-by: Tetsuo Handa Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 566b6291b38e6..72e6595a71cc0 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5067,7 +5067,6 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, else status = MGMT_STATUS_FAILED; - mgmt_pending_remove(cmd); goto unlock; } From c572909376673d8c126ae2ee53ba754ff9327d98 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 18 Aug 2022 14:31:42 -0700 Subject: [PATCH 419/654] Bluetooth: ISO: Fix not handling shutdown condition In order to properly handle shutdown syscall the code shall not assume that the how argument is always SHUT_RDWR resulting in SHUTDOWN_MASK as that would result in poll to immediately report EPOLLHUP instead of properly waiting for disconnect_cfm (Disconnect Complete) which is rather important for the likes of BAP as the CIG may need to be reprogrammed. Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index ced8ad4fed4fe..613039ba5dbf5 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1309,7 +1309,7 @@ static int iso_sock_shutdown(struct socket *sock, int how) struct sock *sk = sock->sk; int err = 0; - BT_DBG("sock %p, sk %p", sock, sk); + BT_DBG("sock %p, sk %p, how %d", sock, sk, how); if (!sk) return 0; @@ -1317,17 +1317,32 @@ static int iso_sock_shutdown(struct socket *sock, int how) sock_hold(sk); lock_sock(sk); - if (!sk->sk_shutdown) { - sk->sk_shutdown = SHUTDOWN_MASK; - iso_sock_clear_timer(sk); - __iso_sock_close(sk); - - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && - !(current->flags & PF_EXITING)) - err = bt_sock_wait_state(sk, BT_CLOSED, - sk->sk_lingertime); + switch (how) { + case SHUT_RD: + if (sk->sk_shutdown & RCV_SHUTDOWN) + goto unlock; + sk->sk_shutdown |= RCV_SHUTDOWN; + break; + case SHUT_WR: + if (sk->sk_shutdown & SEND_SHUTDOWN) + goto unlock; + sk->sk_shutdown |= SEND_SHUTDOWN; + break; + case SHUT_RDWR: + if (sk->sk_shutdown & SHUTDOWN_MASK) + goto unlock; + sk->sk_shutdown |= SHUTDOWN_MASK; + break; } + iso_sock_clear_timer(sk); + __iso_sock_close(sk); + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) + err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); + +unlock: release_sock(sk); sock_put(sk); From f48735a9aaf8258f39918e13adf464ccd7dce33b Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Tue, 23 Aug 2022 12:39:22 +0800 Subject: [PATCH 420/654] Bluetooth: hci_event: Fix checking conn for le_conn_complete_evt To prevent multiple conn complete events, we shouldn't look up the conn with hci_lookup_le_connect, since it requires the state to be BT_CONNECT. By the time the duplicate event is processed, the state might have changed, so we end up processing the new event anyway. Change the lookup function to hci_conn_hash_lookup_ba. Fixes: d5ebaa7c5f6f6 ("Bluetooth: hci_event: Ignore multiple conn complete events") Signed-off-by: Archie Pusaka Reviewed-by: Sonny Sasaka Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 73aa9ee9d21af..6643c9c20fa46 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5801,7 +5801,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, */ hci_dev_clear_flag(hdev, HCI_LE_ADV); - conn = hci_lookup_le_connect(hdev); + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr); if (!conn) { /* In case of error status and there is no connection pending * just unlock as there is nothing to cleanup. From cb0d160f813b23df2f4fe7af3237a026c44ed1aa Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:02:07 +0200 Subject: [PATCH 421/654] Bluetooth: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hidp/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 5940744a8cd8b..cc20e706c6391 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -83,14 +83,14 @@ static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo ci->product = session->input->id.product; ci->version = session->input->id.version; if (session->input->name) - strlcpy(ci->name, session->input->name, 128); + strscpy(ci->name, session->input->name, 128); else - strlcpy(ci->name, "HID Boot Device", 128); + strscpy(ci->name, "HID Boot Device", 128); } else if (session->hid) { ci->vendor = session->hid->vendor; ci->product = session->hid->product; ci->version = session->hid->version; - strlcpy(ci->name, session->hid->name, 128); + strscpy(ci->name, session->hid->name, 128); } } From 2da8eb834b775a9d1acea6214d3e4a78ac841e6e Mon Sep 17 00:00:00 2001 From: Zhengping Jiang Date: Tue, 23 Aug 2022 10:28:08 -0700 Subject: [PATCH 422/654] Bluetooth: hci_sync: hold hdev->lock when cleanup hci_conn When disconnecting all devices, hci_conn_failed is used to cleanup hci_conn object when the hci_conn object cannot be aborted. The function hci_conn_failed requires the caller holds hdev->lock. Fixes: 9b3628d79b46f ("Bluetooth: hci_sync: Cleanup hci_conn if it cannot be aborted") Signed-off-by: Zhengping Jiang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5fe440cdc68dc..187786454d98d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4773,9 +4773,11 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) /* Cleanup hci_conn object if it cannot be cancelled as it * likelly means the controller and host stack are out of sync. */ - if (err) + if (err) { + hci_dev_lock(hdev); hci_conn_failed(conn, err); - + hci_dev_unlock(hdev); + } return err; case BT_CONNECT2: return hci_reject_conn_sync(hdev, conn, reason); From ee653d2d8f7c935a00f01a7654d889e8ce55b00e Mon Sep 17 00:00:00 2001 From: PaddyKP_Yao Date: Thu, 25 Aug 2022 08:43:05 +0800 Subject: [PATCH 423/654] platform/x86: asus-wmi: Fix the name of the mic-mute LED classdev According to well-known-leds.txt, we should use "platform::micmute" instead of "asus::micmute" for the name of the mic-mute LED classdev. Signed-off-by: PaddyKP_Yao Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220825004305.709539-1-PaddyKP_Yao@asus.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 89b604e04d7f6..3acc75c24c793 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1118,7 +1118,7 @@ static int asus_wmi_led_init(struct asus_wmi *asus) } if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MICMUTE_LED)) { - asus->micmute_led.name = "asus::micmute"; + asus->micmute_led.name = "platform::micmute"; asus->micmute_led.max_brightness = 1; asus->micmute_led.brightness = ledtrig_audio_get(LED_AUDIO_MICMUTE); asus->micmute_led.brightness_set_blocking = micmute_led_set; From 874b301985ef2f89b8b592ad255e03fb6fbfe605 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 25 Aug 2022 22:04:56 -0700 Subject: [PATCH 424/654] crypto: lib - remove unneeded selection of XOR_BLOCKS CRYPTO_LIB_CHACHA_GENERIC doesn't need to select XOR_BLOCKS. It perhaps was thought that it's needed for __crypto_xor, but that's not the case. Enabling XOR_BLOCKS is problematic because the XOR_BLOCKS code runs a benchmark when it is initialized. That causes a boot time regression on systems that didn't have it enabled before. Therefore, remove this unnecessary and problematic selection. Fixes: e56e18985596 ("lib/crypto: add prompts back to crypto libraries") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/crypto/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 9ff549f63540f..47816af9a9d7e 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -33,7 +33,6 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select XOR_BLOCKS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a From 343b62048ba4cec6b8b890092bcc200e1eb83424 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 19 Aug 2022 13:01:00 -0500 Subject: [PATCH 425/654] platform/x86: thinkpad_acpi: Explicitly set to balanced mode on startup It was observed that on a Thinkpad T14 Gen1 (AMD) that the platform profile is starting up in 'low-power' mode after refreshing what the firmware had. This is most likely a firmware bug, but as a harmless workaround set the default profile to 'balanced' at thinkpad_acpi startup. Reported-by: madcatx@atlas.cz Link: https://bugzilla.kernel.org/show_bug.cgi?id=216347 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20220819180101.6383-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 22d4e8633e30e..2dbb9fc011a7a 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10592,10 +10592,9 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) /* Ensure initial values are correct */ dytc_profile_refresh(); - /* Set AMT correctly now we know current profile */ - if ((dytc_capabilities & BIT(DYTC_FC_PSC)) && - (dytc_capabilities & BIT(DYTC_FC_AMT))) - dytc_control_amt(dytc_current_profile == PLATFORM_PROFILE_BALANCED); + /* Workaround for https://bugzilla.kernel.org/show_bug.cgi?id=216347 */ + if (dytc_capabilities & BIT(DYTC_FC_PSC)) + dytc_profile_set(NULL, PLATFORM_PROFILE_BALANCED); return 0; } From 5934d9a0383619c14df91af8fd76261dc3de2f5f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Aug 2022 18:01:10 +0300 Subject: [PATCH 426/654] ALSA: control: Re-order bounds checking in get_ctl_id_hash() These two checks are in the reverse order so it might read one element beyond the end of the array. First check if the "i" is within bounds before using it. Fixes: 6ab55ec0a938 ("ALSA: control: Fix an out-of-bounds bug in get_ctl_id_hash()") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YwjgNh/gkG1hH7po@kili Signed-off-by: Takashi Iwai --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index e8fc4c511e5ff..a7271927d875f 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -391,7 +391,7 @@ static unsigned long get_ctl_id_hash(const struct snd_ctl_elem_id *id) h = id->iface; h = MULTIPLIER * h + id->device; h = MULTIPLIER * h + id->subdevice; - for (i = 0; id->name[i] && i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN; i++) + for (i = 0; i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN && id->name[i]; i++) h = MULTIPLIER * h + id->name[i]; h = MULTIPLIER * h + id->index; h &= LONG_MAX; From 2a5840124009f133bd09fd855963551fb2cefe22 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Jul 2022 12:16:22 -0700 Subject: [PATCH 427/654] lsm,io_uring: add LSM hooks for the new uring_cmd file op io-uring cmd support was added through ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd"), this extended the struct file_operations to allow a new command which each subsystem can use to enable command passthrough. Add an LSM specific for the command passthrough which enables LSMs to inspect the command details. This was discussed long ago without no clear pointer for something conclusive, so this enables LSMs to at least reject this new file operation. [0] https://lkml.kernel.org/r/8adf55db-7bab-f59d-d612-ed906b948d19@schaufler-ca.com Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Luis Chamberlain Acked-by: Jens Axboe Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 3 +++ include/linux/security.h | 5 +++++ io_uring/uring_cmd.c | 5 +++++ security/security.c | 4 ++++ 5 files changed, 18 insertions(+) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 8064481730333..60fff133c0b17 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -407,4 +407,5 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #ifdef CONFIG_IO_URING LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) +LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 84a0d7e021769..3aa6030302f5b 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1582,6 +1582,9 @@ * Check whether the current task is allowed to spawn a io_uring polling * thread (IORING_SETUP_SQPOLL). * + * @uring_cmd: + * Check whether the file_operations uring_cmd is allowed to run. + * */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); diff --git a/include/linux/security.h b/include/linux/security.h index 1bc362cb413f2..7bd0c490703d3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2060,6 +2060,7 @@ static inline int security_perf_event_write(struct perf_event *event) #ifdef CONFIG_SECURITY extern int security_uring_override_creds(const struct cred *new); extern int security_uring_sqpoll(void); +extern int security_uring_cmd(struct io_uring_cmd *ioucmd); #else static inline int security_uring_override_creds(const struct cred *new) { @@ -2069,6 +2070,10 @@ static inline int security_uring_sqpoll(void) { return 0; } +static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 8e0cc2d9205ea..0f7ad956ddcbb 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -88,6 +89,10 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (!req->file->f_op->uring_cmd) return -EOPNOTSUPP; + ret = security_uring_cmd(ioucmd); + if (ret) + return ret; + if (ctx->flags & IORING_SETUP_SQE128) issue_flags |= IO_URING_F_SQE128; if (ctx->flags & IORING_SETUP_CQE32) diff --git a/security/security.c b/security/security.c index 14d30fec8a003..4b95de24bc8dc 100644 --- a/security/security.c +++ b/security/security.c @@ -2660,4 +2660,8 @@ int security_uring_sqpoll(void) { return call_int_hook(uring_sqpoll, 0); } +int security_uring_cmd(struct io_uring_cmd *ioucmd) +{ + return call_int_hook(uring_cmd, 0, ioucmd); +} #endif /* CONFIG_IO_URING */ From f4d653dcaa4e4056e1630423e6a8ece4869b544f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 10 Aug 2022 15:55:36 -0400 Subject: [PATCH 428/654] selinux: implement the security_uring_cmd() LSM hook Add a SELinux access control for the iouring IORING_OP_URING_CMD command. This includes the addition of a new permission in the existing "io_uring" object class: "cmd". The subject of the new permission check is the domain of the process requesting access, the object is the open file which points to the device/file that is the target of the IORING_OP_URING_CMD operation. A sample policy rule is shown below: allow :io_uring { cmd }; Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Paul Moore --- security/selinux/hooks.c | 24 ++++++++++++++++++++++++ security/selinux/include/classmap.h | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 79573504783bb..03bca97c8b297 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -91,6 +91,7 @@ #include #include #include +#include #include "avc.h" #include "objsec.h" @@ -6987,6 +6988,28 @@ static int selinux_uring_sqpoll(void) return avc_has_perm(&selinux_state, sid, sid, SECCLASS_IO_URING, IO_URING__SQPOLL, NULL); } + +/** + * selinux_uring_cmd - check if IORING_OP_URING_CMD is allowed + * @ioucmd: the io_uring command structure + * + * Check to see if the current domain is allowed to execute an + * IORING_OP_URING_CMD against the device/file specified in @ioucmd. + * + */ +static int selinux_uring_cmd(struct io_uring_cmd *ioucmd) +{ + struct file *file = ioucmd->file; + struct inode *inode = file_inode(file); + struct inode_security_struct *isec = selinux_inode(inode); + struct common_audit_data ad; + + ad.type = LSM_AUDIT_DATA_FILE; + ad.u.file = file; + + return avc_has_perm(&selinux_state, current_sid(), isec->sid, + SECCLASS_IO_URING, IO_URING__CMD, &ad); +} #endif /* CONFIG_IO_URING */ /* @@ -7231,6 +7254,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { #ifdef CONFIG_IO_URING LSM_HOOK_INIT(uring_override_creds, selinux_uring_override_creds), LSM_HOOK_INIT(uring_sqpoll, selinux_uring_sqpoll), + LSM_HOOK_INIT(uring_cmd, selinux_uring_cmd), #endif /* diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index ff757ae5f2537..1c2f41ff4e551 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -253,7 +253,7 @@ const struct security_class_mapping secclass_map[] = { { "anon_inode", { COMMON_FILE_PERMS, NULL } }, { "io_uring", - { "override_creds", "sqpoll", NULL } }, + { "override_creds", "sqpoll", "cmd", NULL } }, { NULL } }; From 707527956d90ea4f304188555a97144183af1e49 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 19 Aug 2022 16:20:33 -0400 Subject: [PATCH 429/654] /dev/null: add IORING_OP_URING_CMD support This patch adds support for the io_uring command pass through, aka IORING_OP_URING_CMD, to the /dev/null driver. As with all of the /dev/null functionality, the implementation is just a simple sink where commands go to die, but it should be useful for developers who need a simple IORING_OP_URING_CMD test device that doesn't require any special hardware. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Acked-by: Jens Axboe Signed-off-by: Paul Moore --- drivers/char/mem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 84ca98ed1dada..32a932a065a6a 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -480,6 +480,11 @@ static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out, return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null); } +static int uring_cmd_null(struct io_uring_cmd *ioucmd, unsigned int issue_flags) +{ + return 0; +} + static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter) { size_t written = 0; @@ -663,6 +668,7 @@ static const struct file_operations null_fops = { .read_iter = read_iter_null, .write_iter = write_iter_null, .splice_write = splice_write_null, + .uring_cmd = uring_cmd_null, }; static const struct file_operations __maybe_unused port_fops = { From 8238b4579866b7c1bb99883cfe102a43db5506ff Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 26 Aug 2022 09:17:08 -0400 Subject: [PATCH 430/654] wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- Documentation/atomic_bitops.txt | 10 ++++----- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++ .../asm-generic/bitops/generic-non-atomic.h | 14 +++++++++++++ .../bitops/instrumented-non-atomic.h | 12 +++++++++++ include/asm-generic/bitops/non-atomic.h | 1 + .../bitops/non-instrumented-non-atomic.h | 1 + include/linux/bitops.h | 1 + include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 +++---- kernel/sched/wait_bit.c | 2 +- 10 files changed, 60 insertions(+), 12 deletions(-) diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index d8b101c97031b..edea4656c5c05 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_set_bit_lock(), - if the bit in memory is unchanged by the operation then it is deemed to have - failed. + - RMW operations that are conditional are fully ordered. -Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and -clear_bit_unlock() which has RELEASE semantics. +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics, +clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has +ACQUIRE semantics. Since a platform only has a single means of achieving atomic operations the same barriers as for atomic_t are used, see atomic_t.txt. diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 973c6bd17f98e..0fe9de58af313 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -207,6 +207,20 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) variable_test_bit(nr, addr); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index 3d5ebd24652b9..564a8c675d858 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -4,6 +4,7 @@ #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include +#include #ifndef _LINUX_BITOPS_H #error only can be included directly @@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * const_*() definitions provide good compile-time optimizations when * the passed arguments can be resolved at compile time. @@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) #define const___test_and_set_bit generic___test_and_set_bit #define const___test_and_clear_bit generic___test_and_clear_bit #define const___test_and_change_bit generic___test_and_change_bit +#define const_test_bit_acquire generic_test_bit_acquire /** * const_test_bit - Determine whether a bit is set diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index 988a3bbfba34e..2b238b161a620 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volatile unsigned long *addr) return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 5c37ced343aed..71f8d54a5195e 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -13,6 +13,7 @@ #define arch___test_and_change_bit generic___test_and_change_bit #define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include diff --git a/include/asm-generic/bitops/non-instrumented-non-atomic.h b/include/asm-generic/bitops/non-instrumented-non-atomic.h index bdb9b1ffaee90..0ddc78dfc358b 100644 --- a/include/asm-generic/bitops/non-instrumented-non-atomic.h +++ b/include/asm-generic/bitops/non-instrumented-non-atomic.h @@ -12,5 +12,6 @@ #define ___test_and_change_bit arch___test_and_change_bit #define _test_bit arch_test_bit +#define _test_bit_acquire arch_test_bit_acquire #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf9bf65039f22..3b89c64bcfd8f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index def8b8d30ccc1..089c9ade43259 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -156,7 +156,7 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7dec36aecbd9f..7725b7579b781 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index d4788f810b555..0b1cd985dc274 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); From dfb58b1796d19c8405a38eb457f97669440c59d4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 26 Aug 2022 17:15:47 +0100 Subject: [PATCH 431/654] io_uring/net: fix overexcessive retries Length parameter of io_sg_from_iter() can be smaller than the iterator's size, as it's with TCP, so when we set from->count at the end of the function we truncate the iterator forcing TCP to return preliminary with a short send. It affects zerocopy sends with large payload sizes and leads to retries and possible request failures. Fixes: 3ff1a0d395c00 ("io_uring: enable managed frags with register buffers") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0bc0d5179c665b4ef5c328377c84c7a1f298467e.1661530037.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 0af8a02df580f..7a5468cc905e7 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -956,7 +956,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, shinfo->nr_frags = frag; from->bvec += bi.bi_idx; from->nr_segs -= bi.bi_idx; - from->count = bi.bi_size; + from->count -= copied; from->iov_offset = bi.bi_bvec_done; skb->data_len += copied; From a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 19 Aug 2022 03:13:36 +0900 Subject: [PATCH 432/654] fbdev: fbcon: Properly revert changes when vc_resize() failed fbcon_do_set_font() calls vc_resize() when font size is changed. However, if if vc_resize() failed, current implementation doesn't revert changes for font size, and this causes inconsistent state. syzbot reported unable to handle page fault due to this issue [1]. syzbot's repro uses fault injection which cause failure for memory allocation, so vc_resize() failed. This patch fixes this issue by properly revert changes for font related date when vc_resize() failed. Link: https://syzkaller.appspot.com/bug?id=3443d3a1fa6d964dd7310a0cb1696d165a3e07c4 [1] Reported-by: syzbot+a168dbeaaa7778273c1b@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Signed-off-by: Helge Deller CC: stable@vger.kernel.org # 5.15+ --- drivers/video/fbdev/core/fbcon.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 4a032fcf0d14d..098b62f7b701e 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2401,15 +2401,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fb_info *info = fbcon_info_from_console(vc->vc_num); struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; - int resize; + int resize, ret, old_userfont, old_width, old_height, old_charcount; char *old_data = NULL; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); if (p->userfont) old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); + old_userfont = p->userfont; if ((p->userfont = userfont)) REFCOUNT(data)++; + + old_width = vc->vc_font.width; + old_height = vc->vc_font.height; + old_charcount = vc->vc_font.charcount; + vc->vc_font.width = w; vc->vc_font.height = h; vc->vc_font.charcount = charcount; @@ -2425,7 +2431,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); cols /= w; rows /= h; - vc_resize(vc, cols, rows); + ret = vc_resize(vc, cols, rows); + if (ret) + goto err_out; } else if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) { fbcon_clear_margins(vc, 0); @@ -2435,6 +2443,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, if (old_data && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; + +err_out: + p->fontdata = old_data; + vc->vc_font.data = (void *)old_data; + + if (userfont) { + p->userfont = old_userfont; + REFCOUNT(data)--; + } + + vc->vc_font.width = old_width; + vc->vc_font.height = old_height; + vc->vc_font.charcount = old_charcount; + + return ret; } /* From dd9373402280cf4715fdc8fd5070f7d039e43511 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 23 Aug 2022 16:46:18 -0700 Subject: [PATCH 433/654] Smack: Provide read control for io_uring_cmd Limit io_uring "cmd" options to files for which the caller has Smack read access. There may be cases where the cmd option may be closer to a write access than a read, but there is no way to make that determination. Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Casey Schaufler Signed-off-by: Paul Moore --- security/smack/smack_lsm.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 001831458fa2c..bffccdc494cbe 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "smack.h" #define TRANS_TRUE "TRUE" @@ -4732,6 +4733,36 @@ static int smack_uring_sqpoll(void) return -EPERM; } +/** + * smack_uring_cmd - check on file operations for io_uring + * @ioucmd: the command in question + * + * Make a best guess about whether a io_uring "command" should + * be allowed. Use the same logic used for determining if the + * file could be opened for read in the absence of better criteria. + */ +static int smack_uring_cmd(struct io_uring_cmd *ioucmd) +{ + struct file *file = ioucmd->file; + struct smk_audit_info ad; + struct task_smack *tsp; + struct inode *inode; + int rc; + + if (!file) + return -EINVAL; + + tsp = smack_cred(file->f_cred); + inode = file_inode(file); + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); + smk_ad_setfield_u_fs_path(&ad, file->f_path); + rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad); + rc = smk_bu_credfile(file->f_cred, file, MAY_READ, rc); + + return rc; +} + #endif /* CONFIG_IO_URING */ struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { @@ -4889,6 +4920,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { #ifdef CONFIG_IO_URING LSM_HOOK_INIT(uring_override_creds, smack_uring_override_creds), LSM_HOOK_INIT(uring_sqpoll, smack_uring_sqpoll), + LSM_HOOK_INIT(uring_cmd, smack_uring_cmd), #endif }; From 11745ecfe8fea4b4a4c322967a7605d2ecbd5080 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 3 Aug 2022 09:00:31 -0700 Subject: [PATCH 434/654] perf/x86/intel/uncore: Fix broken read_counter() for SNB IMC PMU Existing code was generating bogus counts for the SNB IMC bandwidth counters: $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/ 1.000327813 1,024.03 MiB uncore_imc/data_reads/ 1.000327813 20.73 MiB uncore_imc/data_writes/ 2.000580153 261,120.00 MiB uncore_imc/data_reads/ 2.000580153 23.28 MiB uncore_imc/data_writes/ The problem was introduced by commit: 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC") Where the read_counter callback was replace to point to the generic uncore_mmio_read_counter() function. The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in MMIO. But uncore_mmio_read_counter() is using a readq() call to read from MMIO therefore reading 64-bit from MMIO. Although this is okay for the uncore_perf_event_update() function because it is shifting the value based on the actual counter width to compute a delta, it is not okay for the uncore_pmu_event_start() which is simply reading the counter and therefore priming the event->prev_count with a bogus value which is responsible for causing bogus deltas in the perf stat command above. The fix is to reintroduce the custom callback for read_counter for the SNB IMC PMU and use readl() instead of readq(). With the change the output of perf stat is back to normal: $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/ 1.000120987 296.94 MiB uncore_imc/data_reads/ 1.000120987 138.42 MiB uncore_imc/data_writes/ 2.000403144 175.91 MiB uncore_imc/data_reads/ 2.000403144 68.50 MiB uncore_imc/data_writes/ Fixes: 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC") Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20220803160031.1379788-1-eranian@google.com --- arch/x86/events/intel/uncore_snb.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index ce440011cc4e4..1ef4f7861e2ec 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid) return 0; } +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * SNB IMC counters are 32-bit and are laid out back to back + * in MMIO space. Therefore we must use a 32-bit accessor function + * using readq() from uncore_mmio_read_counter() causes problems + * because it is reading 64-bit at a time. This is okay for the + * uncore_perf_event_update() function because it drops the upper + * 32-bits but not okay for plain uncore_read_counter() as invoked + * in uncore_pmu_event_start(). + */ + return (u64)readl(box->io_addr + hwc->event_base); +} + static struct pmu snb_uncore_imc_pmu = { .task_ctx_nr = perf_invalid_context, .event_init = snb_uncore_imc_event_init, @@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = uncore_mmio_read_counter, + .read_counter = snb_uncore_imc_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { From 5479d6d4bf122d4b137659559a7bd17784b97b7e Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 27 Aug 2022 00:01:50 +0800 Subject: [PATCH 435/654] docs/conf.py: add function attribute '__fix_address' to conf.py Stephen Rothwell reported htmldocs warning when merging net-next: Documentation/networking/kapi:26: net/core/skbuff.c:780: WARNING: Error in declarator or parameters Invalid C declaration: Expecting "(" in parameters. [error at 19] void __fix_address kfree_skb_reason (struct sk_buff *skb, enum skb_drop_reason reason) -------------------^ Add __fix_address keyword to c_id_attributes array in conf.py to fix the warning. Link: https://lore.kernel.org/linux-next/20220825154105.534d78ab@canb.auug.org.au/ Reported-by: Stephen Rothwell Signed-off-by: Menglong Dong Tested-by: Bagas Sanjaya Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/conf.py b/Documentation/conf.py index 934727e23e0eb..255384d094bf7 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -86,6 +86,7 @@ "__used", "__weak", "noinline", + "__fix_address", # include/linux/memblock.h: "__init_memblock", From 7498a457ecf7ff2c4d379360aa8f24566bb1543e Mon Sep 17 00:00:00 2001 From: Casper Andersson Date: Thu, 25 Aug 2022 10:49:55 +0200 Subject: [PATCH 436/654] net: sparx5: fix handling uneven length packets in manual extraction Packets that are not of length divisible by 4 (e.g. 77, 78, 79) would have the checksum included up to next multiple of 4 (a 77 bytes packet would have 3 bytes of ethernet checksum included). The check for the value expects it in host (Little) endian. Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Casper Andersson Reviewed-by: Steen Hegelund Link: https://lore.kernel.org/r/20220825084955.684637-1-casper.casan@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 304f84aadc36b..21844beba72df 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -113,6 +113,8 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) /* This assumes STATUS_WORD_POS == 1, Status * just after last data */ + if (!byte_swap) + val = ntohl((__force __be32)val); byte_cnt -= (4 - XTR_VALID_BYTES(val)); eof_flag = true; break; From 2ca1c94ce0b65a2ce7512b718f3d8a0fe6224bca Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 26 Aug 2022 08:25:30 +0800 Subject: [PATCH 437/654] tg3: Disable tg3 device on system reboot to avoid triggering AER Commit d60cd06331a3 ("PM: ACPI: reboot: Use S5 for reboot") caused a reboot hang on one Dell servers so the commit was reverted. Someone managed to collect the AER log and it's caused by MSI: [ 148.762067] ACPI: Preparing to enter system sleep state S5 [ 148.794638] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 5 [ 148.803731] {1}[Hardware Error]: event severity: recoverable [ 148.810191] {1}[Hardware Error]: Error 0, type: fatal [ 148.816088] {1}[Hardware Error]: section_type: PCIe error [ 148.822391] {1}[Hardware Error]: port_type: 0, PCIe end point [ 148.829026] {1}[Hardware Error]: version: 3.0 [ 148.834266] {1}[Hardware Error]: command: 0x0006, status: 0x0010 [ 148.841140] {1}[Hardware Error]: device_id: 0000:04:00.0 [ 148.847309] {1}[Hardware Error]: slot: 0 [ 148.852077] {1}[Hardware Error]: secondary_bus: 0x00 [ 148.857876] {1}[Hardware Error]: vendor_id: 0x14e4, device_id: 0x165f [ 148.865145] {1}[Hardware Error]: class_code: 020000 [ 148.870845] {1}[Hardware Error]: aer_uncor_status: 0x00100000, aer_uncor_mask: 0x00010000 [ 148.879842] {1}[Hardware Error]: aer_uncor_severity: 0x000ef030 [ 148.886575] {1}[Hardware Error]: TLP Header: 40000001 0000030f 90028090 00000000 [ 148.894823] tg3 0000:04:00.0: AER: aer_status: 0x00100000, aer_mask: 0x00010000 [ 148.902795] tg3 0000:04:00.0: AER: [20] UnsupReq (First) [ 148.910234] tg3 0000:04:00.0: AER: aer_layer=Transaction Layer, aer_agent=Requester ID [ 148.918806] tg3 0000:04:00.0: AER: aer_uncor_severity: 0x000ef030 [ 148.925558] tg3 0000:04:00.0: AER: TLP Header: 40000001 0000030f 90028090 00000000 The MSI is probably raised by incoming packets, so power down the device and disable bus mastering to stop the traffic, as user confirmed this approach works. In addition to that, be extra safe and cancel reset task if it's running. Cc: Josef Bacik Link: https://lore.kernel.org/all/b8db79e6857c41dab4ef08bdf826ea7c47e3bafc.1615947283.git.josef@toxicpanda.com/ BugLink: https://bugs.launchpad.net/bugs/1917471 Signed-off-by: Kai-Heng Feng Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20220826002530.1153296-1-kai.heng.feng@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index db1e9d810b416..89889d8150da1 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18076,16 +18076,20 @@ static void tg3_shutdown(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct tg3 *tp = netdev_priv(dev); + tg3_reset_task_cancel(tp); + rtnl_lock(); + netif_device_detach(dev); if (netif_running(dev)) dev_close(dev); - if (system_state == SYSTEM_POWER_OFF) - tg3_power_down(tp); + tg3_power_down(tp); rtnl_unlock(); + + pci_disable_device(pdev); } /** From 3ce9f2bef75528936c78a7053301f5725f622f3a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 24 Aug 2022 19:39:51 -0700 Subject: [PATCH 438/654] net: smsc911x: Stop and start PHY during suspend and resume Commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") unveiled that the smsc911x driver was not properly stopping and restarting the PHY during suspend/resume. Correct that by indicating that the MAC is in charge of PHY PM operations and ensure that all MDIO bus activity is quiescent during suspend. Tested-by: Geert Uytterhoeven Tested-by: Marek Szyprowski Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Fixes: 2aa70f864955 ("net: smsc911x: Quieten netif during suspend") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220825023951.3220-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smsc911x.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 3bf20211cceb4..3829c2805b16c 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1037,6 +1037,8 @@ static int smsc911x_mii_probe(struct net_device *dev) return ret; } + /* Indicate that the MAC is responsible for managing PHY PM */ + phydev->mac_managed_pm = true; phy_attached_info(phydev); phy_set_max_speed(phydev, SPEED_100); @@ -2587,6 +2589,8 @@ static int smsc911x_suspend(struct device *dev) if (netif_running(ndev)) { netif_stop_queue(ndev); netif_device_detach(ndev); + if (!device_may_wakeup(dev)) + phy_stop(ndev->phydev); } /* enable wake on LAN, energy detection and the external PME @@ -2628,6 +2632,8 @@ static int smsc911x_resume(struct device *dev) if (netif_running(ndev)) { netif_device_attach(ndev); netif_start_queue(ndev); + if (!device_may_wakeup(dev)) + phy_start(ndev->phydev); } return 0; From a87406f4adee9c53b311d8a1ba2849c69e29a6d0 Mon Sep 17 00:00:00 2001 From: Andrey Zhadchenko Date: Thu, 25 Aug 2022 05:03:26 +0300 Subject: [PATCH 439/654] openvswitch: fix memory leak at failed datapath creation ovs_dp_cmd_new()->ovs_dp_change()->ovs_dp_set_upcall_portids() allocates array via kmalloc. If for some reason new_vport() fails during ovs_dp_cmd_new() dp->upcall_portids must be freed. Add missing kfree. Kmemleak example: unreferenced object 0xffff88800c382500 (size 64): comm "dump_state", pid 323, jiffies 4294955418 (age 104.347s) hex dump (first 32 bytes): 5e c2 79 e4 1f 7a 38 c7 09 21 38 0c 80 88 ff ff ^.y..z8..!8..... 03 00 00 00 0a 00 00 00 14 00 00 00 28 00 00 00 ............(... backtrace: [<0000000071bebc9f>] ovs_dp_set_upcall_portids+0x38/0xa0 [<000000000187d8bd>] ovs_dp_change+0x63/0xe0 [<000000002397e446>] ovs_dp_cmd_new+0x1f0/0x380 [<00000000aa06f36e>] genl_family_rcv_msg_doit+0xea/0x150 [<000000008f583bc4>] genl_rcv_msg+0xdc/0x1e0 [<00000000fa10e377>] netlink_rcv_skb+0x50/0x100 [<000000004959cece>] genl_rcv+0x24/0x40 [<000000004699ac7f>] netlink_unicast+0x23e/0x360 [<00000000c153573e>] netlink_sendmsg+0x24e/0x4b0 [<000000006f4aa380>] sock_sendmsg+0x62/0x70 [<00000000d0068654>] ____sys_sendmsg+0x230/0x270 [<0000000012dacf7d>] ___sys_sendmsg+0x88/0xd0 [<0000000011776020>] __sys_sendmsg+0x59/0xa0 [<000000002e8f2dc1>] do_syscall_64+0x3b/0x90 [<000000003243e7cb>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: b83d23a2a38b ("openvswitch: Introduce per-cpu upcall dispatch") Acked-by: Aaron Conole Signed-off-by: Andrey Zhadchenko Link: https://lore.kernel.org/r/20220825020326.664073-1-andrey.zhadchenko@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 7e8a39a356271..6c9d153afbeee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1802,7 +1802,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } - goto err_unlock_and_destroy_meters; + goto err_destroy_portids; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1817,6 +1817,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; +err_destroy_portids: + kfree(rcu_dereference_raw(dp->upcall_portids)); err_unlock_and_destroy_meters: ovs_unlock(); ovs_meters_exit(dp); From ebe5555c2f34505cdb1ae5c3de8b24e33740b3e0 Mon Sep 17 00:00:00 2001 From: Tianyu Yuan Date: Thu, 25 Aug 2022 10:08:45 +0200 Subject: [PATCH 440/654] nfp: flower: fix ingress police using matchall filter Referenced commit introduced nfp_policer_validate in the progress installing rate limiter. This validate check the action id and will reject police with CONTINUE, which is required to support ingress police offload. Fix this issue by allowing FLOW_ACTION_CONTINUE as notexceed action id in nfp_policer_validate Fixes: d97b4b105ce7 ("flow_offload: reject offload for all drivers with invalid police parameters") Signed-off-by: Tianyu Yuan Reviewed-by: Baowen Zheng Reviewed-by: Louis Peens Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20220825080845.507534-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/flower/qos_conf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 4e5df9f2c3722..7b92026e1a6f8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -127,10 +127,11 @@ static int nfp_policer_validate(const struct flow_action *action, return -EOPNOTSUPP; } - if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE && + act->police.notexceed.act_id != FLOW_ACTION_PIPE && act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { NL_SET_ERR_MSG_MOD(extack, - "Offload not supported when conform action is not pipe or ok"); + "Offload not supported when conform action is not continue, pipe or ok"); return -EOPNOTSUPP; } From bc9e7fe313d5e56d4d5f34bcc04d1165f94f86fb Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 28 Jul 2022 10:39:46 +0100 Subject: [PATCH 441/654] perf python: Fix build when PYTHON_CONFIG is user supplied The previous change to Python autodetection had a small mistake where the auto value was used to determine the Python binary, rather than the user supplied value. The Python binary is only used for one part of the build process, rather than the final linking, so it was producing correct builds in most scenarios, especially when the auto detected value matched what the user wanted, or the system only had a valid set of Pythons. Change it so that the Python binary path is derived from either the PYTHON_CONFIG value or PYTHON value, depending on what is specified by the user. This was the original intention. This error was spotted in a build failure an odd cross compilation environment after commit 4c41cb46a732fe82 ("perf python: Prefer python3") was merged. Fixes: 630af16eee495f58 ("perf tools: Use Python devtools for version autodetection rather than runtime") Signed-off-by: James Clark Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220728093946.1337642-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0661a1cf98556..2171f02daf59d 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -265,7 +265,7 @@ endif # defined. get-executable-or-default fails with an error if the first argument is supplied but # doesn't exist. override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO)) -override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO))) +override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG))) grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) From dbcfe5ec3f9a5799d8b49ad2c81549bbfa8390e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 12:53:44 -0300 Subject: [PATCH 442/654] tools kvm headers arm64: Update KVM header from the kernel sources To pick the changes from: ae3b1da95413614f ("KVM: arm64: Fix compile error due to sign extension") That doesn't result in any changes in tooling (when built on x86), only addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Marc Zyngier Cc: Namhyung Kim Cc: Yang Yingliang Link: https://lore.kernel.org/all/YwOMCCc4E79FuvDe@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 3bb134355874c..316917b987070 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -75,9 +75,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 From bf515f024e4c0ca46a1b08c4f31860c01781d8a5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 22 Aug 2022 14:33:51 -0700 Subject: [PATCH 443/654] perf stat: Clear evsel->reset_group for each stat run If a weak group is broken then the reset_group flag remains set for the next run. Having reset_group set means the counter isn't created and ultimately a segfault. A simple reproduction of this is: # perf stat -r2 -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}:W which will be added as a test in the next patch. Fixes: 4804e0111662d7d8 ("perf stat: Use affinity for opening events") Reviewed-by: Andi Kleen Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Xing Zhengjun Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220822213352.75721-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7fb81a44672d7..54cd29d07ca8d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -826,6 +826,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } evlist__for_each_entry(evsel_list, counter) { + counter->reset_group = false; if (bpf_counter__load(counter, &target)) return -1; if (!evsel__is_bpf(counter)) From 0c361c6eaba7fe1a29391540dd8797e850e49f21 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 22 Aug 2022 14:33:52 -0700 Subject: [PATCH 444/654] perf test: Stat test for repeat with a weak group Breaking a weak group requires multiple passes of an evlist, with multiple runs this can introduce bugs ultimately leading to segfaults. Add a test to cover this. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220822213352.75721-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index 9313ef2739e07..26a51b48aee46 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -28,6 +28,24 @@ test_stat_record_report() { echo "stat record and report test [Success]" } +test_stat_repeat_weak_groups() { + echo "stat repeat weak groups test" + if ! perf stat -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}' \ + true 2>&1 | grep -q 'seconds time elapsed' + then + echo "stat repeat weak groups test [Skipped event parsing failed]" + return + fi + if ! perf stat -r2 -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}:W' \ + true > /dev/null 2>&1 + then + echo "stat repeat weak groups test [Failed]" + err=1 + return + fi + echo "stat repeat weak groups test [Success]" +} + test_topdown_groups() { # Topdown events must be grouped with the slots event first. Test that # parse-events reorders this. @@ -75,6 +93,7 @@ test_topdown_weak_groups() { test_default_stat test_stat_record_report +test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups exit $err From e89eaa611c7568d1288a2ccca88355a9434f2d47 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Aug 2022 03:01:27 -0700 Subject: [PATCH 445/654] perf record: Fix manpage formatting of description of support to hybrid systems The Intel hybrid description is written in a different style than the rest of the perf record man page. There were some new command line options added after it which resulted in very strange section ordering. Move the hybrid include last. Also the sub sections in the hybrid document don't fit the record manpage well (especially since it talks about all kinds of unrelated commands). I left this for now, but would be better to separate this properly in the different man pages. It would be better to use sub sections for the other sections, but these don't seem to be supported in AsciiDoc? Some of the examples are still misrendered in the manpage with an indented troff command, but I don't know how to fix that. In any case it's now better than before. Signed-off-by: Andi Kleen Cc: zhengjun.xing@intel.com Link: https://lore.kernel.org/r/20220818100127.249401-1-ak@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-hybrid.txt | 10 ---------- tools/perf/Documentation/perf-record.txt | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/intel-hybrid.txt b/tools/perf/Documentation/intel-hybrid.txt index c9302096dc461..e7a776ad25d71 100644 --- a/tools/perf/Documentation/intel-hybrid.txt +++ b/tools/perf/Documentation/intel-hybrid.txt @@ -21,11 +21,6 @@ cat /sys/devices/cpu_atom/cpus It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus. -Quickstart - -List hybrid event ------------------ - As before, use perf-list to list the symbolic event. perf list @@ -40,7 +35,6 @@ the event is belong to. Same event name but with different pmu can be supported. Enable hybrid event with a specific pmu ---------------------------------------- To enable a core only event or atom only event, following syntax is supported: @@ -53,7 +47,6 @@ For example, count the 'cycles' event on core cpus. perf stat -e cpu_core/cycles/ Create two events for one hardware event automatically ------------------------------------------------------- When creating one event and the event is available on both atom and core, two events are created automatically. One is for atom, the other is for @@ -132,7 +125,6 @@ For perf-stat result, it displays two events: The first 'cycles' is core event, the second 'cycles' is atom event. Thread mode example: --------------------- perf-stat reports the scaled counts for hybrid event and with a percentage displayed. The percentage is the event's running time/enabling time. @@ -176,14 +168,12 @@ perf_event_attr: 604,097,080 cpu_atom/cycles/ (99.57%) perf-record: ------------- If there is no '-e' specified in perf record, on hybrid platform, it creates two default 'cycles' and adds them to event list. One is for core, the other is for atom. perf-stat: ----------- If there is no '-e' specified in perf stat, on hybrid platform, besides of software events, following events are created and diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 099817ef5150d..6ec6d0ba0a72f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -757,8 +757,6 @@ events in data directory files. Option specified with no or empty value defaults to CPU layout. Masks defined or provided by the option value are filtered through the mask provided by -C option. -include::intel-hybrid.txt[] - --debuginfod[=URLs]:: Specify debuginfod URL to be used when cacheing perf.data binaries, it follows the same syntax as the DEBUGINFOD_URLS variable, like: @@ -778,6 +776,8 @@ include::intel-hybrid.txt[] only, as of now. So the applications built without the frame pointer might see bogus addresses. +include::intel-hybrid.txt[] + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] From d72e5cf3cf69d4c68d3b54aea232451b0a8b69d3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 24 Aug 2022 07:57:33 -0700 Subject: [PATCH 446/654] perf sched: Fix memory leaks in __cmd_record detected with -fsanitize=address An array of strings is passed to cmd_record but not freed. As cmd_record modifies the array, add another array as a copy that can be mutated allowing the original array contents to all be freed. Detected with -fsanitize=address. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220824145733.409005-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 2f6cd1b8b6627..a5cf243c337f1 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3355,7 +3355,8 @@ static bool schedstat_events_exposed(void) static int __cmd_record(int argc, const char **argv) { unsigned int rec_argc, i, j; - const char **rec_argv; + char **rec_argv; + const char **rec_argv_copy; const char * const record_args[] = { "record", "-a", @@ -3384,6 +3385,7 @@ static int __cmd_record(int argc, const char **argv) ARRAY_SIZE(schedstat_args) : 0; struct tep_event *waking_event; + int ret; /* * +2 for either "-e", "sched:sched_wakeup" or @@ -3391,14 +3393,18 @@ static int __cmd_record(int argc, const char **argv) */ rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); - if (rec_argv == NULL) return -ENOMEM; + rec_argv_copy = calloc(rec_argc + 1, sizeof(char *)); + if (rec_argv_copy == NULL) { + free(rec_argv); + return -ENOMEM; + } for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); - rec_argv[i++] = "-e"; + rec_argv[i++] = strdup("-e"); waking_event = trace_event__tp_format("sched", "sched_waking"); if (!IS_ERR(waking_event)) rec_argv[i++] = strdup("sched:sched_waking"); @@ -3409,11 +3415,19 @@ static int __cmd_record(int argc, const char **argv) rec_argv[i++] = strdup(schedstat_args[j]); for (j = 1; j < (unsigned int)argc; j++, i++) - rec_argv[i] = argv[j]; + rec_argv[i] = strdup(argv[j]); BUG_ON(i != rec_argc); - return cmd_record(i, rec_argv); + memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc); + ret = cmd_record(rec_argc, rec_argv_copy); + + for (i = 0; i < rec_argc; i++) + free(rec_argv[i]); + free(rec_argv); + free(rec_argv_copy); + + return ret; } int cmd_sched(int argc, const char **argv) From 3126204ce3d9ab083cbdc2d61ab93746232eb89b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 16 Aug 2022 05:56:12 -0700 Subject: [PATCH 447/654] perf docs: Update the documentation for the save_type filter Update the documentation to reflect the kernel changes. Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Ian Rogers Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220816125612.2042397-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6ec6d0ba0a72f..0228efc96686a 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -397,6 +397,9 @@ following filters are defined: - abort_tx: only when the target is a hardware transaction abort - cond: conditional branches - save_type: save branch type during sampling in case binary is not available later + For the platforms with Intel Arch LBR support (12th-Gen+ client or + 4th-Gen Xeon+ server), the save branch type is unconditionally enabled + when the taken branch stack sampling is enabled. + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. From 48648548ef764dcb1f6ffc9c9f9057f7c610caa4 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 25 Aug 2022 09:54:58 +0800 Subject: [PATCH 448/654] perf stat: Capitalize topdown metrics' names Capitalize topdown metrics' names to follow the intel SDM. Before: # ./perf stat -a sleep 1 Performance counter stats for 'system wide': 228,094.05 msec cpu-clock # 225.026 CPUs utilized 842 context-switches # 3.691 /sec 224 cpu-migrations # 0.982 /sec 70 page-faults # 0.307 /sec 23,164,105 cycles # 0.000 GHz 29,403,446 instructions # 1.27 insn per cycle 5,268,185 branches # 23.097 K/sec 33,239 branch-misses # 0.63% of all branches 136,248,990 slots # 597.337 K/sec 32,976,450 topdown-retiring # 24.2% retiring 4,651,918 topdown-bad-spec # 3.4% bad speculation 26,148,695 topdown-fe-bound # 19.2% frontend bound 72,515,776 topdown-be-bound # 53.2% backend bound 6,008,540 topdown-heavy-ops # 4.4% heavy operations # 19.8% light operations 3,934,049 topdown-br-mispredict # 2.9% branch mispredict # 0.5% machine clears 16,655,439 topdown-fetch-lat # 12.2% fetch latency # 7.0% fetch bandwidth 41,635,972 topdown-mem-bound # 30.5% memory bound # 22.7% Core bound 1.013634593 seconds time elapsed After: # ./perf stat -a sleep 1 Performance counter stats for 'system wide': 228,081.94 msec cpu-clock # 225.003 CPUs utilized 824 context-switches # 3.613 /sec 224 cpu-migrations # 0.982 /sec 67 page-faults # 0.294 /sec 22,647,423 cycles # 0.000 GHz 28,870,551 instructions # 1.27 insn per cycle 5,167,099 branches # 22.655 K/sec 32,383 branch-misses # 0.63% of all branches 133,411,074 slots # 584.926 K/sec 32,352,607 topdown-retiring # 24.3% Retiring 4,456,977 topdown-bad-spec # 3.3% Bad Speculation 25,626,487 topdown-fe-bound # 19.2% Frontend Bound 70,955,316 topdown-be-bound # 53.2% Backend Bound 5,834,844 topdown-heavy-ops # 4.4% Heavy Operations # 19.9% Light Operations 3,738,781 topdown-br-mispredict # 2.8% Branch Mispredict # 0.5% Machine Clears 16,286,803 topdown-fetch-lat # 12.2% Fetch Latency # 7.0% Fetch Bandwidth 40,802,069 topdown-mem-bound # 30.6% Memory Bound # 22.6% Core Bound 1.013683125 seconds time elapsed Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220825015458.3252239-1-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 979c8cb918f72..788ce5e46470a 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1193,7 +1193,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; - print_metric(config, ctxp, color, "%8.1f%%", "retiring", + print_metric(config, ctxp, color, "%8.1f%%", "Retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && full_td(cpu_map_idx, st, &rsd)) { @@ -1202,7 +1202,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", + print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && full_td(cpu_map_idx, st, &rsd)) { @@ -1211,7 +1211,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (be_bound > 0.2) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "backend bound", + print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && full_td(cpu_map_idx, st, &rsd)) { @@ -1220,7 +1220,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", + print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1234,13 +1234,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (retiring > 0.7 && heavy_ops > 0.1) color = PERF_COLOR_GREEN; - print_metric(config, ctxp, color, "%8.1f%%", "heavy operations", + print_metric(config, ctxp, color, "%8.1f%%", "Heavy Operations", heavy_ops * 100.); if (retiring > 0.7 && light_ops > 0.6) color = PERF_COLOR_GREEN; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "light operations", + print_metric(config, ctxp, color, "%8.1f%%", "Light Operations", light_ops * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1254,13 +1254,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (bad_spec > 0.1 && br_mis > 0.05) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "branch mispredict", + print_metric(config, ctxp, color, "%8.1f%%", "Branch Mispredict", br_mis * 100.); if (bad_spec > 0.1 && m_clears > 0.05) color = PERF_COLOR_RED; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "machine clears", + print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears", m_clears * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1274,13 +1274,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (fe_bound > 0.2 && fetch_lat > 0.15) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "fetch latency", + print_metric(config, ctxp, color, "%8.1f%%", "Fetch Latency", fetch_lat * 100.); if (fe_bound > 0.2 && fetch_bw > 0.1) color = PERF_COLOR_RED; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth", + print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth", fetch_bw * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { @@ -1294,13 +1294,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2 && mem_bound > 0.2) color = PERF_COLOR_RED; - print_metric(config, ctxp, color, "%8.1f%%", "memory bound", + print_metric(config, ctxp, color, "%8.1f%%", "Memory Bound", mem_bound * 100.); if (be_bound > 0.2 && core_bound > 0.1) color = PERF_COLOR_RED; else color = NULL; - print_metric(config, ctxp, color, "%8.1f%%", "Core bound", + print_metric(config, ctxp, color, "%8.1f%%", "Core Bound", core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, From 1bd3a383075c64d638e65d263c9267b08ee7733c Mon Sep 17 00:00:00 2001 From: Jean-Francois Le Fillatre Date: Wed, 24 Aug 2022 21:14:36 +0200 Subject: [PATCH 449/654] r8152: add PID for the Lenovo OneLink+ Dock The Lenovo OneLink+ Dock contains an RTL8153 controller that behaves as a broken CDC device by default. Add the custom Lenovo PID to the r8152 driver to support it properly. Also, systems compatible with this dock provide a BIOS option to enable MAC address passthrough (as per Lenovo document "ThinkPad Docking Solutions 2017"). Add the custom PID to the MAC passthrough list too. Tested on a ThinkPad 13 1st gen with the expected results: passthrough disabled: Invalid header when reading pass-thru MAC addr passthrough enabled: Using pass-thru MAC addr XX:XX:XX:XX:XX:XX Signed-off-by: Jean-Francois Le Fillatre Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 3 +++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 2de09ad5bac03..e11f70911acc1 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -777,6 +777,13 @@ static const struct usb_device_id products[] = { }, #endif +/* Lenovo ThinkPad OneLink+ Dock (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3054, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* ThinkPad USB-C Dock (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d142ac8fcf6e2..688905ea0a6d3 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -770,6 +770,7 @@ enum rtl8152_flags { RX_EPROTO, }; +#define DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK 0x3054 #define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082 #define DEVICE_ID_THINKPAD_USB_C_DONGLE 0x720c #define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2 0xa387 @@ -9581,6 +9582,7 @@ static bool rtl8152_supports_lenovo_macpassthru(struct usb_device *udev) if (vendor_id == VENDOR_ID_LENOVO) { switch (product_id) { + case DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK: case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2: case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2: case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN3: @@ -9828,6 +9830,7 @@ static const struct usb_device_id rtl8152_table[] = { REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927), REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f), + REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3069), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3082), From d6ffe6067a54972564552ea45d320fb98db1ac5e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 26 Aug 2022 16:43:51 -0400 Subject: [PATCH 450/654] provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/bitops.h | 7 ++----- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 ++----- arch/m68k/include/asm/bitops.h | 7 ++----- arch/s390/include/asm/bitops.h | 10 ++-------- arch/sh/include/asm/bitops-op32.h | 12 ++---------- 6 files changed, 25 insertions(+), 33 deletions(-) diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 492c7713ddae6..bafb1c1f0fdc1 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -283,11 +283,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & mask) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /* * ffz = Find First Zero in word. Undefined if no zero exists, diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index da500471ac73c..160d8f37fa1a3 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -179,6 +179,21 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) return retval; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + /* * ffz - find first zero in word. * @word: The word to search diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 9f62af7fd7c42..1accb7842f588 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h @@ -331,11 +331,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & bit) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /** * ffz - find the first zero bit in a long word diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 470aed9785903..e984af71df6be 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -157,11 +157,8 @@ arch___change_bit(unsigned long nr, volatile unsigned long *addr) change_bit(nr, addr); } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (addr[nr >> 5] & (1UL << (nr & 31))) != 0; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 9a7d15da966e3..2de74fcd0578f 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -176,14 +176,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return old & mask; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - const volatile unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - return *p & mask; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) diff --git a/arch/sh/include/asm/bitops-op32.h b/arch/sh/include/asm/bitops-op32.h index 565a85d8b7fb0..5ace89b465079 100644 --- a/arch/sh/include/asm/bitops-op32.h +++ b/arch/sh/include/asm/bitops-op32.h @@ -135,16 +135,8 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return (old & mask) != 0; } -/** - * arch_test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include From 98e01215708b6d416345465c09dce2bd4868c67a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Aug 2022 20:36:27 -0700 Subject: [PATCH 451/654] Input: iforce - wake up after clearing IFORCE_XMIT_RUNNING flag syzbot is reporting hung task at __input_unregister_device() [1], for iforce_close() waiting at wait_event_interruptible() with dev->mutex held is blocking input_disconnect_device() from __input_unregister_device(). It seems that the cause is simply that commit c2b27ef672992a20 ("Input: iforce - wait for command completion when closing the device") forgot to call wake_up() after clear_bit(). Fix this problem by introducing a helper that calls clear_bit() followed by wake_up_all(). Reported-by: syzbot Fixes: c2b27ef672992a20 ("Input: iforce - wait for command completion when closing the device") Tested-by: syzbot Suggested-by: Fabio M. De Francesco Co-developed-by: Hillf Danton Signed-off-by: Hillf Danton Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/887021c3-4f13-40ce-c8b9-aa6e09faa3a7@I-love.SAKURA.ne.jp Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/iforce/iforce-serio.c | 6 +++--- drivers/input/joystick/iforce/iforce-usb.c | 8 ++++---- drivers/input/joystick/iforce/iforce.h | 6 ++++++ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c index f95a81b9fac72..2380546d79782 100644 --- a/drivers/input/joystick/iforce/iforce-serio.c +++ b/drivers/input/joystick/iforce/iforce-serio.c @@ -39,7 +39,7 @@ static void iforce_serio_xmit(struct iforce *iforce) again: if (iforce->xmit.head == iforce->xmit.tail) { - clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags); + iforce_clear_xmit_and_wake(iforce); spin_unlock_irqrestore(&iforce->xmit_lock, flags); return; } @@ -64,7 +64,7 @@ static void iforce_serio_xmit(struct iforce *iforce) if (test_and_clear_bit(IFORCE_XMIT_AGAIN, iforce->xmit_flags)) goto again; - clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags); + iforce_clear_xmit_and_wake(iforce); spin_unlock_irqrestore(&iforce->xmit_lock, flags); } @@ -169,7 +169,7 @@ static irqreturn_t iforce_serio_irq(struct serio *serio, iforce_serio->cmd_response_len = iforce_serio->len; /* Signal that command is done */ - wake_up(&iforce->wait); + wake_up_all(&iforce->wait); } else if (likely(iforce->type)) { iforce_process_packet(iforce, iforce_serio->id, iforce_serio->data_in, diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index ea58805c480fa..cba92bd590a8d 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -30,7 +30,7 @@ static void __iforce_usb_xmit(struct iforce *iforce) spin_lock_irqsave(&iforce->xmit_lock, flags); if (iforce->xmit.head == iforce->xmit.tail) { - clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags); + iforce_clear_xmit_and_wake(iforce); spin_unlock_irqrestore(&iforce->xmit_lock, flags); return; } @@ -58,9 +58,9 @@ static void __iforce_usb_xmit(struct iforce *iforce) XMIT_INC(iforce->xmit.tail, n); if ( (n=usb_submit_urb(iforce_usb->out, GFP_ATOMIC)) ) { - clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags); dev_warn(&iforce_usb->intf->dev, "usb_submit_urb failed %d\n", n); + iforce_clear_xmit_and_wake(iforce); } /* The IFORCE_XMIT_RUNNING bit is not cleared here. That's intended. @@ -175,15 +175,15 @@ static void iforce_usb_out(struct urb *urb) struct iforce *iforce = &iforce_usb->iforce; if (urb->status) { - clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags); dev_dbg(&iforce_usb->intf->dev, "urb->status %d, exiting\n", urb->status); + iforce_clear_xmit_and_wake(iforce); return; } __iforce_usb_xmit(iforce); - wake_up(&iforce->wait); + wake_up_all(&iforce->wait); } static int iforce_usb_probe(struct usb_interface *intf, diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h index 6aa761ebbdf77..9ccb9107ccbef 100644 --- a/drivers/input/joystick/iforce/iforce.h +++ b/drivers/input/joystick/iforce/iforce.h @@ -119,6 +119,12 @@ static inline int iforce_get_id_packet(struct iforce *iforce, u8 id, response_data, response_len); } +static inline void iforce_clear_xmit_and_wake(struct iforce *iforce) +{ + clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags); + wake_up_all(&iforce->wait); +} + /* Public functions */ /* iforce-main.c */ int iforce_init_device(struct device *parent, u16 bustype, From a2d57ebec1e15f0ac256eb8397e82b07adfaaacc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sat, 27 Aug 2022 22:33:28 +0200 Subject: [PATCH 452/654] ALSA: hda/realtek: Add speaker AMP init for Samsung laptops with ALC298 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Magic initialization sequence was extracted from Windows driver and cleaned up manually. Fixes internal speakers output. Link: https://bugzilla.kernel.org/show_bug.cgi?id=207423 Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1851518 Signed-off-by: Kacper Michajłow Cc: Link: https://lore.kernel.org/r/20220827203328.30363-1-kasper93@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 63 +++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 47e72cf76608e..38930cf5aace0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4700,6 +4700,48 @@ static void alc236_fixup_hp_mute_led_micmute_vref(struct hda_codec *codec, alc236_fixup_hp_micmute_led_vref(codec, fix, action); } +static inline void alc298_samsung_write_coef_pack(struct hda_codec *codec, + const unsigned short coefs[2]) +{ + alc_write_coef_idx(codec, 0x23, coefs[0]); + alc_write_coef_idx(codec, 0x25, coefs[1]); + alc_write_coef_idx(codec, 0x26, 0xb011); +} + +struct alc298_samsung_amp_desc { + unsigned char nid; + unsigned short init_seq[2][2]; +}; + +static void alc298_fixup_samsung_amp(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + int i, j; + static const unsigned short init_seq[][2] = { + { 0x19, 0x00 }, { 0x20, 0xc0 }, { 0x22, 0x44 }, { 0x23, 0x08 }, + { 0x24, 0x85 }, { 0x25, 0x41 }, { 0x35, 0x40 }, { 0x36, 0x01 }, + { 0x38, 0x81 }, { 0x3a, 0x03 }, { 0x3b, 0x81 }, { 0x40, 0x3e }, + { 0x41, 0x07 }, { 0x400, 0x1 } + }; + static const struct alc298_samsung_amp_desc amps[] = { + { 0x3a, { { 0x18, 0x1 }, { 0x26, 0x0 } } }, + { 0x39, { { 0x18, 0x2 }, { 0x26, 0x1 } } } + }; + + if (action != HDA_FIXUP_ACT_INIT) + return; + + for (i = 0; i < ARRAY_SIZE(amps); i++) { + alc_write_coef_idx(codec, 0x22, amps[i].nid); + + for (j = 0; j < ARRAY_SIZE(amps[i].init_seq); j++) + alc298_samsung_write_coef_pack(codec, amps[i].init_seq[j]); + + for (j = 0; j < ARRAY_SIZE(init_seq); j++) + alc298_samsung_write_coef_pack(codec, init_seq[j]); + } +} + #if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) @@ -7030,6 +7072,7 @@ enum { ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, + ALC298_FIXUP_SAMSUNG_AMP, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, @@ -8396,6 +8439,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led_micmute_vref, }, + [ALC298_FIXUP_SAMSUNG_AMP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_samsung_amp, + .chained = true, + .chain_id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET + }, [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -9342,13 +9391,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), - SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), - SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), @@ -9716,7 +9765,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"}, {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, - {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, + {.id = ALC298_FIXUP_SAMSUNG_AMP, .name = "alc298-samsung-amp"}, {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, From 5f3d9e8161bb8cb23ab3b4678cd13f6e90a06186 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Aug 2022 09:41:43 +0200 Subject: [PATCH 453/654] ALSA: usb-audio: Add quirk for LH Labs Geek Out HD Audio 1V5 The USB DAC from LH Labs (2522:0007) seems requiring the same quirk as Sony Walkman to set up the interface like UAC1; otherwise it gets the constant errors "usb_set_interface failed (-71)". This patch adds a quirk entry for addressing the buggy behavior. Reported-by: Lennert Van Alboom Cc: Link: https://lore.kernel.org/r/T3VPXtCc4uFws9Gfh2RjX6OdwM1RqfC6VqQr--_LMDyB2x5N3p9_q6AtPna17IXhHwBtcJVdXuS80ZZSCMjh_BafIbnzJPhbrkmhmWS6DlI=@vanalboom.org Link: https://lore.kernel.org/r/20220828074143.14736-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 168fd802d70bd..9bfead5efc4c1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1903,6 +1903,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */ + QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ From 303e6da99429510b1e4edf833afe90ac8542e747 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 22 Aug 2022 04:10:25 +0000 Subject: [PATCH 454/654] gpio: mockup: remove gpio debugfs when remove device GPIO mockup debugfs is created in gpio_mockup_probe() but forgot to remove when remove device. This patch add a devm managed callback for removing them. Signed-off-by: Wei Yongjun Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mockup.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 8943cea927642..a2e505a7545cd 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -373,6 +373,13 @@ static void gpio_mockup_debugfs_setup(struct device *dev, } } +static void gpio_mockup_debugfs_cleanup(void *data) +{ + struct gpio_mockup_chip *chip = data; + + debugfs_remove_recursive(chip->dbg_dir); +} + static void gpio_mockup_dispose_mappings(void *data) { struct gpio_mockup_chip *chip = data; @@ -455,7 +462,7 @@ static int gpio_mockup_probe(struct platform_device *pdev) gpio_mockup_debugfs_setup(dev, chip); - return 0; + return devm_add_action_or_reset(dev, gpio_mockup_debugfs_cleanup, chip); } static const struct of_device_id gpio_mockup_of_match[] = { From ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jul 2022 21:05:42 +0800 Subject: [PATCH 455/654] mm/hugetlb: avoid corrupting page->mapping in hugetlb_mcopy_atomic_pte In MCOPY_ATOMIC_CONTINUE case with a non-shared VMA, pages in the page cache are installed in the ptes. But hugepage_add_new_anon_rmap is called for them mistakenly because they're not vm_shared. This will corrupt the page->mapping used by page cache code. Link: https://lkml.kernel.org/r/20220712130542.18836-1-linmiaohe@huawei.com Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl") Signed-off-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Axel Rasmussen Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2480ba627aa5e..e070b8593b376 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6041,7 +6041,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!huge_pte_none_mostly(huge_ptep_get(dst_pte))) goto out_release_unlock; - if (vm_shared) { + if (page_in_pagecache) { page_dup_file_rmap(page, true); } else { ClearHPageRestoreReserve(page); From 9dfb3b8d655022760ca68af11821f1c63aa547c3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 30 Jul 2022 05:25:18 +0100 Subject: [PATCH 456/654] shmem: update folio if shmem_replace_page() updates the page If we allocate a new page, we need to make sure that our folio matches that new page. If we do end up in this code path, we store the wrong page in the shmem inode's page cache, and I would rather imagine that data corruption ensues. This will be solved by changing shmem_replace_page() to shmem_replace_folio(), but this is the minimal fix. Link: https://lkml.kernel.org/r/20220730042518.1264767-1-willy@infradead.org Fixes: da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/shmem.c b/mm/shmem.c index d075dd2dcc484..42e5888bf84d8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1782,6 +1782,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, if (shmem_should_replace_folio(folio, gfp)) { error = shmem_replace_page(&page, gfp, info, index); + folio = page_folio(page); if (error) goto failed; } From f87904c075515f3e1d8f4a7115869d3b914674fd Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Mon, 1 Aug 2022 08:50:34 -0700 Subject: [PATCH 457/654] writeback: avoid use-after-free after removing device When a disk is removed, bdi_unregister gets called to stop further writeback and wait for associated delayed work to complete. However, wb_inode_writeback_end() may schedule bandwidth estimation dwork after this has completed, which can result in the timer attempting to access the just freed bdi_writeback. Fix this by checking if the bdi_writeback is alive, similar to when scheduling writeback work. Since this requires wb->work_lock, and wb_inode_writeback_end() may get called from interrupt, switch wb->work_lock to an irqsafe lock. Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload") Signed-off-by: Khazhismel Kumykov Reviewed-by: Jan Kara Cc: Michael Stapelberg Cc: Wu Fengguang Cc: Alexander Viro Cc: Signed-off-by: Andrew Morton --- fs/fs-writeback.c | 12 ++++++------ mm/backing-dev.c | 10 +++++----- mm/page-writeback.c | 6 +++++- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05221366a16dc..08a1993ab7fd3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode, static void wb_wakeup(struct bdi_writeback *wb) { - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) mod_delayed_work(bdi_wq, &wb->dwork, 0); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } static void finish_writeback_work(struct bdi_writeback *wb, @@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb, if (work->done) atomic_inc(&work->done->cnt); - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) { list_add_tail(&work->list, &wb->work_list); @@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb, } else finish_writeback_work(wb, work); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } /** @@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) { struct wb_writeback_work *work = NULL; - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (!list_empty(&wb->work_list)) { work = list_entry(wb->work_list.next, struct wb_writeback_work, list); list_del_init(&work->list); } - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); return work; } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 95550b8fa7fe2..de65cb1e5f761 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeback *wb) unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) queue_delayed_work(bdi_wq, &wb->dwork, timeout); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } static void wb_update_bandwidth_workfn(struct work_struct *work) @@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb); static void wb_shutdown(struct bdi_writeback *wb) { /* Make sure nobody queues further work */ - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); return; } - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); cgwb_remove_from_bdi_list(wb); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d0d466a5c804c..032a7bf8d2593 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2892,6 +2892,7 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb) static void wb_inode_writeback_end(struct bdi_writeback *wb) { + unsigned long flags; atomic_dec(&wb->writeback_inodes); /* * Make sure estimate of writeback throughput gets updated after @@ -2900,7 +2901,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) * that if multiple inodes end writeback at a similar time, they get * batched into one bandwidth update. */ - queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); + spin_lock_irqsave(&wb->work_lock, flags); + if (test_bit(WB_registered, &wb->state)) + queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); + spin_unlock_irqrestore(&wb->work_lock, flags); } bool __folio_end_writeback(struct folio *folio) From 6c26d17eea01477f9223c4d5da8ebc2099e4f027 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 4 Aug 2022 17:22:07 -0300 Subject: [PATCH 458/654] mailmap: update Guilherme G. Piccoli's email addresses Both @canonical and @ibm email addresses are invalid now; use my personal address instead. Link: https://lkml.kernel.org/r/20220804202207.439427-1-gpiccoli@igalia.com Signed-off-by: Guilherme G. Piccoli Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 38255d412f0b3..23241db2647f7 100644 --- a/.mailmap +++ b/.mailmap @@ -150,6 +150,8 @@ Greg Kroah-Hartman Greg Kroah-Hartman Greg Kurz Gregory CLEMENT +Guilherme G. Piccoli +Guilherme G. Piccoli Guo Ren Guo Ren Gustavo Padovan From f09bddbd86619bf6213c96142a3b6b6a84818798 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 8 Aug 2022 13:54:10 -0700 Subject: [PATCH 459/654] vmcoreinfo: add kallsyms_num_syms symbol The rest of the kallsyms symbols are useless without knowing the number of symbols in the table. In an earlier patch, I somehow dropped the kallsyms_num_syms symbol, so add it back in. Link: https://lkml.kernel.org/r/20220808205410.18590-1-stephen.s.brennan@oracle.com Fixes: 5fd8fea935a1 ("vmcoreinfo: include kallsyms symbols") Signed-off-by: Stephen Brennan Cc: Baoquan He Cc: Dave Young Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 07b26df453a97..a0eb4d5cf5577 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -494,6 +494,7 @@ static int __init crash_save_vmcoreinfo_init(void) #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names); + VMCOREINFO_SYMBOL(kallsyms_num_syms); VMCOREINFO_SYMBOL(kallsyms_token_table); VMCOREINFO_SYMBOL(kallsyms_token_index); #ifdef CONFIG_KALLSYMS_BASE_RELATIVE From fcab34b433e2c13e333b2f53c4a8409eadc432c7 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 10 Aug 2022 10:53:59 -0600 Subject: [PATCH 460/654] mm: re-allow pinning of zero pfns (again) The below referenced commit makes the same error as 1c563432588d ("mm: fix is_pinnable_page against a cma page"), re-interpreting the logic to exclude pinning of the zero page, which breaks device assignment with vfio. To avoid further subtle mistakes, split the logic into discrete tests. [akpm@linux-foundation.org: simplify comment, per John] Link: https://lkml.kernel.org/r/166015037385.760108.16881097713975517242.stgit@omen Link: https://lore.kernel.org/all/165490039431.944052.12458624139225785964.stgit@omen Fixes: f25cbb7a95a2 ("mm: add zone device coherent type memory support") Signed-off-by: Alex Williamson Suggested-by: Matthew Wilcox Suggested-by: Felix Kuehling Tested-by: Slawomir Laba Reviewed-by: John Hubbard Cc: Alex Sierra Cc: Christoph Hellwig Cc: Alistair Popple Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 982f2607180b8..21f8b27bd9fd3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1544,9 +1544,16 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !(is_device_coherent_page(page) || - is_zone_movable_page(page) || - is_zero_pfn(page_to_pfn(page))); + /* The zero page may always be pinned */ + if (is_zero_pfn(page_to_pfn(page))) + return true; + + /* Coherent device memory must always allow eviction. */ + if (is_device_coherent_page(page)) + return false; + + /* Otherwise, non-movable zone pages can be pinned. */ + return !is_zone_movable_page(page); } #else static inline bool is_longterm_pinnable_page(struct page *page) From 44e602b4e52f70f04620bbbf4fe46ecb40170bde Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Wed, 10 Aug 2022 16:02:25 +0000 Subject: [PATCH 461/654] binder_alloc: add missing mmap_lock calls when using the VMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take the mmap_read_lock() when using the VMA in binder_alloc_print_pages() and when checking for a VMA in binder_alloc_new_buf_locked(). It is worth noting binder_alloc_new_buf_locked() drops the VMA read lock after it verifies a VMA exists, but may be taken again deeper in the call stack, if necessary. Link: https://lkml.kernel.org/r/20220810160209.1630707-1-Liam.Howlett@oracle.com Fixes: a43cfc87caaf (android: binder: stop saving a pointer to the VMA) Signed-off-by: Liam R. Howlett Reported-by: Ondrej Mosnacek Reported-by: Acked-by: Carlos Llamas Tested-by: Ondrej Mosnacek Cc: Minchan Kim Cc: Christian Brauner (Microsoft) Cc: Greg Kroah-Hartman Cc: Hridya Valsaraju Cc: Joel Fernandes Cc: Martijn Coenen Cc: Suren Baghdasaryan Cc: Todd Kjos Cc: Matthew Wilcox (Oracle) Cc: "Arve Hjønnevåg" Signed-off-by: Andrew Morton --- drivers/android/binder_alloc.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 1014beb128025..51f4e1c5cd019 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -402,12 +402,15 @@ static struct binder_buffer *binder_alloc_new_buf_locked( size_t size, data_offsets_size; int ret; + mmap_read_lock(alloc->vma_vm_mm); if (!binder_alloc_get_vma(alloc)) { + mmap_read_unlock(alloc->vma_vm_mm); binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%d: binder_alloc_buf, no vma\n", alloc->pid); return ERR_PTR(-ESRCH); } + mmap_read_unlock(alloc->vma_vm_mm); data_offsets_size = ALIGN(data_size, sizeof(void *)) + ALIGN(offsets_size, sizeof(void *)); @@ -929,17 +932,25 @@ void binder_alloc_print_pages(struct seq_file *m, * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. */ - if (binder_alloc_get_vma(alloc) != NULL) { - for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { - page = &alloc->pages[i]; - if (!page->page_ptr) - free++; - else if (list_empty(&page->lru)) - active++; - else - lru++; - } + + mmap_read_lock(alloc->vma_vm_mm); + if (binder_alloc_get_vma(alloc) == NULL) { + mmap_read_unlock(alloc->vma_vm_mm); + goto uninitialized; } + + mmap_read_unlock(alloc->vma_vm_mm); + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + page = &alloc->pages[i]; + if (!page->page_ptr) + free++; + else if (list_empty(&page->lru)) + active++; + else + lru++; + } + +uninitialized: mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); From a5d2172180e8f94a8cfc7a7fa0243035629bf8d0 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 16 Aug 2022 14:09:06 +0900 Subject: [PATCH 462/654] mm/zsmalloc: do not attempt to free IS_ERR handle zsmalloc() now returns ERR_PTR values as handles, which zram accidentally can pass to zs_free(). Another bad scenario is when zcomp_compress() fails - handle has default -ENOMEM value, and zs_free() will try to free that "pointer value". Add the missing check and make sure that zs_free() bails out when ERR_PTR() is passed to it. Link: https://lkml.kernel.org/r/20220816050906.2583956-1-senozhatsky@chromium.org Fixes: c7e6f17b52e9 ("zsmalloc: zs_malloc: return ERR_PTR on failure") Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta , Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 34f784a1604b7..907c9b1e1e614 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1487,7 +1487,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle) struct size_class *class; enum fullness_group fullness; - if (unlikely(!handle)) + if (IS_ERR_OR_NULL((void *)handle)) return; /* From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 17 Aug 2022 17:21:39 +0000 Subject: [PATCH 463/654] Revert "memcg: cleanup racy sum avoidance code" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f. Recently we started running the kernel with rstat infrastructure on production traffic and begin to see negative memcg stats values. Particularly the 'sock' stat is the one which we observed having negative value. $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 18446744073708724224 Re-run after couple of seconds $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 53248 For now we are only seeing this issue on large machines (256 CPUs) and only with 'sock' stat. I think the networking stack increase the stat on one cpu and decrease it on another cpu much more often. So, this negative sock is due to rstat flusher flushing the stats on the CPU that has seen the decrement of sock but missed the CPU that has increments. A typical race condition. For easy stable backport, revert is the most simple solution. For long term solution, I am thinking of two directions. First is just reduce the race window by optimizing the rstat flusher. Second is if the reader sees a negative stat value, force flush and restart the stat collection. Basically retry but limited. Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code") Signed-off-by: Shakeel Butt Cc: "Michal Koutný" Cc: Johannes Weiner Cc: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: David Hildenbrand Cc: Yosry Ahmed Cc: Greg Thelen Cc: [5.15] Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4d31ce55b1c0d..6257867fbf953 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -987,19 +987,30 @@ static inline void mod_memcg_page_state(struct page *page, static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - return READ_ONCE(memcg->vmstats.state[idx]); + long x = READ_ONCE(memcg->vmstats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; + long x; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return READ_ONCE(pn->lruvec_stats.state[idx]); + x = READ_ONCE(pn->lruvec_stats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, From 550842cc60987b269e31b222283ade3e1b6c7fc8 Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Mon, 15 Aug 2022 16:57:54 +0800 Subject: [PATCH 464/654] ocfs2: fix freeing uninitialized resource on ocfs2_dlm_shutdown After commit 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job before return error"), any procedure after ocfs2_dlm_init() fails will trigger crash when calling ocfs2_dlm_shutdown(). ie: On local mount mode, no dlm resource is initialized. If ocfs2_mount_volume() fails in ocfs2_find_slot(), error handling will call ocfs2_dlm_shutdown(), then does dlm resource cleanup job, which will trigger kernel crash. This solution should bypass uninitialized resources in ocfs2_dlm_shutdown(). Link: https://lkml.kernel.org/r/20220815085754.20417-1-heming.zhao@suse.com Fixes: 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job before return error") Signed-off-by: Heming Zhao Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/dlmglue.c | 8 +++++--- fs/ocfs2/super.c | 3 +-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 801e60bab9555..c28bc983a7b1c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); - ocfs2_cluster_disconnect(osb->cconn, hangup_pending); - osb->cconn = NULL; + if (osb->cconn) { + ocfs2_cluster_disconnect(osb->cconn, hangup_pending); + osb->cconn = NULL; - ocfs2_dlm_shutdown_debug(osb); + ocfs2_dlm_shutdown_debug(osb); + } } static int ocfs2_drop_lock(struct ocfs2_super *osb, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 013a727bd7c82..e2cc9eec287c9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) !ocfs2_is_hard_readonly(osb)) hangup_needed = 1; - if (osb->cconn) - ocfs2_dlm_shutdown(osb, hangup_needed); + ocfs2_dlm_shutdown(osb, hangup_needed); ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); debugfs_remove_recursive(osb->osb_debug_root); From dd0ff4d12dd284c334f7e9b07f8f335af856ac78 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 19 Aug 2022 17:40:05 +0800 Subject: [PATCH 465/654] bootmem: remove the vmemmap pages from kmemleak in put_page_bootmem The vmemmap pages is marked by kmemleak when allocated from memblock. Remove it from kmemleak when freeing the page. Otherwise, when we reuse the page, kmemleak may report such an error and then stop working. kmemleak: Cannot insert 0xffff98fb6eab3d40 into the object search tree (overlaps existing) kmemleak: Kernel memory leak detector disabled kmemleak: Object 0xffff98fb6be00000 (size 335544320): kmemleak: comm "swapper", pid 0, jiffies 4294892296 kmemleak: min_count = 0 kmemleak: count = 0 kmemleak: flags = 0x1 kmemleak: checksum = 0 kmemleak: backtrace: Link: https://lkml.kernel.org/r/20220819094005.2928241-1-liushixin2@huawei.com Fixes: f41f2ed43ca5 (mm: hugetlb: free the vmemmap pages associated with each HugeTLB page) Signed-off-by: Liu Shixin Reviewed-by: Muchun Song Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/bootmem_info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c index f18a631e74797..b1efebfcf94bb 100644 --- a/mm/bootmem_info.c +++ b/mm/bootmem_info.c @@ -12,6 +12,7 @@ #include #include #include +#include void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) { @@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page) ClearPagePrivate(page); set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); + kmemleak_free_part(page_to_virt(page), PAGE_SIZE); free_reserved_page(page); } } From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Fri, 19 Aug 2022 16:11:45 +0800 Subject: [PATCH 466/654] asm-generic: sections: refactor memory_intersects There are two problems with the current code of memory_intersects: First, it doesn't check whether the region (begin, end) falls inside the region (virt, vend), that is (virt < begin && vend > end). The second problem is if vend is equal to begin, it will return true but this is wrong since vend (virt + size) is not the last address of the memory region but (virt + size -1) is. The wrong determination will trigger the misreporting when the function check_for_illegal_area calls memory_intersects to check if the dma region intersects with stext region. The misreporting is as below (stext is at 0x80100000): WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168 DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536] Modules linked in: CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5 Hardware name: Xilinx Zynq Platform unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x58/0x70 dump_stack_lvl from __warn+0xb0/0x198 __warn from warn_slowpath_fmt+0x80/0xb4 warn_slowpath_fmt from check_for_illegal_area+0x130/0x168 check_for_illegal_area from debug_dma_map_sg+0x94/0x368 debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128 __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24 dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4 usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214 usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118 usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70 usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360 usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440 usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238 usb_stor_control_thread from kthread+0xf8/0x104 kthread from ret_from_fork+0x14/0x2c Refactor memory_intersects to fix the two problems above. Before the 1d7db834a027e ("dma-debug: use memory_intersects() directly"), memory_intersects is called only by printk_late_init: printk_late_init -> init_section_intersects ->memory_intersects. There were few places where memory_intersects was called. When commit 1d7db834a027e ("dma-debug: use memory_intersects() directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA subsystem uses it to check for an illegal area and the calltrace above is triggered. [akpm@linux-foundation.org: fix nearby comment typo] Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com Fixes: 979559362516 ("asm/sections: add helpers to check for section data") Signed-off-by: Quanyang Wang Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Thierry Reding Cc: Signed-off-by: Andrew Morton --- include/asm-generic/sections.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d0f7bdd2fdf23..db13bb620f527 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -97,7 +97,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt, /** * memory_intersects - checks if the region occupied by an object intersects * with another memory region - * @begin: virtual address of the beginning of the memory regien + * @begin: virtual address of the beginning of the memory region * @end: virtual address of the end of the memory region * @virt: virtual address of the memory object * @size: size of the memory object @@ -110,7 +110,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt, { void *vend = virt + size; - return (virt >= begin && virt < end) || (vend >= begin && vend < end); + if (virt < end && vend > begin) + return true; + + return false; } /** From ac733f6558ec86938e40433d88ec4e6148bac485 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Aug 2022 22:27:53 +0100 Subject: [PATCH 467/654] mailmap: update email address for Colin King Colin King is working on kernel janitorial fixes in his spare time and using his Intel email is confusing. Use his gmail account as the default email address. Link: https://lkml.kernel.org/r/20220817212753.101109-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton --- .mailmap | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 23241db2647f7..984ddd1f6ad36 100644 --- a/.mailmap +++ b/.mailmap @@ -98,8 +98,7 @@ Christian Brauner Christian Marangi Christophe Ricard Christoph Hellwig -Colin Ian King -Colin Ian King +Colin Ian King Corey Minyard Damian Hobson-Garcia Daniel Borkmann From d26f60703606ab425eee9882b32a1781a8bed74d Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 21 Aug 2022 18:08:53 +0000 Subject: [PATCH 468/654] mm/damon/dbgfs: avoid duplicate context directory creation When user tries to create a DAMON context via the DAMON debugfs interface with a name of an already existing context, the context directory creation fails but a new context is created and added in the internal data structure, due to absence of the directory creation success check. As a result, memory could leak and DAMON cannot be turned on. An example test case is as below: # cd /sys/kernel/debug/damon/ # echo "off" > monitor_on # echo paddr > target_ids # echo "abc" > mk_context # echo "abc" > mk_context # echo $$ > abc/target_ids # echo "on" > monitor_on <<< fails Return value of 'debugfs_create_dir()' is expected to be ignored in general, but this is an exceptional case as DAMON feature is depending on the debugfs functionality and it has the potential duplicate name issue. This commit therefore fixes the issue by checking the directory creation failure and immediately return the error in the case. Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts") Signed-off-by: Badari Pulavarty Signed-off-by: SeongJae Park Cc: [ 5.15.x] Signed-off-by: Andrew Morton --- mm/damon/dbgfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index cb8a7e9926a40..cfdf63132d5ad 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -818,6 +818,9 @@ static int dbgfs_mk_context(char *name) return -ENOENT; new_dir = debugfs_create_dir(name, root); + /* Below check is required for a potential duplicated name case */ + if (IS_ERR(new_dir)) + return PTR_ERR(new_dir); dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; new_ctx = dbgfs_new_ctx(); From 1f13dff09ffc8bcf6aa20639b638d813379c7f6b Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 22 Aug 2022 22:54:30 +0100 Subject: [PATCH 469/654] squashfs: don't call kmalloc in decompressors The decompressors may be called while in an atomic section. So move the kmalloc() out of this path, and into the "page actor" init function. This fixes a regression introduced by commit f268eedddf35 ("squashfs: extend "page actor" to handle missing pages") Link: https://lkml.kernel.org/r/20220822215430.15933-1-phillip@squashfs.org.uk Fixes: f268eedddf35 ("squashfs: extend "page actor" to handle missing pages") Reported-by: Chris Murphy Signed-off-by: Phillip Lougher Cc: Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 2 +- fs/squashfs/file_direct.c | 2 +- fs/squashfs/page_actor.c | 34 +++++++++++++++------------------- fs/squashfs/page_actor.h | 5 +++++ 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 98e64fec75b77..e56510964b229 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -593,7 +593,7 @@ static void squashfs_readahead(struct readahead_control *ractl) res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - kfree(actor); + squashfs_page_actor_free(actor); if (res == expected) { int bytes; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index be4b12d31e0c3..f1ccad519e28c 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -74,7 +74,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, /* Decompress directly into the page cache buffers */ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - kfree(actor); + squashfs_page_actor_free(actor); if (res < 0) goto mark_errored; diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index b23b780d8f42e..54b93bf4a25c1 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -52,6 +52,7 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, actor->buffer = buffer; actor->pages = pages; actor->next_page = 0; + actor->tmp_buffer = NULL; actor->squashfs_first_page = cache_first_page; actor->squashfs_next_page = cache_next_page; actor->squashfs_finish_page = cache_finish_page; @@ -68,20 +69,9 @@ static void *handle_next_page(struct squashfs_page_actor *actor) if ((actor->next_page == actor->pages) || (actor->next_index != actor->page[actor->next_page]->index)) { - if (actor->alloc_buffer) { - void *tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); - - if (tmp_buffer) { - actor->tmp_buffer = tmp_buffer; - actor->next_index++; - actor->returned_pages++; - return tmp_buffer; - } - } - actor->next_index++; actor->returned_pages++; - return ERR_PTR(-ENOMEM); + return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM); } actor->next_index++; @@ -96,11 +86,10 @@ static void *direct_first_page(struct squashfs_page_actor *actor) static void *direct_next_page(struct squashfs_page_actor *actor) { - if (actor->pageaddr) + if (actor->pageaddr) { kunmap_local(actor->pageaddr); - - kfree(actor->tmp_buffer); - actor->pageaddr = actor->tmp_buffer = NULL; + actor->pageaddr = NULL; + } return handle_next_page(actor); } @@ -109,8 +98,6 @@ static void direct_finish_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) kunmap_local(actor->pageaddr); - - kfree(actor->tmp_buffer); } struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk, @@ -121,6 +108,16 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_ if (actor == NULL) return NULL; + if (msblk->decompressor->alloc_buffer) { + actor->tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + + if (actor->tmp_buffer == NULL) { + kfree(actor); + return NULL; + } + } else + actor->tmp_buffer = NULL; + actor->length = length ? : pages * PAGE_SIZE; actor->page = page; actor->pages = pages; @@ -128,7 +125,6 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_ actor->returned_pages = 0; actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1); actor->pageaddr = NULL; - actor->tmp_buffer = NULL; actor->alloc_buffer = msblk->decompressor->alloc_buffer; actor->squashfs_first_page = direct_first_page; actor->squashfs_next_page = direct_next_page; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 24841d28bc0fb..95ffbb543d913 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -29,6 +29,11 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, extern struct squashfs_page_actor *squashfs_page_actor_init_special( struct squashfs_sb_info *msblk, struct page **page, int pages, int length); +static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor) +{ + kfree(actor->tmp_buffer); + kfree(actor); +} static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { return actor->squashfs_first_page(actor); From 3d2f78f08cd8388035ac375e731ec1ac1b79b09d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 23 Aug 2022 18:11:38 -0400 Subject: [PATCH 470/654] mm/mprotect: only reference swap pfn page if type match Yu Zhao reported a bug after the commit "mm/swap: Add swp_offset_pfn() to fetch PFN from swap entry" added a check in swp_offset_pfn() for swap type [1]: kernel BUG at include/linux/swapops.h:117! CPU: 46 PID: 5245 Comm: EventManager_De Tainted: G S O L 6.0.0-dbg-DEV #2 RIP: 0010:pfn_swap_entry_to_page+0x72/0xf0 Code: c6 48 8b 36 48 83 fe ff 74 53 48 01 d1 48 83 c1 08 48 8b 09 f6 c1 01 75 7b 66 90 48 89 c1 48 8b 09 f6 c1 01 74 74 5d c3 eb 9e <0f> 0b 48 ba ff ff ff ff 03 00 00 00 eb ae a9 ff 0f 00 00 75 13 48 RSP: 0018:ffffa59e73fabb80 EFLAGS: 00010282 RAX: 00000000ffffffe8 RBX: 0c00000000000000 RCX: ffffcd5440000000 RDX: 1ffffffffff7a80a RSI: 0000000000000000 RDI: 0c0000000000042b RBP: ffffa59e73fabb80 R08: ffff9965ca6e8bb8 R09: 0000000000000000 R10: ffffffffa5a2f62d R11: 0000030b372e9fff R12: ffff997b79db5738 R13: 000000000000042b R14: 0c0000000000042b R15: 1ffffffffff7a80a FS: 00007f549d1bb700(0000) GS:ffff99d3cf680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000440d035b3180 CR3: 0000002243176004 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: change_pte_range+0x36e/0x880 change_p4d_range+0x2e8/0x670 change_protection_range+0x14e/0x2c0 mprotect_fixup+0x1ee/0x330 do_mprotect_pkey+0x34c/0x440 __x64_sys_mprotect+0x1d/0x30 It triggers because pfn_swap_entry_to_page() could be called upon e.g. a genuine swap entry. Fix it by only calling it when it's a write migration entry where the page* is used. [1] https://lore.kernel.org/lkml/CAOUHufaVC2Za-p8m0aiHw6YkheDcrO-C3wRGixwDS32VTS+k1w@mail.gmail.com/ Link: https://lkml.kernel.org/r/20220823221138.45602-1-peterx@redhat.com Fixes: 6c287605fd56 ("mm: remember exclusively mapped anonymous pages with PG_anon_exclusive") Signed-off-by: Peter Xu Reported-by: Yu Zhao Tested-by: Yu Zhao Reviewed-by: David Hildenbrand Cc: "Huang, Ying" Cc: Signed-off-by: Andrew Morton --- mm/mprotect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 3a23dde73723b..bc6bddd156ca6 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -196,10 +196,11 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, pages++; } else if (is_swap_pte(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); - struct page *page = pfn_swap_entry_to_page(entry); pte_t newpte; if (is_writable_migration_entry(entry)) { + struct page *page = pfn_swap_entry_to_page(entry); + /* * A protection check is difficult so * just be safe and disable write From 0ebafe2ea879e97fef6bd97f72303a6ccf39f092 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 26 Aug 2022 15:05:15 +0200 Subject: [PATCH 471/654] .mailmap: update Luca Ceresoli's e-mail address My Bootlin address is preferred from now on. Link: https://lkml.kernel.org/r/20220826130515.3011951-1-luca.ceresoli@bootlin.com Signed-off-by: Luca Ceresoli Cc: Arnd Bergmann Cc: Atish Patra Cc: Hans Verkuil Cc: Thomas Petazzoni Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 984ddd1f6ad36..8ded2e7c2906f 100644 --- a/.mailmap +++ b/.mailmap @@ -254,6 +254,7 @@ Linus Lüssing Li Yang Li Yang Lorenzo Pieralisi +Luca Ceresoli Lukasz Luba Maciej W. Rozycki Maciej W. Rozycki From b90cb1053190353cc30f0fef0ef1f378ccc063c5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Aug 2022 15:05:29 -0700 Subject: [PATCH 472/654] Linux 6.0-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c7705f7496012..952d354069a43 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From c3a72bb213209adfe981a4a92ea5746a778323e4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2022 14:07:28 +0100 Subject: [PATCH 473/654] smb3: Move the flush out of smb2_copychunk_range() into its callers Move the flush out of smb2_copychunk_range() into its callers. This will allow the pagecache to be invalidated between the flush and the operation in smb3_collapse_range() and smb3_insert_range(). Signed-off-by: David Howells cc: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 ++ fs/cifs/smb2ops.c | 20 ++++++++------------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f54d8bf2732a5..e9fb338b8e7e7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1219,6 +1219,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, cifs_dbg(FYI, "copychunk range\n"); + filemap_write_and_wait(src_inode->i_mapping); + if (!src_file->private_data || !dst_file->private_data) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4810bd62266a5..674cf187fb0fd 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1600,17 +1600,8 @@ smb2_copychunk_range(const unsigned int xid, int chunks_copied = 0; bool chunk_sizes_updated = false; ssize_t bytes_written, total_bytes_written = 0; - struct inode *inode; pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); - - /* - * We need to flush all unwritten data before we can send the - * copychunk ioctl to the server. - */ - inode = d_inode(trgtfile->dentry); - filemap_write_and_wait(inode->i_mapping); - if (pcchunk == NULL) return -ENOMEM; @@ -3694,6 +3685,8 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, goto out; } + filemap_write_and_wait(inode->i_mapping); + rc = smb2_copychunk_range(xid, cfile, cfile, off + len, i_size_read(inode) - off - len, off); if (rc < 0) @@ -3721,18 +3714,21 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, int rc; unsigned int xid; struct cifsFileInfo *cfile = file->private_data; + struct inode *inode = file_inode(file); __le64 eof; __u64 count; xid = get_xid(); - if (off >= i_size_read(file->f_inode)) { + if (off >= i_size_read(inode)) { rc = -EINVAL; goto out; } - count = i_size_read(file->f_inode) - off; - eof = cpu_to_le64(i_size_read(file->f_inode) + len); + count = i_size_read(inode) - off; + eof = cpu_to_le64(i_size_read(inode) + len); + + filemap_write_and_wait(inode->i_mapping); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); From fa30a81f255a56cccd89552cd6ce7ea6e8d8acc4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 23 Aug 2022 14:07:41 +0100 Subject: [PATCH 474/654] smb3: fix temporary data corruption in collapse range collapse range doesn't discard the affected cached region so can risk temporarily corrupting the file data. This fixes xfstest generic/031 I also decided to merge a minor cleanup to this into the same patch (avoiding rereading inode size repeatedly unnecessarily) to make it clearer. Cc: stable@vger.kernel.org Fixes: 5476b5dd82c8b ("cifs: add support for FALLOC_FL_COLLAPSE_RANGE") Reported-by: David Howells Tested-by: David Howells Reviewed-by: David Howells cc: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 674cf187fb0fd..5b5ddc1b4638f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3669,41 +3669,47 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, { int rc; unsigned int xid; - struct inode *inode; + struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; - struct cifsInodeInfo *cifsi; + struct cifsInodeInfo *cifsi = CIFS_I(inode); __le64 eof; + loff_t old_eof; xid = get_xid(); - inode = d_inode(cfile->dentry); - cifsi = CIFS_I(inode); + inode_lock(inode); - if (off >= i_size_read(inode) || - off + len >= i_size_read(inode)) { + old_eof = i_size_read(inode); + if ((off >= old_eof) || + off + len >= old_eof) { rc = -EINVAL; goto out; } + filemap_invalidate_lock(inode->i_mapping); filemap_write_and_wait(inode->i_mapping); + truncate_pagecache_range(inode, off, old_eof); rc = smb2_copychunk_range(xid, cfile, cfile, off + len, - i_size_read(inode) - off - len, off); + old_eof - off - len, off); if (rc < 0) - goto out; + goto out_2; - eof = cpu_to_le64(i_size_read(inode) - len); + eof = cpu_to_le64(old_eof - len); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); if (rc < 0) - goto out; + goto out_2; rc = 0; cifsi->server_eof = i_size_read(inode) - len; truncate_setsize(inode, cifsi->server_eof); fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof); +out_2: + filemap_invalidate_unlock(inode->i_mapping); out: + inode_unlock(inode); free_xid(xid); return rc; } From 9c8b7a293f50253e694f19161c045817a938e551 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Aug 2022 14:07:55 +0100 Subject: [PATCH 475/654] smb3: fix temporary data corruption in insert range insert range doesn't discard the affected cached region so can risk temporarily corrupting file data. Also includes some minor cleanup (avoiding rereading inode size repeatedly unnecessarily) to make it clearer. Cc: stable@vger.kernel.org Fixes: 7fe6fe95b936 ("cifs: add FALLOC_FL_INSERT_RANGE support") Signed-off-by: David Howells cc: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 5b5ddc1b4638f..7c941ce1e7a9f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3722,35 +3722,43 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, struct cifsFileInfo *cfile = file->private_data; struct inode *inode = file_inode(file); __le64 eof; - __u64 count; + __u64 count, old_eof; xid = get_xid(); - if (off >= i_size_read(inode)) { + inode_lock(inode); + + old_eof = i_size_read(inode); + if (off >= old_eof) { rc = -EINVAL; goto out; } - count = i_size_read(inode) - off; - eof = cpu_to_le64(i_size_read(inode) + len); + count = old_eof - off; + eof = cpu_to_le64(old_eof + len); + filemap_invalidate_lock(inode->i_mapping); filemap_write_and_wait(inode->i_mapping); + truncate_pagecache_range(inode, off, old_eof); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); if (rc < 0) - goto out; + goto out_2; rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); if (rc < 0) - goto out; + goto out_2; - rc = smb3_zero_range(file, tcon, off, len, 1); + rc = smb3_zero_data(file, tcon, off, len, xid); if (rc < 0) - goto out; + goto out_2; rc = 0; +out_2: + filemap_invalidate_unlock(inode->i_mapping); out: + inode_unlock(inode); free_xid(xid); return rc; } From f0da47118c7e93cdbbc6fb403dd729a5f2c90ee3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 26 Aug 2022 16:29:54 +0200 Subject: [PATCH 476/654] net: mac802154: Fix a condition in the receive path Upon reception, a packet must be categorized, either it's destination is the host, or it is another host. A packet with no destination addressing fields may be valid in two situations: - the packet has no source field: only ACKs are built like that, we consider the host as the destination. - the packet has a valid source field: it is directed to the PAN coordinator, as for know we don't have this information we consider we are not the PAN coordinator. There was likely a copy/paste error made during a previous cleanup because the if clause is now containing exactly the same condition as in the switch case, which can never be true. In the past the destination address was used in the switch and the source address was used in the if, which matches what the spec says. Cc: stable@vger.kernel.org Fixes: ae531b9475f6 ("ieee802154: use ieee802154_addr instead of *_sa variants") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220826142954.254853-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- net/mac802154/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index b8ce84618a55b..c439125ef2b91 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -44,7 +44,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: - if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE) + if (hdr->source.mode != IEEE802154_ADDR_NONE) /* FIXME: check if we are PAN coordinator */ skb->pkt_type = PACKET_OTHERHOST; else From ffd7bdddaab193c38416fd5dd416d065517d266e Mon Sep 17 00:00:00 2001 From: Li Qiong Date: Mon, 29 Aug 2022 15:12:59 +0800 Subject: [PATCH 477/654] ieee802154: cc2520: add rc code in cc2520_tx() The rc code is 0 at the error path "status & CC2520_STATUS_TX_UNDERFLOW". Assign rc code with '-EINVAL' at this error path to fix it. Signed-off-by: Li Qiong Link: https://lore.kernel.org/r/20220829071259.18330-1-liqiong@nfschina.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/cc2520.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index 1e1f40f628a02..c69b87d3837da 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -504,6 +504,7 @@ cc2520_tx(struct ieee802154_hw *hw, struct sk_buff *skb) goto err_tx; if (status & CC2520_STATUS_TX_UNDERFLOW) { + rc = -EINVAL; dev_err(&priv->spi->dev, "cc2520 tx underflow exception\n"); goto err_tx; } From ca922fecda6caa5162409406dc3b663062d75089 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Fri, 19 Aug 2022 14:29:45 +0200 Subject: [PATCH 478/654] KVM: s390: pci: Hook to access KVM lowlevel from VFIO We have a cross dependency between KVM and VFIO when using s390 vfio_pci_zdev extensions for PCI passthrough To be able to keep both subsystem modular we add a registering hook inside the S390 core code. This fixes a build problem when VFIO is built-in and KVM is built as a module. Reported-by: Randy Dunlap Reported-by: kernel test robot Reviewed-by: Matthew Rosato Reviewed-by: Niklas Schnelle Signed-off-by: Pierre Morel Fixes: 09340b2fca007 ("KVM: s390: pci: add routines to start/stop interpretive execution") Cc: Acked-by: Janosch Frank Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20220819122945.9309-1-pmorel@linux.ibm.com Message-Id: <20220819122945.9309-1-pmorel@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 17 ++++++----------- arch/s390/kvm/pci.c | 12 ++++++++---- arch/s390/pci/Makefile | 2 +- arch/s390/pci/pci_kvm_hook.c | 11 +++++++++++ drivers/vfio/pci/vfio_pci_zdev.c | 8 ++++++-- 5 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 arch/s390/pci/pci_kvm_hook.c diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f39092e0ceaad..b1e98a9ed152b 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1038,16 +1038,11 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); -#ifdef CONFIG_VFIO_PCI_ZDEV_KVM -int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm); -void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev); -#else -static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev, - struct kvm *kvm) -{ - return -EPERM; -} -static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {} -#endif +struct zpci_kvm_hook { + int (*kvm_register)(void *opaque, struct kvm *kvm); + void (*kvm_unregister)(void *opaque); +}; + +extern struct zpci_kvm_hook zpci_kvm_hook; #endif diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 4946fb7757d6b..bb8c335d17b92 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -431,8 +431,9 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) * available, enable them and let userspace indicate whether or not they will * be used (specify SHM bit to disable). */ -int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm) +static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { + struct zpci_dev *zdev = opaque; int rc; if (!zdev) @@ -510,10 +511,10 @@ int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm) kvm_put_kvm(kvm); return rc; } -EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm); -void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev) +static void kvm_s390_pci_unregister_kvm(void *opaque) { + struct zpci_dev *zdev = opaque; struct kvm *kvm; if (!zdev) @@ -566,7 +567,6 @@ void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev) kvm_put_kvm(kvm); } -EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm); void kvm_s390_pci_init_list(struct kvm *kvm) { @@ -678,6 +678,8 @@ int kvm_s390_pci_init(void) spin_lock_init(&aift->gait_lock); mutex_init(&aift->aift_lock); + zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; + zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; return 0; } @@ -685,6 +687,8 @@ int kvm_s390_pci_init(void) void kvm_s390_pci_exit(void) { mutex_destroy(&aift->aift_lock); + zpci_kvm_hook.kvm_register = NULL; + zpci_kvm_hook.kvm_unregister = NULL; kfree(aift); } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index bf557a1b789c7..5ae31ca9dd441 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o + pci_bus.o pci_kvm_hook.o obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c new file mode 100644 index 0000000000000..ff34baf50a3e6 --- /dev/null +++ b/arch/s390/pci/pci_kvm_hook.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VFIO ZPCI devices support + * + * Copyright (C) IBM Corp. 2022. All rights reserved. + * Author(s): Pierre Morel + */ +#include + +struct zpci_kvm_hook zpci_kvm_hook; +EXPORT_SYMBOL_GPL(zpci_kvm_hook); diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index e163aa9f61444..0cbdcd14f1c8b 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -151,7 +151,10 @@ int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) if (!vdev->vdev.kvm) return 0; - return kvm_s390_pci_register_kvm(zdev, vdev->vdev.kvm); + if (zpci_kvm_hook.kvm_register) + return zpci_kvm_hook.kvm_register(zdev, vdev->vdev.kvm); + + return -ENOENT; } void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) @@ -161,5 +164,6 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) if (!zdev || !vdev->vdev.kvm) return; - kvm_s390_pci_unregister_kvm(zdev); + if (zpci_kvm_hook.kvm_unregister) + zpci_kvm_hook.kvm_unregister(zdev); } From 8d5fc280392735e4441b35de14f2f4860fa8d83c Mon Sep 17 00:00:00 2001 From: Yan Xinyu Date: Thu, 14 Jul 2022 18:20:37 +0800 Subject: [PATCH 479/654] USB: serial: option: add support for OPPO R11 diag port Add support for OPPO R11 USB diag serial port to option driver. This phone uses Qualcomm Snapdragon 660 SoC. usb-devices output: T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 10 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22d9 ProdID=276c Rev=04.04 S: Manufacturer=OPPO S: Product=SDM660-MTP _SN:09C6BCA7 S: SerialNumber=beb2c403 C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option I: If#=0x1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs Signed-off-by: Yan Xinyu Link: https://lore.kernel.org/r/20220714102037.4113889-1-sdlyyxy@bupt.edu.cn Link: https://lore.kernel.org/r/Yt1WfSZk03Plpnan@hovoldconsulting.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index de59fa919540a..cf65cb84c3cae 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -573,6 +573,10 @@ static void option_instat_callback(struct urb *urb); #define WETELECOM_PRODUCT_6802 0x6802 #define WETELECOM_PRODUCT_WMD300 0x6803 +/* OPPO products */ +#define OPPO_VENDOR_ID 0x22d9 +#define OPPO_PRODUCT_R11 0x276c + /* Device flags */ @@ -2155,6 +2159,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ + { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From 3f8ae9fe0409698799e173f698b714f34570b64b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 25 Aug 2022 13:36:44 +0200 Subject: [PATCH 480/654] net: dsa: xrs700x: Use irqsave variant for u64 stats update xrs700x_read_port_counters() updates the stats from a worker using the u64_stats_update_begin() version. This is okay on 32-UP since on the reader side preemption is disabled. On 32bit-SMP the writer can be preempted by the reader at which point the reader will spin on the seqcount until writer continues and completes the update. Assigning the mib_mutex mutex to the underlying seqcount would ensure proper synchronisation. The API for that on the u64_stats_init() side isn't available. Since it is the only user, just use disable interrupts during the update. Use u64_stats_update_begin_irqsave() on the writer side to ensure an uninterrupted update. Fixes: ee00b24f32eb8 ("net: dsa: add Arrow SpeedChips XRS700x driver") Cc: Andrew Lunn Cc: Florian Fainelli Cc: George McCollister Cc: Vivien Didelot Cc: Vladimir Oltean Signed-off-by: Sebastian Andrzej Siewior Acked-by: George McCollister Signed-off-by: David S. Miller --- drivers/net/dsa/xrs700x/xrs700x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index 3887ed33c5fe2..fa622639d6401 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -109,6 +109,7 @@ static void xrs700x_read_port_counters(struct xrs700x *priv, int port) { struct xrs700x_port *p = &priv->ports[port]; struct rtnl_link_stats64 stats; + unsigned long flags; int i; memset(&stats, 0, sizeof(stats)); @@ -138,9 +139,9 @@ static void xrs700x_read_port_counters(struct xrs700x *priv, int port) */ stats.rx_packets += stats.multicast; - u64_stats_update_begin(&p->syncp); + flags = u64_stats_update_begin_irqsave(&p->syncp); p->stats64 = stats; - u64_stats_update_end(&p->syncp); + u64_stats_update_end_irqrestore(&p->syncp, flags); mutex_unlock(&p->mib_mutex); } From 278d3ba61563ceed3cb248383ced19e14ec7bc1f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 25 Aug 2022 13:36:45 +0200 Subject: [PATCH 481/654] net: Use u64_stats_fetch_begin_irq() for stats fetch. On 32bit-UP u64_stats_fetch_begin() disables only preemption. If the reader is in preemptible context and the writer side (u64_stats_update_begin*()) runs in an interrupt context (IRQ or softirq) then the writer can update the stats during the read operation. This update remains undetected. Use u64_stats_fetch_begin_irq() to ensure the stats fetch on 32bit-UP are not interrupted by a writer. 32bit-SMP remains unaffected by this change. Cc: "David S. Miller" Cc: Catherine Sullivan Cc: David Awogbemila Cc: Dimitris Michailidis Cc: Eric Dumazet Cc: Hans Ulli Kroll Cc: Jakub Kicinski Cc: Jeroen de Borst Cc: Johannes Berg Cc: Linus Walleij Cc: Paolo Abeni Cc: Simon Horman Cc: linux-arm-kernel@lists.infradead.org Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Cc: oss-drivers@corigine.com Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.c | 24 +++++++++---------- .../ethernet/fungible/funeth/funeth_txrx.h | 4 ++-- drivers/net/ethernet/google/gve/gve_ethtool.c | 16 ++++++------- drivers/net/ethernet/google/gve/gve_main.c | 12 +++++----- drivers/net/ethernet/huawei/hinic/hinic_rx.c | 4 ++-- drivers/net/ethernet/huawei/hinic/hinic_tx.c | 4 ++-- .../ethernet/netronome/nfp/nfp_net_common.c | 8 +++---- .../ethernet/netronome/nfp/nfp_net_ethtool.c | 8 +++---- drivers/net/netdevsim/netdev.c | 4 ++-- net/mac80211/sta_info.c | 8 +++---- net/mpls/af_mpls.c | 4 ++-- 11 files changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 9e6de2f968fa3..6dae768671e3d 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1919,7 +1919,7 @@ static void gmac_get_stats64(struct net_device *netdev, /* Racing with RX NAPI */ do { - start = u64_stats_fetch_begin(&port->rx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); stats->rx_packets = port->stats.rx_packets; stats->rx_bytes = port->stats.rx_bytes; @@ -1931,11 +1931,11 @@ static void gmac_get_stats64(struct net_device *netdev, stats->rx_crc_errors = port->stats.rx_crc_errors; stats->rx_frame_errors = port->stats.rx_frame_errors; - } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); /* Racing with MIB and TX completion interrupts */ do { - start = u64_stats_fetch_begin(&port->ir_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); stats->tx_errors = port->stats.tx_errors; stats->tx_packets = port->stats.tx_packets; @@ -1945,15 +1945,15 @@ static void gmac_get_stats64(struct net_device *netdev, stats->rx_missed_errors = port->stats.rx_missed_errors; stats->rx_fifo_errors = port->stats.rx_fifo_errors; - } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); /* Racing with hard_start_xmit */ do { - start = u64_stats_fetch_begin(&port->tx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); stats->tx_dropped = port->stats.tx_dropped; - } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); stats->rx_dropped += stats->rx_missed_errors; } @@ -2031,18 +2031,18 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, /* Racing with MIB interrupt */ do { p = values; - start = u64_stats_fetch_begin(&port->ir_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); for (i = 0; i < RX_STATS_NUM; i++) *p++ = port->hw_stats[i]; - } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); values = p; /* Racing with RX NAPI */ do { p = values; - start = u64_stats_fetch_begin(&port->rx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); for (i = 0; i < RX_STATUS_NUM; i++) *p++ = port->rx_stats[i]; @@ -2050,13 +2050,13 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, *p++ = port->rx_csum_stats[i]; *p++ = port->rx_napi_exits; - } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); values = p; /* Racing with TX start_xmit */ do { p = values; - start = u64_stats_fetch_begin(&port->tx_stats_syncp); + start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); for (i = 0; i < TX_MAX_FRAGS; i++) { *values++ = port->tx_frag_stats[i]; @@ -2065,7 +2065,7 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, *values++ = port->tx_frags_linearized; *values++ = port->tx_hw_csummed; - } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); } static int gmac_get_ksettings(struct net_device *netdev, diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h index 53b7e95213a85..671f51135c269 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -206,9 +206,9 @@ struct funeth_rxq { #define FUN_QSTAT_READ(q, seq, stats_copy) \ do { \ - seq = u64_stats_fetch_begin(&(q)->syncp); \ + seq = u64_stats_fetch_begin_irq(&(q)->syncp); \ stats_copy = (q)->stats; \ - } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) + } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq))) #define FUN_INT_NAME_LEN (IFNAMSIZ + 16) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 50b384910c839..7b9a2d9d96243 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -177,14 +177,14 @@ gve_get_ethtool_stats(struct net_device *netdev, struct gve_rx_ring *rx = &priv->rx[ring]; start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; rx_bytes += tmp_rx_bytes; @@ -198,10 +198,10 @@ gve_get_ethtool_stats(struct net_device *netdev, if (priv->tx) { do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); tmp_tx_pkts = priv->tx[ring].pkt_done; tmp_tx_bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); tx_pkts += tmp_tx_pkts; tx_bytes += tmp_tx_bytes; @@ -259,13 +259,13 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->fill_cnt - rx->cnt; do { start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; data[i++] = rx->rx_cont_packet_cnt; @@ -331,9 +331,9 @@ gve_get_ethtool_stats(struct net_device *netdev, } do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); tmp_tx_bytes = tx->bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); data[i++] = tmp_tx_bytes; data[i++] = tx->wake_queue; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 6cafee55efc32..044db3ebb071c 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -51,10 +51,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); packets = priv->rx[ring].rpackets; bytes = priv->rx[ring].rbytes; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); s->rx_packets += packets; s->rx_bytes += bytes; @@ -64,10 +64,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); packets = priv->tx[ring].pkt_done; bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); s->tx_packets += packets; s->tx_bytes += bytes; @@ -1274,9 +1274,9 @@ void gve_handle_report_stats(struct gve_priv *priv) } do { - start = u64_stats_fetch_begin(&priv->tx[idx].statss); + start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss); tx_bytes = priv->tx[idx].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); + } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start)); stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(TX_WAKE_CNT), .value = cpu_to_be64(priv->tx[idx].wake_queue), diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index a866bea651103..e5828a658caf4 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -74,14 +74,14 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) unsigned int start; do { - start = u64_stats_fetch_begin(&rxq_stats->syncp); + start = u64_stats_fetch_begin_irq(&rxq_stats->syncp); stats->pkts = rxq_stats->pkts; stats->bytes = rxq_stats->bytes; stats->errors = rxq_stats->csum_errors + rxq_stats->other_errors; stats->csum_errors = rxq_stats->csum_errors; stats->other_errors = rxq_stats->other_errors; - } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rxq_stats->syncp, start)); } /** diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 5051cdff2384b..3b6c7b5857376 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -99,14 +99,14 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) unsigned int start; do { - start = u64_stats_fetch_begin(&txq_stats->syncp); + start = u64_stats_fetch_begin_irq(&txq_stats->syncp); stats->pkts = txq_stats->pkts; stats->bytes = txq_stats->bytes; stats->tx_busy = txq_stats->tx_busy; stats->tx_wake = txq_stats->tx_wake; stats->tx_dropped = txq_stats->tx_dropped; stats->big_frags_pkts = txq_stats->big_frags_pkts; - } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&txq_stats->syncp, start)); } /** diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index cf4d6f1129fa2..349a2b1a19a24 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1630,21 +1630,21 @@ static void nfp_net_stat64(struct net_device *netdev, unsigned int start; do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); + start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); data[0] = r_vec->rx_pkts; data[1] = r_vec->rx_bytes; data[2] = r_vec->rx_drops; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); stats->rx_packets += data[0]; stats->rx_bytes += data[1]; stats->rx_dropped += data[2]; do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); + start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); data[0] = r_vec->tx_pkts; data[1] = r_vec->tx_bytes; data[2] = r_vec->tx_errors; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); stats->tx_packets += data[0]; stats->tx_bytes += data[1]; stats->tx_errors += data[2]; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index eeb1455a4e5db..b1b1b648e40cb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -649,7 +649,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) unsigned int start; do { - start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); data[0] = nn->r_vecs[i].rx_pkts; tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; @@ -657,10 +657,10 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[3] = nn->r_vecs[i].hw_csum_rx_error; tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; tmp[5] = nn->r_vecs[i].hw_tls_rx; - } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); do { - start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); data[1] = nn->r_vecs[i].tx_pkts; data[2] = nn->r_vecs[i].tx_busy; tmp[6] = nn->r_vecs[i].hw_csum_tx; @@ -670,7 +670,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[10] = nn->r_vecs[i].hw_tls_tx; tmp[11] = nn->r_vecs[i].tls_tx_fallback; tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; - } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); data += NN_RVEC_PER_Q_STATS; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index e470e3398abc2..9a1a5b2036240 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -67,10 +67,10 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) unsigned int start; do { - start = u64_stats_fetch_begin(&ns->syncp); + start = u64_stats_fetch_begin_irq(&ns->syncp); stats->tx_bytes = ns->tx_bytes; stats->tx_packets = ns->tx_packets; - } while (u64_stats_fetch_retry(&ns->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ns->syncp, start)); } static int diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 330dab41f2fef..58998d8217784 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2316,9 +2316,9 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, u64 value; do { - start = u64_stats_fetch_begin(&rxstats->syncp); + start = u64_stats_fetch_begin_irq(&rxstats->syncp); value = rxstats->msdu[tid]; - } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); return value; } @@ -2384,9 +2384,9 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) u64 value; do { - start = u64_stats_fetch_begin(&rxstats->syncp); + start = u64_stats_fetch_begin_irq(&rxstats->syncp); value = rxstats->bytes; - } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); return value; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 35b5f806fdda1..b52afe316dc41 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev, p = per_cpu_ptr(mdev->stats, i); do { - start = u64_stats_fetch_begin(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); local = p->stats; - } while (u64_stats_fetch_retry(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += local.rx_packets; stats->rx_bytes += local.rx_bytes; From f766f3abe6dbc9bf8b56a5d53c87e5a17942c154 Mon Sep 17 00:00:00 2001 From: Yonglin Tan Date: Tue, 19 Jul 2022 19:28:00 +0800 Subject: [PATCH 482/654] USB: serial: option: add Quectel EM060K modem Add usb product id entry for the Quectel EM060K module. "MBIM mode": DIAG + NMEA + AT + MODEM + MBIM + QDSS T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030b Rev= 5.04 S: Manufacturer=Quectel S: Product=EM060K-GL S: SerialNumber=89fb57db C:* #Ifs= 7 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 8 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 9 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 9 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#=12 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Yonglin Tan [ johan: mention QDSS port and sort entries ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index cf65cb84c3cae..bd6d624aa3753 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -253,6 +253,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM05G 0x030a +#define QUECTEL_PRODUCT_EM060K 0x030b #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 @@ -1142,6 +1143,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, From 001047ea241a9646010b2744451dfbc7289542f3 Mon Sep 17 00:00:00 2001 From: Niek Nooijens Date: Mon, 1 Aug 2022 10:39:25 +0200 Subject: [PATCH 483/654] USB: serial: ftdi_sio: add Omron CS1W-CIF31 device id works perfectly with: modprobe ftdi_sio echo "0590 00b2" | tee /sys/module/ftdi_sio/drivers/usb-serial\:ftdi_sio/new_id > /dev/null but doing this every reboot is a pain in the ass. Signed-off-by: Niek Nooijens Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index d5a3986dfee75..52d59be920342 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1045,6 +1045,8 @@ static const struct usb_device_id id_table_combined[] = { /* IDS GmbH devices */ { USB_DEVICE(IDS_VID, IDS_SI31A_PID) }, { USB_DEVICE(IDS_VID, IDS_CM31A_PID) }, + /* Omron devices */ + { USB_DEVICE(OMRON_VID, OMRON_CS1W_CIF31_PID) }, /* U-Blox devices */ { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) }, { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 4e92c165c86bf..31c8ccabbbb78 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -661,6 +661,12 @@ #define INFINEON_TRIBOARD_TC1798_PID 0x0028 /* DAS JTAG TriBoard TC1798 V1.0 */ #define INFINEON_TRIBOARD_TC2X7_PID 0x0043 /* DAS JTAG TriBoard TC2X7 V1.0 */ +/* + * Omron corporation (https://www.omron.com) + */ + #define OMRON_VID 0x0590 + #define OMRON_CS1W_CIF31_PID 0x00b2 + /* * Acton Research Corp. */ From 8ffe20d08f2c95d702c453020d03a4c568a988f0 Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Wed, 10 Aug 2022 11:30:50 +0800 Subject: [PATCH 484/654] USB: serial: option: add support for Cinterion MV32-WA/WB RmNet mode We added PIDs for MV32-WA/WB MBIM mode before, now we need to add support for RmNet mode. Test evidence as below: T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=03 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1e2d ProdID=00f3 Rev=05.04 S: Manufacturer=Cinterion S: Product=Cinterion PID 0x00F3 USB Mobile Broadband S: SerialNumber=d7b4be8d C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=03 Dev#= 10 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1e2d ProdID=00f4 Rev=05.04 S: Manufacturer=Cinterion S: Product=Cinterion PID 0x00F4 USB Mobile Broadband S: SerialNumber=d095087d C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option Signed-off-by: Slark Xiao [ johan: sort entries ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bd6d624aa3753..a5e8374a8d710 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -439,6 +439,8 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_MV31_2_RMNET 0x00b9 #define CINTERION_PRODUCT_MV32_WA 0x00f1 #define CINTERION_PRODUCT_MV32_WB 0x00f2 +#define CINTERION_PRODUCT_MV32_WA_RMNET 0x00f3 +#define CINTERION_PRODUCT_MV32_WB_RMNET 0x00f4 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -2001,8 +2003,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff), .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA_RMNET, 0xff), + .driver_info = RSVD(0) }, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff), .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB_RMNET, 0xff), + .driver_info = RSVD(0) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), .driver_info = RSVD(4) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), From e79a7417072265a85a36775e4e0f798154d39808 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 5 Aug 2022 14:22:40 +0100 Subject: [PATCH 485/654] drm/i915/ttm: fix CCS handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crucible + recent Mesa seems to sometimes hit: GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER) And it looks like we can also trigger this with gem_lmem_swapping, if we modify the test to use slightly larger object sizes. Looking closer it looks like we have the following issues in migrate_copy(): - We are using plain integer in various places, which we can easily overflow with a large object. - We pass the entire object size (when the src is lmem) into emit_pte() and then try to copy it, which doesn't work, since we only have a few fixed sized windows in which to map the pages and perform the copy. With an object > 8M we therefore aren't properly copying the pages. And then with an object > 64M we trigger the GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER). So it looks like our copy handling for any object > 8M (which is our CHUNK_SZ) is currently broken on DG2. Fixes: da0595ae91da ("drm/i915/migrate: Evict and restore the flatccs capable lmem obj") Testcase: igt@gem_lmem_swapping Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Ramalingam C Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20220805132240.442747-2-matthew.auld@intel.com (cherry picked from commit 8676145eb2f53a9940ff70910caf0125bd8a4bc2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_migrate.c | 44 ++++++++++++------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 2b10b96b17b5b..933648cc90ff9 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -638,9 +638,9 @@ static int emit_copy(struct i915_request *rq, return 0; } -static int scatter_list_length(struct scatterlist *sg) +static u64 scatter_list_length(struct scatterlist *sg) { - int len = 0; + u64 len = 0; while (sg && sg_dma_len(sg)) { len += sg_dma_len(sg); @@ -650,28 +650,26 @@ static int scatter_list_length(struct scatterlist *sg) return len; } -static void +static int calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, - int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy) + u64 bytes_to_cpy, u64 ccs_bytes_to_cpy) { - if (ccs_bytes_to_cpy) { - if (!src_is_lmem) - /* - * When CHUNK_SZ is passed all the pages upto CHUNK_SZ - * will be taken for the blt. in Flat-ccs supported - * platform Smem obj will have more pages than required - * for main meory hence limit it to the required size - * for main memory - */ - *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ); - } else { /* ccs handling is not required */ - *src_sz = CHUNK_SZ; - } + if (ccs_bytes_to_cpy && !src_is_lmem) + /* + * When CHUNK_SZ is passed all the pages upto CHUNK_SZ + * will be taken for the blt. in Flat-ccs supported + * platform Smem obj will have more pages than required + * for main meory hence limit it to the required size + * for main memory + */ + return min_t(u64, bytes_to_cpy, CHUNK_SZ); + else + return CHUNK_SZ; } -static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy) +static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy) { - u32 len; + u64 len; do { GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg)); @@ -702,12 +700,12 @@ intel_context_migrate_copy(struct intel_context *ce, { struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs; struct drm_i915_private *i915 = ce->engine->i915; - u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; + u64 ccs_bytes_to_cpy = 0, bytes_to_cpy; enum i915_cache_level ccs_cache_level; u32 src_offset, dst_offset; u8 src_access, dst_access; struct i915_request *rq; - int src_sz, dst_sz; + u64 src_sz, dst_sz; bool ccs_is_src, overwrite_ccs; int err; @@ -790,8 +788,8 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - calculate_chunk_sz(i915, src_is_lmem, &src_sz, - bytes_to_cpy, ccs_bytes_to_cpy); + src_sz = calculate_chunk_sz(i915, src_is_lmem, + bytes_to_cpy, ccs_bytes_to_cpy); len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, src_offset, src_sz); From 4595a25443447b9542b2a5ee7961eb290e94b496 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 11 Aug 2022 14:08:12 -0700 Subject: [PATCH 486/654] drm/i915/guc: clear stalled request after a reset If the GuC CTs are full and we need to stall the request submission while waiting for space, we save the stalled request and where the stall occurred; when the CTs have space again we pick up the request submission from where we left off. If a full GT reset occurs, the state of all contexts is cleared and all non-guilty requests are unsubmitted, therefore we need to restart the stalled request submission from scratch. To make sure that we do so, clear the saved request after a reset. Fixes note: the patch that introduced the bug is in 5.15, but no officially supported platform had GuC submission enabled by default in that kernel, so the backport to that particular version (and only that one) can potentially be skipped. Fixes: 925dc1cf58ed ("drm/i915/guc: Implement GuC submission tasklet") Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Cc: # v5.15+ Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220811210812.3239621-1-daniele.ceraolospurio@intel.com (cherry picked from commit f922fbb0f2ad1fd3e3186f39c46673419e6d9281) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 76916aed897ad..834c707d1877b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4026,6 +4026,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) /* make sure all descriptors are clean... */ xa_destroy(&guc->context_lookup); + /* + * A reset might have occurred while we had a pending stalled request, + * so make sure we clean that up. + */ + guc->stalled_request = NULL; + guc->submission_stall_reason = STALL_NONE; + /* * Some contexts might have been pinned before we enabled GuC * submission, so we need to add them to the GuC bookeeping. From e5e6886f4d7e34b6539abddb591d515271caa634 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 16 Aug 2022 18:37:20 +0300 Subject: [PATCH 487/654] drm/i915/dsi: filter invalid backlight and CABC ports Avoid using ports that aren't initialized in case the VBT backlight or CABC ports have invalid values. This fixes a NULL pointer dereference of intel_dsi->dsi_hosts[port] in such cases. Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Reviewed-by: Stanislav Lisovskiy Link: https://patchwork.freedesktop.org/patch/msgid/b0f4f087866257d280eb97d6bcfcefd109cc5fa2.1660664162.git.jani.nikula@intel.com (cherry picked from commit f4a6c7a454a6e71c5ccf25af82694213a9784013) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/icl_dsi.c | 7 +++++++ drivers/gpu/drm/i915/display/vlv_dsi.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 5dcfa7feffa9f..885c74f60366b 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -2070,7 +2070,14 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) else intel_dsi->ports = BIT(port); + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports; + intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports; + + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports; + intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports; for_each_dsi_port(port, intel_dsi->ports) { diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index b9b1fed998740..35136d26e5177 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1933,7 +1933,14 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) else intel_dsi->ports = BIT(port); + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports; + intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports; + + if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports)) + intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports; + intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports; /* Create a DSI host (and a device) for each port. */ From 875c6d2711f6c97e58c52288b4231f3072711d61 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 16 Aug 2022 18:37:21 +0300 Subject: [PATCH 488/654] drm/i915/dsi: fix dual-link DSI backlight and CABC ports for display 11+ The VBT dual-link DSI backlight and CABC still use ports A and C, both in Bspec and code, while display 11+ DSI only supports ports A and B. Assume port C actually means port B for display 11+ when parsing VBT. Bspec: 20154 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6476 Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Reviewed-by: Stanislav Lisovskiy Link: https://patchwork.freedesktop.org/patch/msgid/8c462718bcc7b36a83e09d0a5eef058b6bc8b1a2.1660664162.git.jani.nikula@intel.com (cherry picked from commit ab55165d73a444606af1530cd0d6448b04370f68) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 51dde5bfd9565..198a2f4920cc4 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1596,6 +1596,8 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, struct intel_panel *panel, enum port port) { + enum port port_bc = DISPLAY_VER(i915) >= 11 ? PORT_B : PORT_C; + if (!panel->vbt.dsi.config->dual_link || i915->vbt.version < 197) { panel->vbt.dsi.bl_ports = BIT(port); if (panel->vbt.dsi.config->cabc_supported) @@ -1609,11 +1611,11 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, panel->vbt.dsi.bl_ports = BIT(PORT_A); break; case DL_DCS_PORT_C: - panel->vbt.dsi.bl_ports = BIT(PORT_C); + panel->vbt.dsi.bl_ports = BIT(port_bc); break; default: case DL_DCS_PORT_A_AND_C: - panel->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(PORT_C); + panel->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(port_bc); break; } @@ -1625,12 +1627,12 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, panel->vbt.dsi.cabc_ports = BIT(PORT_A); break; case DL_DCS_PORT_C: - panel->vbt.dsi.cabc_ports = BIT(PORT_C); + panel->vbt.dsi.cabc_ports = BIT(port_bc); break; default: case DL_DCS_PORT_A_AND_C: panel->vbt.dsi.cabc_ports = - BIT(PORT_A) | BIT(PORT_C); + BIT(PORT_A) | BIT(port_bc); break; } } From 2c4e19f873f4a389c2a557a84909cf4b78d3525a Mon Sep 17 00:00:00 2001 From: Arun R Murthy Date: Mon, 8 Aug 2022 09:27:50 +0530 Subject: [PATCH 489/654] drm/i915/display: avoid warnings when registering dual panel backlight Commit 20f85ef89d94 ("drm/i915/backlight: use unique backlight device names") added support for multiple backlight devices on dual panel systems, but did so with error handling on -EEXIST from backlight_device_register(). Unfortunately, that triggered a warning in dmesg all the way down from sysfs_add_file_mode_ns() and sysfs_warn_dup(). Instead of optimistically always attempting to register with the default name ("intel_backlight", which we have to retain for backward compatibility), check if a backlight device with the name exists first, and, if so, use the card and connector based name. v2: reworked on top of the patch commit 20f85ef89d94 ("drm/i915/backlight: use unique backlight device names") v3: fixed the ref count leak(Jani N) Fixes: 20f85ef89d94 ("drm/i915/backlight: use unique backlight device names") Signed-off-by: Arun R Murthy Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220808035750.3111046-1-arun.r.murthy@intel.com (cherry picked from commit 4234ea30051200fc6016de10e4d58369e60b38f1) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/display/intel_backlight.c | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 110fc98ec280d..9314464133729 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -971,26 +971,24 @@ int intel_backlight_device_register(struct intel_connector *connector) if (!name) return -ENOMEM; - bd = backlight_device_register(name, connector->base.kdev, connector, - &intel_backlight_device_ops, &props); - - /* - * Using the same name independent of the drm device or connector - * prevents registration of multiple backlight devices in the - * driver. However, we need to use the default name for backward - * compatibility. Use unique names for subsequent backlight devices as a - * fallback when the default name already exists. - */ - if (IS_ERR(bd) && PTR_ERR(bd) == -EEXIST) { + bd = backlight_device_get_by_name(name); + if (bd) { + put_device(&bd->dev); + /* + * Using the same name independent of the drm device or connector + * prevents registration of multiple backlight devices in the + * driver. However, we need to use the default name for backward + * compatibility. Use unique names for subsequent backlight devices as a + * fallback when the default name already exists. + */ kfree(name); name = kasprintf(GFP_KERNEL, "card%d-%s-backlight", i915->drm.primary->index, connector->base.name); if (!name) return -ENOMEM; - - bd = backlight_device_register(name, connector->base.kdev, connector, - &intel_backlight_device_ops, &props); } + bd = backlight_device_register(name, connector->base.kdev, connector, + &intel_backlight_device_ops, &props); if (IS_ERR(bd)) { drm_err(&i915->drm, From 51fbbe8a3f8b9dd128fa98f6ea36058dfa3f36de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Mon, 22 Aug 2022 17:08:36 +0300 Subject: [PATCH 490/654] drm/i915/backlight: Disable pps power hook for aux based backlight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pps power hook seems to be problematic for backlight controlled via aux channel. Disable it for such cases. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3657 Cc: stable@vger.kernel.org Signed-off-by: Jouni Högander Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220822140836.534432-1-jouni.hogander@intel.com (cherry picked from commit 869e3bb7acb59d88c1226892136661810e8223a4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_backlight.c | 11 ++++++++--- drivers/gpu/drm/i915/display/intel_dp.c | 2 -- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 9314464133729..f5e1d692976e7 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -16,6 +16,7 @@ #include "intel_dsi_dcs_backlight.h" #include "intel_panel.h" #include "intel_pci_config.h" +#include "intel_pps.h" /** * scale - scale values from one range to another @@ -1771,9 +1772,13 @@ void intel_backlight_init_funcs(struct intel_panel *panel) panel->backlight.pwm_funcs = &i9xx_pwm_funcs; } - if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP && - intel_dp_aux_init_backlight_funcs(connector) == 0) - return; + if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) { + if (intel_dp_aux_init_backlight_funcs(connector) == 0) + return; + + if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK)) + connector->panel.backlight.power = intel_pps_backlight_power; + } /* We're using a standard PWM backlight interface */ panel->backlight.funcs = &pwm_bl_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 32292c0be2bd0..ac90d455a7c73 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5293,8 +5293,6 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, intel_panel_init(intel_connector); - if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK)) - intel_connector->panel.backlight.power = intel_pps_backlight_power; intel_backlight_setup(intel_connector, pipe); intel_edp_add_properties(intel_dp); From 458ec0c8f35963626ccd51c3d50b752de5f1b9d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Bartosik?= Date: Tue, 1 Feb 2022 16:33:54 +0100 Subject: [PATCH 491/654] drm/i915: fix null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Asus chromebook CX550 crashes during boot on v5.17-rc1 kernel. The root cause is null pointer defeference of bi_next in tgl_get_bw_info() in drivers/gpu/drm/i915/display/intel_bw.c. BUG: kernel NULL pointer dereference, address: 000000000000002e PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 1 Comm: swapper/0 Tainted: G U 5.17.0-rc1 Hardware name: Google Delbin/Delbin, BIOS Google_Delbin.13672.156.3 05/14/2021 RIP: 0010:tgl_get_bw_info+0x2de/0x510 ... [ 2.554467] Call Trace: [ 2.554467] [ 2.554467] intel_bw_init_hw+0x14a/0x434 [ 2.554467] ? _printk+0x59/0x73 [ 2.554467] ? _dev_err+0x77/0x91 [ 2.554467] i915_driver_hw_probe+0x329/0x33e [ 2.554467] i915_driver_probe+0x4c8/0x638 [ 2.554467] i915_pci_probe+0xf8/0x14e [ 2.554467] ? _raw_spin_unlock_irqrestore+0x12/0x2c [ 2.554467] pci_device_probe+0xaa/0x142 [ 2.554467] really_probe+0x13f/0x2f4 [ 2.554467] __driver_probe_device+0x9e/0xd3 [ 2.554467] driver_probe_device+0x24/0x7c [ 2.554467] __driver_attach+0xba/0xcf [ 2.554467] ? driver_attach+0x1f/0x1f [ 2.554467] bus_for_each_dev+0x8c/0xc0 [ 2.554467] bus_add_driver+0x11b/0x1f7 [ 2.554467] driver_register+0x60/0xea [ 2.554467] ? mipi_dsi_bus_init+0x16/0x16 [ 2.554467] i915_init+0x2c/0xb9 [ 2.554467] ? mipi_dsi_bus_init+0x16/0x16 [ 2.554467] do_one_initcall+0x12e/0x2b3 [ 2.554467] do_initcall_level+0xd6/0xf3 [ 2.554467] do_initcalls+0x4e/0x79 [ 2.554467] kernel_init_freeable+0xed/0x14d [ 2.554467] ? rest_init+0xc1/0xc1 [ 2.554467] kernel_init+0x1a/0x120 [ 2.554467] ret_from_fork+0x1f/0x30 [ 2.554467] ... Kernel panic - not syncing: Fatal exception Fixes: c64a9a7c05be ("drm/i915: Update memory bandwidth formulae") Signed-off-by: Łukasz Bartosik Reviewed-by: Radhakrishna Sripada Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220201153354.11971-1-lukasz.bartosik@semihalf.com (cherry picked from commit c247cd03898c4c43c3bce6d4014730403bc13032) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bw.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 79269d2c476b2..3699869ab2dbc 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -404,15 +404,17 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel int clpchgroup; int j; - if (i < num_groups - 1) - bi_next = &dev_priv->max_bw[i + 1]; - clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; - if (i < num_groups - 1 && clpchgroup < clperchgroup) - bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; - else - bi_next->num_planes = 0; + if (i < num_groups - 1) { + bi_next = &dev_priv->max_bw[i + 1]; + + if (clpchgroup < clperchgroup) + bi_next->num_planes = (ipqdepth - clpchgroup) / + clpchgroup + 1; + else + bi_next->num_planes = 0; + } bi->num_qgv_points = qi.num_points; bi->num_psf_gv_points = qi.num_psf_points; From 919bef7a106ade2bda73681bbc2f3678198f44fc Mon Sep 17 00:00:00 2001 From: Diego Santa Cruz Date: Thu, 16 Jun 2022 15:41:37 +0300 Subject: [PATCH 492/654] drm/i915/glk: ECS Liva Q2 needs GLK HDMI port timing quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The quirk added in upstream commit 90c3e2198777 ("drm/i915/glk: Add Quirk for GLK NUC HDMI port issues.") is also required on the ECS Liva Q2. Note: Would be nicer to figure out the extra delay required for the retimer without quirks, however don't know how to check for that. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1326 Signed-off-by: Diego Santa Cruz Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220616124137.3184371-1-jani.nikula@intel.com (cherry picked from commit 08e9505fa8f9aa00072a47b6f234d89b6b27a89c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index c8488f5ebd044..e415cd7c0b84b 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -191,6 +191,9 @@ static struct intel_quirk intel_quirks[] = { /* ASRock ITX*/ { 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, { 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, + /* ECS Liva Q2 */ + { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, + { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, }; void intel_init_quirks(struct drm_i915_private *i915) From 0211c2a0ea600e25db3044daaeff4fe41c3ed6d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Jun 2022 22:59:48 +0300 Subject: [PATCH 493/654] drm/i915: Skip wm/ddb readout for disabled pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stuff programmed into the wm/ddb registers of planes on disabled pipes doesn't matter. So during readout just leave our software state tracking for those zeroed. This should avoid us trying too hard to clean up after whatever mess the VBIOS/GOP left in there. The actual hardware state will get cleaned up if/when we enable the pipe anyway. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5711 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220617195948.24007-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit b183db8f4783ca2efc9b47734f15aad9477a108a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_pm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f06babdb3a8cd..9fe4b583cc28a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6561,7 +6561,10 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) enum plane_id plane_id; u8 slices; - skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal); + memset(&crtc_state->wm.skl.optimal, 0, + sizeof(crtc_state->wm.skl.optimal)); + if (crtc_state->hw.active) + skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal); crtc_state->wm.skl.raw = crtc_state->wm.skl.optimal; memset(&dbuf_state->ddb[pipe], 0, sizeof(dbuf_state->ddb[pipe])); @@ -6572,6 +6575,9 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) struct skl_ddb_entry *ddb_y = &crtc_state->wm.skl.plane_ddb_y[plane_id]; + if (!crtc_state->hw.active) + continue; + skl_ddb_get_hw_plane_state(dev_priv, crtc->pipe, plane_id, ddb, ddb_y); From acc5495bf764e7e441193f972d96c5d2887f6b8b Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 22 Jul 2022 11:17:18 +0800 Subject: [PATCH 494/654] bus: mhi: host: Fix up null pointer access in mhi_irq_handler The irq handler for a shared IRQ ought to be prepared for running even now it's being freed. So let's check the pointer used by mhi_irq_handler to avoid null pointer access since it is probably released before freeing IRQ. Fixes: 1227d2a20cd7 ("bus: mhi: host: Move IRQ allocation to controller registration phase") Signed-off-by: Qiang Yu Reviewed-by: Manivannan Sadhasivam Tested-by: Kalle Valo Link: https://lore.kernel.org/r/1658459838-30802-1-git-send-email-quic_qianyu@quicinc.com [mani: added fixes tag] Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/host/main.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index f3aef77a6a4a7..df0fbfee7b78b 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -430,12 +430,25 @@ irqreturn_t mhi_irq_handler(int irq_number, void *dev) { struct mhi_event *mhi_event = dev; struct mhi_controller *mhi_cntrl = mhi_event->mhi_cntrl; - struct mhi_event_ctxt *er_ctxt = - &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index]; + struct mhi_event_ctxt *er_ctxt; struct mhi_ring *ev_ring = &mhi_event->ring; - dma_addr_t ptr = le64_to_cpu(er_ctxt->rp); + dma_addr_t ptr; void *dev_rp; + /* + * If CONFIG_DEBUG_SHIRQ is set, the IRQ handler will get invoked during __free_irq() + * and by that time mhi_ctxt() would've freed. So check for the existence of mhi_ctxt + * before handling the IRQs. + */ + if (!mhi_cntrl->mhi_ctxt) { + dev_dbg(&mhi_cntrl->mhi_dev->dev, + "mhi_ctxt has been freed\n"); + return IRQ_HANDLED; + } + + er_ctxt = &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index]; + ptr = le64_to_cpu(er_ctxt->rp); + if (!is_valid_ring_ptr(ev_ring, ptr)) { dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); From 9c9c71168f7979f3798b61c65b4530fbfbcf19d1 Mon Sep 17 00:00:00 2001 From: Greg Tulli Date: Mon, 29 Aug 2022 11:21:03 -0700 Subject: [PATCH 495/654] Input: iforce - add support for Boeder Force Feedback Wheel Add a new iforce_device entry to support the Boeder Force Feedback Wheel device. Signed-off-by: Greg Tulli Link: https://lore.kernel.org/r/3256420-c8ac-31b-8499-3c488a9880fd@gmail.com Signed-off-by: Dmitry Torokhov --- Documentation/input/joydev/joystick.rst | 1 + drivers/input/joystick/iforce/iforce-main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/input/joydev/joystick.rst b/Documentation/input/joydev/joystick.rst index f615906a0821b..6d721396717a2 100644 --- a/Documentation/input/joydev/joystick.rst +++ b/Documentation/input/joydev/joystick.rst @@ -517,6 +517,7 @@ All I-Force devices are supported by the iforce module. This includes: * AVB Mag Turbo Force * AVB Top Shot Pegasus * AVB Top Shot Force Feedback Racing Wheel +* Boeder Force Feedback Wheel * Logitech WingMan Force * Logitech WingMan Force Wheel * Guillemot Race Leader Force Feedback diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index b2a68bc9f0b4d..b86de1312512b 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -50,6 +50,7 @@ static struct iforce_device iforce_device[] = { { 0x046d, 0xc291, "Logitech WingMan Formula Force", btn_wheel, abs_wheel, ff_iforce }, { 0x05ef, 0x020a, "AVB Top Shot Pegasus", btn_joystick_avb, abs_avb_pegasus, ff_iforce }, { 0x05ef, 0x8884, "AVB Mag Turbo Force", btn_wheel, abs_wheel, ff_iforce }, + { 0x05ef, 0x8886, "Boeder Force Feedback Wheel", btn_wheel, abs_wheel, ff_iforce }, { 0x05ef, 0x8888, "AVB Top Shot Force Feedback Racing Wheel", btn_wheel, abs_wheel, ff_iforce }, //? { 0x061c, 0xc0a4, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce }, //? { 0x061c, 0xc084, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce }, From dcf8e5633e2e69ad60b730ab5905608b756a032f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Aug 2022 12:59:25 -0700 Subject: [PATCH 496/654] tracing: Define the is_signed_type() macro once There are two definitions of the is_signed_type() macro: one in and a second definition in . As suggested by Linus, move the definition of the is_signed_type() macro into the header file. Change the definition of the is_signed_type() macro to make sure that it does not trigger any sparse warnings with future versions of sparse for bitwise types. Link: https://lore.kernel.org/all/CAHk-=whjH6p+qzwUdx5SOVVHjS3WvzJQr6mDUwhEyTf6pJWzaQ@mail.gmail.com/ Link: https://lore.kernel.org/all/CAHk-=wjQGnVfb4jehFR0XyZikdQvCZouE96xR_nnf5kqaM5qqQ@mail.gmail.com/ Cc: Rasmus Villemoes Cc: Steven Rostedt Acked-by: Kees Cook Signed-off-by: Bart Van Assche Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 ++++++ include/linux/overflow.h | 1 - include/linux/trace_events.h | 2 -- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 01ce94b58b423..7713d7bcdaea9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -239,6 +239,12 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* + * Whether 'type' is a signed type or an unsigned type. Supports scalar types, + * bool and also pointer types. + */ +#define is_signed_type(type) (((type)(-1)) < (__force type)1) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f1221d11f8e57..0eb3b192f07ab 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -30,7 +30,6 @@ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - * credit to Christian Biere. */ -#define is_signed_type(type) (((type)(-1)) < (type)1) #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index b18759a673c66..8401dec93c155 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -814,8 +814,6 @@ extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); extern int trace_event_get_offsets(struct trace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < (type)1) - int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, From f233d2be38dbbb22299192292983037f01ab363c Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 30 Aug 2022 03:11:01 +0200 Subject: [PATCH 497/654] hwmon: (gpio-fan) Fix array out of bounds access The driver does not check if the cooling state passed to gpio_fan_set_cur_state() exceeds the maximum cooling state as stored in fan_data->num_speeds. Since the cooling state is later used as an array index in set_fan_speed(), an array out of bounds access can occur. This can be exploited by setting the state of the thermal cooling device to arbitrary values, causing for example a kernel oops when unavailable memory is accessed this way. Example kernel oops: [ 807.987276] Unable to handle kernel paging request at virtual address ffffff80d0588064 [ 807.987369] Mem abort info: [ 807.987398] ESR = 0x96000005 [ 807.987428] EC = 0x25: DABT (current EL), IL = 32 bits [ 807.987477] SET = 0, FnV = 0 [ 807.987507] EA = 0, S1PTW = 0 [ 807.987536] FSC = 0x05: level 1 translation fault [ 807.987570] Data abort info: [ 807.987763] ISV = 0, ISS = 0x00000005 [ 807.987801] CM = 0, WnR = 0 [ 807.987832] swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000001165000 [ 807.987872] [ffffff80d0588064] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 807.987961] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 807.987992] Modules linked in: cmac algif_hash aes_arm64 algif_skcipher af_alg bnep hci_uart btbcm bluetooth ecdh_generic ecc 8021q garp stp llc snd_soc_hdmi_codec brcmfmac vc4 brcmutil cec drm_kms_helper snd_soc_core cfg80211 snd_compress bcm2835_codec(C) snd_pcm_dmaengine syscopyarea bcm2835_isp(C) bcm2835_v4l2(C) sysfillrect v4l2_mem2mem bcm2835_mmal_vchiq(C) raspberrypi_hwmon sysimgblt videobuf2_dma_contig videobuf2_vmalloc fb_sys_fops videobuf2_memops rfkill videobuf2_v4l2 videobuf2_common i2c_bcm2835 snd_bcm2835(C) videodev snd_pcm snd_timer snd mc vc_sm_cma(C) gpio_fan uio_pdrv_genirq uio drm fuse drm_panel_orientation_quirks backlight ip_tables x_tables ipv6 [ 807.988508] CPU: 0 PID: 1321 Comm: bash Tainted: G C 5.15.56-v8+ #1575 [ 807.988548] Hardware name: Raspberry Pi 3 Model B Rev 1.2 (DT) [ 807.988574] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 807.988608] pc : set_fan_speed.part.5+0x34/0x80 [gpio_fan] [ 807.988654] lr : gpio_fan_set_cur_state+0x34/0x50 [gpio_fan] [ 807.988691] sp : ffffffc008cf3bd0 [ 807.988710] x29: ffffffc008cf3bd0 x28: ffffff80019edac0 x27: 0000000000000000 [ 807.988762] x26: 0000000000000000 x25: 0000000000000000 x24: ffffff800747c920 [ 807.988787] x23: 000000000000000a x22: ffffff800369f000 x21: 000000001999997c [ 807.988854] x20: ffffff800369f2e8 x19: ffffff8002ae8080 x18: 0000000000000000 [ 807.988877] x17: 0000000000000000 x16: 0000000000000000 x15: 000000559e271b70 [ 807.988938] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ 807.988960] x11: 0000000000000000 x10: ffffffc008cf3c20 x9 : ffffffcfb60c741c [ 807.989018] x8 : 000000000000000a x7 : 00000000ffffffc9 x6 : 0000000000000009 [ 807.989040] x5 : 000000000000002a x4 : 0000000000000000 x3 : ffffff800369f2e8 [ 807.989062] x2 : 000000000000e780 x1 : 0000000000000001 x0 : ffffff80d0588060 [ 807.989084] Call trace: [ 807.989091] set_fan_speed.part.5+0x34/0x80 [gpio_fan] [ 807.989113] gpio_fan_set_cur_state+0x34/0x50 [gpio_fan] [ 807.989199] cur_state_store+0x84/0xd0 [ 807.989221] dev_attr_store+0x20/0x38 [ 807.989262] sysfs_kf_write+0x4c/0x60 [ 807.989282] kernfs_fop_write_iter+0x130/0x1c0 [ 807.989298] new_sync_write+0x10c/0x190 [ 807.989315] vfs_write+0x254/0x378 [ 807.989362] ksys_write+0x70/0xf8 [ 807.989379] __arm64_sys_write+0x24/0x30 [ 807.989424] invoke_syscall+0x4c/0x110 [ 807.989442] el0_svc_common.constprop.3+0xfc/0x120 [ 807.989458] do_el0_svc+0x2c/0x90 [ 807.989473] el0_svc+0x24/0x60 [ 807.989544] el0t_64_sync_handler+0x90/0xb8 [ 807.989558] el0t_64_sync+0x1a0/0x1a4 [ 807.989579] Code: b9403801 f9402800 7100003f 8b35cc00 (b9400416) [ 807.989627] ---[ end trace 8ded4c918658445b ]--- Fix this by checking the cooling state and return an error if it exceeds the maximum cooling state. Tested on a Raspberry Pi 3. Fixes: b5cf88e46bad ("(gpio-fan): Add thermal control hooks") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20220830011101.178843-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/gpio-fan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c index befe989ca7b94..fbf3f5a4ecb67 100644 --- a/drivers/hwmon/gpio-fan.c +++ b/drivers/hwmon/gpio-fan.c @@ -391,6 +391,9 @@ static int gpio_fan_set_cur_state(struct thermal_cooling_device *cdev, if (!fan_data) return -EINVAL; + if (state >= fan_data->num_speed) + return -EINVAL; + set_fan_speed(fan_data, state); return 0; } From b05972f01e7d30419987a1f221b5593668fd6448 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 26 Aug 2022 09:39:30 +0800 Subject: [PATCH 498/654] net: sched: tbf: don't call qdisc_put() while holding tree lock The issue is the same to commit c2999f7fb05b ("net: sched: multiq: don't call qdisc_put() while holding tree lock"). Qdiscs call qdisc_put() while holding sch tree spinlock, which results sleeping-while-atomic BUG. Fixes: c266f64dbfa2 ("net: sched: protect block state with mutex") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20220826013930.340121-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- net/sched/sch_tbf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 72102277449e1..36079fdde2cb5 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -356,6 +356,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; struct Qdisc *child = NULL; + struct Qdisc *old = NULL; struct psched_ratecfg rate; struct psched_ratecfg peak; u64 max_size; @@ -447,7 +448,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { qdisc_tree_flush_backlog(q->qdisc); - qdisc_put(q->qdisc); + old = q->qdisc; q->qdisc = child; } q->limit = qopt->limit; @@ -467,6 +468,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); sch_tree_unlock(sch); + qdisc_put(old); err = 0; tbf_offload_change(sch); From 566f9c9f89337792070b5a6062dff448b3e7977f Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 30 Jul 2022 20:50:18 +0200 Subject: [PATCH 499/654] vt: Clear selection before changing the font When changing the console font with ioctl(KDFONTOP) the new font size can be bigger than the previous font. A previous selection may thus now be outside of the new screen size and thus trigger out-of-bounds accesses to graphics memory if the selection is removed in vc_do_resize(). Prevent such out-of-memory accesses by dropping the selection before the various con_font_set() console handlers are called. Reported-by: syzbot+14b0e8f3fd1612e35350@syzkaller.appspotmail.com Cc: stable Tested-by: Khalid Masum Signed-off-by: Helge Deller Link: https://lore.kernel.org/r/YuV9apZGNmGfjcor@p100 Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ae9c926acd6f9..0b669c82ddc92 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4662,9 +4662,11 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) console_lock(); if (vc->vc_mode != KD_TEXT) rc = -EINVAL; - else if (vc->vc_sw->con_font_set) + else if (vc->vc_sw->con_font_set) { + if (vc_is_sel(vc)) + clear_selection(); rc = vc->vc_sw->con_font_set(vc, &font, op->flags); - else + } else rc = -ENOSYS; console_unlock(); kfree(font.data); @@ -4691,9 +4693,11 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) console_unlock(); return -EINVAL; } - if (vc->vc_sw->con_font_default) + if (vc->vc_sw->con_font_default) { + if (vc_is_sel(vc)) + clear_selection(); rc = vc->vc_sw->con_font_default(vc, &font, s); - else + } else rc = -ENOSYS; console_unlock(); if (!rc) { From 846651eca073e2e02e37490a4a52752415d84781 Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Fri, 5 Aug 2022 09:45:29 -0500 Subject: [PATCH 500/654] serial: fsl_lpuart: RS485 RTS polariy is inverse The setting of RS485 RTS polarity is inverse in the current driver. When the property of 'rs485-rts-active-low' is enabled in the dts node, the RTS signal should be LOW during sending. Otherwise, if there is no such a property, the RTS should be HIGH during sending. Fixes: 03895cf41d18 ("tty: serial: fsl_lpuart: Add support for RS-485") Cc: stable Signed-off-by: Nicolas Diaz Signed-off-by: Shenwei Wang Link: https://lore.kernel.org/r/20220805144529.604856-1-shenwei.wang@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index f6c33cd228c8a..e9ba0f0bd4411 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1394,9 +1394,9 @@ static int lpuart_config_rs485(struct uart_port *port, struct ktermios *termios, * Note: UART is assumed to be active high. */ if (rs485->flags & SER_RS485_RTS_ON_SEND) - modem &= ~UARTMODEM_TXRTSPOL; - else if (rs485->flags & SER_RS485_RTS_AFTER_SEND) modem |= UARTMODEM_TXRTSPOL; + else if (rs485->flags & SER_RS485_RTS_AFTER_SEND) + modem &= ~UARTMODEM_TXRTSPOL; } writeb(modem, sport->port.membase + UARTMODEM); From 56c14fb4086b2de6921dd70251b19b364b909ea1 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 18 Aug 2022 13:50:26 +0200 Subject: [PATCH 501/654] tty: Fix lookahead_buf crash with serdev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not follow a NULL pointer if the tty_port_client_operations does not implement the ->lookahead_buf() callback, which is the case with serdev's ttyport. Reported-by: Hans de Goede Fixes: 6bb6fa6908ebd3 ("tty: Implement lookahead to process XON/XOFF timely") Reviewed-by: Ilpo Järvinen Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220818115026.2237893-1-vincent.whitchurch@axis.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 9fdecc795b6be..5e287dedce018 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -470,7 +470,6 @@ static void lookahead_bufs(struct tty_port *port, struct tty_buffer *head) while (head) { struct tty_buffer *next; - unsigned char *p, *f = NULL; unsigned int count; /* @@ -489,11 +488,16 @@ static void lookahead_bufs(struct tty_port *port, struct tty_buffer *head) continue; } - p = char_buf_ptr(head, head->lookahead); - if (~head->flags & TTYB_NORMAL) - f = flag_buf_ptr(head, head->lookahead); + if (port->client_ops->lookahead_buf) { + unsigned char *p, *f = NULL; + + p = char_buf_ptr(head, head->lookahead); + if (~head->flags & TTYB_NORMAL) + f = flag_buf_ptr(head, head->lookahead); + + port->client_ops->lookahead_buf(port, p, f, count); + } - port->client_ops->lookahead_buf(port, p, f, count); head->lookahead += count; } } From d5a2e0834364377a5d5a2fff1890a0b3f0bafd1f Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Sun, 21 Aug 2022 18:15:27 +0800 Subject: [PATCH 502/654] tty: serial: lpuart: disable flow control while waiting for the transmit engine to complete When the user initializes the uart port, and waits for the transmit engine to complete in lpuart32_set_termios(), if the UART TX fifo has dirty data and the UARTMODIR enable the flow control, the TX fifo may never be empty. So here we should disable the flow control first to make sure the transmit engin can complete. Fixes: 380c966c093e ("tty: serial: fsl_lpuart: add 32-bit register interface support") Cc: stable Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20220821101527.10066-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index e9ba0f0bd4411..b20f6f2fa51ce 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2191,6 +2191,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, uart_update_timeout(port, termios->c_cflag, baud); /* wait transmit engin complete */ + lpuart32_write(&sport->port, 0, UARTMODIR); lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC); /* disable transmit and receive */ From 692a8ebcfc24f4a5bea0eb2967e450f584193da6 Mon Sep 17 00:00:00 2001 From: Sergiu Moga Date: Wed, 24 Aug 2022 17:29:03 +0300 Subject: [PATCH 503/654] tty: serial: atmel: Preserve previous USART mode if RS485 disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever the atmel_rs485_config() driver method would be called, the USART mode is reset to normal mode before even checking if RS485 flag is set, thus resulting in losing the previous USART mode in the case where the checking fails. Some tools, such as `linux-serial-test`, lead to the driver calling this method when doing the setup of the serial port: after setting the port mode (Hardware Flow Control, Normal Mode, RS485 Mode, etc.), `linux-serial-test` tries to enable/disable RS485 depending on the commandline arguments that were passed. Example of how this issue could reveal itself: When doing a serial communication with Hardware Flow Control through `linux-serial-test`, the tool would lead to the driver roughly doing the following: - set the corresponding bit to 1 (ATMEL_US_USMODE_HWHS bit in the ATMEL_US_MR register) through the atmel_set_termios() to enable Hardware Flow Control - disable RS485 through the atmel_config_rs485() method Thus, when the latter is called, the mode will be reset and the previously set bit is unset, leaving USART in normal mode instead of the expected Hardware Flow Control mode. This fix ensures that this reset is only done if the checking for RS485 succeeds and that the previous mode is preserved otherwise. Fixes: e8faff7330a35 ("ARM: 6092/1: atmel_serial: support for RS485 communications") Cc: stable Reviewed-by: Ilpo Järvinen Signed-off-by: Sergiu Moga Link: https://lore.kernel.org/r/20220824142902.502596-1-sergiu.moga@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 30ba9eef7b395..7450d38530318 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -294,9 +294,6 @@ static int atmel_config_rs485(struct uart_port *port, struct ktermios *termios, mode = atmel_uart_readl(port, ATMEL_US_MR); - /* Resetting serial mode to RS232 (0x0) */ - mode &= ~ATMEL_US_USMODE; - if (rs485conf->flags & SER_RS485_ENABLED) { dev_dbg(port->dev, "Setting UART to RS485\n"); if (rs485conf->flags & SER_RS485_RX_DURING_TX) @@ -306,6 +303,7 @@ static int atmel_config_rs485(struct uart_port *port, struct ktermios *termios, atmel_uart_writel(port, ATMEL_US_TTGR, rs485conf->delay_rts_after_send); + mode &= ~ATMEL_US_USMODE; mode |= ATMEL_US_USMODE_RS485; } else { dev_dbg(port->dev, "Setting UART to RS232\n"); From f16c6d2e58a4c2b972efcf9eb12390ee0ba3befb Mon Sep 17 00:00:00 2001 From: Mazin Al Haddad Date: Sun, 14 Aug 2022 04:52:12 +0300 Subject: [PATCH 504/654] tty: n_gsm: add sanity check for gsm->receive in gsm_receive_buf() A null pointer dereference can happen when attempting to access the "gsm->receive()" function in gsmld_receive_buf(). Currently, the code assumes that gsm->recieve is only called after MUX activation. Since the gsmld_receive_buf() function can be accessed without the need to initialize the MUX, the gsm->receive() function will not be set and a NULL pointer dereference will occur. Fix this by avoiding the call to "gsm->receive()" in case the function is not initialized by adding a sanity check. Call Trace: gsmld_receive_buf+0x1c2/0x2f0 drivers/tty/n_gsm.c:2861 tiocsti drivers/tty/tty_io.c:2293 [inline] tty_ioctl+0xa75/0x15d0 drivers/tty/tty_io.c:2692 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://syzkaller.appspot.com/bug?id=bdf035c61447f8c6e0e6920315d577cb5cc35ac5 Fixes: 01aecd917114 ("tty: n_gsm: fix tty registration before control channel open") Cc: stable Reported-and-tested-by: syzbot+e3563f0c94e188366dbb@syzkaller.appspotmail.com Signed-off-by: Mazin Al Haddad Link: https://lore.kernel.org/r/20220814015211.84180-1-mazinalhaddad05@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index caa5c14ed57f0..38688cb16c20b 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2858,7 +2858,8 @@ static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp, flags = *fp++; switch (flags) { case TTY_NORMAL: - gsm->receive(gsm, *cp); + if (gsm->receive) + gsm->receive(gsm, *cp); break; case TTY_OVERRUN: case TTY_BREAK: From 4bb1a53be85fcb1e24c14860e326a00cdd362c28 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Aug 2022 22:47:19 +0900 Subject: [PATCH 505/654] tty: n_gsm: initialize more members at gsm_alloc_mux() syzbot is reporting use of uninitialized spinlock at gsmld_write() [1], for commit 32dd59f96924f45e ("tty: n_gsm: fix race condition in gsmld_write()") allows accessing gsm->tx_lock before gsm_activate_mux() initializes it. Since object initialization should be done right after allocation in order to avoid accessing uninitialized memory, move initialization of timer/work/waitqueue/spinlock from gsmld_open()/gsm_activate_mux() to gsm_alloc_mux(). Link: https://syzkaller.appspot.com/bug?extid=cf155def4e717db68a12 [1] Fixes: 32dd59f96924f45e ("tty: n_gsm: fix race condition in gsmld_write()") Reported-by: syzbot Tested-by: syzbot Cc: stable Acked-by: Jiri Slaby Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/2110618e-57f0-c1ce-b2ad-b6cacef3f60e@I-love.SAKURA.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 38688cb16c20b..d6598ca3640f1 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2501,13 +2501,6 @@ static int gsm_activate_mux(struct gsm_mux *gsm) if (dlci == NULL) return -ENOMEM; - timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); - timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); - INIT_WORK(&gsm->tx_work, gsmld_write_task); - init_waitqueue_head(&gsm->event); - spin_lock_init(&gsm->control_lock); - spin_lock_init(&gsm->tx_lock); - if (gsm->encoding == 0) gsm->receive = gsm0_receive; else @@ -2612,6 +2605,12 @@ static struct gsm_mux *gsm_alloc_mux(void) kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); + timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); + timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); + INIT_WORK(&gsm->tx_work, gsmld_write_task); + init_waitqueue_head(&gsm->event); + spin_lock_init(&gsm->control_lock); + spin_lock_init(&gsm->tx_lock); gsm->t1 = T1; gsm->t2 = T2; @@ -2947,10 +2946,6 @@ static int gsmld_open(struct tty_struct *tty) gsmld_attach_gsm(tty, gsm); - timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); - timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); - INIT_WORK(&gsm->tx_work, gsmld_write_task); - return 0; } From c9ab053e56ce13a949977398c8edc12e6c02fc95 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 29 Aug 2022 16:16:39 +0300 Subject: [PATCH 506/654] tty: n_gsm: replace kicktimer with delayed_work A kick_timer timer_list is replaced with kick_timeout delayed_work to be able to synchronize with mutexes as a prerequisite for the introduction of tx_mutex. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: c568f7086c6e ("tty: n_gsm: fix missing timer to handle stalled links") Cc: stable Reviewed-by: Jiri Slaby Suggested-by: Hillf Danton Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Link: https://lore.kernel.org/r/20220829131640.69254-2-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index d6598ca3640f1..e23225aff5d9c 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -256,7 +256,7 @@ struct gsm_mux { struct list_head tx_data_list; /* Pending data packets */ /* Control messages */ - struct timer_list kick_timer; /* Kick TX queuing on timeout */ + struct delayed_work kick_timeout; /* Kick TX queuing on timeout */ struct timer_list t2_timer; /* Retransmit timer for commands */ int cretries; /* Command retry counter */ struct gsm_control *pending_cmd;/* Our current pending command */ @@ -1009,7 +1009,7 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) gsm->tx_bytes += msg->len; gsmld_write_trigger(gsm); - mod_timer(&gsm->kick_timer, jiffies + 10 * gsm->t1 * HZ / 100); + schedule_delayed_work(&gsm->kick_timeout, 10 * gsm->t1 * HZ / 100); } /** @@ -1984,16 +1984,16 @@ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) } /** - * gsm_kick_timer - transmit if possible - * @t: timer contained in our gsm object + * gsm_kick_timeout - transmit if possible + * @work: work contained in our gsm object * * Transmit data from DLCIs if the queue is empty. We can't rely on * a tty wakeup except when we filled the pipe so we need to fire off * new data ourselves in other cases. */ -static void gsm_kick_timer(struct timer_list *t) +static void gsm_kick_timeout(struct work_struct *work) { - struct gsm_mux *gsm = from_timer(gsm, t, kick_timer); + struct gsm_mux *gsm = container_of(work, struct gsm_mux, kick_timeout.work); unsigned long flags; int sent = 0; @@ -2458,7 +2458,7 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) } /* Finish outstanding timers, making sure they are done */ - del_timer_sync(&gsm->kick_timer); + cancel_delayed_work_sync(&gsm->kick_timeout); del_timer_sync(&gsm->t2_timer); /* Finish writing to ldisc */ @@ -2605,7 +2605,7 @@ static struct gsm_mux *gsm_alloc_mux(void) kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); - timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); + INIT_DELAYED_WORK(&gsm->kick_timeout, gsm_kick_timeout); timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); From 902e02ea9385373ce4b142576eef41c642703955 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 29 Aug 2022 16:16:40 +0300 Subject: [PATCH 507/654] tty: n_gsm: avoid call of sleeping functions from atomic context Syzkaller reports the following problem: BUG: sleeping function called from invalid context at kernel/printk/printk.c:2347 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1105, name: syz-executor423 3 locks held by syz-executor423/1105: #0: ffff8881468b9098 (&tty->ldisc_sem){++++}-{0:0}, at: tty_ldisc_ref_wait+0x22/0x90 drivers/tty/tty_ldisc.c:266 #1: ffff8881468b9130 (&tty->atomic_write_lock){+.+.}-{3:3}, at: tty_write_lock drivers/tty/tty_io.c:952 [inline] #1: ffff8881468b9130 (&tty->atomic_write_lock){+.+.}-{3:3}, at: do_tty_write drivers/tty/tty_io.c:975 [inline] #1: ffff8881468b9130 (&tty->atomic_write_lock){+.+.}-{3:3}, at: file_tty_write.constprop.0+0x2a8/0x8e0 drivers/tty/tty_io.c:1118 #2: ffff88801b06c398 (&gsm->tx_lock){....}-{2:2}, at: gsmld_write+0x5e/0x150 drivers/tty/n_gsm.c:2717 irq event stamp: 3482 hardirqs last enabled at (3481): [] __get_reqs_available+0x143/0x2f0 fs/aio.c:946 hardirqs last disabled at (3482): [] __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] hardirqs last disabled at (3482): [] _raw_spin_lock_irqsave+0x52/0x60 kernel/locking/spinlock.c:159 softirqs last enabled at (3408): [] asm_call_irq_on_stack+0x12/0x20 softirqs last disabled at (3401): [] asm_call_irq_on_stack+0x12/0x20 Preemption disabled at: [<0000000000000000>] 0x0 CPU: 2 PID: 1105 Comm: syz-executor423 Not tainted 5.10.137-syzkaller #0 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x107/0x167 lib/dump_stack.c:118 ___might_sleep.cold+0x1e8/0x22e kernel/sched/core.c:7304 console_lock+0x19/0x80 kernel/printk/printk.c:2347 do_con_write+0x113/0x1de0 drivers/tty/vt/vt.c:2909 con_write+0x22/0xc0 drivers/tty/vt/vt.c:3296 gsmld_write+0xd0/0x150 drivers/tty/n_gsm.c:2720 do_tty_write drivers/tty/tty_io.c:1028 [inline] file_tty_write.constprop.0+0x502/0x8e0 drivers/tty/tty_io.c:1118 call_write_iter include/linux/fs.h:1903 [inline] aio_write+0x355/0x7b0 fs/aio.c:1580 __io_submit_one fs/aio.c:1952 [inline] io_submit_one+0xf45/0x1a90 fs/aio.c:1999 __do_sys_io_submit fs/aio.c:2058 [inline] __se_sys_io_submit fs/aio.c:2028 [inline] __x64_sys_io_submit+0x18c/0x2f0 fs/aio.c:2028 do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x61/0xc6 The problem happens in the following control flow: gsmld_write(...) spin_lock_irqsave(&gsm->tx_lock, flags) // taken a spinlock on TX data con_write(...) do_con_write(...) console_lock() might_sleep() // -> bug As far as console_lock() might sleep it should not be called with spinlock held. The patch replaces tx_lock spinlock with mutex in order to avoid the problem. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 32dd59f96924 ("tty: n_gsm: fix race condition in gsmld_write()") Cc: stable Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Link: https://lore.kernel.org/r/20220829131640.69254-3-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 53 ++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index e23225aff5d9c..01c112e2e2142 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -248,7 +248,7 @@ struct gsm_mux { bool constipated; /* Asked by remote to shut up */ bool has_devices; /* Devices were registered */ - spinlock_t tx_lock; + struct mutex tx_mutex; unsigned int tx_bytes; /* TX data outstanding */ #define TX_THRESH_HI 8192 #define TX_THRESH_LO 2048 @@ -680,7 +680,6 @@ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) struct gsm_msg *msg; u8 *dp; int ocr; - unsigned long flags; msg = gsm_data_alloc(gsm, addr, 0, control); if (!msg) @@ -702,10 +701,10 @@ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) gsm_print_packet("Q->", addr, cr, control, NULL, 0); - spin_lock_irqsave(&gsm->tx_lock, flags); + mutex_lock(&gsm->tx_mutex); list_add_tail(&msg->list, &gsm->tx_ctrl_list); gsm->tx_bytes += msg->len; - spin_unlock_irqrestore(&gsm->tx_lock, flags); + mutex_unlock(&gsm->tx_mutex); gsmld_write_trigger(gsm); return 0; @@ -730,7 +729,7 @@ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) spin_unlock_irqrestore(&dlci->lock, flags); /* Clear data packets in MUX write queue */ - spin_lock_irqsave(&gsm->tx_lock, flags); + mutex_lock(&gsm->tx_mutex); list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { if (msg->addr != addr) continue; @@ -738,7 +737,7 @@ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) list_del(&msg->list); kfree(msg); } - spin_unlock_irqrestore(&gsm->tx_lock, flags); + mutex_unlock(&gsm->tx_mutex); } /** @@ -1024,10 +1023,9 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { - unsigned long flags; - spin_lock_irqsave(&dlci->gsm->tx_lock, flags); + mutex_lock(&dlci->gsm->tx_mutex); __gsm_data_queue(dlci, msg); - spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); + mutex_unlock(&dlci->gsm->tx_mutex); } /** @@ -1039,7 +1037,7 @@ static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) * is data. Keep to the MRU of the mux. This path handles the usual tty * interface which is a byte stream with optional modem data. * - * Caller must hold the tx_lock of the mux. + * Caller must hold the tx_mutex of the mux. */ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) @@ -1099,7 +1097,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) * is data. Keep to the MRU of the mux. This path handles framed data * queued as skbuffs to the DLCI. * - * Caller must hold the tx_lock of the mux. + * Caller must hold the tx_mutex of the mux. */ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, @@ -1115,7 +1113,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, if (dlci->adaption == 4) overhead = 1; - /* dlci->skb is locked by tx_lock */ + /* dlci->skb is locked by tx_mutex */ if (dlci->skb == NULL) { dlci->skb = skb_dequeue_tail(&dlci->skb_list); if (dlci->skb == NULL) @@ -1169,7 +1167,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, * Push an empty frame in to the transmit queue to update the modem status * bits and to transmit an optional break. * - * Caller must hold the tx_lock of the mux. + * Caller must hold the tx_mutex of the mux. */ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, @@ -1283,13 +1281,12 @@ static int gsm_dlci_data_sweep(struct gsm_mux *gsm) static void gsm_dlci_data_kick(struct gsm_dlci *dlci) { - unsigned long flags; int sweep; if (dlci->constipated) return; - spin_lock_irqsave(&dlci->gsm->tx_lock, flags); + mutex_lock(&dlci->gsm->tx_mutex); /* If we have nothing running then we need to fire up */ sweep = (dlci->gsm->tx_bytes < TX_THRESH_LO); if (dlci->gsm->tx_bytes == 0) { @@ -1300,7 +1297,7 @@ static void gsm_dlci_data_kick(struct gsm_dlci *dlci) } if (sweep) gsm_dlci_data_sweep(dlci->gsm); - spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); + mutex_unlock(&dlci->gsm->tx_mutex); } /* @@ -1994,14 +1991,13 @@ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) static void gsm_kick_timeout(struct work_struct *work) { struct gsm_mux *gsm = container_of(work, struct gsm_mux, kick_timeout.work); - unsigned long flags; int sent = 0; - spin_lock_irqsave(&gsm->tx_lock, flags); + mutex_lock(&gsm->tx_mutex); /* If we have nothing running then we need to fire up */ if (gsm->tx_bytes < TX_THRESH_LO) sent = gsm_dlci_data_sweep(gsm); - spin_unlock_irqrestore(&gsm->tx_lock, flags); + mutex_unlock(&gsm->tx_mutex); if (sent && debug & 4) pr_info("%s TX queue stalled\n", __func__); @@ -2531,6 +2527,7 @@ static void gsm_free_mux(struct gsm_mux *gsm) break; } } + mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2602,6 +2599,7 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_lock_init(&gsm->lock); mutex_init(&gsm->mutex); + mutex_init(&gsm->tx_mutex); kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); @@ -2610,7 +2608,6 @@ static struct gsm_mux *gsm_alloc_mux(void) INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); spin_lock_init(&gsm->control_lock); - spin_lock_init(&gsm->tx_lock); gsm->t1 = T1; gsm->t2 = T2; @@ -2635,6 +2632,7 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_unlock(&gsm_mux_lock); if (i == MAX_MUX) { + mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2790,17 +2788,16 @@ static void gsmld_write_trigger(struct gsm_mux *gsm) static void gsmld_write_task(struct work_struct *work) { struct gsm_mux *gsm = container_of(work, struct gsm_mux, tx_work); - unsigned long flags; int i, ret; /* All outstanding control channel and control messages and one data * frame is sent. */ ret = -ENODEV; - spin_lock_irqsave(&gsm->tx_lock, flags); + mutex_lock(&gsm->tx_mutex); if (gsm->tty) ret = gsm_data_kick(gsm); - spin_unlock_irqrestore(&gsm->tx_lock, flags); + mutex_unlock(&gsm->tx_mutex); if (ret >= 0) for (i = 0; i < NUM_DLCI; i++) @@ -3008,7 +3005,6 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr) { struct gsm_mux *gsm = tty->disc_data; - unsigned long flags; int space; int ret; @@ -3016,13 +3012,13 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, return -ENODEV; ret = -ENOBUFS; - spin_lock_irqsave(&gsm->tx_lock, flags); + mutex_lock(&gsm->tx_mutex); space = tty_write_room(tty); if (space >= nr) ret = tty->ops->write(tty, buf, nr); else set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - spin_unlock_irqrestore(&gsm->tx_lock, flags); + mutex_unlock(&gsm->tx_mutex); return ret; } @@ -3319,14 +3315,13 @@ static struct tty_ldisc_ops tty_ldisc_packet = { static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) { struct gsm_mux *gsm = dlci->gsm; - unsigned long flags; if (dlci->state != DLCI_OPEN || dlci->adaption != 2) return; - spin_lock_irqsave(&gsm->tx_lock, flags); + mutex_lock(&gsm->tx_mutex); gsm_dlci_modem_output(gsm, dlci, brk); - spin_unlock_irqrestore(&gsm->tx_lock, flags); + mutex_unlock(&gsm->tx_mutex); } /** From f612466ebecb12a00d9152344ddda6f6345f04dc Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 26 Aug 2022 17:00:55 +0800 Subject: [PATCH 508/654] net/sched: fix netdevice reference leaks in attach_default_qdiscs() In attach_default_qdiscs(), if a dev has multiple queues and queue 0 fails to attach qdisc because there is no memory in attach_one_default_qdisc(). Then dev->qdisc will be noop_qdisc by default. But the other queues may be able to successfully attach to default qdisc. In this case, the fallback to noqueue process will be triggered. If the original attached qdisc is not released and a new one is directly attached, this will cause netdevice reference leaks. The following is the bug log: veth0: default qdisc (fq_codel) fail, fallback to noqueue unregister_netdevice: waiting for veth0 to become free. Usage count = 32 leaked reference. qdisc_alloc+0x12e/0x210 qdisc_create_dflt+0x62/0x140 attach_one_default_qdisc.constprop.41+0x44/0x70 dev_activate+0x128/0x290 __dev_open+0x12a/0x190 __dev_change_flags+0x1a2/0x1f0 dev_change_flags+0x23/0x60 do_setlink+0x332/0x1150 __rtnl_newlink+0x52f/0x8e0 rtnl_newlink+0x43/0x70 rtnetlink_rcv_msg+0x140/0x3b0 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x1bb/0x290 netlink_sendmsg+0x37c/0x4e0 sock_sendmsg+0x5f/0x70 ____sys_sendmsg+0x208/0x280 Fix this bug by clearing any non-noop qdiscs that may have been assigned before trying to re-attach. Fixes: bf6dba76d278 ("net: sched: fallback to qdisc noqueue if default qdisc setup fail") Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20220826090055.24424-1-wanghai38@huawei.com Signed-off-by: Paolo Abeni --- net/sched/sch_generic.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 99b697ad2b983..7a8ea03f673d5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1122,6 +1122,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, } EXPORT_SYMBOL(dev_graft_qdisc); +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) +{ + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; + + if (qdisc) { + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); + dev_queue->qdisc_sleeping = qdisc_default; + + qdisc_put(qdisc); + } +} + static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) @@ -1169,6 +1184,7 @@ static void attach_default_qdiscs(struct net_device *dev) if (qdisc == &noop_qdisc) { netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", default_qdisc_ops->id, noqueue_qdisc_ops.id); + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); dev->priv_flags |= IFF_NO_QUEUE; netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); qdisc = txq->qdisc_sleeping; @@ -1447,21 +1463,6 @@ void dev_init_scheduler(struct net_device *dev) timer_setup(&dev->watchdog_timer, dev_watchdog, 0); } -static void shutdown_scheduler_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_qdisc_default) -{ - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; - struct Qdisc *qdisc_default = _qdisc_default; - - if (qdisc) { - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - dev_queue->qdisc_sleeping = qdisc_default; - - qdisc_put(qdisc); - } -} - void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); From 3d5f70949f1b1168fbb17d06eb5c57e984c56c58 Mon Sep 17 00:00:00 2001 From: Jean-Francois Le Fillatre Date: Wed, 24 Aug 2022 21:13:21 +0200 Subject: [PATCH 509/654] usb: add quirks for Lenovo OneLink+ Dock The Lenovo OneLink+ Dock contains two VL812 USB3.0 controllers: 17ef:1018 upstream 17ef:1019 downstream Those two controllers both have problems with some USB3.0 devices, particularly self-powered ones. Typical error messages include: Timeout while waiting for setup device command device not accepting address X, error -62 unable to enumerate USB device By process of elimination the controllers themselves were identified as the cause of the problem. Through trial and error the issue was solved by using USB_QUIRK_RESET_RESUME for both chips. Signed-off-by: Jean-Francois Le Fillatre Cc: stable Link: https://lore.kernel.org/r/20220824191320.17883-1-jflf_kernel@gmx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f99a65a64588f..999b7c9697fcd 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -437,6 +437,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo ThinkPad OneLink+ Dock twin hub controllers (VIA Labs VL812) */ + { USB_DEVICE(0x17ef, 0x1018), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x17ef, 0x1019), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, From 6000b8d900cd5f52fbcd0776d0cc396e88c8c2ea Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 25 Aug 2022 15:18:36 +0200 Subject: [PATCH 510/654] usb: dwc3: disable USB core PHY management The dwc3 driver manages its PHYs itself so the USB core PHY management needs to be disabled. Use the struct xhci_plat_priv hack added by commits 46034a999c07 ("usb: host: xhci-plat: add platform data support") and f768e718911e ("usb: host: xhci-plat: add priv quirk for skip PHY initialization") to propagate the setting for now. Fixes: 4e88d4c08301 ("usb: add a flag to skip PHY initialization to struct usb_hcd") Fixes: 178a0bce05cb ("usb: core: hcd: integrate the PHY wrapper into the HCD core") Tested-by: Matthias Kaehlcke Cc: stable Reviewed-by: Matthias Kaehlcke Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220825131836.19769-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/host.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index f6f13e7f1ba14..a7154fe8206d1 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -11,8 +11,13 @@ #include #include +#include "../host/xhci-plat.h" #include "core.h" +static const struct xhci_plat_priv dwc3_xhci_plat_priv = { + .quirks = XHCI_SKIP_PHY_INIT, +}; + static void dwc3_host_fill_xhci_irq_res(struct dwc3 *dwc, int irq, char *name) { @@ -92,6 +97,11 @@ int dwc3_host_init(struct dwc3 *dwc) goto err; } + ret = platform_device_add_data(xhci, &dwc3_xhci_plat_priv, + sizeof(dwc3_xhci_plat_priv)); + if (ret) + goto err; + memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); if (dwc->usb3_lpm_capable) From b7cafb8b06a05c146bf75e5f666f8d84191d1ad4 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 25 Aug 2022 17:04:11 +0300 Subject: [PATCH 511/654] usb: typec: Remove retimers properly Retimer device class is left dangling when the typec module is unloaded. Attempts to reload the module failed with warning: "sysfs: cannot create duplicate filename '/class/retimer'" Fixing the issue by unregistering the class properly. Fixes: ddaf8d96f93b ("usb: typec: Add support for retimers") Reviewed-by: Prashant Malani Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220825140411.10743-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index ebc29ec20e3fb..bd5e5dd704313 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -2346,6 +2346,7 @@ static void __exit typec_exit(void) ida_destroy(&typec_index_ida); bus_unregister(&typec_bus); class_unregister(&typec_mux_class); + class_unregister(&retimer_class); } module_exit(typec_exit); From 8cb339f1c1f04baede9d54c1e40ac96247a6393b Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Wed, 24 Aug 2022 12:42:53 +0530 Subject: [PATCH 512/654] usb: gadget: udc-xilinx: replace memcpy with memcpy_toio For ARM processor, unaligned access to device memory is not allowed. Method memcpy does not take care of alignment. USB detection failure with the unaligned address of memory access, with below kernel crash. To fix the unaligned address the kernel panic issue, replace memcpy with memcpy_toio method. Kernel crash: Unable to handle kernel paging request at virtual address ffff80000c05008a Mem abort info: ESR = 0x96000061 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x21: alignment fault Data abort info: ISV = 0, ISS = 0x00000061 CM = 0, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=000000000143b000 [ffff80000c05008a] pgd=100000087ffff003, p4d=100000087ffff003, pud=100000087fffe003, pmd=1000000800bcc003, pte=00680000a0010713 Internal error: Oops: 96000061 [#1] SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.15.19-xilinx-v2022.1 #1 Hardware name: ZynqMP ZCU102 Rev1.0 (DT) pstate: 200000c5 (nzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __memcpy+0x30/0x260 lr : __xudc_ep0_queue+0xf0/0x110 sp : ffff800008003d00 x29: ffff800008003d00 x28: ffff800009474e80 x27: 00000000000000a0 x26: 0000000000000100 x25: 0000000000000012 x24: ffff000800bc8080 x23: 0000000000000001 x22: 0000000000000012 x21: ffff000800bc8080 x20: 0000000000000012 x19: ffff000800bc8080 x18: 0000000000000000 x17: ffff800876482000 x16: ffff800008004000 x15: 0000000000004000 x14: 00001f09785d0400 x13: 0103020101005567 x12: 0781400000000200 x11: 00000000c5672a10 x10: 00000000000008d0 x9 : ffff800009463cf0 x8 : ffff8000094757b0 x7 : 0201010055670781 x6 : 4000000002000112 x5 : ffff80000c05009a x4 : ffff000800a15012 x3 : ffff00080362ad80 x2 : 0000000000000012 x1 : ffff000800a15000 x0 : ffff80000c050088 Call trace: __memcpy+0x30/0x260 xudc_ep0_queue+0x3c/0x60 usb_ep_queue+0x38/0x44 composite_ep0_queue.constprop.0+0x2c/0xc0 composite_setup+0x8d0/0x185c configfs_composite_setup+0x74/0xb0 xudc_irq+0x570/0xa40 __handle_irq_event_percpu+0x58/0x170 handle_irq_event+0x60/0x120 handle_fasteoi_irq+0xc0/0x220 handle_domain_irq+0x60/0x90 gic_handle_irq+0x74/0xa0 call_on_irq_stack+0x2c/0x60 do_interrupt_handler+0x54/0x60 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x78/0x7c arch_cpu_idle+0x18/0x2c do_idle+0xdc/0x15c cpu_startup_entry+0x28/0x60 rest_init+0xc8/0xe0 arch_call_rest_init+0x10/0x1c start_kernel+0x694/0x6d4 __primary_switched+0xa4/0xac Fixes: 1f7c51660034 ("usb: gadget: Add xilinx usb2 device support") Cc: stable@vger.kernel.org Reviewed-by: Linus Walleij Signed-off-by: Piyush Mehta Link: https://lore.kernel.org/r/20220824071253.1261096-1-piyush.mehta@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/udc-xilinx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c index 4827e3cd38340..054b69dc2f0c9 100644 --- a/drivers/usb/gadget/udc/udc-xilinx.c +++ b/drivers/usb/gadget/udc/udc-xilinx.c @@ -499,11 +499,11 @@ static int xudc_eptxrx(struct xusb_ep *ep, struct xusb_req *req, /* Get the Buffer address and copy the transmit data.*/ eprambase = (u32 __force *)(udc->addr + ep->rambase); if (ep->is_in) { - memcpy(eprambase, bufferptr, bytestosend); + memcpy_toio(eprambase, bufferptr, bytestosend); udc->write_fn(udc->addr, ep->offset + XUSB_EP_BUF0COUNT_OFFSET, bufferlen); } else { - memcpy(bufferptr, eprambase, bytestosend); + memcpy_toio(bufferptr, eprambase, bytestosend); } /* * Enable the buffer for transmission. @@ -517,11 +517,11 @@ static int xudc_eptxrx(struct xusb_ep *ep, struct xusb_req *req, eprambase = (u32 __force *)(udc->addr + ep->rambase + ep->ep_usb.maxpacket); if (ep->is_in) { - memcpy(eprambase, bufferptr, bytestosend); + memcpy_toio(eprambase, bufferptr, bytestosend); udc->write_fn(udc->addr, ep->offset + XUSB_EP_BUF1COUNT_OFFSET, bufferlen); } else { - memcpy(bufferptr, eprambase, bytestosend); + memcpy_toio(bufferptr, eprambase, bytestosend); } /* * Enable the buffer for transmission. @@ -1023,7 +1023,7 @@ static int __xudc_ep0_queue(struct xusb_ep *ep0, struct xusb_req *req) udc->addr); length = req->usb_req.actual = min_t(u32, length, EP0_MAX_PACKET); - memcpy(corebuf, req->usb_req.buf, length); + memcpy_toio(corebuf, req->usb_req.buf, length); udc->write_fn(udc->addr, XUSB_EP_BUF0COUNT_OFFSET, length); udc->write_fn(udc->addr, XUSB_BUFFREADY_OFFSET, 1); } else { @@ -1752,7 +1752,7 @@ static void xudc_handle_setup(struct xusb_udc *udc) /* Load up the chapter 9 command buffer.*/ ep0rambase = (u32 __force *) (udc->addr + XUSB_SETUP_PKT_ADDR_OFFSET); - memcpy(&setup, ep0rambase, 8); + memcpy_toio(&setup, ep0rambase, 8); udc->setup = setup; udc->setup.wValue = cpu_to_le16(setup.wValue); @@ -1839,7 +1839,7 @@ static void xudc_ep0_out(struct xusb_udc *udc) (ep0->rambase << 2)); buffer = req->usb_req.buf + req->usb_req.actual; req->usb_req.actual = req->usb_req.actual + bytes_to_rx; - memcpy(buffer, ep0rambase, bytes_to_rx); + memcpy_toio(buffer, ep0rambase, bytes_to_rx); if (req->usb_req.length == req->usb_req.actual) { /* Data transfer completed get ready for Status stage */ @@ -1915,7 +1915,7 @@ static void xudc_ep0_in(struct xusb_udc *udc) (ep0->rambase << 2)); buffer = req->usb_req.buf + req->usb_req.actual; req->usb_req.actual = req->usb_req.actual + length; - memcpy(ep0rambase, buffer, length); + memcpy_toio(ep0rambase, buffer, length); } udc->write_fn(udc->addr, XUSB_EP_BUF0COUNT_OFFSET, count); udc->write_fn(udc->addr, XUSB_BUFFREADY_OFFSET, 1); From f9b995b49a07bd0d43b0e490f59be84415c745ae Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 23 Aug 2022 19:58:42 +0200 Subject: [PATCH 513/654] usb: dwc2: fix wrong order of phy_power_on and phy_init Since 1599069a62c6 ("phy: core: Warn when phy_power_on is called before phy_init") the driver complains. In my case (Amlogic SoC) the warning is: phy phy-fe03e000.phy.2: phy_power_on was called before phy_init So change the order of the two calls. The same change has to be done to the order of phy_exit() and phy_power_off(). Fixes: 09a75e857790 ("usb: dwc2: refactor common low-level hw code to platform.c") Cc: stable@vger.kernel.org Acked-by: Minas Harutyunyan Acked-by: Marek Szyprowski Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/dfcc6b40-2274-4e86-e73c-5c5e6aa3e046@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index c8ba87df7abef..fd0ccf6f3ec5a 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -154,9 +154,9 @@ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg) } else if (hsotg->plat && hsotg->plat->phy_init) { ret = hsotg->plat->phy_init(pdev, hsotg->plat->phy_type); } else { - ret = phy_power_on(hsotg->phy); + ret = phy_init(hsotg->phy); if (ret == 0) - ret = phy_init(hsotg->phy); + ret = phy_power_on(hsotg->phy); } return ret; @@ -188,9 +188,9 @@ static int __dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg) } else if (hsotg->plat && hsotg->plat->phy_exit) { ret = hsotg->plat->phy_exit(pdev, hsotg->plat->phy_type); } else { - ret = phy_exit(hsotg->phy); + ret = phy_power_off(hsotg->phy); if (ret == 0) - ret = phy_power_off(hsotg->phy); + ret = phy_exit(hsotg->phy); } if (ret) return ret; From 1016fc0c096c92dd0e6e0541daac7a7868169903 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 26 Aug 2022 15:31:17 -0400 Subject: [PATCH 514/654] USB: gadget: Fix obscure lockdep violation for udc_mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A recent commit expanding the scope of the udc_lock mutex in the gadget core managed to cause an obscure and slightly bizarre lockdep violation. In abbreviated form: ====================================================== WARNING: possible circular locking dependency detected 5.19.0-rc7+ #12510 Not tainted ------------------------------------------------------ udevadm/312 is trying to acquire lock: ffff80000aae1058 (udc_lock){+.+.}-{3:3}, at: usb_udc_uevent+0x54/0xe0 but task is already holding lock: ffff000002277548 (kn->active#4){++++}-{0:0}, at: kernfs_seq_start+0x34/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (kn->active#4){++++}-{0:0}:        lock_acquire+0x68/0x84        __kernfs_remove+0x268/0x380        kernfs_remove_by_name_ns+0x58/0xac        sysfs_remove_file_ns+0x18/0x24        device_del+0x15c/0x440 -> #2 (device_links_lock){+.+.}-{3:3}:        lock_acquire+0x68/0x84        __mutex_lock+0x9c/0x430        mutex_lock_nested+0x38/0x64        device_link_remove+0x3c/0xa0        _regulator_put.part.0+0x168/0x190        regulator_put+0x3c/0x54        devm_regulator_release+0x14/0x20 -> #1 (regulator_list_mutex){+.+.}-{3:3}:        lock_acquire+0x68/0x84        __mutex_lock+0x9c/0x430        mutex_lock_nested+0x38/0x64        regulator_lock_dependent+0x54/0x284        regulator_enable+0x34/0x80        phy_power_on+0x24/0x130        __dwc2_lowlevel_hw_enable+0x100/0x130        dwc2_lowlevel_hw_enable+0x18/0x40        dwc2_hsotg_udc_start+0x6c/0x2f0        gadget_bind_driver+0x124/0x1f4 -> #0 (udc_lock){+.+.}-{3:3}:        __lock_acquire+0x1298/0x20cc        lock_acquire.part.0+0xe0/0x230        lock_acquire+0x68/0x84        __mutex_lock+0x9c/0x430        mutex_lock_nested+0x38/0x64        usb_udc_uevent+0x54/0xe0 Evidently this was caused by the scope of udc_mutex being too large. The mutex is only meant to protect udc->driver along with a few other things. As far as I can tell, there's no reason for the mutex to be held while the gadget core calls a gadget driver's ->bind or ->unbind routine, or while a UDC is being started or stopped. (This accounts for link #1 in the chain above, where the mutex is held while the dwc2_hsotg_udc is started as part of driver probing.) Gadget drivers' ->disconnect callbacks are problematic. Even though usb_gadget_disconnect() will now acquire the udc_mutex, there's a window in usb_gadget_bind_driver() between the times when the mutex is released and the ->bind callback is invoked. If a disconnect occurred during that window, we could call the driver's ->disconnect routine before its ->bind routine. To prevent this from happening, it will be necessary to prevent a UDC from connecting while it has no gadget driver. This should be done already but it doesn't seem to be; currently usb_gadget_connect() has no check for this. Such a check will have to be added later. Some degree of mutual exclusion is required in soft_connect_store(), which can dereference udc->driver at arbitrary times since it is a sysfs callback. The solution here is to acquire the gadget's device lock rather than the udc_mutex. Since the driver core guarantees that the device lock is always held during driver binding and unbinding, this will make the accesses in soft_connect_store() mutually exclusive with any changes to udc->driver. Lastly, it turns out there is one place which should hold the udc_mutex but currently does not: The function_show() routine needs protection while it dereferences udc->driver. The missing lock and unlock calls are added. Link: https://lore.kernel.org/all/b2ba4245-9917-e399-94c8-03a383e7070e@samsung.com/ Fixes: 2191c00855b0 ("USB: gadget: Fix use-after-free Read in usb_udc_uevent()") Cc: Felipe Balbi Cc: stable@vger.kernel.org Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YwkfhdxA/I2nOcK7@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index cafcf260394cd..c63c0c2cf649d 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -736,7 +736,10 @@ int usb_gadget_disconnect(struct usb_gadget *gadget) ret = gadget->ops->pullup(gadget, 0); if (!ret) { gadget->connected = 0; - gadget->udc->driver->disconnect(gadget); + mutex_lock(&udc_lock); + if (gadget->udc->driver) + gadget->udc->driver->disconnect(gadget); + mutex_unlock(&udc_lock); } out: @@ -1489,7 +1492,6 @@ static int gadget_bind_driver(struct device *dev) usb_gadget_udc_set_speed(udc, driver->max_speed); - mutex_lock(&udc_lock); ret = driver->bind(udc->gadget, driver); if (ret) goto err_bind; @@ -1499,7 +1501,6 @@ static int gadget_bind_driver(struct device *dev) goto err_start; usb_gadget_enable_async_callbacks(udc); usb_udc_connect_control(udc); - mutex_unlock(&udc_lock); kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); return 0; @@ -1512,6 +1513,7 @@ static int gadget_bind_driver(struct device *dev) dev_err(&udc->dev, "failed to start %s: %d\n", driver->function, ret); + mutex_lock(&udc_lock); udc->driver = NULL; driver->is_bound = false; mutex_unlock(&udc_lock); @@ -1529,7 +1531,6 @@ static void gadget_unbind_driver(struct device *dev) kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); - mutex_lock(&udc_lock); usb_gadget_disconnect(gadget); usb_gadget_disable_async_callbacks(udc); if (gadget->irq) @@ -1537,6 +1538,7 @@ static void gadget_unbind_driver(struct device *dev) udc->driver->unbind(gadget); usb_gadget_udc_stop(udc); + mutex_lock(&udc_lock); driver->is_bound = false; udc->driver = NULL; mutex_unlock(&udc_lock); @@ -1612,7 +1614,7 @@ static ssize_t soft_connect_store(struct device *dev, struct usb_udc *udc = container_of(dev, struct usb_udc, dev); ssize_t ret; - mutex_lock(&udc_lock); + device_lock(&udc->gadget->dev); if (!udc->driver) { dev_err(dev, "soft-connect without a gadget driver\n"); ret = -EOPNOTSUPP; @@ -1633,7 +1635,7 @@ static ssize_t soft_connect_store(struct device *dev, ret = n; out: - mutex_unlock(&udc_lock); + device_unlock(&udc->gadget->dev); return ret; } static DEVICE_ATTR_WO(soft_connect); @@ -1652,11 +1654,15 @@ static ssize_t function_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_udc *udc = container_of(dev, struct usb_udc, dev); - struct usb_gadget_driver *drv = udc->driver; + struct usb_gadget_driver *drv; + int rc = 0; - if (!drv || !drv->function) - return 0; - return scnprintf(buf, PAGE_SIZE, "%s\n", drv->function); + mutex_lock(&udc_lock); + drv = udc->driver; + if (drv && drv->function) + rc = scnprintf(buf, PAGE_SIZE, "%s\n", drv->function); + mutex_unlock(&udc_lock); + return rc; } static DEVICE_ATTR_RO(function); From 9c6d778800b921bde3bff3cff5003d1650f942d1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 26 Aug 2022 15:31:32 -0400 Subject: [PATCH 515/654] USB: core: Prevent nested device-reset calls Automatic kernel fuzzing revealed a recursive locking violation in usb-storage: ============================================ WARNING: possible recursive locking detected 5.18.0 #3 Not tainted -------------------------------------------- kworker/1:3/1205 is trying to acquire lock: ffff888018638db8 (&us_interface_key[i]){+.+.}-{3:3}, at: usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 but task is already holding lock: ffff888018638db8 (&us_interface_key[i]){+.+.}-{3:3}, at: usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 ... stack backtrace: CPU: 1 PID: 1205 Comm: kworker/1:3 Not tainted 5.18.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2988 [inline] check_deadlock kernel/locking/lockdep.c:3031 [inline] validate_chain kernel/locking/lockdep.c:3816 [inline] __lock_acquire.cold+0x152/0x3ca kernel/locking/lockdep.c:5053 lock_acquire kernel/locking/lockdep.c:5665 [inline] lock_acquire+0x1ab/0x520 kernel/locking/lockdep.c:5630 __mutex_lock_common kernel/locking/mutex.c:603 [inline] __mutex_lock+0x14f/0x1610 kernel/locking/mutex.c:747 usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 usb_reset_device+0x37d/0x9a0 drivers/usb/core/hub.c:6109 r871xu_dev_remove+0x21a/0x270 drivers/staging/rtl8712/usb_intf.c:622 usb_unbind_interface+0x1bd/0x890 drivers/usb/core/driver.c:458 device_remove drivers/base/dd.c:545 [inline] device_remove+0x11f/0x170 drivers/base/dd.c:537 __device_release_driver drivers/base/dd.c:1222 [inline] device_release_driver_internal+0x1a7/0x2f0 drivers/base/dd.c:1248 usb_driver_release_interface+0x102/0x180 drivers/usb/core/driver.c:627 usb_forced_unbind_intf+0x4d/0xa0 drivers/usb/core/driver.c:1118 usb_reset_device+0x39b/0x9a0 drivers/usb/core/hub.c:6114 This turned out not to be an error in usb-storage but rather a nested device reset attempt. That is, as the rtl8712 driver was being unbound from a composite device in preparation for an unrelated USB reset (that driver does not have pre_reset or post_reset callbacks), its ->remove routine called usb_reset_device() -- thus nesting one reset call within another. Performing a reset as part of disconnect processing is a questionable practice at best. However, the bug report points out that the USB core does not have any protection against nested resets. Adding a reset_in_progress flag and testing it will prevent such errors in the future. Link: https://lore.kernel.org/all/CAB7eexKUpvX-JNiLzhXBDWgfg2T9e9_0Tw4HQ6keN==voRbP0g@mail.gmail.com/ Cc: stable@vger.kernel.org Reported-and-tested-by: Rondreis Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YwkflDxvg0KWqyZK@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 10 ++++++++++ include/linux/usb.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 2633acde7ac1d..d4b1e70d14982 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -6038,6 +6038,11 @@ static int usb_reset_and_verify_device(struct usb_device *udev) * the reset is over (using their post_reset method). * * Return: The same as for usb_reset_and_verify_device(). + * However, if a reset is already in progress (for instance, if a + * driver doesn't have pre_ or post_reset() callbacks, and while + * being unbound or re-bound during the ongoing reset its disconnect() + * or probe() routine tries to perform a second, nested reset), the + * routine returns -EINPROGRESS. * * Note: * The caller must own the device lock. For example, it's safe to use @@ -6071,6 +6076,10 @@ int usb_reset_device(struct usb_device *udev) return -EISDIR; } + if (udev->reset_in_progress) + return -EINPROGRESS; + udev->reset_in_progress = 1; + port_dev = hub->ports[udev->portnum - 1]; /* @@ -6135,6 +6144,7 @@ int usb_reset_device(struct usb_device *udev) usb_autosuspend_device(udev); memalloc_noio_restore(noio_flag); + udev->reset_in_progress = 0; return ret; } EXPORT_SYMBOL_GPL(usb_reset_device); diff --git a/include/linux/usb.h b/include/linux/usb.h index f7a9914fc97f2..9ff1ad4dfad12 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -575,6 +575,7 @@ struct usb3_lpm_parameters { * @devaddr: device address, XHCI: assigned by HW, others: same as devnum * @can_submit: URBs may be submitted * @persist_enabled: USB_PERSIST enabled for this device + * @reset_in_progress: the device is being reset * @have_langid: whether string_langid is valid * @authorized: policy has said we can use it; * (user space) policy determines if we authorize this device to be @@ -662,6 +663,7 @@ struct usb_device { unsigned can_submit:1; unsigned persist_enabled:1; + unsigned reset_in_progress:1; unsigned have_langid:1; unsigned authorized:1; unsigned authenticated:1; From 608e58a0f4617977178131f5f68a3fce1d3f5316 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 26 Aug 2022 15:31:40 -0400 Subject: [PATCH 516/654] media: mceusb: Use new usb_control_msg_*() routines Automatic kernel fuzzing led to a WARN about invalid pipe direction in the mceusb driver: ------------[ cut here ]------------ usb 6-1: BOGUS control dir, pipe 80000380 doesn't match bRequestType 40 WARNING: CPU: 0 PID: 2465 at drivers/usb/core/urb.c:410 usb_submit_urb+0x1326/0x1820 drivers/usb/core/urb.c:410 Modules linked in: CPU: 0 PID: 2465 Comm: kworker/0:2 Not tainted 5.19.0-rc4-00208-g69cb6c6556ad #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: usb_hub_wq hub_event RIP: 0010:usb_submit_urb+0x1326/0x1820 drivers/usb/core/urb.c:410 Code: 7c 24 40 e8 ac 23 91 fd 48 8b 7c 24 40 e8 b2 70 1b ff 45 89 e8 44 89 f1 4c 89 e2 48 89 c6 48 c7 c7 a0 30 a9 86 e8 48 07 11 02 <0f> 0b e9 1c f0 ff ff e8 7e 23 91 fd 0f b6 1d 63 22 83 05 31 ff 41 RSP: 0018:ffffc900032becf0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8881100f3058 RCX: 0000000000000000 RDX: ffffc90004961000 RSI: ffff888114c6d580 RDI: fffff52000657d90 RBP: ffff888105ad90f0 R08: ffffffff812c3638 R09: 0000000000000000 R10: 0000000000000005 R11: ffffed1023504ef1 R12: ffff888105ad9000 R13: 0000000000000040 R14: 0000000080000380 R15: ffff88810ba96500 FS: 0000000000000000(0000) GS:ffff88811a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe810bda58 CR3: 000000010b720000 CR4: 0000000000350ef0 Call Trace: usb_start_wait_urb+0x101/0x4c0 drivers/usb/core/message.c:58 usb_internal_control_msg drivers/usb/core/message.c:102 [inline] usb_control_msg+0x31c/0x4a0 drivers/usb/core/message.c:153 mceusb_gen1_init drivers/media/rc/mceusb.c:1431 [inline] mceusb_dev_probe+0x258e/0x33f0 drivers/media/rc/mceusb.c:1807 The reason for the warning is clear enough; the driver sends an unusual read request on endpoint 0 but does not set the USB_DIR_IN bit in the bRequestType field. More importantly, the whole situation can be avoided and the driver simplified by converting it over to the relatively new usb_control_msg_recv() and usb_control_msg_send() routines. That's what this fix does. Link: https://lore.kernel.org/all/CAB7eexLLApHJwZfMQ=X-PtRhw0BgO+5KcSMS05FNUYejJXqtSA@mail.gmail.com/ Cc: Mauro Carvalho Chehab Cc: stable@vger.kernel.org Reported-and-tested-by: Rondreis Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YwkfnBFCSEVC6XZu@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/mceusb.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index 0834d5f866fd8..39d2b03e26317 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1416,42 +1416,37 @@ static void mceusb_gen1_init(struct mceusb_dev *ir) { int ret; struct device *dev = ir->dev; - char *data; - - data = kzalloc(USB_CTRL_MSG_SZ, GFP_KERNEL); - if (!data) { - dev_err(dev, "%s: memory allocation failed!", __func__); - return; - } + char data[USB_CTRL_MSG_SZ]; /* * This is a strange one. Windows issues a set address to the device * on the receive control pipe and expect a certain value pair back */ - ret = usb_control_msg(ir->usbdev, usb_rcvctrlpipe(ir->usbdev, 0), - USB_REQ_SET_ADDRESS, USB_TYPE_VENDOR, 0, 0, - data, USB_CTRL_MSG_SZ, 3000); + ret = usb_control_msg_recv(ir->usbdev, 0, USB_REQ_SET_ADDRESS, + USB_DIR_IN | USB_TYPE_VENDOR, + 0, 0, data, USB_CTRL_MSG_SZ, 3000, + GFP_KERNEL); dev_dbg(dev, "set address - ret = %d", ret); dev_dbg(dev, "set address - data[0] = %d, data[1] = %d", data[0], data[1]); /* set feature: bit rate 38400 bps */ - ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), - USB_REQ_SET_FEATURE, USB_TYPE_VENDOR, - 0xc04e, 0x0000, NULL, 0, 3000); + ret = usb_control_msg_send(ir->usbdev, 0, + USB_REQ_SET_FEATURE, USB_TYPE_VENDOR, + 0xc04e, 0x0000, NULL, 0, 3000, GFP_KERNEL); dev_dbg(dev, "set feature - ret = %d", ret); /* bRequest 4: set char length to 8 bits */ - ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), - 4, USB_TYPE_VENDOR, - 0x0808, 0x0000, NULL, 0, 3000); + ret = usb_control_msg_send(ir->usbdev, 0, + 4, USB_TYPE_VENDOR, + 0x0808, 0x0000, NULL, 0, 3000, GFP_KERNEL); dev_dbg(dev, "set char length - retB = %d", ret); /* bRequest 2: set handshaking to use DTR/DSR */ - ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), - 2, USB_TYPE_VENDOR, - 0x0000, 0x0100, NULL, 0, 3000); + ret = usb_control_msg_send(ir->usbdev, 0, + 2, USB_TYPE_VENDOR, + 0x0000, 0x0100, NULL, 0, 3000, GFP_KERNEL); dev_dbg(dev, "set handshake - retC = %d", ret); /* device resume */ @@ -1459,8 +1454,6 @@ static void mceusb_gen1_init(struct mceusb_dev *ir) /* get hw/sw revision? */ mce_command_out(ir, GET_REVISION, sizeof(GET_REVISION)); - - kfree(data); } static void mceusb_gen2_init(struct mceusb_dev *ir) From 9d4dc16ec71bd6368548e9743223e449b4377fc7 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Sat, 27 Aug 2022 08:45:10 +0530 Subject: [PATCH 517/654] usb: gadget: mass_storage: Fix cdrom data transfers on MAC-OS During cdrom emulation, the response to read_toc command must contain the cdrom address as the number of sectors (2048 byte sized blocks) represented either as an absolute value (when MSF bit is '0') or in terms of PMin/PSec/PFrame (when MSF bit is set to '1'). Incase of cdrom, the fsg_lun_open call sets the sector size to 2048 bytes. When MAC OS sends a read_toc request with MSF set to '1', the store_cdrom_address assumes that the address being provided is the LUN size represented in 512 byte sized blocks instead of 2048. It tries to modify the address further to convert it to 2048 byte sized blocks and store it in MSF format. This results in data transfer failures as the cdrom address being provided in the read_toc response is incorrect. Fixes: 3f565a363cee ("usb: gadget: storage: adapt logic block size to bound block devices") Cc: stable@vger.kernel.org Acked-by: Alan Stern Signed-off-by: Krishna Kurapati Link: https://lore.kernel.org/r/1661570110-19127-1-git-send-email-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/storage_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index 03035dbbe97b8..208c6a92780ab 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -294,8 +294,10 @@ EXPORT_SYMBOL_GPL(fsg_lun_fsync_sub); void store_cdrom_address(u8 *dest, int msf, u32 addr) { if (msf) { - /* Convert to Minutes-Seconds-Frames */ - addr >>= 2; /* Convert to 2048-byte frames */ + /* + * Convert to Minutes-Seconds-Frames. + * Sector size is already set to 2048 bytes. + */ addr += 2*75; /* Lead-in occupies 2 seconds */ dest[3] = addr % 75; /* Frames */ addr /= 75; From d5dcc33677d7415c5f23b3c052f9e80cbab9ea4e Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 25 Aug 2022 08:22:07 +0200 Subject: [PATCH 518/654] usb: cdns3: fix incorrect handling TRB_SMM flag for ISOC transfer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TRB_SMM flag indicates that DMA has completed the TD service with this TRB. Usually it’s a last TRB in TD. In case of ISOC transfer for bInterval > 1 each ISOC transfer contains more than one TD associated with usb request (one TD per ITP). In such case the TRB_SMM flag will be set in every TD and driver will recognize the end of transfer after processing the first TD with TRB_SMM. In result driver stops updating request->actual and returns incorrect actual length. To fix this issue driver additionally must check TRB_CHAIN which is not used for isochronous transfers. Fixes: 249f0a25e8be ("usb: cdns3: gadget: handle sg list use case at completion correctly") cc: Acked-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20220825062207.5824-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index d21b69997e750..4f11c311acafe 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -1530,7 +1530,8 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev, TRB_LEN(le32_to_cpu(trb->length)); if (priv_req->num_of_trb > 1 && - le32_to_cpu(trb->control) & TRB_SMM) + le32_to_cpu(trb->control) & TRB_SMM && + le32_to_cpu(trb->control) & TRB_CHAIN) transfer_end = true; cdns3_ep_inc_deq(priv_ep); From b46a6b09fa056042a302b181a1941f0056944603 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 25 Aug 2022 08:21:37 +0200 Subject: [PATCH 519/654] usb: cdns3: fix issue with rearming ISO OUT endpoint ISO OUT endpoint is enabled during queuing first usb request in transfer ring and disabled when TRBERR is reported by controller. After TRBERR and before next transfer added to TR driver must again reenable endpoint but does not. To solve this issue during processing TRBERR event driver must set the flag EP_UPDATE_EP_TRBADDR in priv_ep->flags field. Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") cc: Acked-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20220825062137.5766-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 4f11c311acafe..5adcb349718c3 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -1691,6 +1691,7 @@ static int cdns3_check_ep_interrupt_proceed(struct cdns3_endpoint *priv_ep) ep_cfg &= ~EP_CFG_ENABLE; writel(ep_cfg, &priv_dev->regs->ep_cfg); priv_ep->flags &= ~EP_QUIRK_ISO_OUT_EN; + priv_ep->flags |= EP_UPDATE_EP_TRBADDR; } cdns3_transfer_completed(priv_dev, priv_ep); } else if (!(priv_ep->flags & EP_STALLED) && From e230a4455ac3e9b112f0367d1b8e255e141afae0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Aug 2022 17:55:07 +0300 Subject: [PATCH 520/654] staging: rtl8712: fix use after free bugs _Read/Write_MACREG callbacks are NULL so the read/write_macreg_hdl() functions don't do anything except free the "pcmd" pointer. It results in a use after free. Delete them. Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Cc: stable Reported-by: Zheng Wang Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Yw4ASqkYcUhUfoY2@kili Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl8712_cmd.c | 36 --------------------------- 1 file changed, 36 deletions(-) diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index 2326aae6709e2..bb7db96ed8219 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -117,34 +117,6 @@ static void r871x_internal_cmd_hdl(struct _adapter *padapter, u8 *pbuf) kfree(pdrvcmd->pbuf); } -static u8 read_macreg_hdl(struct _adapter *padapter, u8 *pbuf) -{ - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); - struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - - /* invoke cmd->callback function */ - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); - return H2C_SUCCESS; -} - -static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf) -{ - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); - struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - - /* invoke cmd->callback function */ - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); - return H2C_SUCCESS; -} - static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf) { struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; @@ -213,14 +185,6 @@ static struct cmd_obj *cmd_hdl_filter(struct _adapter *padapter, pcmd_r = NULL; switch (pcmd->cmdcode) { - case GEN_CMD_CODE(_Read_MACREG): - read_macreg_hdl(padapter, (u8 *)pcmd); - pcmd_r = pcmd; - break; - case GEN_CMD_CODE(_Write_MACREG): - write_macreg_hdl(padapter, (u8 *)pcmd); - pcmd_r = pcmd; - break; case GEN_CMD_CODE(_Read_BBREG): read_bbreg_hdl(padapter, (u8 *)pcmd); break; From ff03b8846705e517f878585cf66c5d8fca1c38b2 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 19 Aug 2022 19:40:11 +0200 Subject: [PATCH 521/654] s390/mm: remove useless hugepage address alignment The failing address alignment to HPAGE_MASK in do_exception(), for hugetlb faults, was useless from the beginning. With 2 GB hugepage support it became wrong, but w/o further negative impact. Now it could have negative performance impact because it breaks the cacheline optimization for process_huge_page(). Therefore, remove it. Note that we still have failing address alignment by HW to PAGE_SIZE, for all page faults, not just hugetlb faults. So this patch will not fix UFFD_FEATURE_EXACT_ADDRESS for userfaultfd handling. It will just move the failing address for hugetlb faults a bit closer to the real address, at 4K page granularity, similar to normal page faults. Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 09b6e756d521d..9ab6ca6f7f590 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -421,8 +421,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) if (unlikely(!(vma->vm_flags & access))) goto out_up; - if (is_vm_hugetlb_page(vma)) - address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo From c9305b6c1f52060377c72aebe3a701389e9f3172 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 26 Aug 2022 16:55:44 -0700 Subject: [PATCH 522/654] s390: fix nospec table alignments Add proper alignment for .nospec_call_table and .nospec_return_table in vmlinux. [hca@linux.ibm.com]: The problem with the missing alignment of the nospec tables exist since a long time, however only since commit e6ed91fd0768 ("s390/alternatives: remove padding generation code") and with CONFIG_RELOCATABLE=n the kernel may also crash at boot time. The above named commit reduced the size of struct alt_instr by one byte, so its new size is 11 bytes. Therefore depending on the number of cpu alternatives the size of the __alt_instructions array maybe odd, which again also causes that the addresses of the nospec tables will be odd. If the address of __nospec_call_start is odd and the kernel is compiled With CONFIG_RELOCATABLE=n the compiler may generate code that loads the address of __nospec_call_start with a 'larl' instruction. This will generate incorrect code since the 'larl' instruction only works with even addresses. In result the members of the nospec tables will be accessed with an off-by-one offset, which subsequently may lead to addressing exceptions within __nospec_revert(). Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches") Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/8719bf1ce4a72ebdeb575200290094e9ce047bcc.1661557333.git.jpoimboe@kernel.org Cc: # 4.16 Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 2e526f11b91e2..5ea3830af0ccf 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -131,6 +131,7 @@ SECTIONS /* * Table with the patch locations to undo expolines */ + . = ALIGN(4); .nospec_call_table : { __nospec_call_start = . ; *(.s390_indirect*) From bdbf57bca6bf0b76a0f2681014552b25917c26e1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Aug 2022 13:17:07 +0200 Subject: [PATCH 523/654] s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 53 +++++++++++++++------------- arch/s390/configs/defconfig | 49 +++++++++++++------------ arch/s390/configs/zfcpdump_defconfig | 6 ++-- 3 files changed, 60 insertions(+), 48 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f6dfde577ce83..2a827002934bc 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,8 +40,6 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y @@ -74,6 +72,7 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y @@ -93,6 +92,10 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m +CONFIG_ZSWAP=y +CONFIG_ZSMALLOC_STAT=y +CONFIG_SLUB_STATS=y +# CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -102,14 +105,12 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y -CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y CONFIG_ANON_VMA_NAME=y +CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -167,6 +168,7 @@ CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y @@ -493,7 +495,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set -# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set # CONFIG_NET_VENDOR_CHELSIO is not set @@ -509,7 +510,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m @@ -518,16 +519,18 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETERION is not set # CONFIG_NET_VENDOR_NETRONOME is not set -# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set # CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set # CONFIG_NET_VENDOR_REALTEK is not set @@ -535,9 +538,9 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SILAN is not set # CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_SOCIONEXT is not set # CONFIG_NET_VENDOR_STMICRO is not set @@ -570,6 +573,8 @@ CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +# CONFIG_RANDOM_TRUST_CPU is not set +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set CONFIG_PPS=m # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set @@ -727,18 +732,26 @@ CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m +CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m @@ -746,11 +759,14 @@ CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_842=m @@ -766,16 +782,6 @@ CONFIG_CRYPTO_STATS=y CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m @@ -797,6 +803,7 @@ CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_SLUB_DEBUG_ON=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_RODATA_TEST=y CONFIG_DEBUG_WX=y @@ -808,8 +815,6 @@ CONFIG_DEBUG_OBJECTS_TIMERS=y CONFIG_DEBUG_OBJECTS_WORK=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y -CONFIG_SLUB_DEBUG_ON=y -CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 706df3a4a867f..fb780e80e4c8f 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,8 +38,6 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y @@ -69,6 +67,7 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y @@ -88,6 +87,9 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m +CONFIG_ZSWAP=y +CONFIG_ZSMALLOC_STAT=y +# CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -95,13 +97,11 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y -CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_ANON_VMA_NAME=y +CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -159,6 +159,7 @@ CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y @@ -484,7 +485,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set -# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set # CONFIG_NET_VENDOR_CHELSIO is not set @@ -500,7 +500,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m @@ -509,16 +509,18 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETERION is not set # CONFIG_NET_VENDOR_NETRONOME is not set -# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_PACKET_ENGINES is not set # CONFIG_NET_VENDOR_PENSANDO is not set # CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set # CONFIG_NET_VENDOR_REALTEK is not set @@ -526,9 +528,9 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SILAN is not set # CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_SOCIONEXT is not set # CONFIG_NET_VENDOR_STMICRO is not set @@ -561,6 +563,8 @@ CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +# CONFIG_RANDOM_TRUST_CPU is not set +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_PTP_1588_CLOCK is not set # CONFIG_HWMON is not set CONFIG_WATCHDOG=y @@ -713,18 +717,26 @@ CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m +CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m @@ -732,11 +744,14 @@ CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_842=m @@ -752,16 +767,6 @@ CONFIG_CRYPTO_STATS=y CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index f4976f611b944..a5576b8d4081e 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,4 +1,3 @@ -# CONFIG_SWAP is not set CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BPF_SYSCALL=y @@ -9,7 +8,6 @@ CONFIG_BPF_SYSCALL=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_COMPAT_BRK is not set CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set @@ -28,6 +26,8 @@ CONFIG_CRASH_DUMP=y # CONFIG_BLOCK_LEGACY_AUTOLOAD is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_SWAP is not set +# CONFIG_COMPAT_BRK is not set # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set CONFIG_NET=y @@ -53,10 +53,12 @@ CONFIG_ZFCP=y # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set # CONFIG_HMC_DRV is not set +# CONFIG_S390_UV_UAPI is not set # CONFIG_S390_TAPE is not set # CONFIG_VMCP is not set # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set +# CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set From 7c8d42fdf1a84b1a0dd60d6528309c8ec127e87c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 19 Aug 2022 18:53:43 +0200 Subject: [PATCH 524/654] s390/hugetlb: fix prepare_hugepage_range() check for 2 GB hugepages The alignment check in prepare_hugepage_range() is wrong for 2 GB hugepages, it only checks for 1 MB hugepage alignment. This can result in kernel crash in __unmap_hugepage_range() at the BUG_ON(start & ~huge_page_mask(h)) alignment check, for mappings created with MAP_FIXED at unaligned address. Fix this by correctly handling multiple hugepage sizes, similar to the generic version of prepare_hugepage_range(). Fixes: d08de8e2d867 ("s390/mm: add support for 2GB hugepages") Cc: # 4.8+ Acked-by: Alexander Gordeev Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/hugetlb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index f22beda9e6d5c..ccdbccfde148c 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -28,9 +28,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } From c8fea9273fd1be308668496badfcbd55183e0dd3 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 24 Aug 2022 23:00:02 +0800 Subject: [PATCH 525/654] drm/amdgpu: disable FRU access on special SIENNA CICHLID card Below driver load error will be printed, not friendly to end user. amdgpu: ATOM BIOS: 113-D603GLXE-077 [drm] FRU: Failed to get size field [drm:amdgpu_fru_get_product_info [amdgpu]] *ERROR* Failed to read FRU Manufacturer, ret:-5 Signed-off-by: Guchun Chen Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index ecada5eadfe35..e325150879df7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -66,10 +66,15 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) return true; case CHIP_SIENNA_CICHLID: if (strnstr(atom_ctx->vbios_version, "D603", + sizeof(atom_ctx->vbios_version))) { + if (strnstr(atom_ctx->vbios_version, "D603GLXE", sizeof(atom_ctx->vbios_version))) - return true; - else + return false; + else + return true; + } else { return false; + } default: return false; } From 47e04eed84bb07cc5b54462752a4bc7286ab8197 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Mon, 15 Aug 2022 13:28:19 -0400 Subject: [PATCH 526/654] drm/amdgpu: Update mes_v11_api_def.h New GFX11 MES FW adds the trap_en bit. For now hardcode to 1 (traps enabled). Signed-off-by: Graham Sider Acked-by: Felix Kuehling Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 + drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 120ea294abefb..cc3fdbbcd3140 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -183,6 +183,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; + mes_add_queue_pkt.trap_en = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 80dab1146439e..50bfa513cb35a 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -268,7 +268,8 @@ union MESAPI__ADD_QUEUE { uint32_t is_tmz_queue : 1; uint32_t map_kiq_utility_queue : 1; uint32_t is_kfd_process : 1; - uint32_t reserved : 22; + uint32_t trap_en : 1; + uint32_t reserved : 21; }; struct MES_API_STATUS api_status; uint64_t tma_addr; From 507fd7c400032b126747a5ae8cca2816d73f009a Mon Sep 17 00:00:00 2001 From: George Shen Date: Wed, 10 Aug 2022 22:06:17 -0400 Subject: [PATCH 527/654] drm/amd/display: Fix DCN32 DPSTREAMCLK_CNTL programming [Why] Each index in the DPSTREAMCLK_CNTL register phyiscally maps 1-to-1 with HPO stream encoder instance. On the other hand, each index in DTBCLK_P_CNTL physically maps 1-to-1 with OTG instance. Current DCN32 DPSTREAMCLK_CLK programing assumes that OTG instance always maps 1-to-1 with HPO stream encoder instance. This is not always guaranteed and can result in blackscreen. [How] Program the correct dpstreamclk instance with the correct dtbclk_p source. Reviewed-by: Ariel Bernstein Acked-by: Brian Chang Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index a31c64b50410b..0d5e8a4415121 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -225,19 +225,19 @@ void dccg32_set_dpstreamclk( case 0: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, - (src == REFCLK) ? 0 : 1, DPSTREAMCLK0_SRC_SEL, 0); + (src == REFCLK) ? 0 : 1, DPSTREAMCLK0_SRC_SEL, otg_inst); break; case 1: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, - (src == REFCLK) ? 0 : 1, DPSTREAMCLK1_SRC_SEL, 1); + (src == REFCLK) ? 0 : 1, DPSTREAMCLK1_SRC_SEL, otg_inst); break; case 2: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, - (src == REFCLK) ? 0 : 1, DPSTREAMCLK2_SRC_SEL, 2); + (src == REFCLK) ? 0 : 1, DPSTREAMCLK2_SRC_SEL, otg_inst); break; case 3: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, - (src == REFCLK) ? 0 : 1, DPSTREAMCLK3_SRC_SEL, 3); + (src == REFCLK) ? 0 : 1, DPSTREAMCLK3_SRC_SEL, otg_inst); break; default: BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index db7b0b155374a..226af06278ce4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -116,7 +116,7 @@ static void setup_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, link_enc->inst); + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, stream_enc->inst); dccg->funcs->enable_symclk32_se(dccg, stream_enc->inst, phyd32clk); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); stream_enc->funcs->enable_stream(stream_enc); @@ -137,7 +137,7 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) stream_enc->funcs->disable(stream_enc); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, stream_enc->inst); - dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, pipe_ctx->link_res.hpo_dp_link_enc->inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, stream_enc->inst); } static void setup_hpo_dp_stream_attribute(struct pipe_ctx *pipe_ctx) From d1b4a51a4ca8954f30cf4671b25c4f8637c45600 Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Mon, 15 Aug 2022 17:37:32 -0400 Subject: [PATCH 528/654] drm/amd/display: Fix OTG H timing reset for dcn314 [Why] When ODM is enabled, H timing control register reset to 0. Div mode manual field get overwritten causing no display on certain modes for dcn314. [How] Use REG_UPDATE instead of REG_SET to set div_mode field. Reviewed-by: Charlene Liu Reviewed-by: Nicholas Kazlauskas Acked-by: Brian Chang Signed-off-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 0c7980266b853..38aa28ec6b130 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -98,7 +98,8 @@ static void optc314_set_odm_combine(struct timing_generator *optc, int *opp_id, REG_UPDATE(OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mpcc_hactive); - REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, opp_cnt - 1); optc1->opp_count = opp_cnt; } From 3c93603d9568c7c4b20ff1712ddc60e997d78df7 Mon Sep 17 00:00:00 2001 From: YuBiao Wang Date: Wed, 24 Aug 2022 15:56:04 +0800 Subject: [PATCH 529/654] drm/amdgpu: Fix use-after-free in amdgpu_cs_ioctl [Why] In amdgpu_cs_ioctl, amdgpu_job_free could be performed ealier if there is -ERESTARTSYS error. In this case, job->hw_fence could be not initialized yet. Putting hw_fence during amdgpu_job_free could lead to a use-after-free warning. [How] Check if drm_sched_job_init is performed before job_free by checking s_fence. v2: Check hw_fence.ops instead since it could be NULL if fence is not initialized. Reverse the condition since !=NULL check is discouraged in kernel. Signed-off-by: YuBiao Wang Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index b1099ee79c50b..c2fd6f3076a6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -159,7 +159,10 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - dma_fence_put(&job->hw_fence); + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, From 3e834a17a22cb8e9169c0b018d1a2df126977db3 Mon Sep 17 00:00:00 2001 From: Wang Fudong Date: Wed, 17 Aug 2022 17:47:50 +0800 Subject: [PATCH 530/654] drm/amd/display: set dig fifo read start level to 7 before dig fifo reset [Why] DIG_FIFO_ERROR = 1 caused mst daisy chain 2nd monitor black. [How] We need to set dig fifo read start level = 7 before dig fifo reset during dig fifo enable according to hardware designer's suggestion. If it is zero, it will cause underflow or overflow and DIG_FIFO_ERROR = 1. Reviewed-by: Alvin Lee Reviewed-by: Aric Cyr Acked-by: Brian Chang Signed-off-by: Wang Fudong Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c index 26648ce772dab..38a48983f663b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c @@ -310,6 +310,11 @@ static void enc32_stream_encoder_dp_unblank( // TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); + /* read start level = 0 will bring underflow / overflow and DIG_FIFO_ERROR = 1 + * so set it to 1/2 full = 7 before reset as suggested by hardware team. + */ + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1); REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); From 6783e6bbc2457dbed351fb0d2477aa6060a7d32b Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 18 Aug 2022 13:21:37 -0400 Subject: [PATCH 531/654] drm/amd/display: Missing HPO instance added [Why & How] Number of encoder is set to 4 but only 3 instances are created. Reviewed-by: Charlene Liu Acked-by: Brian Chang Signed-off-by: Leo Chen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 3a9e3870b3a95..2a2a4a9cc1173 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -454,6 +454,7 @@ static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs hpo_dp_stream_encoder_reg_list(0), hpo_dp_stream_encoder_reg_list(1), hpo_dp_stream_encoder_reg_list(2), + hpo_dp_stream_encoder_reg_list(3) }; static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { From 4fd7f14b56b2e727dd66a62e217e57015da4e9fd Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 18 Aug 2022 17:25:05 -0400 Subject: [PATCH 532/654] drm/amd/display: Fix CAB cursor size allocation for DCN32/321 For calculating cursor size allocation, surface size was used, resulting in over allocation Reviewed-by: Alvin Lee Reviewed-by: Nicholas Kazlauskas Acked-by: Brian Chang Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index ebd3945c71f1b..a3e8648a319f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -312,7 +312,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c cursor_size *= 8; break; } - cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size, + cache_lines_used += dcn32_cache_lines_for_surface(dc, cursor_size, plane->address.grph.cursor_cache_addr.quad_part); } } From 94a82c9e3dffb88182a4ed0464dc0266ad0d7b45 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 18 Aug 2022 12:34:06 -0400 Subject: [PATCH 533/654] drm/amd/display: disable display fresh from MALL on an edge case for DCN321 [Why&How] When using a 4k monitor when cursor caching is not supported due to framebuffer being on an uncacheable address, enabling display refresh from MALL would trigger corruption if SS is enabled. Prevent entering SS if we are on the edge case and cursor caching is not possible. Do this only if cursor size larger than a 64x64@4bpp. Pull the cursor size calculation out of if condition since cursor address may not be set on all platforms Reviewed-by: Alvin Lee Reviewed-by: Nicholas Kazlauskas Acked-by: Brian Chang Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 43 ++++++++++++------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index a3e8648a319f8..dc296aac89f48 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -295,23 +295,24 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c } // Include cursor size for CAB allocation + cursor_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size; + switch (stream->cursor_attributes.color_format) { + case CURSOR_MODE_MONO: + cursor_size /= 2; + break; + case CURSOR_MODE_COLOR_1BIT_AND: + case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA: + case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA: + cursor_size *= 4; + break; + + case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED: + case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED: + cursor_size *= 8; + break; + } + if (stream->cursor_position.enable && plane->address.grph.cursor_cache_addr.quad_part) { - cursor_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size; - switch (stream->cursor_attributes.color_format) { - case CURSOR_MODE_MONO: - cursor_size /= 2; - break; - case CURSOR_MODE_COLOR_1BIT_AND: - case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA: - case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA: - cursor_size *= 4; - break; - - case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED: - case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED: - cursor_size *= 8; - break; - } cache_lines_used += dcn32_cache_lines_for_surface(dc, cursor_size, plane->address.grph.cursor_cache_addr.quad_part); } @@ -325,6 +326,16 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c if (cache_lines_used % lines_per_way > 0) num_ways++; + if (stream->cursor_position.enable && + !plane->address.grph.cursor_cache_addr.quad_part && + cursor_size > 16384) + /* Cursor caching is not supported since it won't be on the same line. + * So we need an extra line to accommodate it. With large cursors and a single 4k monitor + * this case triggers corruption. If we're at the edge, then dont trigger display refresh + * from MALL. We only need to cache cursor if its greater that 64x64 at 4 bpp. + */ + num_ways++; + return num_ways; } From 595091c6ba35ba48c1f8186116bbbae2b208dce6 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 18 Aug 2022 17:55:01 -0400 Subject: [PATCH 534/654] drm/amd/display: use actual cursor size instead of max for CAB allocation [Why&How] When calculating allocation for cursor size, get the real cursor through the HUBP instead of using the maximum cursor size for more optimal allocation Reviewed-by: Alvin Lee Reviewed-by: Nicholas Kazlauskas Acked-by: Brian Chang Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index dc296aac89f48..18287743add55 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -295,7 +295,20 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c } // Include cursor size for CAB allocation - cursor_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[i]; + struct hubp *hubp = pipe->plane_res.hubp; + + if (pipe->stream && pipe->plane_state && hubp) + /* Find the cursor plane and use the exact size instead of + * using the max for calculation + */ + if (hubp->curs_attr.width > 0) { + cursor_size = hubp->curs_attr.width * hubp->curs_attr.height; + break; + } + } + switch (stream->cursor_attributes.color_format) { case CURSOR_MODE_MONO: cursor_size /= 2; From d0629cea1f97acc5e8d95ca2a42ddc72ed4ffa3b Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 18 Aug 2022 20:24:26 -0400 Subject: [PATCH 535/654] drm/amd/display: fix wrong register access [why] fw version check was for release branch. for staging, it has a chance to enter wrong code path. Reviewed-by: Hansen Dsouza Acked-by: Brian Chang Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c | 3 +++ .../gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index a788d160953b5..ab70ebd8f223d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -104,6 +104,9 @@ static bool has_query_dp_alt(struct link_encoder *enc) { struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv; + if (enc->ctx->dce_version >= DCN_VERSION_3_15) + return true; + /* Supports development firmware and firmware >= 4.0.11 */ return dc_dmub_srv && !(dc_dmub_srv->dmub->fw_version >= DMUB_FW_VERSION(4, 0, 0) && diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index b384f30395d39..e3351ddc566cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -317,6 +317,7 @@ static void enc314_stream_encoder_dp_unblank( /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen * that it overflows during mode transition, and sometimes doesn't recover. */ + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7); REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1); udelay(10); From d7e7546886eebf626569e38ff06d0a67b8b82757 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 23 Aug 2022 15:45:36 +0800 Subject: [PATCH 536/654] drm/amd/pm: use vbios carried pptable for those supported SKUs For some SMU13.0.0 SKUs, the vbios carried pptable is ready to go. Use that one instead of hardcoded softpptable. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 6 ++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 23 +++--- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 72 ++++++++++++++++--- 3 files changed, 77 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index ac308e72241a5..cf28af9ee0cb8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -291,5 +291,11 @@ int smu_v13_0_set_default_dpm_tables(struct smu_context *smu); void smu_v13_0_set_smu_mailbox_registers(struct smu_context *smu); int smu_v13_0_mode1_reset(struct smu_context *smu); + +int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, + void **table, + uint32_t *size, + uint32_t pptable_id); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 18ee3b5e64c50..24488f4cb78cc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -84,9 +84,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin"); static const int link_width[] = {0, 1, 2, 4, 8, 12, 16}; static const int link_speed[] = {25, 50, 80, 160}; -static int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, void **table, uint32_t *size, - uint32_t pptable_id); - int smu_v13_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -224,23 +221,19 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu) /* * Temporary solution for SMU V13.0.0 with SCPM enabled: - * - use 36831 signed pptable when pp_table_id is 3683 - * - use 37151 signed pptable when pp_table_id is 3715 - * - use 36641 signed pptable when pp_table_id is 3664 or 0 - * TODO: drop these when the pptable carried in vbios is ready. + * - use vbios carried pptable when pptable_id is 3664, 3715 or 3795 + * - use 36831 soft pptable when pptable_id is 3683 */ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) { switch (pptable_id) { - case 0: case 3664: - pptable_id = 36641; + case 3715: + case 3795: + pptable_id = 0; break; case 3683: pptable_id = 36831; break; - case 3715: - pptable_id = 37151; - break; default: dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); return -EINVAL; @@ -425,8 +418,10 @@ static int smu_v13_0_get_pptable_from_vbios(struct smu_context *smu, void **tabl return 0; } -static int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, void **table, uint32_t *size, - uint32_t pptable_id) +int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, + void **table, + uint32_t *size, + uint32_t pptable_id) { const struct smc_firmware_header_v1_0 *hdr; struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index df4a47acd7247..7db2fd9ea74ad 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -388,11 +388,29 @@ static int smu_v13_0_0_append_powerplay_table(struct smu_context *smu) return 0; } -static int smu_v13_0_0_setup_pptable(struct smu_context *smu) +static int smu_v13_0_0_get_pptable_from_pmfw(struct smu_context *smu, + void **table, + uint32_t *size) { struct smu_table_context *smu_table = &smu->smu_table; void *combo_pptable = smu_table->combo_pptable; + int ret = 0; + + ret = smu_cmn_get_combo_pptable(smu); + if (ret) + return ret; + + *table = combo_pptable; + *size = sizeof(struct smu_13_0_0_powerplay_table); + + return 0; +} + +static int smu_v13_0_0_setup_pptable(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; struct amdgpu_device *adev = smu->adev; + uint32_t pptable_id; int ret = 0; /* @@ -401,17 +419,51 @@ static int smu_v13_0_0_setup_pptable(struct smu_context *smu) * rely on the combo pptable(and its revelant SMU message). */ if (adev->scpm_enabled) { - ret = smu_cmn_get_combo_pptable(smu); - if (ret) - return ret; - - smu->smu_table.power_play_table = combo_pptable; - smu->smu_table.power_play_table_size = sizeof(struct smu_13_0_0_powerplay_table); + ret = smu_v13_0_0_get_pptable_from_pmfw(smu, + &smu_table->power_play_table, + &smu_table->power_play_table_size); } else { - ret = smu_v13_0_setup_pptable(smu); - if (ret) - return ret; + /* override pptable_id from driver parameter */ + if (amdgpu_smu_pptable_id >= 0) { + pptable_id = amdgpu_smu_pptable_id; + dev_info(adev->dev, "override pptable id %d\n", pptable_id); + } else { + pptable_id = smu_table->boot_values.pp_table_id; + } + + /* + * Temporary solution for SMU V13.0.0 with SCPM disabled: + * - use vbios carried pptable when pptable_id is 3664, 3715 or 3795 + * - use soft pptable when pptable_id is 3683 + */ + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) { + switch (pptable_id) { + case 3664: + case 3715: + case 3795: + pptable_id = 0; + break; + case 3683: + break; + default: + dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); + return -EINVAL; + } + } + + /* force using vbios pptable in sriov mode */ + if ((amdgpu_sriov_vf(adev) || !pptable_id) && (amdgpu_emu_mode != 1)) + ret = smu_v13_0_0_get_pptable_from_pmfw(smu, + &smu_table->power_play_table, + &smu_table->power_play_table_size); + else + ret = smu_v13_0_get_pptable_from_firmware(smu, + &smu_table->power_play_table, + &smu_table->power_play_table_size, + pptable_id); } + if (ret) + return ret; ret = smu_v13_0_0_store_powerplay_table(smu); if (ret) From b023053592646b1da9477b0b598f2cdd5d3f89d8 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 23 Aug 2022 16:07:18 +0800 Subject: [PATCH 537/654] drm/amd/pm: use vbios carried pptable for all SMU13.0.7 SKUs For those SMU13.0.7 unsecure SKUs, the vbios carried pptable is ready to go. Use that one instead of hardcoded softpptable. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 1016d1c216d8c..fcf24c5408592 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -400,11 +400,27 @@ static int smu_v13_0_7_append_powerplay_table(struct smu_context *smu) return 0; } +static int smu_v13_0_7_get_pptable_from_pmfw(struct smu_context *smu, + void **table, + uint32_t *size) +{ + struct smu_table_context *smu_table = &smu->smu_table; + void *combo_pptable = smu_table->combo_pptable; + int ret = 0; + + ret = smu_cmn_get_combo_pptable(smu); + if (ret) + return ret; + + *table = combo_pptable; + *size = sizeof(struct smu_13_0_7_powerplay_table); + + return 0; +} static int smu_v13_0_7_setup_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - void *combo_pptable = smu_table->combo_pptable; struct amdgpu_device *adev = smu->adev; int ret = 0; @@ -413,18 +429,11 @@ static int smu_v13_0_7_setup_pptable(struct smu_context *smu) * be used directly by driver. To get the raw pptable, we need to * rely on the combo pptable(and its revelant SMU message). */ - if (adev->scpm_enabled) { - ret = smu_cmn_get_combo_pptable(smu); - if (ret) - return ret; - - smu->smu_table.power_play_table = combo_pptable; - smu->smu_table.power_play_table_size = sizeof(struct smu_13_0_7_powerplay_table); - } else { - ret = smu_v13_0_setup_pptable(smu); - if (ret) - return ret; - } + ret = smu_v13_0_7_get_pptable_from_pmfw(smu, + &smu_table->power_play_table, + &smu_table->power_play_table_size); + if (ret) + return ret; ret = smu_v13_0_7_store_powerplay_table(smu); if (ret) From 2640174f4a139502f603f7bedf3a7011db714e1d Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 23 Aug 2022 17:37:26 +0800 Subject: [PATCH 538/654] drm/amd/pm: bump SMU 13.0.0 driver_if header version To suppress the warning about version mismatch with the latest 78.54.0 PMFW. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index f745cd8f1ab76..063f4a7376056 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -25,7 +25,7 @@ #define SMU13_DRIVER_IF_V13_0_0_H //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x22 +#define PPTABLE_VERSION 0x24 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index cf28af9ee0cb8..f442bf085a318 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,7 +30,7 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2E +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x30 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From b97e914552c3fcea71ce03f899e285f2178ec38b Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 25 Aug 2022 15:42:08 -0500 Subject: [PATCH 539/654] drm/amdgpu: ensure no PCIe peer access for CPU XGMI iolinks [Why] Devices with CPU XGMI iolink do not support PCIe peer access. Signed-off-by: Alex Sierra Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f095a2513affc..1400abee9f402 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5524,7 +5524,8 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size - 1; - bool p2p_access = !(pci_p2pdma_distance_many(adev->pdev, + bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && + !(pci_p2pdma_distance_many(adev->pdev, &peer_adev->dev, 1, true) < 0); return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && From 6ffc967c36b42f864955cb2c5e8b3fded0baa918 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 19 Aug 2022 15:11:19 -0400 Subject: [PATCH 540/654] drm/amd/display: Use correct plane for CAB cursor size allocation [Why&How] plane and stream variables used for cursor size allocation calculation were stale from previous iteration. Redo the iteration to find the correct cursor plane for the calculation. Reviewed-by: Alvin Lee Acked-by: Brian Chang Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 18287743add55..8d9d96c398085 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -295,8 +295,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c } // Include cursor size for CAB allocation - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[i]; + for (j = 0; j < dc->res_pool->pipe_count; j++) { + struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[j]; struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp) @@ -339,15 +339,25 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c if (cache_lines_used % lines_per_way > 0) num_ways++; - if (stream->cursor_position.enable && - !plane->address.grph.cursor_cache_addr.quad_part && - cursor_size > 16384) - /* Cursor caching is not supported since it won't be on the same line. - * So we need an extra line to accommodate it. With large cursors and a single 4k monitor - * this case triggers corruption. If we're at the edge, then dont trigger display refresh - * from MALL. We only need to cache cursor if its greater that 64x64 at 4 bpp. - */ - num_ways++; + for (i = 0; i < ctx->stream_count; i++) { + stream = ctx->streams[i]; + for (j = 0; j < ctx->stream_status[i].plane_count; j++) { + plane = ctx->stream_status[i].plane_states[j]; + + if (stream->cursor_position.enable && plane && + !plane->address.grph.cursor_cache_addr.quad_part && + cursor_size > 16384) { + /* Cursor caching is not supported since it won't be on the same line. + * So we need an extra line to accommodate it. With large cursors and a single 4k monitor + * this case triggers corruption. If we're at the edge, then dont trigger display refresh + * from MALL. We only need to cache cursor if its greater that 64x64 at 4 bpp. + */ + num_ways++; + /* We only expect one cursor plane */ + break; + } + } + } return num_ways; } From f5b9c1ffabce5f4acbeabd3a03fd57b3970a13fe Mon Sep 17 00:00:00 2001 From: Ethan Wellenreiter Date: Mon, 22 Aug 2022 14:33:23 -0400 Subject: [PATCH 541/654] drm/amd/display: Re-initialize viewport after pipe merge [Why] Pipes get merged in preparation for SubVP but if they don't get used, and are in ODM or some other multi pipe config, it would calculate the voltage level with a viewport of just one pipe from when they were split resulting in too low of a voltage level. [How] Made it so that the viewport and other timing settings get rebuilt and re- initialized after the pipe merge, before calculating the voltage level so it would calculate it correctly. Reviewed-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Ethan Wellenreiter Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 8118cfc5b4056..8e4c9d0887ceb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1014,6 +1014,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, dc->debug.force_subvp_mclk_switch)) { dcn32_merge_pipes_for_subvp(dc, context); + // to re-initialize viewport after the pipe merge + for (int i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx->plane_state || !pipe_ctx->stream) + continue; + + resource_build_scaling_params(pipe_ctx); + } while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { From d6f84bab48745ea68814d596eb476a9c11ce76ae Mon Sep 17 00:00:00 2001 From: Ethan Wellenreiter Date: Fri, 19 Aug 2022 18:30:44 -0400 Subject: [PATCH 542/654] drm/amd/display: Fix check for stream and plane [WHY] Function wasn't returning false when it had a no stream [HOW] Made it return false when it had no stream. Reviewed-by: Alvin Lee Acked-by: Brian Chang Signed-off-by: Ethan Wellenreiter Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 955f52e6064df..ab918fe38f6a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -144,7 +144,7 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (!pipe->stream) - continue; + return false; if (!pipe->plane_state) return false; From 7b471c32e4cbfdd7a673b79321f6a26abecbf33b Mon Sep 17 00:00:00 2001 From: Vladimir Stempen Date: Fri, 19 Aug 2022 18:32:01 -0400 Subject: [PATCH 543/654] drm/amd/display: Fix black flash when switching from ODM2to1 to ODMBypass [Why] On secondary display hotplug we switch primary stream from ODM2to1 to ODMBypass mode. Current logic will trigger disabling front end for this stream. [How] We need to check if prev_odm_pipe is equal to NULL in order to disable dangling planes in this scenario. Reviewed-by: Ariel Bernstein Acked-by: Brian Chang Signed-off-by: Vladimir Stempen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index aeecca68dea73..fb22c3d70528e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1094,7 +1094,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) dc->current_state->stream_count != context->stream_count) should_disable = true; - if (old_stream && !dc->current_state->res_ctx.pipe_ctx[i].top_pipe) { + if (old_stream && !dc->current_state->res_ctx.pipe_ctx[i].top_pipe && + !dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe) { struct pipe_ctx *old_pipe, *new_pipe; old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; From 910ab9eee0f61a243126d70e932e1301b5437583 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 30 Aug 2022 15:47:47 +0800 Subject: [PATCH 544/654] drm/amdgpu: only init tap_delay ucode when it's included in ucode binary Not all the gfx10 variants need to integrate global tap_delay and per se tap_delay firmwares Only init tap_delay ucode when it does include in rlc ucode binary so driver doesn't send a null buffer to psp for firmware loading Signed-off-by: Hawking Zhang Reviewed-by: Jack Gui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 60 +++++++++++++++----------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a2a4dc1844c0a..a3cd5c1e85292 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4274,35 +4274,45 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + } - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + } - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + } - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + } - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + } info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; From 3e3761f1ec7df67d88cfca5e2ea98538f529e645 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 29 Aug 2022 11:53:41 -0500 Subject: [PATCH 545/654] smb3: use filemap_write_and_wait_range instead of filemap_write_and_wait When doing insert range and collapse range we should be writing out the cached pages for the ranges affected but not the whole file. Fixes: c3a72bb21320 ("smb3: Move the flush out of smb2_copychunk_range() into its callers") Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: David Howells Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 8 ++++++-- fs/cifs/smb2ops.c | 9 +++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e9fb338b8e7e7..8042d7280dec1 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1219,8 +1219,6 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, cifs_dbg(FYI, "copychunk range\n"); - filemap_write_and_wait(src_inode->i_mapping); - if (!src_file->private_data || !dst_file->private_data) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); @@ -1250,6 +1248,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, lock_two_nondirectories(target_inode, src_inode); cifs_dbg(FYI, "about to flush pages\n"); + + rc = filemap_write_and_wait_range(src_inode->i_mapping, off, + off + len - 1); + if (rc) + goto out; + /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 7c941ce1e7a9f..421be43af4253 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3687,7 +3687,10 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, } filemap_invalidate_lock(inode->i_mapping); - filemap_write_and_wait(inode->i_mapping); + rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof - 1); + if (rc < 0) + goto out_2; + truncate_pagecache_range(inode, off, old_eof); rc = smb2_copychunk_range(xid, cfile, cfile, off + len, @@ -3738,7 +3741,9 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, eof = cpu_to_le64(old_eof + len); filemap_invalidate_lock(inode->i_mapping); - filemap_write_and_wait(inode->i_mapping); + rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof + len - 1); + if (rc < 0) + goto out_2; truncate_pagecache_range(inode, off, old_eof); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, From 27893dfc1285f80f80f46b3b8c95f5d15d2e66d0 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Tue, 30 Aug 2022 19:51:51 -0300 Subject: [PATCH 546/654] cifs: fix small mempool leak in SMB2_negotiate() In some cases of failure (dialect mismatches) in SMB2_negotiate(), after the request is sent, the checks would return -EIO when they should be rather setting rc = -EIO and jumping to neg_exit to free the response buffer from mempool. Signed-off-by: Enzo Matsumiya Cc: stable@vger.kernel.org Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 128e44e575280..6352ab32c7e7a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -965,16 +965,17 @@ SMB2_negotiate(const unsigned int xid, } else if (rc != 0) goto neg_exit; + rc = -EIO; if (strcmp(server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { cifs_server_dbg(VFS, "SMB2 dialect returned but not requested\n"); - return -EIO; + goto neg_exit; } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { cifs_server_dbg(VFS, "SMB2.1 dialect returned but not requested\n"); - return -EIO; + goto neg_exit; } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { /* ops set to 3.0 by default for default so update */ server->ops = &smb311_operations; @@ -985,7 +986,7 @@ SMB2_negotiate(const unsigned int xid, if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { cifs_server_dbg(VFS, "SMB2 dialect returned but not requested\n"); - return -EIO; + goto neg_exit; } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ server->ops = &smb21_operations; @@ -999,7 +1000,7 @@ SMB2_negotiate(const unsigned int xid, /* if requested single dialect ensure returned dialect matched */ cifs_server_dbg(VFS, "Invalid 0x%x dialect returned: not requested\n", le16_to_cpu(rsp->DialectRevision)); - return -EIO; + goto neg_exit; } cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); @@ -1017,9 +1018,10 @@ SMB2_negotiate(const unsigned int xid, else { cifs_server_dbg(VFS, "Invalid dialect returned by server 0x%x\n", le16_to_cpu(rsp->DialectRevision)); - rc = -EIO; goto neg_exit; } + + rc = 0; server->dialect = le16_to_cpu(rsp->DialectRevision); /* From 200dccd07df21b504a2168960059f0a971bf415d Mon Sep 17 00:00:00 2001 From: Shyamin Ayesh Date: Fri, 26 Aug 2022 09:51:40 -0700 Subject: [PATCH 547/654] nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM610 Lexar NM610 reports bogus eui64 values that appear to be the same across all drives. Quirk them out so they are not marked as "non globally unique" duplicates. Signed-off-by: Shyamin Ayesh [patch formatting] Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a222caa1ab002..ca15602401235 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3522,6 +3522,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), From da0342a3aa0357795224e6283df86444e1117168 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 24 Aug 2022 09:23:16 +0200 Subject: [PATCH 548/654] nvmet-auth: add missing goto in nvmet_setup_auth() There's a goto missing in nvmet_setup_auth(), causing a kernel oops when nvme_auth_extract_key() fails. Reported-by: Tal Lossos Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/auth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index cf690df347758..c4113b43dbfee 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -196,6 +196,7 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl) if (IS_ERR(ctrl->ctrl_key)) { ret = PTR_ERR(ctrl->ctrl_key); ctrl->ctrl_key = NULL; + goto out_free_hash; } pr_debug("%s: using ctrl hash %s key %*ph\n", __func__, ctrl->ctrl_key->hash > 0 ? From 478814a5584197fa1fb18377653626e3416e7cd6 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 29 Aug 2022 14:40:30 +0200 Subject: [PATCH 549/654] nvmet-tcp: fix unhandled tcp states in nvmet_tcp_state_change() TCP_FIN_WAIT2 and TCP_LAST_ACK were not handled, the connection is closing so we can ignore them and avoid printing the "unhandled state" warning message. [ 1298.852386] nvmet_tcp: queue 2 unhandled state 5 [ 1298.879112] nvmet_tcp: queue 7 unhandled state 5 [ 1298.884253] nvmet_tcp: queue 8 unhandled state 5 [ 1298.889475] nvmet_tcp: queue 9 unhandled state 5 v2: Do not call nvmet_tcp_schedule_release_queue(), just ignore the fin_wait2 and last_ack states. Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index dc3b4dc8fe08b..a3694a32f6d52 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1506,6 +1506,9 @@ static void nvmet_tcp_state_change(struct sock *sk) goto done; switch (sk->sk_state) { + case TCP_FIN_WAIT2: + case TCP_LAST_ACK: + break; case TCP_FIN_WAIT1: case TCP_CLOSE_WAIT: case TCP_CLOSE: From fce1c23f629173e0db78b79a74f2052044a00e65 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 23 Aug 2022 10:39:47 +0300 Subject: [PATCH 550/654] net: virtio_net: fix notification coalescing comments Fix wording in comments for the notifications coalescing feature. Signed-off-by: Alvaro Karsz Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220823073947.14774-1-alvaro.karsz@solid-run.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/virtio_net.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 29ced55514d41..6cb842ea8979a 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ -#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ +#define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -364,24 +364,24 @@ struct virtio_net_hash_config { */ #define VIRTIO_NET_CTRL_NOTF_COAL 6 /* - * Set the tx-usecs/tx-max-packets patameters. - * tx-usecs - Maximum number of usecs to delay a TX notification. - * tx-max-packets - Maximum number of packets to send before a TX notification. + * Set the tx-usecs/tx-max-packets parameters. */ struct virtio_net_ctrl_coal_tx { + /* Maximum number of packets to send before a TX notification */ __le32 tx_max_packets; + /* Maximum number of usecs to delay a TX notification */ __le32 tx_usecs; }; #define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 /* - * Set the rx-usecs/rx-max-packets patameters. - * rx-usecs - Maximum number of usecs to delay a RX notification. - * rx-max-frames - Maximum number of packets to receive before a RX notification. + * Set the rx-usecs/rx-max-packets parameters. */ struct virtio_net_ctrl_coal_rx { + /* Maximum number of packets to receive before a RX notification */ __le32 rx_max_packets; + /* Maximum number of usecs to delay a RX notification */ __le32 rx_usecs; }; From 4a4ce82212ef014d70f486a427005b2b5bab8e34 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 30 Aug 2022 08:40:55 +0200 Subject: [PATCH 551/654] net: phy: micrel: Make the GPIO to be non-exclusive The same GPIO line can be shared by multiple phys for the coma mode pin. If that is the case then, all the other phys that share the same line will failed to be probed because the access to the gpio line is not non-exclusive. Fix this by making access to the gpio line to be nonexclusive using flag GPIOD_FLAGS_BIT_NONEXCLUSIVE. This allows all the other PHYs to be probed. Fixes: 738871b09250ee ("net: phy: micrel: add coma mode GPIO") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20220830064055.2340403-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e78d0bf69bc3e..6f52b4fb68885 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2873,12 +2873,18 @@ static int lan8814_config_init(struct phy_device *phydev) return 0; } +/* It is expected that there will not be any 'lan8814_take_coma_mode' + * function called in suspend. Because the GPIO line can be shared, so if one of + * the phys goes back in coma mode, then all the other PHYs will go, which is + * wrong. + */ static int lan8814_release_coma_mode(struct phy_device *phydev) { struct gpio_desc *gpiod; gpiod = devm_gpiod_get_optional(&phydev->mdio.dev, "coma-mode", - GPIOD_OUT_HIGH_OPEN_DRAIN); + GPIOD_OUT_HIGH_OPEN_DRAIN | + GPIOD_FLAGS_BIT_NONEXCLUSIVE); if (IS_ERR(gpiod)) return PTR_ERR(gpiod); From 642b2122c5df332a6df52dd1f91e552bc4356951 Mon Sep 17 00:00:00 2001 From: Gao Xiao Date: Mon, 29 Aug 2022 12:16:51 +0200 Subject: [PATCH 552/654] nfp: fix the access to management firmware hanging When running `ethtool -p` with the old management firmware, the management firmware resource is not correctly released, which causes firmware related malfunction: all the access to management firmware hangs. It releases the management firmware resource when set id mode operation is not supported. Fixes: ccb9bc1dfa44 ("nfp: add 'ethtool --identify' support") Signed-off-by: Gao Xiao Reviewed-by: Louis Peens Signed-off-by: Simon Horman Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20220829101651.633840-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index edd3000337352..4cc38799eabc7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -507,6 +507,7 @@ int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state) if (nfp_nsp_get_abi_ver_minor(nsp) < 32) { nfp_err(nfp_nsp_cpp(nsp), "set id mode operation not supported, please update flash\n"); + nfp_eth_config_cleanup_end(nsp); return -EOPNOTSUPP; } From 13a9d08c296228d18289de60b83792c586e1d073 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Aug 2022 18:00:30 +0300 Subject: [PATCH 553/654] net: lan966x: improve error handle in lan966x_fdma_rx_get_frame() Don't just print a warning. Clean up and return an error as well. Fixes: c8349639324a ("net: lan966x: Add FDMA functionality") Signed-off-by: Dan Carpenter Reviewed-by: Horatiu Vultur Link: https://lore.kernel.org/r/YwjgDm/SVd5c1tQU@kili Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index 6dea7f8c14814..51f8a08163777 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -425,7 +425,8 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) lan966x_ifh_get_src_port(skb->data, &src_port); lan966x_ifh_get_timestamp(skb->data, ×tamp); - WARN_ON(src_port >= lan966x->num_phys_ports); + if (WARN_ON(src_port >= lan966x->num_phys_ports)) + goto free_skb; skb->dev = lan966x->ports[src_port]->dev; skb_pull(skb, IFH_LEN * sizeof(u32)); @@ -449,6 +450,8 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) return skb; +free_skb: + kfree_skb(skb); unmap_page: dma_unmap_page(lan966x->dev, (dma_addr_t)db->dataptr, FDMA_DCB_STATUS_BLOCKL(db->status), From 58bfe7d8e31014d7ce246788df99c56e3cfe6c68 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 31 Aug 2022 10:34:25 +0200 Subject: [PATCH 554/654] Revert "usb: add quirks for Lenovo OneLink+ Dock" This reverts commit 3d5f70949f1b1168fbb17d06eb5c57e984c56c58. The quirk does not work properly, more work is needed to determine what should be done here. Reported-by: Oliver Neukum Cc: Jean-Francois Le Fillatre Cc: stable Fixes: 3d5f70949f1b ("usb: add quirks for Lenovo OneLink+ Dock") Link: https://lore.kernel.org/r/9a17ea86-079f-510d-e919-01bc53a6d09f@gmx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 999b7c9697fcd..f99a65a64588f 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -437,10 +437,6 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, - /* Lenovo ThinkPad OneLink+ Dock twin hub controllers (VIA Labs VL812) */ - { USB_DEVICE(0x17ef, 0x1018), .driver_info = USB_QUIRK_RESET_RESUME }, - { USB_DEVICE(0x17ef, 0x1019), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, From 518e26f11af2fe4f5bebf9a0351595d508c7077f Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 31 Aug 2022 18:37:35 +0800 Subject: [PATCH 555/654] gpio: pca953x: Add mutex_lock for regcache sync in PM The regcache sync will set the cache_bypass = true, at that time, when there is regmap write operation, it will bypass the regmap cache, then the regcache sync will write back the value from cache to register, which is not as our expectation. Though regmap already use its internal lock to avoid such issue, but this driver force disable the regmap internal lock in its regmap config: disable_locking = true To avoid this issue, use the driver's own lock to do the protect in system PM. Fixes: b76574300504 ("gpio: pca953x: Restore registers after suspend/resume cycle") Signed-off-by: Haibo Chen Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index ecd7d169470b0..2925f4d8cef36 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1175,7 +1175,9 @@ static int pca953x_suspend(struct device *dev) { struct pca953x_chip *chip = dev_get_drvdata(dev); + mutex_lock(&chip->i2c_lock); regcache_cache_only(chip->regmap, true); + mutex_unlock(&chip->i2c_lock); if (atomic_read(&chip->wakeup_path)) device_set_wakeup_path(dev); @@ -1198,13 +1200,17 @@ static int pca953x_resume(struct device *dev) } } + mutex_lock(&chip->i2c_lock); regcache_cache_only(chip->regmap, false); regcache_mark_dirty(chip->regmap); ret = pca953x_regcache_sync(dev); - if (ret) + if (ret) { + mutex_unlock(&chip->i2c_lock); return ret; + } ret = regcache_sync(chip->regmap); + mutex_unlock(&chip->i2c_lock); if (ret) { dev_err(dev, "Failed to restore register map: %d\n", ret); return ret; From ceb4038472a4803e7046ed488b03d11551991514 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 29 Aug 2022 16:25:50 +0200 Subject: [PATCH 556/654] USB: serial: cp210x: add Decagon UCA device id Add the device id for Decagon Devices USB Cable Adapter. Link: https://lore.kernel.org/r/trinity-819f9db2-d3e1-40e9-a669-9c245817c046-1661523546680@msvc-mesg-web108 Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index c374620a486f0..a34957c4b64c0 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -130,6 +130,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x83AA) }, /* Mark-10 Digital Force Gauge */ { USB_DEVICE(0x10C4, 0x83D8) }, /* DekTec DTA Plus VHF/UHF Booster/Attenuator */ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */ + { USB_DEVICE(0x10C4, 0x8414) }, /* Decagon USB Cable Adapter */ { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ From c0955bf957be4bead01fae1d791476260da7325d Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sat, 27 Aug 2022 23:38:15 +0800 Subject: [PATCH 557/654] ethernet: rocker: fix sleep in atomic context bug in neigh_timer_handler The function neigh_timer_handler() is a timer handler that runs in an atomic context. When used by rocker, neigh_timer_handler() calls "kzalloc(.., GFP_KERNEL)" that may sleep. As a result, the sleep in atomic context bug will happen. One of the processes is shown below: ofdpa_fib4_add() ... neigh_add_timer() (wait a timer) neigh_timer_handler() neigh_release() neigh_destroy() rocker_port_neigh_destroy() rocker_world_port_neigh_destroy() ofdpa_port_neigh_destroy() ofdpa_port_ipv4_neigh() kzalloc(sizeof(.., GFP_KERNEL) //may sleep This patch changes the gfp_t parameter of kzalloc() from GFP_KERNEL to GFP_ATOMIC in order to mitigate the bug. Fixes: 00fc0c51e35b ("rocker: Change world_ops API and implementation to be switchdev independant") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index bc70c6abd6a5b..58cf7cc54f408 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1273,7 +1273,7 @@ static int ofdpa_port_ipv4_neigh(struct ofdpa_port *ofdpa_port, bool removing; int err = 0; - entry = kzalloc(sizeof(*entry), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; From 814816d71e29934d0a76ee259b54c0b80c3b0e4a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 23 Aug 2022 18:36:35 +0200 Subject: [PATCH 558/654] powerpc: Fix hard_irq_disable() with sanitizer As reported by Zhouyi Zhou, WRITE_ONCE() is not atomic as expected when KASAN or KCSAN are compiled in. Fix it by re-implementing it using inline assembly. Fixes: 077fc62b2b66 ("powerpc/irq: remove inline assembly in hard_irq_disable macro") Reported-by: Zhouyi Zhou Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a8298991b3df049a54ee8e558838e34265812014.1661272586.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 26ede09c521df..3c8cb48f88aee 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -282,7 +282,8 @@ static inline bool pmi_irq_pending(void) flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (!arch_irqs_disabled_flags(flags)) { \ - WRITE_ONCE(local_paca->saved_r1, current_stack_pointer);\ + asm volatile("std%X0 %1,%0" : "=m" (local_paca->saved_r1) \ + : "r" (current_stack_pointer)); \ trace_hardirqs_off(); \ } \ } while(0) From 8e83622ae7ca481c76c8fd9579877f6abae64ca2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 31 Aug 2022 10:15:24 +0200 Subject: [PATCH 559/654] USB: serial: ch341: fix lost character on LCR updates Disable LCR updates for pre-0x30 devices which use a different (unknown) protocol for line control and where the current register write causes the next received character to be lost. Note that updating LCR using the INIT command has no effect on these devices either. Reported-by: Jonathan Woithe Tested-by: Jonathan Woithe Link: https://lore.kernel.org/r/Ys1iPTfiZRWj2gXs@marvin.atrad.com.au Fixes: 4e46c410e050 ("USB: serial: ch341: reinitialize chip on reconfiguration") Fixes: 55fa15b5987d ("USB: serial: ch341: fix baud rate and line-control handling") Cc: stable@vger.kernel.org # 4.10 Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2798fca712612..2bcce172355b5 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -97,7 +97,10 @@ struct ch341_private { u8 mcr; u8 msr; u8 lcr; + unsigned long quirks; + u8 version; + unsigned long break_end; }; @@ -265,6 +268,9 @@ static int ch341_set_baudrate_lcr(struct usb_device *dev, * (stop bits, parity and word length). Version 0x30 and above use * CH341_REG_LCR only and CH341_REG_LCR2 is always set to zero. */ + if (priv->version < 0x30) + return 0; + r = ch341_control_out(dev, CH341_REQ_WRITE_REG, CH341_REG_LCR2 << 8 | CH341_REG_LCR, lcr); if (r) @@ -308,7 +314,9 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) r = ch341_control_in(dev, CH341_REQ_READ_VERSION, 0, 0, buffer, size); if (r) return r; - dev_dbg(&dev->dev, "Chip version: 0x%02x\n", buffer[0]); + + priv->version = buffer[0]; + dev_dbg(&dev->dev, "Chip version: 0x%02x\n", priv->version); r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT, 0, 0); if (r < 0) From 41ca302a697b64a3dab4676e01d0d11bb184737d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 31 Aug 2022 10:15:25 +0200 Subject: [PATCH 560/654] USB: serial: ch341: fix disabled rx timer on older devices At least one older CH341 appears to have the RX timer enable bit inverted so that setting it disables the RX timer and prevents the FIFO from emptying until it is full. Only set the RX timer enable bit for devices with version newer than 0x27 (even though this probably affects all pre-0x30 devices). Reported-by: Jonathan Woithe Tested-by: Jonathan Woithe Link: https://lore.kernel.org/r/Ys1iPTfiZRWj2gXs@marvin.atrad.com.au Fixes: 4e46c410e050 ("USB: serial: ch341: reinitialize chip on reconfiguration") Cc: stable@vger.kernel.org # 4.10 Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2bcce172355b5..af01a462cc43c 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -253,8 +253,12 @@ static int ch341_set_baudrate_lcr(struct usb_device *dev, /* * CH341A buffers data until a full endpoint-size packet (32 bytes) * has been received unless bit 7 is set. + * + * At least one device with version 0x27 appears to have this bit + * inverted. */ - val |= BIT(7); + if (priv->version > 0x27) + val |= BIT(7); r = ch341_control_out(dev, CH341_REQ_WRITE_REG, CH341_REG_DIVISOR << 8 | CH341_REG_PRESCALER, From ec1bd37123c607ca6485beb4542a792a4db765aa Mon Sep 17 00:00:00 2001 From: Khalid Masum Date: Thu, 18 Aug 2022 10:07:38 +0600 Subject: [PATCH 561/654] fscache: fix misdocumented parameter This patch fixes two warnings generated by make docs. The functions fscache_use_cookie and fscache_unuse_cookie, both have a parameter named cookie. But they are documented with the name "object" with unclear description. Which generates the warning when creating docs. This commit will replace the currently misdocumented parameter names with the correct ones while adding proper descriptions. CC: Randy Dunlap Signed-off-by: Khalid Masum Signed-off-by: David Howells Link: https://lore.kernel.org/r/20220521142446.4746-1-khalid.masum.92@gmail.com/ # v1 Link: https://lore.kernel.org/r/20220818040738.12036-1-khalid.masum.92@gmail.com/ # v2 Link: https://lore.kernel.org/r/880d7d25753fb326ee17ac08005952112fcf9bdb.1657360984.git.mchehab@kernel.org/ # Mauro's version --- include/linux/fscache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 720874e6ee947..36e5dd84cf599 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -258,7 +258,7 @@ struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, /** * fscache_use_cookie - Request usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @will_modify: If cache is expected to be modified locally * * Request usage of the cookie attached to an object. The caller should tell @@ -274,7 +274,7 @@ static inline void fscache_use_cookie(struct fscache_cookie *cookie, /** * fscache_unuse_cookie - Cease usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @aux_data: Updated auxiliary data (or NULL) * @object_size: Revised size of the object (or NULL) * From c93ccd63b18c8d108c57b2bb0e5f3b058b9d2029 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Fri, 26 Aug 2022 10:35:15 +0800 Subject: [PATCH 562/654] cachefiles: fix error return code in cachefiles_ondemand_copen() The cache_size field of copen is specified by the user daemon. If cache_size < 0, then the OPEN request is expected to fail, while copen itself shall succeed. However, returning 0 is indeed unexpected when cache_size is an invalid error code. Fix this by returning error when cache_size is an invalid error code. Changes ======= v4: update the code suggested by Dan v3: update the commit log suggested by Jingbo. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Sun Ke Suggested-by: Jeffle Xu Suggested-by: Dan Carpenter Signed-off-by: David Howells Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20220818111935.1683062-1-sunke32@huawei.com/ # v2 Link: https://lore.kernel.org/r/20220818125038.2247720-1-sunke32@huawei.com/ # v3 Link: https://lore.kernel.org/r/20220826023515.3437469-1-sunke32@huawei.com/ # v4 --- fs/cachefiles/ondemand.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 1fee702d55293..7e1586bd5cf34 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -158,9 +158,13 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) /* fail OPEN request if daemon reports an error */ if (size < 0) { - if (!IS_ERR_VALUE(size)) - size = -EINVAL; - req->error = size; + if (!IS_ERR_VALUE(size)) { + req->error = -EINVAL; + ret = -EINVAL; + } else { + req->error = size; + ret = 0; + } goto out; } From 1122f40072731525c06b1371cfa30112b9b54d27 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 25 Aug 2022 10:09:45 +0800 Subject: [PATCH 563/654] cachefiles: make on-demand request distribution fairer For now, enqueuing and dequeuing on-demand requests all start from idx 0, this makes request distribution unfair. In the weighty concurrent I/O scenario, the request stored in higher idx will starve. Searching requests cyclically in cachefiles_ondemand_daemon_read, makes distribution fairer. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Reported-by: Yongqing Li Signed-off-by: Xin Yin Signed-off-by: David Howells Reviewed-by: Jeffle Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20220817065200.11543-1-yinxin.x@bytedance.com/ # v1 Link: https://lore.kernel.org/r/20220825020945.2293-1-yinxin.x@bytedance.com/ # v2 --- fs/cachefiles/internal.h | 1 + fs/cachefiles/ondemand.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 6cba2c6de2f96..2ad58c4652084 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -111,6 +111,7 @@ struct cachefiles_cache { char *tag; /* cache binding tag */ refcount_t unbind_pincount;/* refcount to do daemon unbind */ struct xarray reqs; /* xarray of pending on-demand requests */ + unsigned long req_id_next; struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ u32 ondemand_id_next; }; diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 7e1586bd5cf34..0254ed39f68ce 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -242,14 +242,19 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, unsigned long id = 0; size_t n; int ret = 0; - XA_STATE(xas, &cache->reqs, 0); + XA_STATE(xas, &cache->reqs, cache->req_id_next); /* - * Search for a request that has not ever been processed, to prevent - * requests from being processed repeatedly. + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. */ xa_lock(&cache->reqs); req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); + req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW); + } if (!req) { xa_unlock(&cache->reqs); return 0; @@ -264,6 +269,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, } xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; xa_unlock(&cache->reqs); id = xas.xa_index; From ee0175b3b44288c74d5292c2a9c2c154f6c0317e Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sun, 7 Aug 2022 21:21:15 +0200 Subject: [PATCH 564/654] gpio: realtek-otto: switch to 32-bit I/O By using 16-bit I/O on the GPIO peripheral, which is apparently not safe on MIPS, the IMR can end up containing garbage. This then results in interrupt triggers for lines that don't have an interrupt handler associated. The irq_desc lookup fails, and the ISR will not be cleared, keeping the CPU busy until reboot, or until another IMR operation restores the correct value. This situation appears to happen very rarely, for < 0.5% of IMR writes. Instead of using 8-bit or 16-bit I/O operations on the 32-bit memory mapped peripheral registers, switch to using 32-bit I/O only, operating on the entire bank for all single bit line settings. For 2-bit line settings, with 16-bit port values, stick to manual (un)packing. This issue has been seen on RTL8382M (HPE 1920-16G), RTL8391M (Netgear GS728TP v2), and RTL8393M (D-Link DGS-1210-52 F3, Zyxel GS1900-48). Reported-by: Luiz Angelo Daros de Luca # DGS-1210-52 Reported-by: Birger Koblitz # GS728TP Reported-by: Jan Hoffmann # 1920-16G Fixes: 0d82fb1127fb ("gpio: Add Realtek Otto GPIO support") Signed-off-by: Sander Vanheule Cc: Paul Cercueil Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-realtek-otto.c | 166 ++++++++++++++++--------------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/drivers/gpio/gpio-realtek-otto.c b/drivers/gpio/gpio-realtek-otto.c index 63dcf42f7c206..d6418f89d3f63 100644 --- a/drivers/gpio/gpio-realtek-otto.c +++ b/drivers/gpio/gpio-realtek-otto.c @@ -46,10 +46,20 @@ * @lock: Lock for accessing the IRQ registers and values * @intr_mask: Mask for interrupts lines * @intr_type: Interrupt type selection + * @bank_read: Read a bank setting as a single 32-bit value + * @bank_write: Write a bank setting as a single 32-bit value + * @imr_line_pos: Bit shift of an IRQ line's IMR value. + * + * The DIR, DATA, and ISR registers consist of four 8-bit port values, packed + * into a single 32-bit register. Use @bank_read (@bank_write) to get (assign) + * a value from (to) these registers. The IMR register consists of four 16-bit + * port values, packed into two 32-bit registers. Use @imr_line_pos to get the + * bit shift of the 2-bit field for a line's IMR settings. Shifts larger than + * 32 overflow into the second register. * * Because the interrupt mask register (IMR) combines the function of IRQ type * selection and masking, two extra values are stored. @intr_mask is used to - * mask/unmask the interrupts for a GPIO port, and @intr_type is used to store + * mask/unmask the interrupts for a GPIO line, and @intr_type is used to store * the selected interrupt types. The logical AND of these values is written to * IMR on changes. */ @@ -59,10 +69,11 @@ struct realtek_gpio_ctrl { void __iomem *cpumask_base; struct cpumask cpu_irq_maskable; raw_spinlock_t lock; - u16 intr_mask[REALTEK_GPIO_PORTS_PER_BANK]; - u16 intr_type[REALTEK_GPIO_PORTS_PER_BANK]; - unsigned int (*port_offset_u8)(unsigned int port); - unsigned int (*port_offset_u16)(unsigned int port); + u8 intr_mask[REALTEK_GPIO_MAX]; + u8 intr_type[REALTEK_GPIO_MAX]; + u32 (*bank_read)(void __iomem *reg); + void (*bank_write)(void __iomem *reg, u32 value); + unsigned int (*line_imr_pos)(unsigned int line); }; /* Expand with more flags as devices with other quirks are added */ @@ -101,14 +112,22 @@ static struct realtek_gpio_ctrl *irq_data_to_ctrl(struct irq_data *data) * port. The two interrupt mask registers store two bits per GPIO, so use u16 * values. */ -static unsigned int realtek_gpio_port_offset_u8(unsigned int port) +static u32 realtek_gpio_bank_read_swapped(void __iomem *reg) { - return port; + return ioread32be(reg); } -static unsigned int realtek_gpio_port_offset_u16(unsigned int port) +static void realtek_gpio_bank_write_swapped(void __iomem *reg, u32 value) { - return 2 * port; + iowrite32be(value, reg); +} + +static unsigned int realtek_gpio_line_imr_pos_swapped(unsigned int line) +{ + unsigned int port_pin = line % 8; + unsigned int port = line / 8; + + return 2 * (8 * (port ^ 1) + port_pin); } /* @@ -119,66 +138,67 @@ static unsigned int realtek_gpio_port_offset_u16(unsigned int port) * per GPIO, so use u16 values. The first register contains ports 1 and 0, the * second ports 3 and 2. */ -static unsigned int realtek_gpio_port_offset_u8_rev(unsigned int port) +static u32 realtek_gpio_bank_read(void __iomem *reg) { - return 3 - port; + return ioread32(reg); } -static unsigned int realtek_gpio_port_offset_u16_rev(unsigned int port) +static void realtek_gpio_bank_write(void __iomem *reg, u32 value) { - return 2 * (port ^ 1); + iowrite32(value, reg); } -static void realtek_gpio_write_imr(struct realtek_gpio_ctrl *ctrl, - unsigned int port, u16 irq_type, u16 irq_mask) +static unsigned int realtek_gpio_line_imr_pos(unsigned int line) { - iowrite16(irq_type & irq_mask, - ctrl->base + REALTEK_GPIO_REG_IMR + ctrl->port_offset_u16(port)); + return 2 * line; } -static void realtek_gpio_clear_isr(struct realtek_gpio_ctrl *ctrl, - unsigned int port, u8 mask) +static void realtek_gpio_clear_isr(struct realtek_gpio_ctrl *ctrl, u32 mask) { - iowrite8(mask, ctrl->base + REALTEK_GPIO_REG_ISR + ctrl->port_offset_u8(port)); + ctrl->bank_write(ctrl->base + REALTEK_GPIO_REG_ISR, mask); } -static u8 realtek_gpio_read_isr(struct realtek_gpio_ctrl *ctrl, unsigned int port) +static u32 realtek_gpio_read_isr(struct realtek_gpio_ctrl *ctrl) { - return ioread8(ctrl->base + REALTEK_GPIO_REG_ISR + ctrl->port_offset_u8(port)); + return ctrl->bank_read(ctrl->base + REALTEK_GPIO_REG_ISR); } -/* Set the rising and falling edge mask bits for a GPIO port pin */ -static u16 realtek_gpio_imr_bits(unsigned int pin, u16 value) +/* Set the rising and falling edge mask bits for a GPIO pin */ +static void realtek_gpio_update_line_imr(struct realtek_gpio_ctrl *ctrl, unsigned int line) { - return (value & REALTEK_GPIO_IMR_LINE_MASK) << 2 * pin; + void __iomem *reg = ctrl->base + REALTEK_GPIO_REG_IMR; + unsigned int line_shift = ctrl->line_imr_pos(line); + unsigned int shift = line_shift % 32; + u32 irq_type = ctrl->intr_type[line]; + u32 irq_mask = ctrl->intr_mask[line]; + u32 reg_val; + + reg += 4 * (line_shift / 32); + reg_val = ioread32(reg); + reg_val &= ~(REALTEK_GPIO_IMR_LINE_MASK << shift); + reg_val |= (irq_type & irq_mask & REALTEK_GPIO_IMR_LINE_MASK) << shift; + iowrite32(reg_val, reg); } static void realtek_gpio_irq_ack(struct irq_data *data) { struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data); irq_hw_number_t line = irqd_to_hwirq(data); - unsigned int port = line / 8; - unsigned int port_pin = line % 8; - realtek_gpio_clear_isr(ctrl, port, BIT(port_pin)); + realtek_gpio_clear_isr(ctrl, BIT(line)); } static void realtek_gpio_irq_unmask(struct irq_data *data) { struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data); unsigned int line = irqd_to_hwirq(data); - unsigned int port = line / 8; - unsigned int port_pin = line % 8; unsigned long flags; - u16 m; gpiochip_enable_irq(&ctrl->gc, line); raw_spin_lock_irqsave(&ctrl->lock, flags); - m = ctrl->intr_mask[port]; - m |= realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK); - ctrl->intr_mask[port] = m; - realtek_gpio_write_imr(ctrl, port, ctrl->intr_type[port], m); + ctrl->intr_mask[line] = REALTEK_GPIO_IMR_LINE_MASK; + realtek_gpio_update_line_imr(ctrl, line); raw_spin_unlock_irqrestore(&ctrl->lock, flags); } @@ -186,16 +206,11 @@ static void realtek_gpio_irq_mask(struct irq_data *data) { struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data); unsigned int line = irqd_to_hwirq(data); - unsigned int port = line / 8; - unsigned int port_pin = line % 8; unsigned long flags; - u16 m; raw_spin_lock_irqsave(&ctrl->lock, flags); - m = ctrl->intr_mask[port]; - m &= ~realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK); - ctrl->intr_mask[port] = m; - realtek_gpio_write_imr(ctrl, port, ctrl->intr_type[port], m); + ctrl->intr_mask[line] = 0; + realtek_gpio_update_line_imr(ctrl, line); raw_spin_unlock_irqrestore(&ctrl->lock, flags); gpiochip_disable_irq(&ctrl->gc, line); @@ -205,10 +220,8 @@ static int realtek_gpio_irq_set_type(struct irq_data *data, unsigned int flow_ty { struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data); unsigned int line = irqd_to_hwirq(data); - unsigned int port = line / 8; - unsigned int port_pin = line % 8; unsigned long flags; - u16 type, t; + u8 type; switch (flow_type & IRQ_TYPE_SENSE_MASK) { case IRQ_TYPE_EDGE_FALLING: @@ -227,11 +240,8 @@ static int realtek_gpio_irq_set_type(struct irq_data *data, unsigned int flow_ty irq_set_handler_locked(data, handle_edge_irq); raw_spin_lock_irqsave(&ctrl->lock, flags); - t = ctrl->intr_type[port]; - t &= ~realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK); - t |= realtek_gpio_imr_bits(port_pin, type); - ctrl->intr_type[port] = t; - realtek_gpio_write_imr(ctrl, port, t, ctrl->intr_mask[port]); + ctrl->intr_type[line] = type; + realtek_gpio_update_line_imr(ctrl, line); raw_spin_unlock_irqrestore(&ctrl->lock, flags); return 0; @@ -242,28 +252,21 @@ static void realtek_gpio_irq_handler(struct irq_desc *desc) struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct realtek_gpio_ctrl *ctrl = gpiochip_get_data(gc); struct irq_chip *irq_chip = irq_desc_get_chip(desc); - unsigned int lines_done; - unsigned int port_pin_count; unsigned long status; int offset; chained_irq_enter(irq_chip, desc); - for (lines_done = 0; lines_done < gc->ngpio; lines_done += 8) { - status = realtek_gpio_read_isr(ctrl, lines_done / 8); - port_pin_count = min(gc->ngpio - lines_done, 8U); - for_each_set_bit(offset, &status, port_pin_count) - generic_handle_domain_irq(gc->irq.domain, offset + lines_done); - } + status = realtek_gpio_read_isr(ctrl); + for_each_set_bit(offset, &status, gc->ngpio) + generic_handle_domain_irq(gc->irq.domain, offset); chained_irq_exit(irq_chip, desc); } -static inline void __iomem *realtek_gpio_irq_cpu_mask(struct realtek_gpio_ctrl *ctrl, - unsigned int port, int cpu) +static inline void __iomem *realtek_gpio_irq_cpu_mask(struct realtek_gpio_ctrl *ctrl, int cpu) { - return ctrl->cpumask_base + ctrl->port_offset_u8(port) + - REALTEK_GPIO_PORTS_PER_BANK * cpu; + return ctrl->cpumask_base + REALTEK_GPIO_PORTS_PER_BANK * cpu; } static int realtek_gpio_irq_set_affinity(struct irq_data *data, @@ -271,12 +274,10 @@ static int realtek_gpio_irq_set_affinity(struct irq_data *data, { struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data); unsigned int line = irqd_to_hwirq(data); - unsigned int port = line / 8; - unsigned int port_pin = line % 8; void __iomem *irq_cpu_mask; unsigned long flags; int cpu; - u8 v; + u32 v; if (!ctrl->cpumask_base) return -ENXIO; @@ -284,15 +285,15 @@ static int realtek_gpio_irq_set_affinity(struct irq_data *data, raw_spin_lock_irqsave(&ctrl->lock, flags); for_each_cpu(cpu, &ctrl->cpu_irq_maskable) { - irq_cpu_mask = realtek_gpio_irq_cpu_mask(ctrl, port, cpu); - v = ioread8(irq_cpu_mask); + irq_cpu_mask = realtek_gpio_irq_cpu_mask(ctrl, cpu); + v = ctrl->bank_read(irq_cpu_mask); if (cpumask_test_cpu(cpu, dest)) - v |= BIT(port_pin); + v |= BIT(line); else - v &= ~BIT(port_pin); + v &= ~BIT(line); - iowrite8(v, irq_cpu_mask); + ctrl->bank_write(irq_cpu_mask, v); } raw_spin_unlock_irqrestore(&ctrl->lock, flags); @@ -305,16 +306,17 @@ static int realtek_gpio_irq_set_affinity(struct irq_data *data, static int realtek_gpio_irq_init(struct gpio_chip *gc) { struct realtek_gpio_ctrl *ctrl = gpiochip_get_data(gc); - unsigned int port; + u32 mask_all = GENMASK(gc->ngpio - 1, 0); + unsigned int line; int cpu; - for (port = 0; (port * 8) < gc->ngpio; port++) { - realtek_gpio_write_imr(ctrl, port, 0, 0); - realtek_gpio_clear_isr(ctrl, port, GENMASK(7, 0)); + for (line = 0; line < gc->ngpio; line++) + realtek_gpio_update_line_imr(ctrl, line); - for_each_cpu(cpu, &ctrl->cpu_irq_maskable) - iowrite8(GENMASK(7, 0), realtek_gpio_irq_cpu_mask(ctrl, port, cpu)); - } + realtek_gpio_clear_isr(ctrl, mask_all); + + for_each_cpu(cpu, &ctrl->cpu_irq_maskable) + ctrl->bank_write(realtek_gpio_irq_cpu_mask(ctrl, cpu), mask_all); return 0; } @@ -387,12 +389,14 @@ static int realtek_gpio_probe(struct platform_device *pdev) if (dev_flags & GPIO_PORTS_REVERSED) { bgpio_flags = 0; - ctrl->port_offset_u8 = realtek_gpio_port_offset_u8_rev; - ctrl->port_offset_u16 = realtek_gpio_port_offset_u16_rev; + ctrl->bank_read = realtek_gpio_bank_read; + ctrl->bank_write = realtek_gpio_bank_write; + ctrl->line_imr_pos = realtek_gpio_line_imr_pos; } else { bgpio_flags = BGPIOF_BIG_ENDIAN_BYTE_ORDER; - ctrl->port_offset_u8 = realtek_gpio_port_offset_u8; - ctrl->port_offset_u16 = realtek_gpio_port_offset_u16; + ctrl->bank_read = realtek_gpio_bank_read_swapped; + ctrl->bank_write = realtek_gpio_bank_write_swapped; + ctrl->line_imr_pos = realtek_gpio_line_imr_pos_swapped; } err = bgpio_init(&ctrl->gc, dev, 4, From abb5f3f4b1f5f0ad50eb067a00051d3587dec9fb Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 31 Aug 2022 10:53:25 -0700 Subject: [PATCH 565/654] Revert "clk: core: Honor CLK_OPS_PARENT_ENABLE for clk gate ops" This reverts commit 35b0fac808b95eea1212f8860baf6ad25b88b087. Alexander reports that it causes boot failures on i.MX8M Plus based boards (specifically imx8mp-tqma8mpql-mba8mpxl.dts). Reported-by: Alexander Stein Cc: Chen-Yu Tsai Fixes: 35b0fac808b9 ("clk: core: Honor CLK_OPS_PARENT_ENABLE for clk gate ops") Link: https://lore.kernel.org/r/12115951.O9o76ZdvQC@steina-w Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20220831175326.2523912-1-sboyd@kernel.org --- drivers/clk/clk.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 2e29a72c68e1b..bd0b35cac83e3 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -196,9 +196,6 @@ static bool clk_core_rate_is_protected(struct clk_core *core) return core->protect_count; } -static int clk_core_prepare_enable(struct clk_core *core); -static void clk_core_disable_unprepare(struct clk_core *core); - static bool clk_core_is_prepared(struct clk_core *core) { bool ret = false; @@ -211,11 +208,7 @@ static bool clk_core_is_prepared(struct clk_core *core) return core->prepare_count; if (!clk_pm_runtime_get(core)) { - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_prepare_enable(core->parent); ret = core->ops->is_prepared(core->hw); - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_disable_unprepare(core->parent); clk_pm_runtime_put(core); } @@ -251,13 +244,7 @@ static bool clk_core_is_enabled(struct clk_core *core) } } - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_prepare_enable(core->parent); - ret = core->ops->is_enabled(core->hw); - - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_disable_unprepare(core->parent); done: if (core->rpm_enabled) pm_runtime_put(core->dev); @@ -825,9 +812,6 @@ int clk_rate_exclusive_get(struct clk *clk) } EXPORT_SYMBOL_GPL(clk_rate_exclusive_get); -static int clk_core_enable_lock(struct clk_core *core); -static void clk_core_disable_lock(struct clk_core *core); - static void clk_core_unprepare(struct clk_core *core) { lockdep_assert_held(&prepare_lock); @@ -851,18 +835,12 @@ static void clk_core_unprepare(struct clk_core *core) WARN(core->enable_count > 0, "Unpreparing enabled %s\n", core->name); - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_enable_lock(core->parent); - trace_clk_unprepare(core); if (core->ops->unprepare) core->ops->unprepare(core->hw); trace_clk_unprepare_complete(core); - - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_disable_lock(core->parent); clk_core_unprepare(core->parent); clk_pm_runtime_put(core); } @@ -912,9 +890,6 @@ static int clk_core_prepare(struct clk_core *core) if (ret) goto runtime_put; - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_enable_lock(core->parent); - trace_clk_prepare(core); if (core->ops->prepare) @@ -922,9 +897,6 @@ static int clk_core_prepare(struct clk_core *core) trace_clk_prepare_complete(core); - if (core->flags & CLK_OPS_PARENT_ENABLE) - clk_core_disable_lock(core->parent); - if (ret) goto unprepare; } From 3a1a274e933fca73fdc960cb1f60636cd285a265 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Fri, 26 Aug 2022 11:59:16 -0400 Subject: [PATCH 566/654] mlxbf_gige: compute MDIO period based on i1clk This patch adds logic to compute the MDIO period based on the i1clk, and thereafter write the MDIO period into the YU MDIO config register. The i1clk resource from the ACPI table is used to provide addressing to YU bootrecord PLL registers. The values in these registers are used to compute MDIO period. If the i1clk resource is not present in the ACPI table, then the current default hardcorded value of 430Mhz is used. The i1clk clock value of 430MHz is only accurate for boards with BF2 mid bin and main bin SoCs. The BF2 high bin SoCs have i1clk = 500MHz, but can support a slower MDIO period. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Reviewed-by: Asmaa Mnebhi Signed-off-by: David Thompson Link: https://lore.kernel.org/r/20220826155916.12491-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlxbf_gige/mlxbf_gige.h | 4 +- .../mellanox/mlxbf_gige/mlxbf_gige_mdio.c | 122 +++++++++++++++--- .../mellanox/mlxbf_gige/mlxbf_gige_regs.h | 2 + 3 files changed, 110 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h index 5fdf9b7179f55..5a1027b072155 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -75,6 +75,7 @@ struct mlxbf_gige { struct net_device *netdev; struct platform_device *pdev; void __iomem *mdio_io; + void __iomem *clk_io; struct mii_bus *mdiobus; spinlock_t lock; /* for packet processing indices */ u16 rx_q_entries; @@ -137,7 +138,8 @@ enum mlxbf_gige_res { MLXBF_GIGE_RES_MDIO9, MLXBF_GIGE_RES_GPIO0, MLXBF_GIGE_RES_LLU, - MLXBF_GIGE_RES_PLU + MLXBF_GIGE_RES_PLU, + MLXBF_GIGE_RES_CLK }; /* Version of register data returned by mlxbf_gige_get_regs() */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c index 2e6c1b7af0964..85155cd9405c5 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c @@ -22,10 +22,23 @@ #include #include "mlxbf_gige.h" +#include "mlxbf_gige_regs.h" #define MLXBF_GIGE_MDIO_GW_OFFSET 0x0 #define MLXBF_GIGE_MDIO_CFG_OFFSET 0x4 +#define MLXBF_GIGE_MDIO_FREQ_REFERENCE 156250000ULL +#define MLXBF_GIGE_MDIO_COREPLL_CONST 16384ULL +#define MLXBF_GIGE_MDC_CLK_NS 400 +#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG1 0x4 +#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG2 0x8 +#define MLXBF_GIGE_MDIO_CORE_F_SHIFT 0 +#define MLXBF_GIGE_MDIO_CORE_F_MASK GENMASK(25, 0) +#define MLXBF_GIGE_MDIO_CORE_R_SHIFT 26 +#define MLXBF_GIGE_MDIO_CORE_R_MASK GENMASK(31, 26) +#define MLXBF_GIGE_MDIO_CORE_OD_SHIFT 0 +#define MLXBF_GIGE_MDIO_CORE_OD_MASK GENMASK(3, 0) + /* Support clause 22 */ #define MLXBF_GIGE_MDIO_CL22_ST1 0x1 #define MLXBF_GIGE_MDIO_CL22_WRITE 0x1 @@ -50,27 +63,76 @@ #define MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK GENMASK(23, 16) #define MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK GENMASK(31, 24) +#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \ + FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13)) + +#define MLXBF_GIGE_BF2_COREPLL_ADDR 0x02800c30 +#define MLXBF_GIGE_BF2_COREPLL_SIZE 0x0000000c + +static struct resource corepll_params[] = { + [MLXBF_GIGE_VERSION_BF2] = { + .start = MLXBF_GIGE_BF2_COREPLL_ADDR, + .end = MLXBF_GIGE_BF2_COREPLL_ADDR + MLXBF_GIGE_BF2_COREPLL_SIZE - 1, + .name = "COREPLL_RES" + }, +}; + +/* Returns core clock i1clk in Hz */ +static u64 calculate_i1clk(struct mlxbf_gige *priv) +{ + u8 core_od, core_r; + u64 freq_output; + u32 reg1, reg2; + u32 core_f; + + reg1 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG1); + reg2 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG2); + + core_f = (reg1 & MLXBF_GIGE_MDIO_CORE_F_MASK) >> + MLXBF_GIGE_MDIO_CORE_F_SHIFT; + core_r = (reg1 & MLXBF_GIGE_MDIO_CORE_R_MASK) >> + MLXBF_GIGE_MDIO_CORE_R_SHIFT; + core_od = (reg2 & MLXBF_GIGE_MDIO_CORE_OD_MASK) >> + MLXBF_GIGE_MDIO_CORE_OD_SHIFT; + + /* Compute PLL output frequency as follow: + * + * CORE_F / 16384 + * freq_output = freq_reference * ---------------------------- + * (CORE_R + 1) * (CORE_OD + 1) + */ + freq_output = div_u64((MLXBF_GIGE_MDIO_FREQ_REFERENCE * core_f), + MLXBF_GIGE_MDIO_COREPLL_CONST); + freq_output = div_u64(freq_output, (core_r + 1) * (core_od + 1)); + + return freq_output; +} + /* Formula for encoding the MDIO period. The encoded value is * passed to the MDIO config register. * - * mdc_clk = 2*(val + 1)*i1clk + * mdc_clk = 2*(val + 1)*(core clock in sec) * - * 400 ns = 2*(val + 1)*(((1/430)*1000) ns) + * i1clk is in Hz: + * 400 ns = 2*(val + 1)*(1/i1clk) * - * val = (((400 * 430 / 1000) / 2) - 1) + * val = (((400/10^9) / (1/i1clk) / 2) - 1) + * val = (400/2 * i1clk)/10^9 - 1 */ -#define MLXBF_GIGE_I1CLK_MHZ 430 -#define MLXBF_GIGE_MDC_CLK_NS 400 +static u8 mdio_period_map(struct mlxbf_gige *priv) +{ + u8 mdio_period; + u64 i1clk; -#define MLXBF_GIGE_MDIO_PERIOD (((MLXBF_GIGE_MDC_CLK_NS * MLXBF_GIGE_I1CLK_MHZ / 1000) / 2) - 1) + i1clk = calculate_i1clk(priv); -#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, \ - MLXBF_GIGE_MDIO_PERIOD) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \ - FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13)) + mdio_period = div_u64((MLXBF_GIGE_MDC_CLK_NS >> 1) * i1clk, 1000000000) - 1; + + return mdio_period; +} static u32 mlxbf_gige_mdio_create_cmd(u16 data, int phy_add, int phy_reg, u32 opcode) @@ -124,9 +186,9 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add, int phy_reg, u16 val) { struct mlxbf_gige *priv = bus->priv; + u32 temp; u32 cmd; int ret; - u32 temp; if (phy_reg & MII_ADDR_C45) return -EOPNOTSUPP; @@ -144,18 +206,44 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add, return ret; } +static void mlxbf_gige_mdio_cfg(struct mlxbf_gige *priv) +{ + u8 mdio_period; + u32 val; + + mdio_period = mdio_period_map(priv); + + val = MLXBF_GIGE_MDIO_CFG_VAL; + val |= FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, mdio_period); + writel(val, priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET); +} + int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv) { struct device *dev = &pdev->dev; + struct resource *res; int ret; priv->mdio_io = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MDIO9); if (IS_ERR(priv->mdio_io)) return PTR_ERR(priv->mdio_io); - /* Configure mdio parameters */ - writel(MLXBF_GIGE_MDIO_CFG_VAL, - priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET); + /* clk resource shared with other drivers so cannot use + * devm_platform_ioremap_resource + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_CLK); + if (!res) { + /* For backward compatibility with older ACPI tables, also keep + * CLK resource internal to the driver. + */ + res = &corepll_params[MLXBF_GIGE_VERSION_BF2]; + } + + priv->clk_io = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(priv->clk_io)) + return PTR_ERR(priv->clk_io); + + mlxbf_gige_mdio_cfg(priv); priv->mdiobus = devm_mdiobus_alloc(dev); if (!priv->mdiobus) { diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h index 5fb33c9294bf9..7be3a793984d5 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -8,6 +8,8 @@ #ifndef __MLXBF_GIGE_REGS_H__ #define __MLXBF_GIGE_REGS_H__ +#define MLXBF_GIGE_VERSION 0x0000 +#define MLXBF_GIGE_VERSION_BF2 0x0 #define MLXBF_GIGE_STATUS 0x0010 #define MLXBF_GIGE_STATUS_READY BIT(0) #define MLXBF_GIGE_INT_STATUS 0x0028 From 8fc29ff3910f3af08a7c40a75d436b5720efe2bf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 27 Aug 2022 11:13:14 -0700 Subject: [PATCH 567/654] kcm: fix strp_init() order and cleanup strp_init() is called just a few lines above this csk->sk_user_data check, it also initializes strp->work etc., therefore, it is unnecessary to call strp_done() to cancel the freshly initialized work. And if sk_user_data is already used by KCM, psock->strp should not be touched, particularly strp->work state, so we need to move strp_init() after the csk->sk_user_data check. This also makes a lockdep warning reported by syzbot go away. Reported-and-tested-by: syzbot+9fc084a4348493ef65d2@syzkaller.appspotmail.com Reported-by: syzbot+e696806ef96cdd2d87cd@syzkaller.appspotmail.com Fixes: e5571240236c ("kcm: Check if sk_user_data already set in kcm_attach") Fixes: dff8baa26117 ("kcm: Call strp_stop before strp_done in kcm_attach") Cc: Tom Herbert Signed-off-by: Cong Wang Link: https://lore.kernel.org/r/20220827181314.193710-1-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/kcm/kcmsock.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 71899e5a5a111..1215c863e1c41 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1412,12 +1412,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; - err = strp_init(&psock->strp, csk, &cb); - if (err) { - kmem_cache_free(kcm_psockp, psock); - goto out; - } - write_lock_bh(&csk->sk_callback_lock); /* Check if sk_user_data is already by KCM or someone else. @@ -1425,13 +1419,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, */ if (csk->sk_user_data) { write_unlock_bh(&csk->sk_callback_lock); - strp_stop(&psock->strp); - strp_done(&psock->strp); kmem_cache_free(kcm_psockp, psock); err = -EALREADY; goto out; } + err = strp_init(&psock->strp, csk, &cb); + if (err) { + write_unlock_bh(&csk->sk_callback_lock); + kmem_cache_free(kcm_psockp, psock); + goto out; + } + psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; From 404a5ad72011f5bd2bb90f0a035be7635e2bd839 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Aug 2022 16:54:14 -0700 Subject: [PATCH 568/654] Documentation: networking: correct possessive "its" Change occurrences of "it's" that are possessive to "its" so that they don't read as "it is". Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: Eric Dumazet Cc: Paolo Abeni Cc: Jiri Pirko Link: https://lore.kernel.org/r/20220829235414.17110-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/netdevsim.rst | 2 +- Documentation/networking/driver.rst | 2 +- Documentation/networking/ipvlan.rst | 2 +- Documentation/networking/l2tp.rst | 2 +- Documentation/networking/switchdev.rst | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 8a292fb5aaea3..ec5e6d79b2e24 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -67,7 +67,7 @@ The ``netdevsim`` driver supports rate objects management, which includes: - setting tx_share and tx_max rate values for any rate object type; - setting parent node for any rate object type. -Rate nodes and it's parameters are exposed in ``netdevsim`` debugfs in RO mode. +Rate nodes and their parameters are exposed in ``netdevsim`` debugfs in RO mode. For example created rate node with name ``some_group``: .. code:: shell diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst index c8f59dbda46f7..64f7236ff10be 100644 --- a/Documentation/networking/driver.rst +++ b/Documentation/networking/driver.rst @@ -8,7 +8,7 @@ Transmit path guidelines: 1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under any normal circumstances. It is considered a hard error unless - there is no way your device can tell ahead of time when it's + there is no way your device can tell ahead of time when its transmit function will become busy. Instead it must maintain the queue properly. For example, diff --git a/Documentation/networking/ipvlan.rst b/Documentation/networking/ipvlan.rst index 694adcba36b06..0000c1d383bc0 100644 --- a/Documentation/networking/ipvlan.rst +++ b/Documentation/networking/ipvlan.rst @@ -11,7 +11,7 @@ Initial Release: ================ This is conceptually very similar to the macvlan driver with one major exception of using L3 for mux-ing /demux-ing among slaves. This property makes -the master device share the L2 with it's slave devices. I have developed this +the master device share the L2 with its slave devices. I have developed this driver in conjunction with network namespaces and not sure if there is use case outside of it. diff --git a/Documentation/networking/l2tp.rst b/Documentation/networking/l2tp.rst index 498b382d25a0b..7f383e99dbada 100644 --- a/Documentation/networking/l2tp.rst +++ b/Documentation/networking/l2tp.rst @@ -530,7 +530,7 @@ its tunnel close actions. For L2TPIP sockets, the socket's close handler initiates the same tunnel close actions. All sessions are first closed. Each session drops its tunnel ref. When the tunnel ref reaches zero, the tunnel puts its socket ref. When the socket is -eventually destroyed, it's sk_destruct finally frees the L2TP tunnel +eventually destroyed, its sk_destruct finally frees the L2TP tunnel context. Sessions diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst index f1f4e6a85a297..bbf272e9d607b 100644 --- a/Documentation/networking/switchdev.rst +++ b/Documentation/networking/switchdev.rst @@ -159,7 +159,7 @@ tools such as iproute2. The switchdev driver can know a particular port's position in the topology by monitoring NETDEV_CHANGEUPPER notifications. For example, a port moved into a -bond will see it's upper master change. If that bond is moved into a bridge, +bond will see its upper master change. If that bond is moved into a bridge, the bond's upper master will change. And so on. The driver will track such movements to know what position a port is in in the overall topology by registering for netdevice events and acting on NETDEV_CHANGEUPPER. From 5a3a59981027b53ec0f729ad76a43ce2b64ad968 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 29 Aug 2022 11:47:48 -0700 Subject: [PATCH 569/654] selftests: net: sort .gitignore file This is the result of `sort tools/testing/selftests/net/.gitignore`, but preserving the comment at the top. Suggested-by: Jakub Kicinski Signed-off-by: Axel Rasmussen Link: https://lore.kernel.org/r/20220829184748.1535580-1-axelrasmussen@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/.gitignore | 50 +++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 0e5751af6247f..de7d5cc15f857 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,42 +1,42 @@ # SPDX-License-Identifier: GPL-2.0-only +cmsg_sender +fin_ack_lat +gro +hwtstamp_config +ioam6_parser +ip_defrag ipsec +ipv6_flowlabel +ipv6_flowlabel_mgr msg_zerocopy -socket +nettest psock_fanout psock_snd psock_tpacket -stress_reuseport_listen +reuseaddr_conflict +reuseaddr_ports_exhausted reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack -reuseaddr_conflict -tcp_mmap -udpgso -udpgso_bench_rx -udpgso_bench_tx -tcp_inq -tls -txring_overwrite -ip_defrag -ipv6_flowlabel -ipv6_flowlabel_mgr -so_txtime -tcp_fastopen_backup_key -nettest -fin_ack_lat -reuseaddr_ports_exhausted -hwtstamp_config rxtimestamp -timestamping -txtimestamp +socket so_netns_cookie +so_txtime +stress_reuseport_listen +tap +tcp_fastopen_backup_key +tcp_inq +tcp_mmap test_unix_oob -gro -ioam6_parser +timestamping +tls toeplitz tun -cmsg_sender +txring_overwrite +txtimestamp +udpgso +udpgso_bench_rx +udpgso_bench_tx unix_connect -tap \ No newline at end of file From 39c84b8e929dbd4f63be7e04bf1a2bcd92b44177 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Tue, 30 Aug 2022 16:33:01 +0800 Subject: [PATCH 570/654] drm/amd/amdgpu: skip ucode loading if ucode_size == 0 Restrict the ucode loading check to avoid frontdoor loading error. Signed-off-by: Chengming Gui Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 1036446abc308..9f7a5e393f85e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2401,7 +2401,7 @@ static int psp_load_smu_fw(struct psp_context *psp) static bool fw_load_skip_check(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { - if (!ucode->fw) + if (!ucode->fw || !ucode->ucode_size) return true; if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && From 90fabae8a2c225c4e4936723c38857887edde5cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 31 Aug 2022 11:21:03 +0200 Subject: [PATCH 571/654] sch_cake: Return __NET_XMIT_STOLEN when consuming enqueued skb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the GSO splitting feature of sch_cake is enabled, GSO superpackets will be broken up and the resulting segments enqueued in place of the original skb. In this case, CAKE calls consume_skb() on the original skb, but still returns NET_XMIT_SUCCESS. This can confuse parent qdiscs into assuming the original skb still exists, when it really has been freed. Fix this by adding the __NET_XMIT_STOLEN flag to the return value in this case. Fixes: 0c850344d388 ("sch_cake: Conditionally split GSO segments") Signed-off-by: Toke Høiland-Jørgensen Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-18231 Link: https://lore.kernel.org/r/20220831092103.442868-1-toke@toke.dk Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a43a58a73d096..a04928082e4ab 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1713,6 +1713,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } idx--; flow = &b->flows[idx]; + ret = NET_XMIT_SUCCESS; /* ensure shaper state isn't stale */ if (!b->tin_backlog) { @@ -1771,6 +1772,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); + ret |= __NET_XMIT_STOLEN; } else { /* not splitting */ cobalt_set_enqueue_time(skb, now); @@ -1904,7 +1906,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } b->drop_overlimit += dropped; } - return NET_XMIT_SUCCESS; + return ret; } static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) From 2555283eb40df89945557273121e9393ef9b542b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 31 Aug 2022 19:06:00 +0200 Subject: [PATCH 572/654] mm/rmap: Fix anon_vma->degree ambiguity leading to double-reuse anon_vma->degree tracks the combined number of child anon_vmas and VMAs that use the anon_vma as their ->anon_vma. anon_vma_clone() then assumes that for any anon_vma attached to src->anon_vma_chain other than src->anon_vma, it is impossible for it to be a leaf node of the VMA tree, meaning that for such VMAs ->degree is elevated by 1 because of a child anon_vma, meaning that if ->degree equals 1 there are no VMAs that use the anon_vma as their ->anon_vma. This assumption is wrong because the ->degree optimization leads to leaf nodes being abandoned on anon_vma_clone() - an existing anon_vma is reused and no new parent-child relationship is created. So it is possible to reuse an anon_vma for one VMA while it is still tied to another VMA. This is an issue because is_mergeable_anon_vma() and its callers assume that if two VMAs have the same ->anon_vma, the list of anon_vmas attached to the VMAs is guaranteed to be the same. When this assumption is violated, vma_merge() can merge pages into a VMA that is not attached to the corresponding anon_vma, leading to dangling page->mapping pointers that will be dereferenced during rmap walks. Fix it by separately tracking the number of child anon_vmas and the number of VMAs using the anon_vma as their ->anon_vma. Fixes: 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy") Cc: stable@kernel.org Acked-by: Michal Hocko Acked-by: Vlastimil Babka Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 7 +++++-- mm/rmap.c | 29 ++++++++++++++++------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf80adca980b9..b89b4b86951f8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -41,12 +41,15 @@ struct anon_vma { atomic_t refcount; /* - * Count of child anon_vmas and VMAs which points to this anon_vma. + * Count of child anon_vmas. Equals to the count of all anon_vmas that + * have ->parent pointing to this one, including itself. * * This counter is used for making decision about reusing anon_vma * instead of forking new one. See comments in function anon_vma_clone. */ - unsigned degree; + unsigned long num_children; + /* Count of VMAs whose ->anon_vma pointer points to this object. */ + unsigned long num_active_vmas; struct anon_vma *parent; /* Parent of this anon_vma */ diff --git a/mm/rmap.c b/mm/rmap.c index edc06c52bc82e..93d5a6f793d20 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -93,7 +93,8 @@ static inline struct anon_vma *anon_vma_alloc(void) anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { atomic_set(&anon_vma->refcount, 1); - anon_vma->degree = 1; /* Reference for first vma */ + anon_vma->num_children = 0; + anon_vma->num_active_vmas = 0; anon_vma->parent = anon_vma; /* * Initialise the anon_vma root to point to itself. If called @@ -201,6 +202,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) anon_vma = anon_vma_alloc(); if (unlikely(!anon_vma)) goto out_enomem_free_avc; + anon_vma->num_children++; /* self-parent link for new root */ allocated = anon_vma; } @@ -210,8 +212,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; anon_vma_chain_link(vma, avc, anon_vma); - /* vma reference or self-parent link for new root */ - anon_vma->degree++; + anon_vma->num_active_vmas++; allocated = NULL; avc = NULL; } @@ -296,19 +297,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) anon_vma_chain_link(dst, avc, anon_vma); /* - * Reuse existing anon_vma if its degree lower than two, - * that means it has no vma and only one anon_vma child. + * Reuse existing anon_vma if it has no vma and only one + * anon_vma child. * - * Do not choose parent anon_vma, otherwise first child - * will always reuse it. Root anon_vma is never reused: + * Root anon_vma is never reused: * it has self-parent reference and at least one child. */ if (!dst->anon_vma && src->anon_vma && - anon_vma != src->anon_vma && anon_vma->degree < 2) + anon_vma->num_children < 2 && + anon_vma->num_active_vmas == 0) dst->anon_vma = anon_vma; } if (dst->anon_vma) - dst->anon_vma->degree++; + dst->anon_vma->num_active_vmas++; unlock_anon_vma_root(root); return 0; @@ -358,6 +359,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) anon_vma = anon_vma_alloc(); if (!anon_vma) goto out_error; + anon_vma->num_active_vmas++; avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto out_error_free_anon_vma; @@ -378,7 +380,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) vma->anon_vma = anon_vma; anon_vma_lock_write(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); - anon_vma->parent->degree++; + anon_vma->parent->num_children++; anon_vma_unlock_write(anon_vma); return 0; @@ -410,7 +412,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) * to free them outside the lock. */ if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) { - anon_vma->parent->degree--; + anon_vma->parent->num_children--; continue; } @@ -418,7 +420,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) anon_vma_chain_free(avc); } if (vma->anon_vma) { - vma->anon_vma->degree--; + vma->anon_vma->num_active_vmas--; /* * vma would still be needed after unlink, and anon_vma will be prepared @@ -436,7 +438,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; - VM_WARN_ON(anon_vma->degree); + VM_WARN_ON(anon_vma->num_children); + VM_WARN_ON(anon_vma->num_active_vmas); put_anon_vma(anon_vma); list_del(&avc->same_vma); From eb55dc09b5dd040232d5de32812cc83001a23da6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 29 Aug 2022 12:01:21 +0200 Subject: [PATCH 573/654] ip: fix triggering of 'icmp redirect' __mkroute_input() uses fib_validate_source() to trigger an icmp redirect. My understanding is that fib_validate_source() is used to know if the src address and the gateway address are on the same link. For that, fib_validate_source() returns 1 (same link) or 0 (not the same network). __mkroute_input() is the only user of these positive values, all other callers only look if the returned value is negative. Since the below patch, fib_validate_source() didn't return anymore 1 when both addresses are on the same network, because the route lookup returns RT_SCOPE_LINK instead of RT_SCOPE_HOST. But this is, in fact, right. Let's adapat the test to return 1 again when both addresses are on the same link. CC: stable@vger.kernel.org Fixes: 747c14307214 ("ip: fix dflt addr selection for connected nexthop") Reported-by: kernel test robot Reported-by: Heng Qi Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220829100121.3821-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f361d3d56be27..943edf4ad4db0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -389,7 +389,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dev_match = dev_match || (res.type == RTN_LOCAL && dev == net->loopback_dev); if (dev_match) { - ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK; return ret; } if (no_addr) @@ -401,7 +401,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) - ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK; } return ret; From 52267ce25f60f37ae40ccbca0b21328ebae5ae75 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 30 Aug 2022 18:34:48 +0200 Subject: [PATCH 574/654] net: dsa: hellcreek: Print warning only once In case the source port cannot be decoded, print the warning only once. This still brings attention to the user and does not spam the logs at the same time. Signed-off-by: Kurt Kanzenbach Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220830163448.8921-1-kurt@linutronix.de Signed-off-by: Jakub Kicinski --- net/dsa/tag_hellcreek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index eb204ad36eeec..846588c0070a5 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -45,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, skb->dev = dsa_master_find_slave(dev, 0, port); if (!skb->dev) { - netdev_warn(dev, "Failed to get source port: %d\n", port); + netdev_warn_once(dev, "Failed to get source port: %d\n", port); return NULL; } From 8c70521238b7863c2af607e20bcba20f974c969b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2022 11:56:55 -0700 Subject: [PATCH 575/654] tcp: annotate data-race around challenge_timestamp challenge_timestamp can be read an written by concurrent threads. This was expected, but we need to annotate the race to avoid potential issues. Following patch moves challenge_timestamp and challenge_count to per-netns storage to provide better isolation. Fixes: 354e4aa391ed ("tcp: RFC 5961 5.2 Blind Data Injection Attack Mitigation") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ab5f0ea166f1a..c184e15397a28 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3629,11 +3629,11 @@ static void tcp_send_challenge_ack(struct sock *sk) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; - if (now != challenge_timestamp) { + if (now != READ_ONCE(challenge_timestamp)) { u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1; - challenge_timestamp = now; + WRITE_ONCE(challenge_timestamp, now); WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); } count = READ_ONCE(challenge_count); From 79e3602caa6f9d59c4f66a268407080496dae408 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2022 11:56:56 -0700 Subject: [PATCH 576/654] tcp: make global challenge ack rate limitation per net-ns and default disabled Because per host rate limiting has been proven problematic (side channel attacks can be based on it), per host rate limiting of challenge acks ideally should be per netns and turned off by default. This is a long due followup of following commits: 083ae308280d ("tcp: enable per-socket rate limiting of all 'challenge acks'") f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock") 75ff39ccc1bd ("tcp: make challenge acks less predictable") Signed-off-by: Eric Dumazet Cc: Jason Baron Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 5 ++++- include/net/netns/ipv4.h | 2 ++ net/ipv4/tcp_input.c | 21 +++++++++++---------- net/ipv4/tcp_ipv4.c | 6 ++++-- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 56cd4ea059b2a..a759872a2883b 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1035,7 +1035,10 @@ tcp_limit_output_bytes - INTEGER tcp_challenge_ack_limit - INTEGER Limits number of Challenge ACK sent per second, as recommended in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) - Default: 1000 + Note that this per netns rate limit can allow some side channel + attacks and probably should not be enabled. + TCP stack implements per TCP socket limits anyway. + Default: INT_MAX (unlimited) UDP variables ============= diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c7320ef356d94..6320a76cefdcd 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -179,6 +179,8 @@ struct netns_ipv4 { unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; + u32 tcp_challenge_timestamp; + u32 tcp_challenge_count; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c184e15397a28..b85a9f755da41 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3614,12 +3614,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk) { - /* unprotected vars, we dont care of overwrites */ - static u32 challenge_timestamp; - static unsigned int challenge_count; struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); - u32 count, now; + u32 count, now, ack_limit; /* First check our per-socket dupack rate limit. */ if (__tcp_oow_rate_limited(net, @@ -3627,18 +3624,22 @@ static void tcp_send_challenge_ack(struct sock *sk) &tp->last_oow_ack_time)) return; + ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); + if (ack_limit == INT_MAX) + goto send_ack; + /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; - if (now != READ_ONCE(challenge_timestamp)) { - u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); + if (now != READ_ONCE(net->ipv4.tcp_challenge_timestamp)) { u32 half = (ack_limit + 1) >> 1; - WRITE_ONCE(challenge_timestamp, now); - WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); + WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now); + WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit)); } - count = READ_ONCE(challenge_count); + count = READ_ONCE(net->ipv4.tcp_challenge_count); if (count > 0) { - WRITE_ONCE(challenge_count, count - 1); + WRITE_ONCE(net->ipv4.tcp_challenge_count, count - 1); +send_ack: NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c83780dc9bf4..5b019ba2b9d21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3139,8 +3139,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tso_win_divisor = 3; /* Default TSQ limit of 16 TSO segments */ net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536; - /* rfc5961 challenge ack rate limiting */ - net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; + + /* rfc5961 challenge ack rate limiting, per net-ns, disabled by default. */ + net->ipv4.sysctl_tcp_challenge_ack_limit = INT_MAX; + net->ipv4.sysctl_tcp_min_tso_segs = 2; net->ipv4.sysctl_tcp_tso_rtt_log = 9; /* 2^9 = 512 usec */ net->ipv4.sysctl_tcp_min_rtt_wlen = 300; From 0b4f688d53fdc2a731b9d9cdf0c96255bc024ea6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2022 20:01:32 -0700 Subject: [PATCH 577/654] Revert "sch_cake: Return __NET_XMIT_STOLEN when consuming enqueued skb" This reverts commit 90fabae8a2c225c4e4936723c38857887edde5cc. Patch was applied hastily, revert and let the v2 be reviewed. Fixes: 90fabae8a2c2 ("sch_cake: Return __NET_XMIT_STOLEN when consuming enqueued skb") Link: https://lore.kernel.org/all/87wnao2ha3.fsf@toke.dk/ Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a04928082e4ab..a43a58a73d096 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1713,7 +1713,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } idx--; flow = &b->flows[idx]; - ret = NET_XMIT_SUCCESS; /* ensure shaper state isn't stale */ if (!b->tin_backlog) { @@ -1772,7 +1771,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); - ret |= __NET_XMIT_STOLEN; } else { /* not splitting */ cobalt_set_enqueue_time(skb, now); @@ -1906,7 +1904,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } b->drop_overlimit += dropped; } - return ret; + return NET_XMIT_SUCCESS; } static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) From a8424a9b4522a3ab9f32175ad6d848739079071f Mon Sep 17 00:00:00 2001 From: Yacan Liu Date: Tue, 30 Aug 2022 23:23:14 +0800 Subject: [PATCH 578/654] net/smc: Remove redundant refcount increase For passive connections, the refcount increment has been done in smc_clcsock_accept()-->smc_sock_alloc(). Fixes: 3b2dec2603d5 ("net/smc: restructure client and server code in af_smc") Signed-off-by: Yacan Liu Reviewed-by: Tony Lu Link: https://lore.kernel.org/r/20220830152314.838736-1-liuyacan@corp.netease.com Signed-off-by: Paolo Abeni --- net/smc/af_smc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 79c1318af1fef..0939cc3b915af 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1855,7 +1855,6 @@ static void smc_listen_out_connected(struct smc_sock *new_smc) { struct sock *newsmcsk = &new_smc->sk; - sk_refcnt_debug_inc(newsmcsk); if (newsmcsk->sk_state == SMC_INIT) newsmcsk->sk_state = SMC_ACTIVE; From 4ef3f2aff1267bfa6d5a90c42a30b927b8aa239b Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 8 Jul 2022 11:47:47 +0100 Subject: [PATCH 579/654] soundwire: qcom: fix device status array range This patch updates device status array range from 11 to 12 as we will be reading status from device number 0 to device number 11 inclusive. Without this patch we can potentially access status array out of range during auto-enumeration. Fixes: aa1262ca6695 ("soundwire: qcom: Check device status before reading devid") Reported-by: Dan Carpenter Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20220708104747.8722-1-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index a43961ad4614c..3a992a6478c30 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -169,7 +169,7 @@ struct qcom_swrm_ctrl { u8 wcmd_id; struct qcom_swrm_port_config pconfig[QCOM_SDW_MAX_PORTS]; struct sdw_stream_runtime *sruntime[SWRM_MAX_DAIS]; - enum sdw_slave_status status[SDW_MAX_DEVICES]; + enum sdw_slave_status status[SDW_MAX_DEVICES + 1]; int (*reg_read)(struct qcom_swrm_ctrl *ctrl, int reg, u32 *val); int (*reg_write)(struct qcom_swrm_ctrl *ctrl, int reg, int val); u32 slave_status; @@ -420,7 +420,7 @@ static int qcom_swrm_get_alert_slave_dev_num(struct qcom_swrm_ctrl *ctrl) ctrl->reg_read(ctrl, SWRM_MCP_SLV_STATUS, &val); - for (dev_num = 0; dev_num < SDW_MAX_DEVICES; dev_num++) { + for (dev_num = 0; dev_num <= SDW_MAX_DEVICES; dev_num++) { status = (val >> (dev_num * SWRM_MCP_SLV_STATUS_SZ)); if ((status & SWRM_MCP_SLV_STATUS_MASK) == SDW_SLAVE_ALERT) { @@ -440,7 +440,7 @@ static void qcom_swrm_get_device_status(struct qcom_swrm_ctrl *ctrl) ctrl->reg_read(ctrl, SWRM_MCP_SLV_STATUS, &val); ctrl->slave_status = val; - for (i = 0; i < SDW_MAX_DEVICES; i++) { + for (i = 0; i <= SDW_MAX_DEVICES; i++) { u32 s; s = (val >> (i * 2)); From 0495e337b7039191dfce6e03f5f830454b1fae6b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 12 Aug 2022 14:30:33 -0400 Subject: [PATCH 580/654] mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock A circular locking problem is reported by lockdep due to the following circular locking dependency. +--> cpu_hotplug_lock --> slab_mutex --> kn->active --+ | | +-----------------------------------------------------+ The forward cpu_hotplug_lock ==> slab_mutex ==> kn->active dependency happens in kmem_cache_destroy(): cpus_read_lock(); mutex_lock(&slab_mutex); ==> sysfs_slab_unlink() ==> kobject_del() ==> kernfs_remove() ==> __kernfs_remove() ==> kernfs_drain(): rwsem_acquire(&kn->dep_map, ...); The backward kn->active ==> cpu_hotplug_lock dependency happens in kernfs_fop_write_iter(): kernfs_get_active(); ==> slab_attr_store() ==> cpu_partial_store() ==> flush_all(): cpus_read_lock() One way to break this circular locking chain is to avoid holding cpu_hotplug_lock and slab_mutex while deleting the kobject in sysfs_slab_unlink() which should be equivalent to doing a write_lock and write_unlock pair of the kn->active virtual lock. Since the kobject structures are not protected by slab_mutex or the cpu_hotplug_lock, we can certainly release those locks before doing the delete operation. Move sysfs_slab_unlink() and sysfs_slab_release() to the newly created kmem_cache_release() and call it outside the slab_mutex & cpu_hotplug_lock critical sections. There will be a slight delay in the deletion of sysfs files if kmem_cache_release() is called indirectly from a work function. Fixes: 5a836bf6b09f ("mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context") Signed-off-by: Waiman Long Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Roman Gushchin Acked-by: David Rientjes Link: https://lore.kernel.org/all/YwOImVd+nRUsSAga@hyeyoo/ Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 17996649cfe3e..07b948288f84d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -392,6 +392,28 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, } EXPORT_SYMBOL(kmem_cache_create); +#ifdef SLAB_SUPPORTS_SYSFS +/* + * For a given kmem_cache, kmem_cache_destroy() should only be called + * once or there will be a use-after-free problem. The actual deletion + * and release of the kobject does not need slab_mutex or cpu_hotplug_lock + * protection. So they are now done without holding those locks. + * + * Note that there will be a slight delay in the deletion of sysfs files + * if kmem_cache_release() is called indrectly from a work function. + */ +static void kmem_cache_release(struct kmem_cache *s) +{ + sysfs_slab_unlink(s); + sysfs_slab_release(s); +} +#else +static void kmem_cache_release(struct kmem_cache *s) +{ + slab_kmem_cache_release(s); +} +#endif + static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) { LIST_HEAD(to_destroy); @@ -418,11 +440,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) list_for_each_entry_safe(s, s2, &to_destroy, list) { debugfs_slab_release(s); kfence_shutdown_cache(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_release(s); -#else - slab_kmem_cache_release(s); -#endif + kmem_cache_release(s); } } @@ -437,20 +455,11 @@ static int shutdown_cache(struct kmem_cache *s) list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_unlink(s); -#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { kfence_shutdown_cache(s); debugfs_slab_release(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_unlink(s); - sysfs_slab_release(s); -#else - slab_kmem_cache_release(s); -#endif } return 0; @@ -465,14 +474,16 @@ void slab_kmem_cache_release(struct kmem_cache *s) void kmem_cache_destroy(struct kmem_cache *s) { + int refcnt; + if (unlikely(!s) || !kasan_check_byte(s)) return; cpus_read_lock(); mutex_lock(&slab_mutex); - s->refcount--; - if (s->refcount) + refcnt = --s->refcount; + if (refcnt) goto out_unlock; WARN(shutdown_cache(s), @@ -481,6 +492,8 @@ void kmem_cache_destroy(struct kmem_cache *s) out_unlock: mutex_unlock(&slab_mutex); cpus_read_unlock(); + if (!refcnt && !(s->flags & SLAB_TYPESAFE_BY_RCU)) + kmem_cache_release(s); } EXPORT_SYMBOL(kmem_cache_destroy); From 4831be702b95047c89b3fa5728d07091e9e9f7c9 Mon Sep 17 00:00:00 2001 From: Levi Yun Date: Wed, 31 Aug 2022 19:39:13 +0900 Subject: [PATCH 581/654] arm64/kexec: Fix missing extra range for crashkres_low. Like crashk_res, Calling crash_exclude_mem_range function with crashk_low_res area would need extra crash_mem range too. Add one more extra cmem slot in case of crashk_low_res is used. Signed-off-by: Levi Yun Fixes: 944a45abfabc ("arm64: kdump: Reimplement crashkernel=X") Cc: # 5.19.x Acked-by: Baoquan He Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220831103913.12661-1-ppbuk5246@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 889951291cc0f..a11a6e14ba89f 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -47,7 +47,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) u64 i; phys_addr_t start, end; - nr_ranges = 1; /* for exclusion of crashkernel region */ + nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; From e62b9e6f25fc99be07c3b7b284262ee74ed645a9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 27 Aug 2022 09:09:04 +0200 Subject: [PATCH 582/654] arm64: head: Ignore bogus KASLR displacement on non-relocatable kernels Even non-KASLR kernels can be built as relocatable, to work around broken bootloaders that violate the rules regarding physical placement of the kernel image - in this case, the physical offset modulo 2 MiB is used as the KASLR offset, and all absolute symbol references are fixed up in the usual way. This workaround is enabled by default. CONFIG_RELOCATABLE can also be disabled entirely, in which case the relocation code and the code that captures the offset are omitted from the build. However, since commit aacd149b6238 ("arm64: head: avoid relocating the kernel twice for KASLR"), this code got out of sync, and we still add the offset to the kernel virtual address before populating the page tables even though we never capture it. This means we add a bogus value instead, breaking the boot entirely. Fixes: aacd149b6238 ("arm64: head: avoid relocating the kernel twice for KASLR") Signed-off-by: Ard Biesheuvel Tested-by: Mikulas Patocka Link: https://lore.kernel.org/r/20220827070904.2216989-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index cefe6a73ee546..814b6587ccb78 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -371,7 +371,9 @@ SYM_FUNC_END(create_idmap) SYM_FUNC_START_LOCAL(create_kernel_mapping) adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR // compile time __va(_text) +#ifdef CONFIG_RELOCATABLE add x5, x5, x23 // add KASLR displacement +#endif adrp x6, _end // runtime __pa(_end) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text From 6bb0d64c100091e131cd16710b62fda3319cd0af Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Thu, 25 Aug 2022 09:18:44 +0800 Subject: [PATCH 583/654] perf/arm_pmu_platform: fix tests for platform_get_irq() failure The platform_get_irq() returns negative error codes. It can't actually return zero. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20220825011844.8536-1-yuzhe@nfschina.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 513de1f54e2d7..933b96e243b84 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -117,7 +117,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) if (num_irqs == 1) { int irq = platform_get_irq(pdev, 0); - if (irq && irq_is_percpu_devid(irq)) + if ((irq > 0) && irq_is_percpu_devid(irq)) return pmu_parse_percpu_irq(pmu, irq); } From 5fbc49cef91916140a305f22f7430e9a7ea0c6b4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Aug 2022 18:48:00 +0200 Subject: [PATCH 584/654] arm64: mm: Reserve enough pages for the initial ID map The logic that conditionally allocates one additional page at each swapper page table level if KASLR is enabled is also applied to the initial ID map, now that we have started using the same set of macros to allocate the space for it. However, the placement of the kernel in physical memory might result in additional pages being needed at any level, even if KASLR is disabled in the build. So account for this in the computation. Fixes: c3cee924bd85 ("arm64: head: cover entire kernel image in initial ID map") Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220826164800.2059148-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/kernel-pgtable.h | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 02e59fa8f2930..32d14f481f0c3 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -64,28 +64,28 @@ #define EARLY_KASLR (0) #endif -#define EARLY_ENTRIES(vstart, vend, shift) \ - ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + EARLY_KASLR) +#define EARLY_ENTRIES(vstart, vend, shift, add) \ + ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + add) -#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT)) +#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add)) #if SWAPPER_PGTABLE_LEVELS > 3 -#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT)) +#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add)) #else -#define EARLY_PUDS(vstart, vend) (0) +#define EARLY_PUDS(vstart, vend, add) (0) #endif #if SWAPPER_PGTABLE_LEVELS > 2 -#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT)) +#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add)) #else -#define EARLY_PMDS(vstart, vend) (0) +#define EARLY_PMDS(vstart, vend, add) (0) #endif -#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \ - + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ - + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ - + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) +#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \ + + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \ + + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \ + + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */ +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR)) /* the initial ID map may need two extra pages if it needs to be extended */ #if VA_BITS < 48 @@ -93,7 +93,7 @@ #else #define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) #endif -#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE) +#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1) /* Initial memory map size */ #if ARM64_KERNEL_USES_PMD_MAPS From c3b82d26bc85f5fc2fef5ec8cce17c89633a55a8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 29 Aug 2022 18:35:44 +0200 Subject: [PATCH 585/654] platform/x86: acer-wmi: Acer Aspire One AOD270/Packard Bell Dot keymap fixes 2 keymap fixes for the Acer Aspire One AOD270 and the same hardware rebranded as Packard Bell Dot SC: 1. The F2 key is marked with a big '?' symbol on the Packard Bell Dot SC, this sends WMID_HOTKEY_EVENTs with a scancode of 0x27 add a mapping for this. 2. Scancode 0x61 is KEY_SWITCHVIDEOMODE. Usually this is a duplicate input event with the "Video Bus" input device events. But on these devices the "Video Bus" does not send events for this key. Map 0x61 to KEY_UNKNOWN instead of using KE_IGNORE so that udev/hwdb can override it on these devs. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220829163544.5288-1-hdegoede@redhat.com --- drivers/platform/x86/acer-wmi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index e0230ea0cb7ee..f1259d81d86da 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -99,6 +99,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = { {KE_KEY, 0x22, {KEY_PROG2} }, /* Arcade */ {KE_KEY, 0x23, {KEY_PROG3} }, /* P_Key */ {KE_KEY, 0x24, {KEY_PROG4} }, /* Social networking_Key */ + {KE_KEY, 0x27, {KEY_HELP} }, {KE_KEY, 0x29, {KEY_PROG3} }, /* P_Key for TM8372 */ {KE_IGNORE, 0x41, {KEY_MUTE} }, {KE_IGNORE, 0x42, {KEY_PREVIOUSSONG} }, @@ -112,7 +113,13 @@ static const struct key_entry acer_wmi_keymap[] __initconst = { {KE_IGNORE, 0x48, {KEY_VOLUMEUP} }, {KE_IGNORE, 0x49, {KEY_VOLUMEDOWN} }, {KE_IGNORE, 0x4a, {KEY_VOLUMEDOWN} }, - {KE_IGNORE, 0x61, {KEY_SWITCHVIDEOMODE} }, + /* + * 0x61 is KEY_SWITCHVIDEOMODE. Usually this is a duplicate input event + * with the "Video Bus" input device events. But sometimes it is not + * a dup. Map it to KEY_UNKNOWN instead of using KE_IGNORE so that + * udev/hwdb can override it on systems where it is not a dup. + */ + {KE_KEY, 0x61, {KEY_UNKNOWN} }, {KE_IGNORE, 0x62, {KEY_BRIGHTNESSUP} }, {KE_IGNORE, 0x63, {KEY_BRIGHTNESSDOWN} }, {KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} }, /* Display Switch */ From 17c2bd6bea4c32fe691c1f9ebcc20fd48d77454a Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Tue, 23 Aug 2022 23:19:34 +0300 Subject: [PATCH 586/654] platform/mellanox: mlxreg-lc: Fix coverity warning Fix smatch warning: drivers/platform/mellanox/mlxreg-lc.c:866 mlxreg_lc_probe() warn: passing zero to 'PTR_ERR' by removing 'err = PTR_ERR(regmap)'. Fixes: b4b830a34d80 ("platform/mellanox: mlxreg-lc: Fix error flow and extend verbosity") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20220823201937.46855-2-vadimp@nvidia.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxreg-lc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index 55834ccb4ac7c..9a1bfcd24317d 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -863,7 +863,6 @@ static int mlxreg_lc_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "Failed to sync regmap for client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); - err = PTR_ERR(regmap); goto regcache_sync_fail; } From 1e092b7faa6b507125b69c92a51bb22c2d549d37 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Tue, 23 Aug 2022 23:19:35 +0300 Subject: [PATCH 587/654] platform/mellanox: mlxreg-lc: Fix locking issue Fix locking issues: - mlxreg_lc_state_update() takes a lock when set or clear "MLXREG_LC_POWERED". - All the devices can be deleted before MLXREG_LC_POWERED flag is cleared. To fix it: - Add lock() / unlock() at the beginning / end of mlxreg_lc_event_handler() and remove locking from mlxreg_lc_power_on_off() and mlxreg_lc_enable_disable() - Add locked version of mlxreg_lc_state_update() - mlxreg_lc_state_update_locked() for using outside mlxreg_lc_event_handler(). (2) Remove redundant NULL check for of if 'data->notifier'. Fixes: 62f9529b8d5c87b ("platform/mellanox: mlxreg-lc: Add initial support for Nvidia line card devices") Reported-by: Dan Carpenter Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20220823201937.46855-3-vadimp@nvidia.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxreg-lc.c | 37 ++++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index 9a1bfcd24317d..e578c7bc060bb 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -460,8 +460,6 @@ static int mlxreg_lc_power_on_off(struct mlxreg_lc *mlxreg_lc, u8 action) u32 regval; int err; - mutex_lock(&mlxreg_lc->lock); - err = regmap_read(mlxreg_lc->par_regmap, mlxreg_lc->data->reg_pwr, ®val); if (err) goto regmap_read_fail; @@ -474,7 +472,6 @@ static int mlxreg_lc_power_on_off(struct mlxreg_lc *mlxreg_lc, u8 action) err = regmap_write(mlxreg_lc->par_regmap, mlxreg_lc->data->reg_pwr, regval); regmap_read_fail: - mutex_unlock(&mlxreg_lc->lock); return err; } @@ -491,8 +488,6 @@ static int mlxreg_lc_enable_disable(struct mlxreg_lc *mlxreg_lc, bool action) * line card which is already has been enabled. Disabling does not affect the disabled line * card. */ - mutex_lock(&mlxreg_lc->lock); - err = regmap_read(mlxreg_lc->par_regmap, mlxreg_lc->data->reg_ena, ®val); if (err) goto regmap_read_fail; @@ -505,7 +500,6 @@ static int mlxreg_lc_enable_disable(struct mlxreg_lc *mlxreg_lc, bool action) err = regmap_write(mlxreg_lc->par_regmap, mlxreg_lc->data->reg_ena, regval); regmap_read_fail: - mutex_unlock(&mlxreg_lc->lock); return err; } @@ -537,6 +531,15 @@ mlxreg_lc_sn4800_c16_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, static void mlxreg_lc_state_update(struct mlxreg_lc *mlxreg_lc, enum mlxreg_lc_state state, u8 action) +{ + if (action) + mlxreg_lc->state |= state; + else + mlxreg_lc->state &= ~state; +} + +static void +mlxreg_lc_state_update_locked(struct mlxreg_lc *mlxreg_lc, enum mlxreg_lc_state state, u8 action) { mutex_lock(&mlxreg_lc->lock); @@ -560,8 +563,11 @@ static int mlxreg_lc_event_handler(void *handle, enum mlxreg_hotplug_kind kind, dev_info(mlxreg_lc->dev, "linecard#%d state %d event kind %d action %d\n", mlxreg_lc->data->slot, mlxreg_lc->state, kind, action); - if (!(mlxreg_lc->state & MLXREG_LC_INITIALIZED)) + mutex_lock(&mlxreg_lc->lock); + if (!(mlxreg_lc->state & MLXREG_LC_INITIALIZED)) { + mutex_unlock(&mlxreg_lc->lock); return 0; + } switch (kind) { case MLXREG_HOTPLUG_LC_SYNCED: @@ -574,7 +580,7 @@ static int mlxreg_lc_event_handler(void *handle, enum mlxreg_hotplug_kind kind, if (!(mlxreg_lc->state & MLXREG_LC_POWERED) && action) { err = mlxreg_lc_power_on_off(mlxreg_lc, 1); if (err) - return err; + goto mlxreg_lc_power_on_off_fail; } /* In case line card is configured - enable it. */ if (mlxreg_lc->state & MLXREG_LC_CONFIGURED && action) @@ -588,12 +594,13 @@ static int mlxreg_lc_event_handler(void *handle, enum mlxreg_hotplug_kind kind, /* In case line card is configured - enable it. */ if (mlxreg_lc->state & MLXREG_LC_CONFIGURED) err = mlxreg_lc_enable_disable(mlxreg_lc, 1); + mutex_unlock(&mlxreg_lc->lock); return err; } err = mlxreg_lc_create_static_devices(mlxreg_lc, mlxreg_lc->main_devs, mlxreg_lc->main_devs_num); if (err) - return err; + goto mlxreg_lc_create_static_devices_fail; /* In case line card is already in ready state - enable it. */ if (mlxreg_lc->state & MLXREG_LC_CONFIGURED) @@ -620,6 +627,10 @@ static int mlxreg_lc_event_handler(void *handle, enum mlxreg_hotplug_kind kind, break; } +mlxreg_lc_power_on_off_fail: +mlxreg_lc_create_static_devices_fail: + mutex_unlock(&mlxreg_lc->lock); + return err; } @@ -665,7 +676,7 @@ static int mlxreg_lc_completion_notify(void *handle, struct i2c_adapter *parent, if (err) goto mlxreg_lc_create_static_devices_failed; - mlxreg_lc_state_update(mlxreg_lc, MLXREG_LC_POWERED, 1); + mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_POWERED, 1); } /* Verify if line card is synchronized. */ @@ -676,7 +687,7 @@ static int mlxreg_lc_completion_notify(void *handle, struct i2c_adapter *parent, /* Power on line card if necessary. */ if (regval & mlxreg_lc->data->mask) { mlxreg_lc->state |= MLXREG_LC_SYNCED; - mlxreg_lc_state_update(mlxreg_lc, MLXREG_LC_SYNCED, 1); + mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_SYNCED, 1); if (mlxreg_lc->state & ~MLXREG_LC_POWERED) { err = mlxreg_lc_power_on_off(mlxreg_lc, 1); if (err) @@ -684,7 +695,7 @@ static int mlxreg_lc_completion_notify(void *handle, struct i2c_adapter *parent, } } - mlxreg_lc_state_update(mlxreg_lc, MLXREG_LC_INITIALIZED, 1); + mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_INITIALIZED, 1); return 0; @@ -904,6 +915,8 @@ static int mlxreg_lc_remove(struct platform_device *pdev) struct mlxreg_core_data *data = dev_get_platdata(&pdev->dev); struct mlxreg_lc *mlxreg_lc = platform_get_drvdata(pdev); + mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_INITIALIZED, 0); + /* * Probing and removing are invoked by hotplug events raised upon line card insertion and * removing. If probing procedure fails all data is cleared. However, hotplug event still From 2f92fdd043d548a14287889742e147f8ed4ee03d Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Tue, 23 Aug 2022 23:19:36 +0300 Subject: [PATCH 588/654] platform/mellanox: Remove unnecessary code Remove redundant 'NULL' check for of if 'data->notifier'. Replace 'return err' by 'return 0' in mlxreg_lc_probe(). Fixes: 62f9529b8d5c87b ("platform/mellanox: mlxreg-lc: Add initial support for Nvidia line card devices") Reported-by: Dan Carpenter Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20220823201937.46855-4-vadimp@nvidia.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxreg-lc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index e578c7bc060bb..1e0c3ddc46cdc 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -825,10 +825,9 @@ static int mlxreg_lc_probe(struct platform_device *pdev) mutex_init(&mlxreg_lc->lock); /* Set event notification callback. */ - if (data->notifier) { - data->notifier->user_handler = mlxreg_lc_event_handler; - data->notifier->handle = mlxreg_lc; - } + data->notifier->user_handler = mlxreg_lc_event_handler; + data->notifier->handle = mlxreg_lc; + data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr); if (!data->hpdev.adapter) { dev_err(&pdev->dev, "Failed to get adapter for bus %d\n", @@ -888,7 +887,7 @@ static int mlxreg_lc_probe(struct platform_device *pdev) if (err) goto mlxreg_lc_config_init_fail; - return err; + return 0; mlxreg_lc_config_init_fail: regcache_sync_fail: From 791ae8e8960efbf2e331eae7110db65b4f9f9083 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Tue, 23 Aug 2022 23:19:37 +0300 Subject: [PATCH 589/654] platform/mellanox: Remove redundant 'NULL' check Remove 'NULL' check for 'data->hpdev.client' in error flow of mlxreg_lc_probe(). It cannot be 'NULL' at this point. Fixes: b4b830a34d80 ("platform/mellanox: mlxreg-lc: Fix error flow and extend verbosity") Reported-by: Dan Carpenter Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20220823201937.46855-5-vadimp@nvidia.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxreg-lc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index 1e0c3ddc46cdc..1e071df4c9f5b 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -893,10 +893,8 @@ static int mlxreg_lc_probe(struct platform_device *pdev) regcache_sync_fail: regmap_write_fail: devm_regmap_init_i2c_fail: - if (data->hpdev.client) { - i2c_unregister_device(data->hpdev.client); - data->hpdev.client = NULL; - } + i2c_unregister_device(data->hpdev.client); + data->hpdev.client = NULL; i2c_new_device_fail: i2c_put_adapter(data->hpdev.adapter); data->hpdev.adapter = NULL; From 25e9fbf0fd38868a429feabc38abebfc6dbf6542 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Wed, 17 Aug 2022 11:40:26 -0700 Subject: [PATCH 590/654] driver core: Don't probe devices after bus_type.match() probe deferral Both __device_attach_driver() and __driver_attach() check the return code of the bus_type.match() function to see if the device needs to be added to the deferred probe list. After adding the device to the list, the logic attempts to bind the device to the driver anyway, as if the device had matched with the driver, which is not correct. If __device_attach_driver() detects that the device in question is not ready to match with a driver on the bus, then it doesn't make sense for the device to attempt to bind with the current driver or continue attempting to match with any of the other drivers on the bus. So, update the logic in __device_attach_driver() to reflect this. If __driver_attach() detects that a driver tried to match with a device that is not ready to match yet, then the driver should not attempt to bind with the device. However, the driver can still attempt to match and bind with other devices on the bus, as drivers can be bound to multiple devices. So, update the logic in __driver_attach() to reflect this. Fixes: 656b8035b0ee ("ARM: 8524/1: driver cohandle -EPROBE_DEFER from bus_type.match()") Cc: stable@vger.kernel.org Cc: Saravana Kannan Reported-by: Guenter Roeck Tested-by: Guenter Roeck Tested-by: Linus Walleij Reviewed-by: Saravana Kannan Signed-off-by: Isaac J. Manjarres Link: https://lore.kernel.org/r/20220817184026.3468620-1-isaacmanjarres@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a8916d1bfdcba..ec69b43f926ae 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -911,6 +911,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data) dev_dbg(dev, "Device match requests probe deferral\n"); dev->can_match = true; driver_deferred_probe_add(dev); + /* + * Device can't match with a driver right now, so don't attempt + * to match or bind with other drivers on the bus. + */ + return ret; } else if (ret < 0) { dev_dbg(dev, "Bus failed to match device: %d\n", ret); return ret; @@ -1150,6 +1155,11 @@ static int __driver_attach(struct device *dev, void *data) dev_dbg(dev, "Device match requests probe deferral\n"); dev->can_match = true; driver_deferred_probe_add(dev); + /* + * Driver could not match with device, but may match with + * another device on the bus. + */ + return 0; } else if (ret < 0) { dev_dbg(dev, "Bus failed to match device: %d\n", ret); return ret; From c61feaee68b9735be06f162bc046c7f1959efb0c Mon Sep 17 00:00:00 2001 From: Hu Xiaoying Date: Thu, 1 Sep 2022 12:57:37 +0800 Subject: [PATCH 591/654] usb: storage: Add ASUS <0x0b05:0x1932> to IGNORE_UAS USB external storage device(0x0b05:1932), use gnome-disk-utility tools to test usb write < 30MB/s. if does not to load module of uas for this device, can increase the write speed from 20MB/s to >40MB/s. Suggested-by: Matthias Kaehlcke Acked-by: Alan Stern Signed-off-by: Hu Xiaoying Link: https://lore.kernel.org/r/20220901045737.3438046-1-huxiaoying@kylinos.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 4051c8cd0cd8a..23ab3b048d9be 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -62,6 +62,13 @@ UNUSUAL_DEV(0x0984, 0x0301, 0x0128, 0x0128, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_UAS), +/* Reported-by: Tom Hu */ +UNUSUAL_DEV(0x0b05, 0x1932, 0x0000, 0x9999, + "ASUS", + "External HDD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: David Webb */ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, "Seagate", From 9baa1415d9abdd1e08362ea2dcfadfacee8690b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 29 Aug 2022 10:05:29 +0200 Subject: [PATCH 592/654] misc: fastrpc: fix memory corruption on probe Add the missing sanity check on the probed-session count to avoid corrupting memory beyond the fixed-size slab-allocated session array when there are more than FASTRPC_MAX_SESSIONS sessions defined in the devicetree. Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model") Cc: stable@vger.kernel.org # 5.1 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220829080531.29681-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 93ebd174d8487..88091778c1b85 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1943,6 +1943,11 @@ static int fastrpc_cb_probe(struct platform_device *pdev) of_property_read_u32(dev->of_node, "qcom,nsessions", &sessions); spin_lock_irqsave(&cctx->lock, flags); + if (cctx->sesscount >= FASTRPC_MAX_SESSIONS) { + dev_err(&pdev->dev, "too many sessions\n"); + spin_unlock_irqrestore(&cctx->lock, flags); + return -ENOSPC; + } sess = &cctx->session[cctx->sesscount]; sess->used = false; sess->valid = true; From d245f43aab2b61195d8ebb64cef7b5a08c590ab4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 29 Aug 2022 10:05:30 +0200 Subject: [PATCH 593/654] misc: fastrpc: fix memory corruption on open The probe session-duplication overflow check incremented the session count also when there were no more available sessions so that memory beyond the fixed-size slab-allocated session array could be corrupted in fastrpc_session_alloc() on open(). Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model") Cc: stable@vger.kernel.org # 5.1 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220829080531.29681-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 88091778c1b85..6e312ac856686 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1948,7 +1948,7 @@ static int fastrpc_cb_probe(struct platform_device *pdev) spin_unlock_irqrestore(&cctx->lock, flags); return -ENOSPC; } - sess = &cctx->session[cctx->sesscount]; + sess = &cctx->session[cctx->sesscount++]; sess->used = false; sess->valid = true; sess->dev = dev; @@ -1961,13 +1961,12 @@ static int fastrpc_cb_probe(struct platform_device *pdev) struct fastrpc_session_ctx *dup_sess; for (i = 1; i < sessions; i++) { - if (cctx->sesscount++ >= FASTRPC_MAX_SESSIONS) + if (cctx->sesscount >= FASTRPC_MAX_SESSIONS) break; - dup_sess = &cctx->session[cctx->sesscount]; + dup_sess = &cctx->session[cctx->sesscount++]; memcpy(dup_sess, sess, sizeof(*dup_sess)); } } - cctx->sesscount++; spin_unlock_irqrestore(&cctx->lock, flags); rc = dma_set_mask(dev, DMA_BIT_MASK(32)); if (rc) { From 689a2d9f9332a27b1379ef230396e944f949a72b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 29 Aug 2022 10:05:31 +0200 Subject: [PATCH 594/654] misc: fastrpc: increase maximum session count The SC8280XP platform uses 14 sessions for the compute DSP so increment the maximum session count. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220829080531.29681-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 6e312ac856686..5d9e3483b89d7 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -25,7 +25,7 @@ #define SDSP_DOMAIN_ID (2) #define CDSP_DOMAIN_ID (3) #define FASTRPC_DEV_MAX 4 /* adsp, mdsp, slpi, cdsp*/ -#define FASTRPC_MAX_SESSIONS 13 /*12 compute, 1 cpz*/ +#define FASTRPC_MAX_SESSIONS 14 #define FASTRPC_MAX_VMIDS 16 #define FASTRPC_ALIGN 128 #define FASTRPC_MAX_FDLIST 16 From 1da52815d5f1b654c89044db0cdc6adce43da1f1 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 29 Aug 2022 20:12:48 +0000 Subject: [PATCH 595/654] binder: fix alloc->vma_vm_mm null-ptr dereference Syzbot reported a couple issues introduced by commit 44e602b4e52f ("binder_alloc: add missing mmap_lock calls when using the VMA"), in which we attempt to acquire the mmap_lock when alloc->vma_vm_mm has not been initialized yet. This can happen if a binder_proc receives a transaction without having previously called mmap() to setup the binder_proc->alloc space in [1]. Also, a similar issue occurs via binder_alloc_print_pages() when we try to dump the debugfs binder stats file in [2]. Sample of syzbot's crash report: ================================================================== KASAN: null-ptr-deref in range [0x0000000000000128-0x000000000000012f] CPU: 0 PID: 3755 Comm: syz-executor229 Not tainted 6.0.0-rc1-next-20220819-syzkaller #0 syz-executor229[3755] cmdline: ./syz-executor2294415195 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 RIP: 0010:__lock_acquire+0xd83/0x56d0 kernel/locking/lockdep.c:4923 [...] Call Trace: lock_acquire kernel/locking/lockdep.c:5666 [inline] lock_acquire+0x1ab/0x570 kernel/locking/lockdep.c:5631 down_read+0x98/0x450 kernel/locking/rwsem.c:1499 mmap_read_lock include/linux/mmap_lock.h:117 [inline] binder_alloc_new_buf_locked drivers/android/binder_alloc.c:405 [inline] binder_alloc_new_buf+0xa5/0x19e0 drivers/android/binder_alloc.c:593 binder_transaction+0x242e/0x9a80 drivers/android/binder.c:3199 binder_thread_write+0x664/0x3220 drivers/android/binder.c:3986 binder_ioctl_write_read drivers/android/binder.c:5036 [inline] binder_ioctl+0x3470/0x6d00 drivers/android/binder.c:5323 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] ================================================================== Fix these issues by setting up alloc->vma_vm_mm pointer during open() and caching directly from current->mm. This guarantees we have a valid reference to take the mmap_lock during scenarios described above. [1] https://syzkaller.appspot.com/bug?extid=f7dc54e5be28950ac459 [2] https://syzkaller.appspot.com/bug?extid=a75ebe0452711c9e56d9 Fixes: 44e602b4e52f ("binder_alloc: add missing mmap_lock calls when using the VMA") Cc: # v5.15+ Cc: Liam R. Howlett Reported-by: syzbot+f7dc54e5be28950ac459@syzkaller.appspotmail.com Reported-by: syzbot+a75ebe0452711c9e56d9@syzkaller.appspotmail.com Reviewed-by: Liam R. Howlett Acked-by: Todd Kjos Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20220829201254.1814484-2-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 1014beb128025..a61df6e1103a1 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -322,7 +322,6 @@ static inline void binder_alloc_set_vma(struct binder_alloc *alloc, */ if (vma) { vm_start = vma->vm_start; - alloc->vma_vm_mm = vma->vm_mm; mmap_assert_write_locked(alloc->vma_vm_mm); } else { mmap_assert_locked(alloc->vma_vm_mm); @@ -792,7 +791,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, binder_insert_free_buffer(alloc, buffer); alloc->free_async_space = alloc->buffer_size / 2; binder_alloc_set_vma(alloc, vma); - mmgrab(alloc->vma_vm_mm); return 0; @@ -1080,6 +1078,8 @@ static struct shrinker binder_shrinker = { void binder_alloc_init(struct binder_alloc *alloc) { alloc->pid = current->group_leader->pid; + alloc->vma_vm_mm = current->mm; + mmgrab(alloc->vma_vm_mm); mutex_init(&alloc->mutex); INIT_LIST_HEAD(&alloc->buffers); } From 9b03e79300100bcd36e77c8ce94ee7f47cd2f528 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 5 Aug 2022 16:07:36 -0700 Subject: [PATCH 596/654] arch_topology: Silence early cacheinfo errors when non-existent Architectures which do not have cacheinfo such as ARM 32-bit would spit out the following during boot: Early cacheinfo failed, ret = -2 Treat -ENOENT specifically to silence this error since it means that the platform does not support reporting its cache information. Fixes: 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the CPU hotplug path") Tested-by: Geert Uytterhoeven Tested-by: Michael Walle Reviewed-by: Sudeep Holla Reviewed-by: Conor Dooley Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20220805230736.1562801-1-f.fainelli@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 0424b59b695ef..eaa1b8d2d39d3 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -735,7 +735,7 @@ void update_siblings_masks(unsigned int cpuid) int cpu, ret; ret = detect_cache_attributes(cpuid); - if (ret) + if (ret && ret != -ENOENT) pr_info("Early cacheinfo failed, ret = %d\n", ret); /* update core and thread sibling masks */ From 75847100c351c7a49dddd60d1d023bd3e6640682 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:00 +0100 Subject: [PATCH 597/654] selftests/net: temporarily disable io_uring zc test We're going to change API, to avoid build problems with a couple of following commits, disable io_uring testing. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/12b7507223df04fbd12aa05fc0cb544b51d7ed79.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- tools/testing/selftests/net/io_uring_zerocopy_tx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 9d64c560a2d61..7446ef364e9f0 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -36,6 +36,8 @@ #include #include +#if 0 + #define NOTIF_TAG 0xfffffffULL #define NONZC_TAG 0 #define ZC_TAG 1 @@ -603,3 +605,10 @@ int main(int argc, char **argv) error(1, 0, "unknown cfg_test %s", cfg_test); return 0; } + +#else +int main(int argc, char **argv) +{ + return 0; +} +#endif From 23c12d5fc02fb0712c64f3e87a27fcfa78e8af9c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:01 +0100 Subject: [PATCH 598/654] Revert "io_uring: add zc notification flush requests" This reverts commit 492dddb4f6e3a5839c27d41ff1fecdbe6c3ab851. Soon we won't have the very notion of notification flushing, so remove notification flushing requests. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8850334ca56e65b413cb34fd158db81d7b2865a3.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 - io_uring/rsrc.c | 38 ----------------------------------- 2 files changed, 39 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9e0b5c8d92cea..18ae5caf17731 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -301,7 +301,6 @@ enum io_uring_op { */ enum { IORING_RSRC_UPDATE_FILES, - IORING_RSRC_UPDATE_NOTIF, }; /* diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 71359a4d0bd4e..048f7483fe8ac 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -15,7 +15,6 @@ #include "io_uring.h" #include "openclose.h" #include "rsrc.h" -#include "notif.h" struct io_rsrc_update { struct file *file; @@ -741,41 +740,6 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static int io_notif_update(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); - struct io_ring_ctx *ctx = req->ctx; - unsigned len = up->nr_args; - unsigned idx_end, idx = up->offset; - int ret = 0; - - io_ring_submit_lock(ctx, issue_flags); - if (unlikely(check_add_overflow(idx, len, &idx_end))) { - ret = -EOVERFLOW; - goto out; - } - if (unlikely(idx_end > ctx->nr_notif_slots)) { - ret = -EINVAL; - goto out; - } - - for (; idx < idx_end; idx++) { - struct io_notif_slot *slot = &ctx->notif_slots[idx]; - - if (!slot->notif) - continue; - if (up->arg) - slot->tag = up->arg; - io_notif_slot_flush_submit(slot, issue_flags); - } -out: - io_ring_submit_unlock(ctx, issue_flags); - if (ret < 0) - req_set_fail(req); - io_req_set_res(req, ret, 0); - return IOU_OK; -} - int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); @@ -783,8 +747,6 @@ int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) switch (up->type) { case IORING_RSRC_UPDATE_FILES: return io_files_update(req, issue_flags); - case IORING_RSRC_UPDATE_NOTIF: - return io_notif_update(req, issue_flags); } return -EINVAL; } From d9808ceb3129b811becebdee3ec96d189c83e56c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:02 +0100 Subject: [PATCH 599/654] Revert "io_uring: rename IORING_OP_FILES_UPDATE" This reverts commit 4379d5f15b3fd4224c37841029178aa8082a242e. We removed notification flushing, also cleanup uapi preparation changes to not pollute it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/89edc3905350f91e1b6e26d9dbf42ee44fd451a2.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 +----------- io_uring/opdef.c | 9 ++++----- io_uring/rsrc.c | 17 ++--------------- io_uring/rsrc.h | 4 ++-- 4 files changed, 9 insertions(+), 33 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 18ae5caf17731..111b651366bda 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -178,8 +178,7 @@ enum io_uring_op { IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, - IORING_OP_RSRC_UPDATE, - IORING_OP_FILES_UPDATE = IORING_OP_RSRC_UPDATE, + IORING_OP_FILES_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, @@ -228,7 +227,6 @@ enum io_uring_op { #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) - /* * sqe->splice_flags * extends splice(2) flags @@ -295,14 +293,6 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) - -/* - * IORING_OP_RSRC_UPDATE flags - */ -enum { - IORING_RSRC_UPDATE_FILES, -}; - /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 41410126c1c68..10b301ccf5cd6 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -246,13 +246,12 @@ const struct io_op_def io_op_defs[] = { .prep = io_close_prep, .issue = io_close, }, - [IORING_OP_RSRC_UPDATE] = { + [IORING_OP_FILES_UPDATE] = { .audit_skip = 1, .iopoll = 1, - .name = "RSRC_UPDATE", - .prep = io_rsrc_update_prep, - .issue = io_rsrc_update, - .ioprio = 1, + .name = "FILES_UPDATE", + .prep = io_files_update_prep, + .issue = io_files_update, }, [IORING_OP_STATX] = { .audit_skip = 1, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 048f7483fe8ac..cf32721132141 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -21,7 +21,6 @@ struct io_rsrc_update { u64 arg; u32 nr_args; u32 offset; - int type; }; static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, @@ -654,7 +653,7 @@ __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, return -EINVAL; } -int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); @@ -668,7 +667,6 @@ int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!up->nr_args) return -EINVAL; up->arg = READ_ONCE(sqe->addr); - up->type = READ_ONCE(sqe->ioprio); return 0; } @@ -711,7 +709,7 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req, return ret; } -static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) +int io_files_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); struct io_ring_ctx *ctx = req->ctx; @@ -740,17 +738,6 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); - - switch (up->type) { - case IORING_RSRC_UPDATE_FILES: - return io_files_update(req, issue_flags); - } - return -EINVAL; -} - int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, struct io_rsrc_node *node, void *rsrc) { diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index f3a9a177941f8..9bce15665444e 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -167,8 +167,8 @@ static inline u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) return &data->tags[table_idx][off]; } -int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags); -int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_files_update(struct io_kiocb *req, unsigned int issue_flags); +int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int __io_account_mem(struct user_struct *user, unsigned long nr_pages); From 57f332246afa5929bdf2e7a5facddedb43549be4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:03 +0100 Subject: [PATCH 600/654] io_uring/notif: remove notif registration We're going to remove the userspace exposed zerocopy notification API, remove notification registration. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6ff00b97be99869c386958a990593c9c31cf105b.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ---- io_uring/io_uring.c | 10 ----- io_uring/net.c | 4 +- io_uring/notif.c | 71 ----------------------------------- io_uring/notif.h | 11 ------ 5 files changed, 1 insertion(+), 103 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 111b651366bda..b11c57b0ebb51 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -279,14 +279,10 @@ enum io_uring_op { * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. - * - * IORING_RECVSEND_NOTIF_FLUSH Flush a notification after a successful - * successful. Only for zerocopy sends. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) -#define IORING_RECVSEND_NOTIF_FLUSH (1U << 3) /* * accept flags stored in sqe->ioprio @@ -474,10 +470,6 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, - /* zerocopy notification API */ - IORING_REGISTER_NOTIFIERS = 26, - IORING_UNREGISTER_NOTIFIERS = 27, - /* this goes last */ IORING_REGISTER_LAST }; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 77616279000b0..c2e06a3aa18da 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2640,7 +2640,6 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_unregister_personality(ctx, index); if (ctx->rings) io_poll_remove_all(ctx, NULL, true); - io_notif_unregister(ctx); mutex_unlock(&ctx->uring_lock); /* failed during ring init, it couldn't have issued any requests */ @@ -3839,15 +3838,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_register_file_alloc_range(ctx, arg); break; - case IORING_REGISTER_NOTIFIERS: - ret = io_notif_register(ctx, arg, nr_args); - break; - case IORING_UNREGISTER_NOTIFIERS: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_notif_unregister(ctx); - break; default: ret = -EINVAL; break; diff --git a/io_uring/net.c b/io_uring/net.c index 7a5468cc905e7..aac6997b7d883 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -889,7 +889,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) zc->flags = READ_ONCE(sqe->ioprio); if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | - IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH)) + IORING_RECVSEND_FIXED_BUF)) return -EINVAL; if (zc->flags & IORING_RECVSEND_FIXED_BUF) { unsigned idx = READ_ONCE(sqe->buf_index); @@ -1063,8 +1063,6 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); - } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) { - io_notif_slot_flush_submit(notif_slot, 0); } if (ret >= 0) diff --git a/io_uring/notif.c b/io_uring/notif.c index 96f076b175e07..11f45640684a1 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -86,74 +86,3 @@ void io_notif_slot_flush(struct io_notif_slot *slot) io_req_task_work_add(notif); } } - -__cold int io_notif_unregister(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - int i; - - if (!ctx->notif_slots) - return -ENXIO; - - for (i = 0; i < ctx->nr_notif_slots; i++) { - struct io_notif_slot *slot = &ctx->notif_slots[i]; - struct io_kiocb *notif = slot->notif; - struct io_notif_data *nd; - - if (!notif) - continue; - nd = io_notif_to_data(notif); - slot->notif = NULL; - if (!refcount_dec_and_test(&nd->uarg.refcnt)) - continue; - notif->io_task_work.func = __io_notif_complete_tw; - io_req_task_work_add(notif); - } - - kvfree(ctx->notif_slots); - ctx->notif_slots = NULL; - ctx->nr_notif_slots = 0; - return 0; -} - -__cold int io_notif_register(struct io_ring_ctx *ctx, - void __user *arg, unsigned int size) - __must_hold(&ctx->uring_lock) -{ - struct io_uring_notification_slot __user *slots; - struct io_uring_notification_slot slot; - struct io_uring_notification_register reg; - unsigned i; - - if (ctx->nr_notif_slots) - return -EBUSY; - if (size != sizeof(reg)) - return -EINVAL; - if (copy_from_user(®, arg, sizeof(reg))) - return -EFAULT; - if (!reg.nr_slots || reg.nr_slots > IORING_MAX_NOTIF_SLOTS) - return -EINVAL; - if (reg.resv || reg.resv2 || reg.resv3) - return -EINVAL; - - slots = u64_to_user_ptr(reg.data); - ctx->notif_slots = kvcalloc(reg.nr_slots, sizeof(ctx->notif_slots[0]), - GFP_KERNEL_ACCOUNT); - if (!ctx->notif_slots) - return -ENOMEM; - - for (i = 0; i < reg.nr_slots; i++, ctx->nr_notif_slots++) { - struct io_notif_slot *notif_slot = &ctx->notif_slots[i]; - - if (copy_from_user(&slot, &slots[i], sizeof(slot))) { - io_notif_unregister(ctx); - return -EFAULT; - } - if (slot.resv[0] | slot.resv[1] | slot.resv[2]) { - io_notif_unregister(ctx); - return -EINVAL; - } - notif_slot->tag = slot.tag; - } - return 0; -} diff --git a/io_uring/notif.h b/io_uring/notif.h index 80f6445e0c2ba..8380eeff2f2e0 100644 --- a/io_uring/notif.h +++ b/io_uring/notif.h @@ -8,7 +8,6 @@ #include "rsrc.h" #define IO_NOTIF_SPLICE_BATCH 32 -#define IORING_MAX_NOTIF_SLOTS (1U << 15) struct io_notif_data { struct file *file; @@ -36,10 +35,6 @@ struct io_notif_slot { u32 seq; }; -int io_notif_register(struct io_ring_ctx *ctx, - void __user *arg, unsigned int size); -int io_notif_unregister(struct io_ring_ctx *ctx); - void io_notif_slot_flush(struct io_notif_slot *slot); struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, struct io_notif_slot *slot); @@ -67,12 +62,6 @@ static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx, return &ctx->notif_slots[idx]; } -static inline void io_notif_slot_flush_submit(struct io_notif_slot *slot, - unsigned int issue_flags) -{ - io_notif_slot_flush(slot); -} - static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len) { struct io_ring_ctx *ctx = notif->ctx; From b48c312be05e83b55a4d58bf61f80b4a3288fb7e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:04 +0100 Subject: [PATCH 601/654] io_uring/net: simplify zerocopy send user API Following user feedback, this patch simplifies zerocopy send API. One of the main complaints is that the current API is difficult with the userspace managing notification slots, and then send retries with error handling make it even worse. Instead of keeping notification slots change it to the per-request notifications model, which posts both completion and notification CQEs for each request when any data has been sent, and only one CQE if it fails. All notification CQEs will have IORING_CQE_F_NOTIF set and IORING_CQE_F_MORE in completion CQEs indicates whether to wait a notification or not. IOSQE_CQE_SKIP_SUCCESS is disallowed with zerocopy sends for now. This is less flexible, but greatly simplifies the user API and also the kernel implementation. We reuse notif helpers in this patch, but in the future there won't be need for keeping two requests. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/95287640ab98fc9417370afb16e310677c63e6ce.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++-- io_uring/io_uring.c | 4 +-- io_uring/net.c | 53 ++++++++++++++++++++++------------- io_uring/net.h | 1 + io_uring/notif.c | 12 ++------ io_uring/notif.h | 43 ++-------------------------- io_uring/opdef.c | 3 +- 7 files changed, 47 insertions(+), 76 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b11c57b0ebb51..6b83177fd41dd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -71,8 +71,8 @@ struct io_uring_sqe { __s32 splice_fd_in; __u32 file_index; struct { - __u16 notification_idx; __u16 addr_len; + __u16 __pad3[1]; }; }; union { @@ -205,7 +205,7 @@ enum io_uring_op { IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, - IORING_OP_SENDZC_NOTIF, + IORING_OP_SEND_ZC, /* this goes last, obviously */ IORING_OP_LAST, @@ -326,10 +326,13 @@ struct io_uring_cqe { * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv + * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct + * them from sends. */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) +#define IORING_CQE_F_NOTIF (1U << 3) enum { IORING_CQE_BUFFER_SHIFT = 16, diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c2e06a3aa18da..f9be9b7eb654f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3923,8 +3923,8 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(42, __u16, personality); BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); BUILD_BUG_SQE_ELEM(44, __u32, file_index); - BUILD_BUG_SQE_ELEM(44, __u16, notification_idx); - BUILD_BUG_SQE_ELEM(46, __u16, addr_len); + BUILD_BUG_SQE_ELEM(44, __u16, addr_len); + BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); BUILD_BUG_SQE_ELEM(56, __u64, __pad2); diff --git a/io_uring/net.c b/io_uring/net.c index aac6997b7d883..7047c13425419 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -65,12 +65,12 @@ struct io_sendzc { struct file *file; void __user *buf; size_t len; - u16 slot_idx; unsigned msg_flags; unsigned flags; unsigned addr_len; void __user *addr; size_t done_io; + struct io_kiocb *notif; }; #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) @@ -879,12 +879,26 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) return ret; } +void io_sendzc_cleanup(struct io_kiocb *req) +{ + struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + + zc->notif->flags |= REQ_F_CQE_SKIP; + io_notif_flush(zc->notif); + zc->notif = NULL; +} + int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *notif; - if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)) + if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) || + READ_ONCE(sqe->__pad3[0])) + return -EINVAL; + /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ + if (req->flags & REQ_F_CQE_SKIP) return -EINVAL; zc->flags = READ_ONCE(sqe->ioprio); @@ -900,11 +914,17 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = READ_ONCE(ctx->user_bufs[idx]); io_req_set_rsrc_node(req, ctx, 0); } + notif = zc->notif = io_alloc_notif(ctx); + if (!notif) + return -ENOMEM; + notif->cqe.user_data = req->cqe.user_data; + notif->cqe.res = 0; + notif->cqe.flags = IORING_CQE_F_NOTIF; + req->flags |= REQ_F_NEED_CLEANUP; zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; - zc->slot_idx = READ_ONCE(sqe->notification_idx); if (zc->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; @@ -976,33 +996,20 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) { struct sockaddr_storage __address, *addr = NULL; - struct io_ring_ctx *ctx = req->ctx; struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); - struct io_notif_slot *notif_slot; - struct io_kiocb *notif; struct msghdr msg; struct iovec iov; struct socket *sock; - unsigned msg_flags; + unsigned msg_flags, cflags; int ret, min_ret = 0; if (!(req->flags & REQ_F_POLLED) && (zc->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; - - if (issue_flags & IO_URING_F_UNLOCKED) - return -EAGAIN; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; - notif_slot = io_get_notif_slot(ctx, zc->slot_idx); - if (!notif_slot) - return -EINVAL; - notif = io_get_notif(ctx, notif_slot); - if (!notif) - return -ENOMEM; - msg.msg_name = NULL; msg.msg_control = NULL; msg.msg_controllen = 0; @@ -1033,7 +1040,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) &msg.msg_iter); if (unlikely(ret)) return ret; - ret = io_notif_account_mem(notif, zc->len); + ret = io_notif_account_mem(zc->notif, zc->len); if (unlikely(ret)) return ret; } @@ -1045,7 +1052,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) min_ret = iov_iter_count(&msg.msg_iter); msg.msg_flags = msg_flags; - msg.msg_ubuf = &io_notif_to_data(notif)->uarg; + msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; msg.sg_from_iter = io_sg_from_iter; ret = sock_sendmsg(sock, &msg); @@ -1060,6 +1067,8 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_addr(req, addr, issue_flags); } + if (ret < 0 && !zc->done_io) + zc->notif->flags |= REQ_F_CQE_SKIP; if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); @@ -1069,7 +1078,11 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ret += zc->done_io; else if (zc->done_io) ret = zc->done_io; - io_req_set_res(req, ret, 0); + + io_notif_flush(zc->notif); + req->flags &= ~REQ_F_NEED_CLEANUP; + cflags = ret >= 0 ? IORING_CQE_F_MORE : 0; + io_req_set_res(req, ret, cflags); return IOU_OK; } diff --git a/io_uring/net.h b/io_uring/net.h index f91f56c6eeacf..d744a0a874e75 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -55,6 +55,7 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags); int io_sendzc(struct io_kiocb *req, unsigned int issue_flags); int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +void io_sendzc_cleanup(struct io_kiocb *req); void io_netmsg_cache_free(struct io_cache_entry *entry); #else diff --git a/io_uring/notif.c b/io_uring/notif.c index 11f45640684a1..38d77165edc3a 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -42,8 +42,7 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, } } -struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, - struct io_notif_slot *slot) +struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) __must_hold(&ctx->uring_lock) { struct io_kiocb *notif; @@ -59,27 +58,20 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, io_get_task_refs(1); notif->rsrc_node = NULL; io_req_set_rsrc_node(notif, ctx, 0); - notif->cqe.user_data = slot->tag; - notif->cqe.flags = slot->seq++; - notif->cqe.res = 0; nd = io_notif_to_data(notif); nd->account_pages = 0; nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; nd->uarg.callback = io_uring_tx_zerocopy_callback; - /* master ref owned by io_notif_slot, will be dropped on flush */ refcount_set(&nd->uarg.refcnt, 1); return notif; } -void io_notif_slot_flush(struct io_notif_slot *slot) +void io_notif_flush(struct io_kiocb *notif) __must_hold(&slot->notif->ctx->uring_lock) { - struct io_kiocb *notif = slot->notif; struct io_notif_data *nd = io_notif_to_data(notif); - slot->notif = NULL; - /* drop slot's master ref */ if (refcount_dec_and_test(&nd->uarg.refcnt)) { notif->io_task_work.func = __io_notif_complete_tw; diff --git a/io_uring/notif.h b/io_uring/notif.h index 8380eeff2f2e0..5b4d710c8ca54 100644 --- a/io_uring/notif.h +++ b/io_uring/notif.h @@ -15,53 +15,14 @@ struct io_notif_data { unsigned long account_pages; }; -struct io_notif_slot { - /* - * Current/active notifier. A slot holds only one active notifier at a - * time and keeps one reference to it. Flush releases the reference and - * lazily replaces it with a new notifier. - */ - struct io_kiocb *notif; - - /* - * Default ->user_data for this slot notifiers CQEs - */ - u64 tag; - /* - * Notifiers of a slot live in generations, we create a new notifier - * only after flushing the previous one. Track the sequential number - * for all notifiers and copy it into notifiers's cqe->cflags - */ - u32 seq; -}; - -void io_notif_slot_flush(struct io_notif_slot *slot); -struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, - struct io_notif_slot *slot); +void io_notif_flush(struct io_kiocb *notif); +struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx); static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif) { return io_kiocb_to_cmd(notif, struct io_notif_data); } -static inline struct io_kiocb *io_get_notif(struct io_ring_ctx *ctx, - struct io_notif_slot *slot) -{ - if (!slot->notif) - slot->notif = io_alloc_notif(ctx, slot); - return slot->notif; -} - -static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx, - unsigned idx) - __must_hold(&ctx->uring_lock) -{ - if (idx >= ctx->nr_notif_slots) - return NULL; - idx = array_index_nospec(idx, ctx->nr_notif_slots); - return &ctx->notif_slots[idx]; -} - static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len) { struct io_ring_ctx *ctx = notif->ctx; diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 10b301ccf5cd6..c61494e0a6022 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -470,7 +470,7 @@ const struct io_op_def io_op_defs[] = { .issue = io_uring_cmd, .prep_async = io_uring_cmd_prep_async, }, - [IORING_OP_SENDZC_NOTIF] = { + [IORING_OP_SEND_ZC] = { .name = "SENDZC_NOTIF", .needs_file = 1, .unbound_nonreg_file = 1, @@ -483,6 +483,7 @@ const struct io_op_def io_op_defs[] = { .prep = io_sendzc_prep, .issue = io_sendzc, .prep_async = io_sendzc_prep_async, + .cleanup = io_sendzc_cleanup, #else .prep = io_eopnotsupp_prep, #endif From 916d72c10a4ca80ea51f1421e774cb765b53f28f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:05 +0100 Subject: [PATCH 602/654] selftests/net: return back io_uring zc send tests Enable io_uring zerocopy send tests back and fix them up to follow the new inteface. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c8e5018c516093bdad0b6e19f2f9847dea17e4d2.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- .../selftests/net/io_uring_zerocopy_tx.c | 110 ++++++------------ .../selftests/net/io_uring_zerocopy_tx.sh | 10 +- 2 files changed, 41 insertions(+), 79 deletions(-) diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 7446ef364e9f0..8ce48aca83217 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -36,8 +36,6 @@ #include #include -#if 0 - #define NOTIF_TAG 0xfffffffULL #define NONZC_TAG 0 #define ZC_TAG 1 @@ -49,7 +47,6 @@ enum { MODE_MIXED = 3, }; -static bool cfg_flush = false; static bool cfg_cork = false; static int cfg_mode = MODE_ZC_FIXED; static int cfg_nr_reqs = 8; @@ -168,21 +165,6 @@ static int io_uring_register_buffers(struct io_uring *ring, return (ret < 0) ? -errno : ret; } -static int io_uring_register_notifications(struct io_uring *ring, - unsigned nr, - struct io_uring_notification_slot *slots) -{ - int ret; - struct io_uring_notification_register r = { - .nr_slots = nr, - .data = (unsigned long)slots, - }; - - ret = syscall(__NR_io_uring_register, ring->ring_fd, - IORING_REGISTER_NOTIFIERS, &r, sizeof(r)); - return (ret < 0) ? -errno : ret; -} - static int io_uring_mmap(int fd, struct io_uring_params *p, struct io_uring_sq *sq, struct io_uring_cq *cq) { @@ -299,11 +281,10 @@ static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, const void *buf, size_t len, int flags, - unsigned slot_idx, unsigned zc_flags) + unsigned zc_flags) { io_uring_prep_send(sqe, sockfd, buf, len, flags); - sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF; - sqe->notification_idx = slot_idx; + sqe->opcode = (__u8) IORING_OP_SEND_ZC; sqe->ioprio = zc_flags; } @@ -376,7 +357,6 @@ static int do_setup_tx(int domain, int type, int protocol) static void do_tx(int domain, int type, int protocol) { - struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}}; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; unsigned long packets = 0, bytes = 0; @@ -392,10 +372,6 @@ static void do_tx(int domain, int type, int protocol) if (ret) error(1, ret, "io_uring: queue init"); - ret = io_uring_register_notifications(&ring, 1, b); - if (ret) - error(1, ret, "io_uring: tx ctx registration"); - iov.iov_base = payload; iov.iov_len = cfg_payload_len; @@ -411,9 +387,8 @@ static void do_tx(int domain, int type, int protocol) for (i = 0; i < cfg_nr_reqs; i++) { unsigned zc_flags = 0; unsigned buf_idx = 0; - unsigned slot_idx = 0; unsigned mode = cfg_mode; - unsigned msg_flags = 0; + unsigned msg_flags = MSG_WAITALL; if (cfg_mode == MODE_MIXED) mode = rand() % 3; @@ -425,13 +400,10 @@ static void do_tx(int domain, int type, int protocol) cfg_payload_len, msg_flags); sqe->user_data = NONZC_TAG; } else { - if (cfg_flush) { - zc_flags |= IORING_RECVSEND_NOTIF_FLUSH; - compl_cqes++; - } + compl_cqes++; io_uring_prep_sendzc(sqe, fd, payload, cfg_payload_len, - msg_flags, slot_idx, zc_flags); + msg_flags, zc_flags); if (mode == MODE_ZC_FIXED) { sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; sqe->buf_index = buf_idx; @@ -444,51 +416,57 @@ static void do_tx(int domain, int type, int protocol) if (ret != cfg_nr_reqs) error(1, ret, "submit"); + if (cfg_cork) + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); for (i = 0; i < cfg_nr_reqs; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) error(1, ret, "wait cqe"); - if (cqe->user_data == NOTIF_TAG) { + if (cqe->user_data != NONZC_TAG && + cqe->user_data != ZC_TAG) + error(1, -EINVAL, "invalid cqe->user_data"); + + if (cqe->flags & IORING_CQE_F_NOTIF) { + if (cqe->flags & IORING_CQE_F_MORE) + error(1, -EINVAL, "invalid notif flags"); compl_cqes--; i--; - } else if (cqe->user_data != NONZC_TAG && - cqe->user_data != ZC_TAG) { - error(1, cqe->res, "invalid user_data"); - } else if (cqe->res <= 0 && cqe->res != -EAGAIN) { + } else if (cqe->res <= 0) { + if (cqe->flags & IORING_CQE_F_MORE) + error(1, cqe->res, "more with a failed send"); error(1, cqe->res, "send failed"); } else { - if (cqe->res > 0) { - packets++; - bytes += cqe->res; - } - /* failed requests don't flush */ - if (cfg_flush && - cqe->res <= 0 && - cqe->user_data == ZC_TAG) - compl_cqes--; + if (cqe->user_data == ZC_TAG && + !(cqe->flags & IORING_CQE_F_MORE)) + error(1, cqe->res, "missing more flag"); + packets++; + bytes += cqe->res; } io_uring_cqe_seen(&ring); } - if (cfg_cork) - do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); } while (gettimeofday_ms() < tstop); - if (close(fd)) - error(1, errno, "close"); - - fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n", - packets, bytes >> 20, - packets / (cfg_runtime_ms / 1000), - (bytes >> 20) / (cfg_runtime_ms / 1000)); - while (compl_cqes) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) error(1, ret, "wait cqe"); + if (cqe->flags & IORING_CQE_F_MORE) + error(1, -EINVAL, "invalid notif flags"); + if (!(cqe->flags & IORING_CQE_F_NOTIF)) + error(1, -EINVAL, "missing notif flag"); + io_uring_cqe_seen(&ring); compl_cqes--; } + + fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n", + packets, bytes >> 20, + packets / (cfg_runtime_ms / 1000), + (bytes >> 20) / (cfg_runtime_ms / 1000)); + + if (close(fd)) + error(1, errno, "close"); } static void do_test(int domain, int type, int protocol) @@ -502,8 +480,8 @@ static void do_test(int domain, int type, int protocol) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-f] [-n] [-z0] [-s] " - "(-4|-6) [-t