From 06e472acf964649a58b7de35fc9cdc3151acb970 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 28 Oct 2022 14:46:55 +0530 Subject: [PATCH 0001/1593] scsi: mpt3sas: Remove usage of dma_get_required_mask() API Remove the usage of dma_get_required_mask() API. Directly set the DMA mask to 63/64 if the system is a 64bit machine. Signed-off-by: Sreekanth Reddy Link: https://lore.kernel.org/r/20221028091655.17741-2-sreekanth.reddy@broadcom.com Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 4e981ccaac416..69061545d9d2f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2992,8 +2992,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) struct sysinfo s; u64 coherent_dma_mask, dma_mask; - if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4 || - dma_get_required_mask(&pdev->dev) <= DMA_BIT_MASK(32)) { + if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4) { ioc->dma_mask = 32; coherent_dma_mask = dma_mask = DMA_BIT_MASK(32); /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ From d347a951906b506c485ee9ffb76fe473b41f3e10 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 11 Nov 2022 15:52:46 +0530 Subject: [PATCH 0002/1593] scsi: mpi3mr: Remove usage of dma_get_required_mask() API Remove the usage of dma_get_required_mask() API. Directly set the DMA mask to 63/64 if the system is a 64bit machine. Signed-off-by: Sreekanth Reddy Link: https://lore.kernel.org/r/20221111102246.19995-2-sreekanth.reddy@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 0c4aabaefdcc4..286a44506578b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -3633,8 +3633,7 @@ int mpi3mr_setup_resources(struct mpi3mr_ioc *mrioc) int i, retval = 0, capb = 0; u16 message_control; u64 dma_mask = mrioc->dma_mask ? mrioc->dma_mask : - (((dma_get_required_mask(&pdev->dev) > DMA_BIT_MASK(32)) && - (sizeof(dma_addr_t) > 4)) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32)); + ((sizeof(dma_addr_t) > 4) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32)); if (pci_enable_device_mem(pdev)) { ioc_err(mrioc, "pci_enable_device_mem: failed\n"); From 02228f6aa6a64d588bc31e3267d05ff184d772eb Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Sun, 27 Nov 2022 22:06:02 +0100 Subject: [PATCH 0003/1593] regulator: da9211: Use irq handler when ready If the system does not come from reset (like when it is kexec()), the regulator might have an IRQ waiting for us. If we enable the IRQ handler before its structures are ready, we crash. This patch fixes: [ 1.141839] Unable to handle kernel read from unreadable memory at virtual address 0000000000000078 [ 1.316096] Call trace: [ 1.316101] blocking_notifier_call_chain+0x20/0xa8 [ 1.322757] cpu cpu0: dummy supplies not allowed for exclusive requests [ 1.327823] regulator_notifier_call_chain+0x1c/0x2c [ 1.327825] da9211_irq_handler+0x68/0xf8 [ 1.327829] irq_thread+0x11c/0x234 [ 1.327833] kthread+0x13c/0x154 Signed-off-by: Ricardo Ribalda Reviewed-by: Adam Ward Link: https://lore.kernel.org/r/20221124-da9211-v2-0-1779e3c5d491@chromium.org Signed-off-by: Mark Brown --- drivers/regulator/da9211-regulator.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index e01b32d1fa17d..00828f5baa972 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c) chip->chip_irq = i2c->irq; + ret = da9211_regulator_init(chip); + if (ret < 0) { + dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); + return ret; + } + if (chip->chip_irq != 0) { ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL, da9211_irq_handler, @@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c) dev_warn(chip->dev, "No IRQ configured\n"); } - ret = da9211_regulator_init(chip); - - if (ret < 0) - dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); - return ret; } From aff234839f8b80ac101e6c2f14d0e44b236efa48 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 9 Dec 2022 16:44:46 +0000 Subject: [PATCH 0004/1593] KVM: arm64: PMU: Fix PMCR_EL0 reset value ARMV8_PMU_PMCR_N_MASK is an unshifted value which results in the wrong reset value for PMCR_EL0, so shift it to fix it. This fixes the following error when running qemu: $ qemu-system-aarch64 -cpu host -machine type=virt,accel=kvm -kernel ... target/arm/helper.c:1813: pmevcntr_rawwrite: Assertion `counter < pmu_num_counters(env)' failed. Fixes: 292e8f149476 ("KVM: arm64: PMU: Simplify PMCR_EL0 reset handling") Signed-off-by: James Clark Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221209164446.1972014-2-james.clark@arm.com --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d5ee52d6bf732..c6cbfe6b854b3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return; /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ - pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; + pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT); if (!kvm_supports_32bit_el0()) pmcr |= ARMV8_PMU_PMCR_LC; From f728a5ea27c92133893590e731ce10f6561ced87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 6 Dec 2022 14:07:49 +0100 Subject: [PATCH 0005/1593] dma-buf: fix dma_buf_export init order v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The init order and resulting error handling in dma_buf_export was pretty messy. Subordinate objects like the file and the sysfs kernel objects were initializing and wiring itself up with the object in the wrong order resulting not only in complicating and partially incorrect error handling, but also in publishing only halve initialized DMA-buf objects. Clean this up thoughtfully by allocating the file independent of the DMA-buf object. Then allocate and initialize the DMA-buf object itself, before publishing it through sysfs. If everything works as expected the file is then connected with the DMA-buf object and publish it through debugfs. Also adds the missing dma_resv_fini() into the error handling. v2: add some missing changes to dma_bug_getfile() and a missing NULL check in dma_buf_file_release() Signed-off-by: Christian König Reviewed-by: Michael J. Ruhl Reviewed-by: T.J. Mercier Acked-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20221209071535.933698-1-christian.koenig@amd.com --- drivers/dma-buf/dma-buf-sysfs-stats.c | 7 +-- drivers/dma-buf/dma-buf-sysfs-stats.h | 4 +- drivers/dma-buf/dma-buf.c | 84 +++++++++++++-------------- 3 files changed, 43 insertions(+), 52 deletions(-) diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 2bba0babcb62b..4b680e10c15a3 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -168,14 +168,11 @@ void dma_buf_uninit_sysfs_statistics(void) kset_unregister(dma_buf_stats_kset); } -int dma_buf_stats_setup(struct dma_buf *dmabuf) +int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) { struct dma_buf_sysfs_entry *sysfs_entry; int ret; - if (!dmabuf || !dmabuf->file) - return -EINVAL; - if (!dmabuf->exp_name) { pr_err("exporter name must not be empty if stats needed\n"); return -EINVAL; @@ -192,7 +189,7 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf) /* create the directory for buffer stats */ ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, - "%lu", file_inode(dmabuf->file)->i_ino); + "%lu", file_inode(file)->i_ino); if (ret) goto err_sysfs_dmabuf; diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.h b/drivers/dma-buf/dma-buf-sysfs-stats.h index a49c6e2650ccc..7a8a995b75bae 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.h +++ b/drivers/dma-buf/dma-buf-sysfs-stats.h @@ -13,7 +13,7 @@ int dma_buf_init_sysfs_statistics(void); void dma_buf_uninit_sysfs_statistics(void); -int dma_buf_stats_setup(struct dma_buf *dmabuf); +int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file); void dma_buf_stats_teardown(struct dma_buf *dmabuf); #else @@ -25,7 +25,7 @@ static inline int dma_buf_init_sysfs_statistics(void) static inline void dma_buf_uninit_sysfs_statistics(void) {} -static inline int dma_buf_stats_setup(struct dma_buf *dmabuf) +static inline int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) { return 0; } diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index e6f36c014c4cd..eb6b59363c4f5 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -95,10 +95,11 @@ static int dma_buf_file_release(struct inode *inode, struct file *file) return -EINVAL; dmabuf = file->private_data; - - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); + if (dmabuf) { + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + } return 0; } @@ -523,17 +524,17 @@ static inline int is_dma_buf_file(struct file *file) return file->f_op == &dma_buf_fops; } -static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) +static struct file *dma_buf_getfile(size_t size, int flags) { static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); - struct file *file; struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); + struct file *file; if (IS_ERR(inode)) return ERR_CAST(inode); - inode->i_size = dmabuf->size; - inode_set_bytes(inode, dmabuf->size); + inode->i_size = size; + inode_set_bytes(inode, size); /* * The ->i_ino acquired from get_next_ino() is not unique thus @@ -547,8 +548,6 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) flags, &dma_buf_fops); if (IS_ERR(file)) goto err_alloc_file; - file->private_data = dmabuf; - file->f_path.dentry->d_fsdata = dmabuf; return file; @@ -614,19 +613,11 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) size_t alloc_size = sizeof(struct dma_buf); int ret; - if (!exp_info->resv) - alloc_size += sizeof(struct dma_resv); - else - /* prevent &dma_buf[1] == dma_buf->resv */ - alloc_size += 1; - - if (WARN_ON(!exp_info->priv - || !exp_info->ops - || !exp_info->ops->map_dma_buf - || !exp_info->ops->unmap_dma_buf - || !exp_info->ops->release)) { + if (WARN_ON(!exp_info->priv || !exp_info->ops + || !exp_info->ops->map_dma_buf + || !exp_info->ops->unmap_dma_buf + || !exp_info->ops->release)) return ERR_PTR(-EINVAL); - } if (WARN_ON(exp_info->ops->cache_sgt_mapping && (exp_info->ops->pin || exp_info->ops->unpin))) @@ -638,10 +629,21 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) if (!try_module_get(exp_info->owner)) return ERR_PTR(-ENOENT); + file = dma_buf_getfile(exp_info->size, exp_info->flags); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_module; + } + + if (!exp_info->resv) + alloc_size += sizeof(struct dma_resv); + else + /* prevent &dma_buf[1] == dma_buf->resv */ + alloc_size += 1; dmabuf = kzalloc(alloc_size, GFP_KERNEL); if (!dmabuf) { ret = -ENOMEM; - goto err_module; + goto err_file; } dmabuf->priv = exp_info->priv; @@ -653,44 +655,36 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) init_waitqueue_head(&dmabuf->poll); dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll; dmabuf->cb_in.active = dmabuf->cb_out.active = 0; + mutex_init(&dmabuf->lock); + INIT_LIST_HEAD(&dmabuf->attachments); if (!resv) { - resv = (struct dma_resv *)&dmabuf[1]; - dma_resv_init(resv); + dmabuf->resv = (struct dma_resv *)&dmabuf[1]; + dma_resv_init(dmabuf->resv); + } else { + dmabuf->resv = resv; } - dmabuf->resv = resv; - file = dma_buf_getfile(dmabuf, exp_info->flags); - if (IS_ERR(file)) { - ret = PTR_ERR(file); + ret = dma_buf_stats_setup(dmabuf, file); + if (ret) goto err_dmabuf; - } + file->private_data = dmabuf; + file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; - mutex_init(&dmabuf->lock); - INIT_LIST_HEAD(&dmabuf->attachments); - mutex_lock(&db_list.lock); list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); - ret = dma_buf_stats_setup(dmabuf); - if (ret) - goto err_sysfs; - return dmabuf; -err_sysfs: - /* - * Set file->f_path.dentry->d_fsdata to NULL so that when - * dma_buf_release() gets invoked by dentry_ops, it exits - * early before calling the release() dma_buf op. - */ - file->f_path.dentry->d_fsdata = NULL; - fput(file); err_dmabuf: + if (!resv) + dma_resv_fini(dmabuf->resv); kfree(dmabuf); +err_file: + fput(file); err_module: module_put(exp_info->owner); return ERR_PTR(ret); From a3be19b91ea7121d388084e8c07f5b1b982eb40c Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Sat, 26 Nov 2022 09:07:52 +0800 Subject: [PATCH 0006/1593] scsi: iscsi: Fix multiple iSCSI session unbind events sent to userspace It was observed that the kernel would potentially send ISCSI_KEVENT_UNBIND_SESSION multiple times. Introduce 'target_state' in iscsi_cls_session() to make sure session will send only one unbind session event. This introduces a regression wrt. the issue fixed in commit 13e60d3ba287 ("scsi: iscsi: Report unbind session event when the target has been removed"). If iscsid dies for any reason after sending an unbind session to kernel, once iscsid is restarted, the kernel's ISCSI_KEVENT_UNBIND_SESSION event is lost and userspace is then unable to logout. However, the session is actually in invalid state (its target_id is INVALID) so iscsid should not sync this session during restart. Consequently we need to check the session's target state during iscsid restart. If session is in unbound state, do not sync this session and perform session teardown. This is OK because once a session is unbound, we can not recover it any more (mainly because its target id is INVALID). Signed-off-by: Wenchao Hao Link: https://lore.kernel.org/r/20221126010752.231917-1-haowenchao@huawei.com Reviewed-by: Mike Christie Reviewed-by: Wu Bo Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 50 ++++++++++++++++++++++++++--- include/scsi/scsi_transport_iscsi.h | 9 ++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index c3fe5ecfee59e..3eb58db1399d8 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1676,6 +1676,13 @@ static const char *iscsi_session_state_name(int state) return name; } +static char *iscsi_session_target_state_name[] = { + [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND", + [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED", + [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED", + [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING", +}; + int iscsi_session_chkready(struct iscsi_cls_session *session) { int err; @@ -1785,9 +1792,13 @@ static int iscsi_user_scan_session(struct device *dev, void *data) if ((scan_data->channel == SCAN_WILD_CARD || scan_data->channel == 0) && (scan_data->id == SCAN_WILD_CARD || - scan_data->id == id)) + scan_data->id == id)) { scsi_scan_target(&session->dev, 0, id, scan_data->lun, scan_data->rescan); + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_SCANNED; + spin_unlock_irqrestore(&session->lock, flags); + } } user_scan_exit: @@ -1960,31 +1971,41 @@ static void __iscsi_unbind_session(struct work_struct *work) struct iscsi_cls_host *ihost = shost->shost_data; unsigned long flags; unsigned int target_id; + bool remove_target = true; ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); /* Prevent new scans and make sure scanning is not in progress */ mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); - if (session->target_id == ISCSI_MAX_TARGET) { + if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) { + remove_target = false; + } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - goto unbind_session_exit; + ISCSI_DBG_TRANS_SESSION(session, + "Skipping target unbinding: Session is unbound/unbinding.\n"); + return; } + session->target_state = ISCSI_SESSION_TARGET_UNBINDING; target_id = session->target_id; session->target_id = ISCSI_MAX_TARGET; spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - scsi_remove_target(&session->dev); + if (remove_target) + scsi_remove_target(&session->dev); if (session->ida_used) ida_free(&iscsi_sess_ida, target_id); -unbind_session_exit: iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); + + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_UNBOUND; + spin_unlock_irqrestore(&session->lock, flags); } static void __iscsi_destroy_session(struct work_struct *work) @@ -2061,6 +2082,9 @@ int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id) session->ida_used = true; } else session->target_id = target_id; + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_ALLOCATED; + spin_unlock_irqrestore(&session->lock, flags); dev_set_name(&session->dev, "session%u", session->sid); err = device_add(&session->dev); @@ -4368,6 +4392,19 @@ iscsi_session_attr(def_taskmgmt_tmo, ISCSI_PARAM_DEF_TASKMGMT_TMO, 0); iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0); iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0); +static ssize_t +show_priv_session_target_state(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + + return sysfs_emit(buf, "%s\n", + iscsi_session_target_state_name[session->target_state]); +} + +static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO, + show_priv_session_target_state, NULL); + static ssize_t show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) @@ -4470,6 +4507,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_sess_boot_target.attr, &dev_attr_priv_sess_recovery_tmo.attr, &dev_attr_priv_sess_state.attr, + &dev_attr_priv_sess_target_state.attr, &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, @@ -4583,6 +4621,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO | S_IWUSR; else if (attr == &dev_attr_priv_sess_state.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_state.attr) + return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_id.attr) diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index cab52b0f11d0c..34c03707fb6ef 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -236,6 +236,14 @@ enum { ISCSI_SESSION_FREE, }; +enum { + ISCSI_SESSION_TARGET_UNBOUND, + ISCSI_SESSION_TARGET_ALLOCATED, + ISCSI_SESSION_TARGET_SCANNED, + ISCSI_SESSION_TARGET_UNBINDING, + ISCSI_SESSION_TARGET_MAX, +}; + #define ISCSI_MAX_TARGET -1 struct iscsi_cls_session { @@ -264,6 +272,7 @@ struct iscsi_cls_session { */ pid_t creator; int state; + int target_state; /* session target bind state */ int sid; /* session id */ void *dd_data; /* LLD private data */ struct device dev; /* sysfs transport/container device */ From 67ff3d0a49f3d445c3922e30a54e03c161da561e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Dec 2022 11:52:44 -0800 Subject: [PATCH 0007/1593] scsi: storvsc: Fix swiotlb bounce buffer leak in confidential VM storvsc_queuecommand() maps the scatter/gather list using scsi_dma_map(), which in a confidential VM allocates swiotlb bounce buffers. If the I/O submission fails in storvsc_do_io(), the I/O is typically retried by higher level code, but the bounce buffer memory is never freed. The mostly like cause of I/O submission failure is a full VMBus channel ring buffer, which is not uncommon under high I/O loads. Eventually enough bounce buffer memory leaks that the confidential VM can't do any I/O. The same problem can arise in a non-confidential VM with kernel boot parameter swiotlb=force. Fix this by doing scsi_dma_unmap() in the case of an I/O submission error, which frees the bounce buffer memory. Fixes: 743b237c3a7b ("scsi: storvsc: Add Isolation VM support for storvsc driver") Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1670183564-76254-1-git-send-email-mikelley@microsoft.com Tested-by: Dexuan Cui Reviewed-by: Dexuan Cui Reviewed-by: Tianyu Lan Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index a84194d823471..0252ee2dd844b 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1824,6 +1824,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request, get_cpu()); put_cpu(); + if (ret) + scsi_dma_unmap(scmnd); + if (ret == -EAGAIN) { /* no more space */ ret = SCSI_MLQUEUE_DEVICE_BUSY; From d0b9025540ef57cc4464ab2fc64ed8ddc49b5658 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 6 Dec 2022 14:13:45 +0100 Subject: [PATCH 0008/1593] scsi: core: scsi_error: Do not queue pointless abort workqueue functions If a host template doesn't implement the .eh_abort_handler() there is no point in queueing the abort workqueue function; all it does is invoking SCSI EH anyway. So return 'FAILED' from scsi_abort_command() if the .eh_abort_handler() is not implemented and save us from having to wait for the abort workqueue function to complete. Cc: Niklas Cassel Cc: Damien Le Moal Cc: John Garry Signed-off-by: Hannes Reinecke [niklas: moved the check to the top of scsi_abort_command()] Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20221206131346.2045375-1-niklas.cassel@wdc.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 613d5aeb1e3ce..955ee1bfde8f3 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -231,6 +231,11 @@ scsi_abort_command(struct scsi_cmnd *scmd) struct Scsi_Host *shost = sdev->host; unsigned long flags; + if (!shost->hostt->eh_abort_handler) { + /* No abort handler, fail command directly */ + return FAILED; + } + if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { /* * Retry after abort failed, escalate to next level. From f0a43ba6c66cc0688e2748d986a1459fdd3442ef Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 7 Dec 2022 11:36:59 +0900 Subject: [PATCH 0009/1593] scsi: mpi3mr: Refer CONFIG_SCSI_MPI3MR in Makefile When Kconfig item CONFIG_SCSI_MPI3MR was introduced for mpi3mr driver, the Makefile of the driver was not modified to refer the Kconfig item. As a result, mpi3mr.ko is built regardless of the Kconfig item value y or m. Also, if 'make localmodconfig' can not find the Kconfig item in the Makefile, then it does not generate CONFIG_SCSI_MPI3MR=m even when mpi3mr.ko is loaded on the system. Refer to the Kconfig item to avoid the issues. Fixes: c4f7ac64616e ("scsi: mpi3mr: Add mpi30 Rev-R headers and Kconfig") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20221207023659.2411785-1-shinichiro.kawasaki@wdc.com Reviewed-by: Damien Le Moal Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile index ef86ca46646b8..3bf8cf34e1c3f 100644 --- a/drivers/scsi/mpi3mr/Makefile +++ b/drivers/scsi/mpi3mr/Makefile @@ -1,5 +1,5 @@ # mpi3mr makefile -obj-m += mpi3mr.o +obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o mpi3mr-y += mpi3mr_os.o \ mpi3mr_fw.o \ mpi3mr_app.o \ From c411a42fb91f452509c312e4dda713699a22a995 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 13 Dec 2022 14:21:22 +0000 Subject: [PATCH 0010/1593] scsi: scsi_debug: Delete unreachable code in inquiry_vpd_b0() The 2nd return statement in inquiry_vpd_b0() is unreachable, so delete it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20221213142122.1011886-1-john.g.garry@oracle.com Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index de3d3b1edaf56..6fa08857d6b99 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1511,8 +1511,6 @@ static int inquiry_vpd_b0(unsigned char *arr) put_unaligned_be64(sdebug_write_same_length, &arr[32]); return 0x3c; /* Mandatory page length for Logical Block Provisioning */ - - return sizeof(vpdb0_data); } /* Block device characteristics VPD page (SBC-3) */ From 1a5665fc8d7a000671ebd3fe69c6f9acf1e0dcd9 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Thu, 8 Dec 2022 15:25:20 +0800 Subject: [PATCH 0011/1593] scsi: ufs: core: WLUN suspend SSU/enter hibern8 fail recovery When SSU/enter hibern8 fail in WLUN suspend flow, trigger the error handler and return busy to break the suspend. Otherwise the consumer will get stuck in runtime suspend status. Fixes: b294ff3e3449 ("scsi: ufs: core: Enable power management for wlun") Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20221208072520.26210-1-peter.wang@mediatek.com Reviewed-by: Stanley Chu Reviewed-by: Bart Van Assche Reviewed-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 2dbe24977822f..c63cd8bbe5c6d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6056,6 +6056,14 @@ void ufshcd_schedule_eh_work(struct ufs_hba *hba) } } +static void ufshcd_force_error_recovery(struct ufs_hba *hba) +{ + spin_lock_irq(hba->host->host_lock); + hba->force_reset = true; + ufshcd_schedule_eh_work(hba); + spin_unlock_irq(hba->host->host_lock); +} + static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) { down_write(&hba->clk_scaling_lock); @@ -9083,6 +9091,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (!hba->dev_info.b_rpm_dev_flush_capable) { ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto enable_scaling; } @@ -9094,6 +9111,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba); ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto set_dev_active; From 01258b62c62710297dab4e2b72f46e01be392cc6 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 9 Dec 2022 10:59:37 +0100 Subject: [PATCH 0012/1593] wifi: ti: remove obsolete lines in the Makefile Commit 06463f6e98df ("wifi: wl1251: drop support for platform data") removes TI WiLink platform data, but leaves some dead lines in the Makefile. Remove these obsolete lines in the Makefile. Signed-off-by: Lukas Bulwahn Reviewed-by: Dmitry Torokhov Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221209095937.17773-1-lukas.bulwahn@gmail.com --- drivers/net/wireless/ti/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/ti/Makefile b/drivers/net/wireless/ti/Makefile index 0530dd744275c..05ee016594f8c 100644 --- a/drivers/net/wireless/ti/Makefile +++ b/drivers/net/wireless/ti/Makefile @@ -3,6 +3,3 @@ obj-$(CONFIG_WLCORE) += wlcore/ obj-$(CONFIG_WL12XX) += wl12xx/ obj-$(CONFIG_WL1251) += wl1251/ obj-$(CONFIG_WL18XX) += wl18xx/ - -# small builtin driver bit -obj-$(CONFIG_WILINK_PLATFORM_DATA) += wilink_platform_data.o From 7cffcade57a429667447c4f41d8414bbcf1b3aaa Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Tue, 13 Dec 2022 23:46:52 +0800 Subject: [PATCH 0013/1593] xen: make remove callback of xen driver void returned Since commit fc7a6209d571 ("bus: Make remove callback return void") forces bus_type::remove be void-returned, it doesn't make much sense for any bus based driver implementing remove callbalk to return non-void to its caller. This change is for xen bus based drivers. Acked-by: Juergen Gross Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/TYCP286MB23238119AB4DF190997075C9CAE39@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Juergen Gross --- drivers/block/xen-blkback/xenbus.c | 4 +--- drivers/block/xen-blkfront.c | 3 +-- drivers/char/tpm/xen-tpmfront.c | 3 +-- drivers/gpu/drm/xen/xen_drm_front.c | 3 +-- drivers/input/misc/xen-kbdfront.c | 5 ++--- drivers/net/xen-netback/xenbus.c | 3 +-- drivers/net/xen-netfront.c | 4 +--- drivers/pci/xen-pcifront.c | 4 +--- drivers/scsi/xen-scsifront.c | 4 +--- drivers/tty/hvc/hvc_xen.c | 4 ++-- drivers/usb/host/xen-hcd.c | 4 +--- drivers/video/fbdev/xen-fbfront.c | 6 ++---- drivers/xen/pvcalls-back.c | 3 +-- drivers/xen/pvcalls-front.c | 3 +-- drivers/xen/xen-pciback/xenbus.c | 4 +--- drivers/xen/xen-scsiback.c | 4 +--- include/xen/xenbus.h | 2 +- net/9p/trans_xen.c | 3 +-- sound/xen/xen_snd_front.c | 3 +-- 19 files changed, 22 insertions(+), 47 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c0227dfa46887..4807af1d58059 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -524,7 +524,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, return 0; } -static int xen_blkbk_remove(struct xenbus_device *dev) +static void xen_blkbk_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -547,8 +547,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev) /* Put the reference we set in xen_blkif_alloc(). */ xen_blkif_put(be->blkif); } - - return 0; } int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 35b9bcad9db90..e68576ded7cb8 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2468,7 +2468,7 @@ static void blkback_changed(struct xenbus_device *dev, } } -static int blkfront_remove(struct xenbus_device *xbdev) +static void blkfront_remove(struct xenbus_device *xbdev) { struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); @@ -2489,7 +2489,6 @@ static int blkfront_remove(struct xenbus_device *xbdev) } kfree(info); - return 0; } static int blkfront_is_ready(struct xenbus_device *dev) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 3792918262617..80cca3b83b226 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -360,14 +360,13 @@ static int tpmfront_probe(struct xenbus_device *dev, return tpm_chip_register(priv->chip); } -static int tpmfront_remove(struct xenbus_device *dev) +static void tpmfront_remove(struct xenbus_device *dev) { struct tpm_chip *chip = dev_get_drvdata(&dev->dev); struct tpm_private *priv = dev_get_drvdata(&chip->dev); tpm_chip_unregister(chip); ring_free(priv); dev_set_drvdata(&chip->dev, NULL); - return 0; } static int tpmfront_resume(struct xenbus_device *dev) diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 0d8e6bd1ccbf2..90996c108146d 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -717,7 +717,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, return xenbus_switch_state(xb_dev, XenbusStateInitialising); } -static int xen_drv_remove(struct xenbus_device *dev) +static void xen_drv_remove(struct xenbus_device *dev) { struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev); int to = 100; @@ -751,7 +751,6 @@ static int xen_drv_remove(struct xenbus_device *dev) xen_drm_drv_fini(front_info); xenbus_frontend_closed(dev); - return 0; } static const struct xenbus_device_id xen_driver_ids[] = { diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 8d8ebdc2039b8..67f1c7364c95d 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -51,7 +51,7 @@ module_param_array(ptr_size, int, NULL, 0444); MODULE_PARM_DESC(ptr_size, "Pointing device width, height in pixels (default 800,600)"); -static int xenkbd_remove(struct xenbus_device *); +static void xenkbd_remove(struct xenbus_device *); static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *); static void xenkbd_disconnect_backend(struct xenkbd_info *); @@ -404,7 +404,7 @@ static int xenkbd_resume(struct xenbus_device *dev) return xenkbd_connect_backend(dev, info); } -static int xenkbd_remove(struct xenbus_device *dev) +static void xenkbd_remove(struct xenbus_device *dev) { struct xenkbd_info *info = dev_get_drvdata(&dev->dev); @@ -417,7 +417,6 @@ static int xenkbd_remove(struct xenbus_device *dev) input_unregister_device(info->mtouch); free_page((unsigned long)info->page); kfree(info); - return 0; } static int xenkbd_connect_backend(struct xenbus_device *dev, diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index c1ba4294f3647..001636901ddae 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -977,7 +977,7 @@ static int read_xenbus_vif_flags(struct backend_info *be) return 0; } -static int netback_remove(struct xenbus_device *dev) +static void netback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -992,7 +992,6 @@ static int netback_remove(struct xenbus_device *dev) kfree(be->hotplug_script); kfree(be); dev_set_drvdata(&dev->dev, NULL); - return 0; } /* diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 9af2b027c19c6..bc17f5391b1a0 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2640,7 +2640,7 @@ static void xennet_bus_close(struct xenbus_device *dev) } while (!ret); } -static int xennet_remove(struct xenbus_device *dev) +static void xennet_remove(struct xenbus_device *dev) { struct netfront_info *info = dev_get_drvdata(&dev->dev); @@ -2656,8 +2656,6 @@ static int xennet_remove(struct xenbus_device *dev) rtnl_unlock(); } xennet_free_netdev(info->netdev); - - return 0; } static const struct xenbus_device_id netfront_ids[] = { diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 7378e2f3e525f..fcd029ca2eb18 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -1055,14 +1055,12 @@ static int pcifront_xenbus_probe(struct xenbus_device *xdev, return err; } -static int pcifront_xenbus_remove(struct xenbus_device *xdev) +static void pcifront_xenbus_remove(struct xenbus_device *xdev) { struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); if (pdev) free_pdev(pdev); - - return 0; } static const struct xenbus_device_id xenpci_ids[] = { diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 66b316d173b0b..71a3bb83984c0 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -995,7 +995,7 @@ static int scsifront_suspend(struct xenbus_device *dev) return err; } -static int scsifront_remove(struct xenbus_device *dev) +static void scsifront_remove(struct xenbus_device *dev) { struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); @@ -1011,8 +1011,6 @@ static int scsifront_remove(struct xenbus_device *dev) scsifront_free_ring(info); scsi_host_put(info->host); - - return 0; } static void scsifront_disconnect(struct vscsifrnt_info *info) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 7c23112dc923f..c879f922c716b 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -394,9 +394,9 @@ static int xen_console_remove(struct xencons_info *info) return 0; } -static int xencons_remove(struct xenbus_device *dev) +static void xencons_remove(struct xenbus_device *dev) { - return xen_console_remove(dev_get_drvdata(&dev->dev)); + xen_console_remove(dev_get_drvdata(&dev->dev)); } static int xencons_connect_backend(struct xenbus_device *dev, diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c index de1b091583183..46fdab940092e 100644 --- a/drivers/usb/host/xen-hcd.c +++ b/drivers/usb/host/xen-hcd.c @@ -1530,15 +1530,13 @@ static void xenhcd_backend_changed(struct xenbus_device *dev, } } -static int xenhcd_remove(struct xenbus_device *dev) +static void xenhcd_remove(struct xenbus_device *dev) { struct xenhcd_info *info = dev_get_drvdata(&dev->dev); struct usb_hcd *hcd = xenhcd_info_to_hcd(info); xenhcd_destroy_rings(info); usb_put_hcd(hcd); - - return 0; } static int xenhcd_probe(struct xenbus_device *dev, diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c index 4d2694d904aab..ae8a50ecdbd3a 100644 --- a/drivers/video/fbdev/xen-fbfront.c +++ b/drivers/video/fbdev/xen-fbfront.c @@ -67,7 +67,7 @@ MODULE_PARM_DESC(video, "Video memory size in MB, width, height in pixels (default 2,800,600)"); static void xenfb_make_preferred_console(void); -static int xenfb_remove(struct xenbus_device *); +static void xenfb_remove(struct xenbus_device *); static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); static void xenfb_disconnect_backend(struct xenfb_info *); @@ -527,7 +527,7 @@ static int xenfb_resume(struct xenbus_device *dev) return xenfb_connect_backend(dev, info); } -static int xenfb_remove(struct xenbus_device *dev) +static void xenfb_remove(struct xenbus_device *dev) { struct xenfb_info *info = dev_get_drvdata(&dev->dev); @@ -542,8 +542,6 @@ static int xenfb_remove(struct xenbus_device *dev) vfree(info->gfns); vfree(info->fb); kfree(info); - - return 0; } static unsigned long vmalloc_to_gfn(void *address) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index d6f945fd41474..ea52a2092bb82 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -1181,9 +1181,8 @@ static void pvcalls_back_changed(struct xenbus_device *dev, } } -static int pvcalls_back_remove(struct xenbus_device *dev) +static void pvcalls_back_remove(struct xenbus_device *dev) { - return 0; } static int pvcalls_back_uevent(struct xenbus_device *xdev, diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 1826e8e671251..5328f4d35f25e 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -1085,7 +1085,7 @@ static const struct xenbus_device_id pvcalls_front_ids[] = { { "" } }; -static int pvcalls_front_remove(struct xenbus_device *dev) +static void pvcalls_front_remove(struct xenbus_device *dev) { struct pvcalls_bedata *bedata; struct sock_mapping *map = NULL, *n; @@ -1121,7 +1121,6 @@ static int pvcalls_front_remove(struct xenbus_device *dev) kfree(bedata->ring.sring); kfree(bedata); xenbus_switch_state(dev, XenbusStateClosed); - return 0; } static int pvcalls_front_probe(struct xenbus_device *dev, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index d171091eec123..b11e401f1b1ee 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -716,14 +716,12 @@ static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, return err; } -static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) +static void xen_pcibk_xenbus_remove(struct xenbus_device *dev) { struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); if (pdev != NULL) free_pdev(pdev); - - return 0; } static const struct xenbus_device_id xen_pcibk_ids[] = { diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 6106ed93817d6..954188b0b858a 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1249,7 +1249,7 @@ static void scsiback_release_translation_entry(struct vscsibk_info *info) spin_unlock_irqrestore(&info->v2p_lock, flags); } -static int scsiback_remove(struct xenbus_device *dev) +static void scsiback_remove(struct xenbus_device *dev) { struct vscsibk_info *info = dev_get_drvdata(&dev->dev); @@ -1261,8 +1261,6 @@ static int scsiback_remove(struct xenbus_device *dev) gnttab_page_cache_shrink(&info->free_pages, 0); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static int scsiback_probe(struct xenbus_device *dev, diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index eaa932b99d8ac..ad4fb4eab753d 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -117,7 +117,7 @@ struct xenbus_driver { const struct xenbus_device_id *id); void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); - int (*remove)(struct xenbus_device *dev); + void (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index aaa5fd364691b..9950e1a5acb74 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -303,13 +303,12 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) kfree(priv); } -static int xen_9pfs_front_remove(struct xenbus_device *dev) +static void xen_9pfs_front_remove(struct xenbus_device *dev) { struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); dev_set_drvdata(&dev->dev, NULL); xen_9pfs_front_free(priv); - return 0; } static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c index 4041748c12e51..b66e037710d0d 100644 --- a/sound/xen/xen_snd_front.c +++ b/sound/xen/xen_snd_front.c @@ -311,7 +311,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, return xenbus_switch_state(xb_dev, XenbusStateInitialising); } -static int xen_drv_remove(struct xenbus_device *dev) +static void xen_drv_remove(struct xenbus_device *dev) { struct xen_snd_front_info *front_info = dev_get_drvdata(&dev->dev); int to = 100; @@ -345,7 +345,6 @@ static int xen_drv_remove(struct xenbus_device *dev) xen_snd_drv_fini(front_info); xenbus_frontend_closed(dev); - return 0; } static const struct xenbus_device_id xen_drv_ids[] = { From abe3bf7425fb695a9b37394af18b9ea58a800802 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 12 Dec 2022 21:14:17 +0100 Subject: [PATCH 0014/1593] btrfs: fix an error handling path in btrfs_rename() If new_whiteout_inode() fails, some resources need to be freed. Add the missing goto to the error handling path. Fixes: ab3c5c18e8fa ("btrfs: setup qstr from dentrys using fscrypt helper") Reviewed-by: Sweet Tea Dorminy Signed-off-by: Christophe JAILLET Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 905ea19df125b..bfcbe64eb8b3a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9377,8 +9377,10 @@ static int btrfs_rename(struct user_namespace *mnt_userns, if (flags & RENAME_WHITEOUT) { whiteout_args.inode = new_whiteout_inode(mnt_userns, old_dir); - if (!whiteout_args.inode) - return -ENOMEM; + if (!whiteout_args.inode) { + ret = -ENOMEM; + goto out_fscrypt_names; + } ret = btrfs_new_inode_prepare(&whiteout_args, &trans_num_items); if (ret) goto out_whiteout_inode; From db0a4a7b8e95f9312a59a67cbd5bc589f090e13d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 12 Dec 2022 21:01:43 +0100 Subject: [PATCH 0015/1593] btrfs: fix an error handling path in btrfs_defrag_leaves() All error handling paths end to 'out', except this memory allocation failure. This is spurious. So branch to the error handling path also in this case. It will add a call to: memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); Fixes: 6702ed490ca0 ("Btrfs: Add run time btree defrag, and an ioctl to force btree defrag") Signed-off-by: Christophe JAILLET Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/defrag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index 0a3c261b69c9f..d81b764a76446 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -358,8 +358,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } level = btrfs_header_level(root->node); From c68f72900a12a56c5e9890e6f2ca5119234c9a75 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 13 Dec 2022 10:42:26 +0000 Subject: [PATCH 0016/1593] btrfs: fix leak of fs devices after removing btrfs module When removing the btrfs module we are not calling btrfs_cleanup_fs_uuids() which results in leaking btrfs_fs_devices structures and other resources. This is a regression recently introduced by a refactoring of the module initialization and exit sequence, which simply removed the call to btrfs_cleanup_fs_uuids() in the exit path, resulting in the leaks. So fix this by calling btrfs_cleanup_fs_uuids() at exit_btrfs_fs(). Fixes: 5565b8e0adcd ("btrfs: make module init/exit match their sequence") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 93f52ee85f6fe..d5de18d6517e7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2514,6 +2514,7 @@ static __always_inline void btrfs_exit_btrfs_fs(void) static void __exit exit_btrfs_fs(void) { btrfs_exit_btrfs_fs(); + btrfs_cleanup_fs_uuids(); } static int __init init_btrfs_fs(void) From f1f0460c0ca97a4a6570f211c81579294a6cc7be Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 13 Dec 2022 16:57:44 -0500 Subject: [PATCH 0017/1593] btrfs: restore BTRFS_SEQ_LAST when looking up qgroup backref lookup In the patch a2c8d27e5ee8 ("btrfs: use a structure to pass arguments to backref walking functions") Filipe converted everybody to using a new context struct to use for backref lookups, but accidentally dropped the BTRFS_SEQ_LAST usage that exists for qgroups. Add this back so we have the previous behavior. Fixes: a2c8d27e5ee8 ("btrfs: use a structure to pass arguments to backref walking functions") Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5c636e00d77da..d275bf24b250b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2787,6 +2787,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) * current root. It's safe inside commit_transaction(). */ ctx.trans = trans; + ctx.time_seq = BTRFS_SEQ_LAST; ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) goto cleanup; From 0a3212de8ab3e2ce5808c6265855e528d4a6767b Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 14 Dec 2022 11:06:07 +0900 Subject: [PATCH 0018/1593] btrfs: fix trace event name typo for FLUSH_DELAYED_REFS Fix a typo of printing FLUSH_DELAYED_REFS event in flush_space() as FLUSH_ELAYED_REFS. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0bce0b4ff2faf..6548b5b5aa608 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -98,7 +98,7 @@ struct raid56_bio_trace_info; EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \ EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \ EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \ - EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \ + EM( FLUSH_DELAYED_REFS, "FLUSH_DELAYED_REFS") \ EM( ALLOC_CHUNK, "ALLOC_CHUNK") \ EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \ EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \ From b18cba09e374637a0a3759d856a6bca94c133952 Mon Sep 17 00:00:00 2001 From: minoura makoto Date: Tue, 13 Dec 2022 13:14:31 +0900 Subject: [PATCH 0019/1593] SUNRPC: ensure the matching upcall is in-flight upon downcall Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid but different gss service") introduced `auth` argument to __gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL since it (and auth->service) was not (yet) determined. When multiple upcalls with the same uid and different service are ongoing, it could happen that __gss_find_upcall(), which returns the first match found in the pipe->in_downcall list, could not find the correct gss_msg corresponding to the downcall we are looking for. Moreover, it might return a msg which is not sent to rpc.gssd yet. We could see mount.nfs process hung in D state with multiple mount.nfs are executed in parallel. The call trace below is of CentOS 7.9 kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/ elrepo kernel-ml-6.0.7-1.el7. PID: 71258 TASK: ffff91ebd4be0000 CPU: 36 COMMAND: "mount.nfs" #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9 #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss] #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc [sunrpc] #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss] #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc] #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc] #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc] #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc] #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc] The scenario is like this. Let's say there are two upcalls for services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe. When rpc.gssd reads pipe to get the upcall msg corresponding to service B from pipe->pipe and then writes the response, in gss_pipe_downcall the msg corresponding to service A will be picked because only uid is used to find the msg and it is before the one for B in pipe->in_downcall. And the process waiting for the msg corresponding to service A will be woken up. Actual scheduing of that process might be after rpc.gssd processes the next msg. In rpc_pipe_generic_upcall it clears msg->errno (for A). The process is scheduled to see gss_msg->ctx == NULL and gss_msg->msg.errno == 0, therefore it cannot break the loop in gss_create_upcall and is never woken up after that. This patch adds a simple check to ensure that a msg which is not sent to rpc.gssd yet is not chosen as the matching upcall upon receiving a downcall. Signed-off-by: minoura makoto Signed-off-by: Hiroshi Shimamoto Tested-by: Hiroshi Shimamoto Cc: Trond Myklebust Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 5 +++++ net/sunrpc/auth_gss/auth_gss.c | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index cd188a527d169..3b35b6f6533aa 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); +/* returns true if the msg is in-flight, i.e., already eaten by the peer */ +static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { + return (msg->copied != 0 && list_empty(&msg->list)); +} + struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct rpc_clnt *); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7bb247c51e2f6..2d7b1e03110ae 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; - if (auth && pos->auth->service != auth->service) + if (pos->auth->service != auth->service) continue; refcount_inc(&pos->count); return pos; @@ -686,6 +686,21 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) return err; } +static struct gss_upcall_msg * +gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) +{ + struct gss_upcall_msg *pos; + list_for_each_entry(pos, &pipe->in_downcall, list) { + if (!uid_eq(pos->uid, uid)) + continue; + if (!rpc_msg_is_inflight(&pos->msg)) + continue; + refcount_inc(&pos->count); + return pos; + } + return NULL; +} + #define MSG_BUF_MAXSIZE 1024 static ssize_t @@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); - gss_msg = __gss_find_upcall(pipe, uid, NULL); + gss_msg = gss_find_downcall(pipe, uid); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; From 4e699e34f923188175986ad8a74ab99f7034075e Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 16 Dec 2022 11:05:26 +0800 Subject: [PATCH 0020/1593] drm/plane-helper: Add the missing declaration of drm_atomic_state Add the missing declaration of struct drm_atomic_state to fix the compile error below: error: 'struct drm_atomic_state' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] Signed-off-by: Ma Jun Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Fixes: 8401bd361f59 ("drm/plane-helper: Add a drm_plane_helper_atomic_check() helper") Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v6.1+ Link: https://patchwork.freedesktop.org/patch/msgid/20221216030526.1335609-1-majun@amd.com --- include/drm/drm_plane_helper.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h index ff83d26216876..3a574e8cd22f4 100644 --- a/include/drm/drm_plane_helper.h +++ b/include/drm/drm_plane_helper.h @@ -26,6 +26,7 @@ #include +struct drm_atomic_state; struct drm_crtc; struct drm_framebuffer; struct drm_modeset_acquire_ctx; From 3c44e2b6cde674797b76e76d3a903a63ce8a18bb Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 16 Dec 2022 13:15:34 -0800 Subject: [PATCH 0021/1593] Revert "Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode" This reverts commit ac5408991ea6b06e29129b4d4861097c4c3e0d59 because it causes loss of keyboard on HP 15-da1xxx. Fixes: ac5408991ea6 ("Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode") Reported-by: Jiri Slaby Link: https://lore.kernel.org/r/824effa5-8b9a-c28a-82bb-9b0ab24623e1@kernel.org Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1206358 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index b0f776448a1cd..fa021af8506e4 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -192,7 +192,6 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ - "SYN3286", /* HP Laptop 15-da3001TU */ NULL }; From 71a06f1034b91e15d3ba6b5539c7d3a2d7f13030 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Sat, 17 Dec 2022 00:57:42 +0100 Subject: [PATCH 0022/1593] mac802154: Fix possible double free upon parsing error Commit 4d1c7d87030b ("mac802154: Move an skb free within the rx path") tried to simplify error handling within the receive path by moving the kfree_skb() call at the very end of the top-level function but missed one kfree_skb() called upon frame parsing error. Prevent this possible double free from happening. Fixes: 4d1c7d87030b ("mac802154: Move an skb free within the rx path") Reported-by: Dan Carpenter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20221216235742.646134-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- net/mac802154/rx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index c2aae2a6d6a6b..97bb4401dd3ee 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -213,7 +213,6 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, ret = ieee802154_parse_frame_start(skb, &hdr); if (ret) { pr_debug("got invalid frame\n"); - kfree_skb(skb); return; } From cc074822465d18a2d39e0b3e2b48b6766a568db2 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 17 Dec 2022 14:21:44 +0800 Subject: [PATCH 0023/1593] bpf: Define sock security related BTF IDs under CONFIG_SECURITY_NETWORK There are warnings reported from resolve_btfids when building vmlinux with CONFIG_SECURITY_NETWORK disabled: WARN: resolve_btfids: unresolved symbol bpf_lsm_sk_free_security WARN: resolve_btfids: unresolved symbol bpf_lsm_sk_alloc_security So only define BTF IDs for these LSM hooks when CONFIG_SECURITY_NETWORK is enabled. Fixes: c0c852dd1876 ("bpf: Do not mark certain LSM hook arguments as trusted") Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221217062144.2507222-1-houtao@huaweicloud.com --- kernel/bpf/bpf_lsm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 9ea42a45da470..a4a41ee3e80b5 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -351,8 +351,10 @@ BTF_ID(func, bpf_lsm_bpf_prog_alloc_security) BTF_ID(func, bpf_lsm_bpf_prog_free_security) BTF_ID(func, bpf_lsm_file_alloc_security) BTF_ID(func, bpf_lsm_file_free_security) +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) +#endif /* CONFIG_SECURITY_NETWORK */ BTF_ID(func, bpf_lsm_task_free) BTF_SET_END(untrusted_lsm_hooks) From 1c4c0b28b517d778d37900deedfe91088839f07a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Dec 2022 23:15:04 +0200 Subject: [PATCH 0024/1593] wifi: iwlwifi: fw: skip PPAG for JF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For JF RFs we don't support PPAG, but many firmware images lie about it. Always skip support for JF to avoid firmware errors when sending the command. Reported-and-tested-by: Íñigo Huguet Link: https://lore.kernel.org/linux-wireless/CACT4oufQsqHGp6bah2c4+jPn2wG1oZqY=UKa_TmPx=F6Lxng8Q@mail.gmail.com Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221213225723.2a43415d8990.I9ac210740a45b41f1b2e15274e1daf4284f2808a@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index e6d64152c81a7..a02e5a67b7066 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -1106,6 +1106,11 @@ int iwl_read_ppag_table(struct iwl_fw_runtime *fwrt, union iwl_ppag_table_cmd *c int i, j, num_sub_bands; s8 *gain; + /* many firmware images for JF lie about this */ + if (CSR_HW_RFID_TYPE(fwrt->trans->hw_rf_id) == + CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF)) + return -EOPNOTSUPP; + if (!fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) { IWL_DEBUG_RADIO(fwrt, "PPAG capability not supported by FW, command not sent.\n"); From 37fc9ad1617a303bbfd28870eb25aaa4766e79ab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:31:10 +0100 Subject: [PATCH 0025/1593] wifi: mt76: mt7996: select CONFIG_RELAY Without CONFIG_RELAY, the driver fails to link: ERROR: modpost: "relay_flush" [drivers/net/wireless/mediatek/mt76/mt7996/mt7996e.ko] undefined! ERROR: modpost: "relay_switch_subbuf" [drivers/net/wireless/mediatek/mt76/mt7996/mt7996e.ko] undefined! ERROR: modpost: "relay_open" [drivers/net/wireless/mediatek/mt76/mt7996/mt7996e.ko] undefined! ERROR: modpost: "relay_reset" [drivers/net/wireless/mediatek/mt76/mt7996/mt7996e.ko] undefined! ERROR: modpost: "relay_file_operations" [drivers/net/wireless/mediatek/mt76/mt7996/mt7996e.ko] undefined! The same change was done in mt7915 for the corresponding copy of the code. Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") See-also: 988845c9361a ("mt76: mt7915: add support for passing chip/firmware debug data to user space") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221215163133.4152299-1-arnd@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7996/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig b/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig index 5c5fc569e6d59..79fb47a73c91d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig +++ b/drivers/net/wireless/mediatek/mt76/mt7996/Kconfig @@ -2,6 +2,7 @@ config MT7996E tristate "MediaTek MT7996 (PCIe) support" select MT76_CONNAC_LIB + select RELAY depends on MAC80211 depends on PCI help From b7dc753fe33a707379e2254317794a4dad6c0fe2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:55:42 +0100 Subject: [PATCH 0026/1593] wifi: ath9k: use proper statements in conditionals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A previous cleanup patch accidentally broke some conditional expressions by replacing the safe "do {} while (0)" constructs with empty macros. gcc points this out when extra warnings are enabled: drivers/net/wireless/ath/ath9k/hif_usb.c: In function 'ath9k_skb_queue_complete': drivers/net/wireless/ath/ath9k/hif_usb.c:251:57: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] 251 | TX_STAT_INC(hif_dev, skb_failed); Make both sets of macros proper expressions again. Fixes: d7fc76039b74 ("ath9k: htc: clean up statistics macros") Signed-off-by: Arnd Bergmann Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221215165553.1950307-1-arnd@kernel.org --- drivers/net/wireless/ath/ath9k/htc.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 30f0765fb9fd8..237f4ec2cffd7 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -327,9 +327,9 @@ static inline struct ath9k_htc_tx_ctl *HTC_SKB_CB(struct sk_buff *skb) } #ifdef CONFIG_ATH9K_HTC_DEBUGFS -#define __STAT_SAFE(hif_dev, expr) ((hif_dev)->htc_handle->drv_priv ? (expr) : 0) -#define CAB_STAT_INC(priv) ((priv)->debug.tx_stats.cab_queued++) -#define TX_QSTAT_INC(priv, q) ((priv)->debug.tx_stats.queue_stats[q]++) +#define __STAT_SAFE(hif_dev, expr) do { ((hif_dev)->htc_handle->drv_priv ? (expr) : 0); } while (0) +#define CAB_STAT_INC(priv) do { ((priv)->debug.tx_stats.cab_queued++); } while (0) +#define TX_QSTAT_INC(priv, q) do { ((priv)->debug.tx_stats.queue_stats[q]++); } while (0) #define TX_STAT_INC(hif_dev, c) \ __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.tx_stats.c++) @@ -378,10 +378,10 @@ void ath9k_htc_get_et_stats(struct ieee80211_hw *hw, struct ethtool_stats *stats, u64 *data); #else -#define TX_STAT_INC(hif_dev, c) -#define TX_STAT_ADD(hif_dev, c, a) -#define RX_STAT_INC(hif_dev, c) -#define RX_STAT_ADD(hif_dev, c, a) +#define TX_STAT_INC(hif_dev, c) do { } while (0) +#define TX_STAT_ADD(hif_dev, c, a) do { } while (0) +#define RX_STAT_INC(hif_dev, c) do { } while (0) +#define RX_STAT_ADD(hif_dev, c, a) do { } while (0) #define CAB_STAT_INC(priv) #define TX_QSTAT_INC(priv, c) From a6b9d2fa0024e7e399c26facd0fb466b7396e2b9 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 20 Dec 2022 12:31:29 -0500 Subject: [PATCH 0027/1593] pNFS/filelayout: Fix coalescing test for single DS When there is a single DS no striping constraints need to be placed on the IO. When such constraint is applied then buffered reads don't coalesce to the DS's rsize. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ad34a33b0737c..4974cd18ca468 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, return &fl->generic_hdr; } +static bool +filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) +{ + return flseg->num_fh > 1; +} + /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * @@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) + return size; /* see if req and prev are in the same stripe */ if (prev) { From 560840afc3e63bbe5d9c5ef6b2ecf8f3589adff6 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 14 Dec 2022 15:05:08 -0800 Subject: [PATCH 0028/1593] btrfs: fix resolving backrefs for inline extent followed by prealloc If a file consists of an inline extent followed by a regular or prealloc extent, then a legitimate attempt to resolve a logical address in the non-inline region will result in add_all_parents reading the invalid offset field of the inline extent. If the inline extent item is placed in the leaf eb s.t. it is the first item, attempting to access the offset field will not only be meaningless, it will go past the end of the eb and cause this panic: [17.626048] BTRFS warning (device dm-2): bad eb member end: ptr 0x3fd4 start 30834688 member offset 16377 size 8 [17.631693] general protection fault, probably for non-canonical address 0x5088000000000: 0000 [#1] SMP PTI [17.635041] CPU: 2 PID: 1267 Comm: btrfs Not tainted 5.12.0-07246-g75175d5adc74-dirty #199 [17.637969] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [17.641995] RIP: 0010:btrfs_get_64+0xe7/0x110 [17.649890] RSP: 0018:ffffc90001f73a08 EFLAGS: 00010202 [17.651652] RAX: 0000000000000001 RBX: ffff88810c42d000 RCX: 0000000000000000 [17.653921] RDX: 0005088000000000 RSI: ffffc90001f73a0f RDI: 0000000000000001 [17.656174] RBP: 0000000000000ff9 R08: 0000000000000007 R09: c0000000fffeffff [17.658441] R10: ffffc90001f73790 R11: ffffc90001f73788 R12: ffff888106afe918 [17.661070] R13: 0000000000003fd4 R14: 0000000000003f6f R15: cdcdcdcdcdcdcdcd [17.663617] FS: 00007f64e7627d80(0000) GS:ffff888237c80000(0000) knlGS:0000000000000000 [17.666525] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [17.668664] CR2: 000055d4a39152e8 CR3: 000000010c596002 CR4: 0000000000770ee0 [17.671253] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [17.673634] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [17.676034] PKRU: 55555554 [17.677004] Call Trace: [17.677877] add_all_parents+0x276/0x480 [17.679325] find_parent_nodes+0xfae/0x1590 [17.680771] btrfs_find_all_leafs+0x5e/0xa0 [17.682217] iterate_extent_inodes+0xce/0x260 [17.683809] ? btrfs_inode_flags_to_xflags+0x50/0x50 [17.685597] ? iterate_inodes_from_logical+0xa1/0xd0 [17.687404] iterate_inodes_from_logical+0xa1/0xd0 [17.689121] ? btrfs_inode_flags_to_xflags+0x50/0x50 [17.691010] btrfs_ioctl_logical_to_ino+0x131/0x190 [17.692946] btrfs_ioctl+0x104a/0x2f60 [17.694384] ? selinux_file_ioctl+0x182/0x220 [17.695995] ? __x64_sys_ioctl+0x84/0xc0 [17.697394] __x64_sys_ioctl+0x84/0xc0 [17.698697] do_syscall_64+0x33/0x40 [17.700017] entry_SYSCALL_64_after_hwframe+0x44/0xae [17.701753] RIP: 0033:0x7f64e72761b7 [17.709355] RSP: 002b:00007ffefb067f58 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [17.712088] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f64e72761b7 [17.714667] RDX: 00007ffefb067fb0 RSI: 00000000c0389424 RDI: 0000000000000003 [17.717386] RBP: 00007ffefb06d188 R08: 000055d4a390d2b0 R09: 00007f64e7340a60 [17.719938] R10: 0000000000000231 R11: 0000000000000246 R12: 0000000000000001 [17.722383] R13: 0000000000000000 R14: 00000000c0389424 R15: 000055d4a38fd2a0 [17.724839] Modules linked in: Fix the bug by detecting the inline extent item in add_all_parents and skipping to the next extent item. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/backref.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 21c92c74bf71a..46851511b661b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -484,6 +484,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, u64 wanted_disk_byte = ref->wanted_disk_byte; u64 count = 0; u64 data_offset; + u8 type; if (level != 0) { eb = path->nodes[level]; @@ -538,6 +539,9 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, continue; } fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + type = btrfs_file_extent_type(eb, fi); + if (type == BTRFS_FILE_EXTENT_INLINE) + goto next; disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); data_offset = btrfs_file_extent_offset(eb, fi); From e7fc357ec03ee109da503af0dd31bbf68514e481 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 16 Dec 2022 11:48:00 -0500 Subject: [PATCH 0029/1593] btrfs: scrub: fix uninitialized return value in recover_scrub_rbio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 75b470332965 ("btrfs: raid56: migrate recovery and scrub recovery path to use error_bitmap") introduced an uninitialized return variable. This can be caught by gcc 12.1 by -Wmaybe-uninitialized: CC [M] fs/btrfs/raid56.o fs/btrfs/raid56.c: In function ‘scrub_rbio’: fs/btrfs/raid56.c:2801:15: warning: ‘ret’ may be used uninitialized [-Wmaybe-uninitialized] 2801 | ret = recover_scrub_rbio(rbio); | ^~~~~~~~~~~~~~~~~~~~~~~~ fs/btrfs/raid56.c:2649:13: note: ‘ret’ was declared here 2649 | int ret; The warning is disabled by default so we haven't caught that. Due to the bug the raid56 scrub fstests have been failing since the patch was merged, so initialize that. Fixes: 75b470332965 ("btrfs: raid56: migrate recovery and scrub recovery path to use error_bitmap") Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 2d90a6b5eb00e..6a2cf754912df 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2646,7 +2646,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio) void **pointers = NULL; void **unmap_array = NULL; int sector_nr; - int ret; + int ret = 0; /* * @pointers array stores the pointer for each sector. From fee4c19937439693f2420a916169d08e88576e8e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 20 Dec 2022 11:13:33 +0000 Subject: [PATCH 0030/1593] btrfs: fix fscrypt name leak after failure to join log transaction When logging a new name, we don't expect to fail joining a log transaction since we know at least one of the inodes was logged before in the current transaction. However if we fail for some unexpected reason, we end up not freeing the fscrypt name we previously allocated. So fix that by freeing the name in case we failed to join a log transaction. Fixes: ab3c5c18e8fa ("btrfs: setup qstr from dentrys using fscrypt helper") Reviewed-by: Sweet Tea Dorminy Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a3c43f0b1c95c..fb52aa0600930 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7459,8 +7459,11 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * not fail, but if it does, it's not serious, just bail out and * mark the log for a full commit. */ - if (WARN_ON_ONCE(ret < 0)) + if (WARN_ON_ONCE(ret < 0)) { + fscrypt_free_filename(&fname); goto out; + } + log_pinned = true; path = btrfs_alloc_path(); From 54c3f1a81421f85e60ae2eaae7be3727a09916ee Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 19 Dec 2022 16:47:00 -0800 Subject: [PATCH 0031/1593] bpf: pull before calling skb_postpull_rcsum() Anand hit a BUG() when pulling off headers on egress to a SW tunnel. We get to skb_checksum_help() with an invalid checksum offset (commit d7ea0d9df2a6 ("net: remove two BUG() from skb_checksum_help()") converted those BUGs to WARN_ONs()). He points out oddness in how skb_postpull_rcsum() gets used. Indeed looks like we should pull before "postpull", otherwise the CHECKSUM_PARTIAL fixup from skb_postpull_rcsum() will not be able to do its job: if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; Reported-by: Anand Parthasarathy Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Signed-off-by: Jakub Kicinski Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20221220004701.402165-1-kuba@kernel.org Signed-off-by: Martin KaFai Lau --- net/core/filter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 929358677183d..43cc1fe58a2c6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3180,15 +3180,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) { + void *old_data; + /* skb_ensure_writable() is not needed here, as we're * already working on an uncloned skb. */ if (unlikely(!pskb_may_pull(skb, off + len))) return -ENOMEM; - skb_postpull_rcsum(skb, skb->data + off, len); - memmove(skb->data + len, skb->data, off); + old_data = skb->data; __skb_pull(skb, len); + skb_postpull_rcsum(skb, old_data + off, len); + memmove(skb->data, old_data, off); return 0; } From 4217c6ac817451d5116687f3cc6286220dc43d49 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 19 Dec 2022 14:01:30 +0000 Subject: [PATCH 0032/1593] drm/panfrost: Fix GEM handle creation ref-counting panfrost_gem_create_with_handle() previously returned a BO but with the only reference being from the handle, which user space could in theory guess and release, causing a use-after-free. Additionally if the call to panfrost_gem_mapping_get() in panfrost_ioctl_create_bo() failed then a(nother) reference on the BO was dropped. The _create_with_handle() is a problematic pattern, so ditch it and instead create the handle in panfrost_ioctl_create_bo(). If the call to panfrost_gem_mapping_get() fails then this means that user space has indeed gone behind our back and freed the handle. In which case just return an error code. Reported-by: Rob Clark Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver") Signed-off-by: Steven Price Reviewed-by: Rob Clark Link: https://patchwork.freedesktop.org/patch/msgid/20221219140130.410578-1-steven.price@arm.com --- drivers/gpu/drm/panfrost/panfrost_drv.c | 27 ++++++++++++++++--------- drivers/gpu/drm/panfrost/panfrost_gem.c | 16 +-------------- drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +---- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 2fa5afe212889..919e6cc049828 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_object *bo; struct drm_panfrost_create_bo *args = data; struct panfrost_gem_mapping *mapping; + int ret; if (!args->size || args->pad || (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) @@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, !(args->flags & PANFROST_BO_NOEXEC)) return -EINVAL; - bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, - &args->handle); + bo = panfrost_gem_create(dev, args->size, args->flags); if (IS_ERR(bo)) return PTR_ERR(bo); + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (ret) + goto out; + mapping = panfrost_gem_mapping_get(bo, priv); - if (!mapping) { - drm_gem_object_put(&bo->base.base); - return -EINVAL; + if (mapping) { + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); + } else { + /* This can only happen if the handle from + * drm_gem_handle_create() has already been guessed and freed + * by user space + */ + ret = -EINVAL; } - args->offset = mapping->mmnode.start << PAGE_SHIFT; - panfrost_gem_mapping_put(mapping); - - return 0; +out: + drm_gem_object_put(&bo->base.base); + return ret; } /** diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 293e799e2fe81..3c812fbd126fd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -235,12 +235,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t } struct panfrost_gem_object * -panfrost_gem_create_with_handle(struct drm_file *file_priv, - struct drm_device *dev, size_t size, - u32 flags, - uint32_t *handle) +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) { - int ret; struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; @@ -256,16 +252,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); - /* - * Allocate an id of idr table where the obj is registered - * and handle has the id what user can see. - */ - ret = drm_gem_handle_create(file_priv, &shmem->base, handle); - /* drop reference from allocate - handle holds it now. */ - drm_gem_object_put(&shmem->base); - if (ret) - return ERR_PTR(ret); - return bo; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 8088d5fd8480e..ad2877eeeccdf 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, struct sg_table *sgt); struct panfrost_gem_object * -panfrost_gem_create_with_handle(struct drm_file *file_priv, - struct drm_device *dev, size_t size, - u32 flags, - uint32_t *handle); +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); void panfrost_gem_close(struct drm_gem_object *obj, From 5eb119da94ac5d67a31eaa869621dc6e25eb125e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 15 Dec 2022 15:16:33 +0100 Subject: [PATCH 0033/1593] netfilter: conntrack: fix ipv6 exthdr error check smatch warnings: net/netfilter/nf_conntrack_proto.c:167 nf_confirm() warn: unsigned 'protoff' is never less than zero. We need to check if ipv6_skip_exthdr() returned an error, but protoff is unsigned. Use a signed integer for this. Fixes: a70e483460d5 ("netfilter: conntrack: merge ipv4+ipv6 confirm functions") Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 99323fb12d0f5..ccef340be575e 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -141,6 +141,7 @@ unsigned int nf_confirm(void *priv, struct nf_conn *ct; bool seqadj_needed; __be16 frag_off; + int start; u8 pnum; ct = nf_ct_get(skb, &ctinfo); @@ -163,9 +164,11 @@ unsigned int nf_confirm(void *priv, break; case NFPROTO_IPV6: pnum = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) + start = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); + if (start < 0 || (frag_off & htons(~0x7)) != 0) return nf_conntrack_confirm(skb); + + protoff = start; break; default: return nf_conntrack_confirm(skb); From bed4a63ea4ae77cfe5aae004ef87379f0655260a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 20:07:52 +0100 Subject: [PATCH 0034/1593] netfilter: nf_tables: consolidate set description Add the following fields to the set description: - key type - data type - object type - policy - gc_int: garbage collection interval) - timeout: element timeout This prepares for stricter set type checks on updates in a follow up patch. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 12 +++++++ net/netfilter/nf_tables_api.c | 58 +++++++++++++++---------------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e69ce23566eab..4957b4775757b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -312,17 +312,29 @@ struct nft_set_iter { /** * struct nft_set_desc - description of set elements * + * @ktype: key type * @klen: key length + * @dtype: data type * @dlen: data length + * @objtype: object type + * @flags: flags * @size: number of set elements + * @policy: set policy + * @gc_int: garbage collector interval * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element * @expr: set must support for expressions */ struct nft_set_desc { + u32 ktype; unsigned int klen; + u32 dtype; unsigned int dlen; + u32 objtype; unsigned int size; + u32 policy; + u32 gc_int; + u64 timeout; u8 field_len[NFT_REG32_COUNT]; u8 field_count; bool expr; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 832b881f7c174..1deecc1a6c003 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3780,8 +3780,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) static const struct nft_set_ops * nft_select_set_ops(const struct nft_ctx *ctx, const struct nlattr * const nla[], - const struct nft_set_desc *desc, - enum nft_set_policies policy) + const struct nft_set_desc *desc) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nft_set_ops *ops, *bops; @@ -3810,7 +3809,7 @@ nft_select_set_ops(const struct nft_ctx *ctx, if (!ops->estimate(desc, flags, &est)) continue; - switch (policy) { + switch (desc->policy) { case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; @@ -4392,7 +4391,6 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - u32 ktype, dtype, flags, policy, gc_int, objtype; struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; @@ -4405,10 +4403,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, struct nft_set *set; struct nft_ctx ctx; size_t alloc_size; - u64 timeout; char *name; int err, i; u16 udlen; + u32 flags; u64 size; if (nla[NFTA_SET_TABLE] == NULL || @@ -4419,10 +4417,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, memset(&desc, 0, sizeof(desc)); - ktype = NFT_DATA_VALUE; + desc.ktype = NFT_DATA_VALUE; if (nla[NFTA_SET_KEY_TYPE] != NULL) { - ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); - if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); + if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) return -EINVAL; } @@ -4447,17 +4445,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, return -EOPNOTSUPP; } - dtype = 0; + desc.dtype = 0; if (nla[NFTA_SET_DATA_TYPE] != NULL) { if (!(flags & NFT_SET_MAP)) return -EINVAL; - dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); - if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && - dtype != NFT_DATA_VERDICT) + desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); + if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && + desc.dtype != NFT_DATA_VERDICT) return -EINVAL; - if (dtype != NFT_DATA_VERDICT) { + if (desc.dtype != NFT_DATA_VERDICT) { if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); @@ -4472,34 +4470,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_OBJECT)) return -EINVAL; - objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); - if (objtype == NFT_OBJECT_UNSPEC || - objtype > NFT_OBJECT_MAX) + desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); + if (desc.objtype == NFT_OBJECT_UNSPEC || + desc.objtype > NFT_OBJECT_MAX) return -EOPNOTSUPP; } else if (flags & NFT_SET_OBJECT) return -EINVAL; else - objtype = NFT_OBJECT_UNSPEC; + desc.objtype = NFT_OBJECT_UNSPEC; - timeout = 0; + desc.timeout = 0; if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; } - gc_int = 0; + desc.gc_int = 0; if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } - policy = NFT_SET_POL_PERFORMANCE; + desc.policy = NFT_SET_POL_PERFORMANCE; if (nla[NFTA_SET_POLICY] != NULL) - policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); if (nla[NFTA_SET_DESC] != NULL) { err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); @@ -4544,7 +4542,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(&ctx, nla, &desc, policy); + ops = nft_select_set_ops(&ctx, nla, &desc); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -4584,18 +4582,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, set->table = table; write_pnet(&set->net, net); set->ops = ops; - set->ktype = ktype; + set->ktype = desc.ktype; set->klen = desc.klen; - set->dtype = dtype; - set->objtype = objtype; + set->dtype = desc.dtype; + set->objtype = desc.objtype; set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; - set->policy = policy; + set->policy = desc.policy; set->udlen = udlen; set->udata = udata; - set->timeout = timeout; - set->gc_int = gc_int; + set->timeout = desc.timeout; + set->gc_int = desc.gc_int; set->field_count = desc.field_count; for (i = 0; i < desc.field_count; i++) From a8fe4154fa5a1bae590b243ed60f871e5a5e1378 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 18:00:10 +0100 Subject: [PATCH 0035/1593] netfilter: nf_tables: add function to create set stateful expressions Add a helper function to allocate and initialize the stateful expressions that are defined in a set. This patch allows to reuse this code from the set update path, to check that type of the update matches the existing set in the kernel. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 106 ++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1deecc1a6c003..b9b0ae29f5f60 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4388,6 +4388,59 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, return err; } +static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr * const *nla, + struct nft_expr **exprs, int *num_exprs, + u32 flags) +{ + struct nft_expr *expr; + int err, i; + + if (nla[NFTA_SET_EXPR]) { + expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]); + if (IS_ERR(expr)) { + err = PTR_ERR(expr); + goto err_set_expr_alloc; + } + exprs[0] = expr; + (*num_exprs)++; + } else if (nla[NFTA_SET_EXPRESSIONS]) { + struct nlattr *tmp; + int left; + + if (!(flags & NFT_SET_EXPR)) { + err = -EINVAL; + goto err_set_expr_alloc; + } + i = 0; + nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { + if (i == NFT_SET_EXPR_MAX) { + err = -E2BIG; + goto err_set_expr_alloc; + } + if (nla_type(tmp) != NFTA_LIST_ELEM) { + err = -EINVAL; + goto err_set_expr_alloc; + } + expr = nft_set_elem_expr_alloc(ctx, set, tmp); + if (IS_ERR(expr)) { + err = PTR_ERR(expr); + goto err_set_expr_alloc; + } + exprs[i++] = expr; + (*num_exprs)++; + } + } + + return 0; + +err_set_expr_alloc: + for (i = 0; i < *num_exprs; i++) + nft_expr_destroy(ctx, exprs[i]); + + return err; +} + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -4395,7 +4448,6 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_set_ops *ops; - struct nft_expr *expr = NULL; struct net *net = info->net; struct nft_set_desc desc; struct nft_table *table; @@ -4403,6 +4455,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, struct nft_set *set; struct nft_ctx ctx; size_t alloc_size; + int num_exprs = 0; char *name; int err, i; u16 udlen; @@ -4529,6 +4582,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(set); } } else { + struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {}; + if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return -EEXIST; @@ -4536,6 +4591,13 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); + if (err < 0) + return err; + + for (i = 0; i < num_exprs; i++) + nft_expr_destroy(&ctx, exprs[i]); + return 0; } @@ -4603,43 +4665,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (err < 0) goto err_set_init; - if (nla[NFTA_SET_EXPR]) { - expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); - if (IS_ERR(expr)) { - err = PTR_ERR(expr); - goto err_set_expr_alloc; - } - set->exprs[0] = expr; - set->num_exprs++; - } else if (nla[NFTA_SET_EXPRESSIONS]) { - struct nft_expr *expr; - struct nlattr *tmp; - int left; - - if (!(flags & NFT_SET_EXPR)) { - err = -EINVAL; - goto err_set_expr_alloc; - } - i = 0; - nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { - if (i == NFT_SET_EXPR_MAX) { - err = -E2BIG; - goto err_set_expr_alloc; - } - if (nla_type(tmp) != NFTA_LIST_ELEM) { - err = -EINVAL; - goto err_set_expr_alloc; - } - expr = nft_set_elem_expr_alloc(&ctx, set, tmp); - if (IS_ERR(expr)) { - err = PTR_ERR(expr); - goto err_set_expr_alloc; - } - set->exprs[i++] = expr; - set->num_exprs++; - } - } + err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags); + if (err < 0) + goto err_set_destroy; + set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); @@ -4653,7 +4683,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); - +err_set_destroy: ops->destroy(set); err_set_init: kfree(set->name); From f6594c372afd5cec8b1e9ee9ea8f8819d59c6fb1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 20:09:00 +0100 Subject: [PATCH 0036/1593] netfilter: nf_tables: perform type checking for existing sets If a ruleset declares a set name that matches an existing set in the kernel, then validate that this declaration really refers to the same set, otherwise bail out with EEXIST. Currently, the kernel reports success when adding a set that already exists in the kernel. This usually results in EINVAL errors at a later stage, when the user adds elements to the set, if the set declaration mismatches the existing set representation in the kernel. Add a new function to check that the set declaration really refers to the same existing set in the kernel. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b9b0ae29f5f60..319887f4d3ef7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4441,6 +4441,34 @@ static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, return err; } +static bool nft_set_is_same(const struct nft_set *set, + const struct nft_set_desc *desc, + struct nft_expr *exprs[], u32 num_exprs, u32 flags) +{ + int i; + + if (set->ktype != desc->ktype || + set->dtype != desc->dtype || + set->flags != flags || + set->klen != desc->klen || + set->dlen != desc->dlen || + set->field_count != desc->field_count || + set->num_exprs != num_exprs) + return false; + + for (i = 0; i < desc->field_count; i++) { + if (set->field_len[i] != desc->field_len[i]) + return false; + } + + for (i = 0; i < num_exprs; i++) { + if (set->exprs[i]->ops != exprs[i]->ops) + return false; + } + + return true; +} + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -4595,10 +4623,16 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (err < 0) return err; + err = 0; + if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); + err = -EEXIST; + } + for (i = 0; i < num_exprs; i++) nft_expr_destroy(&ctx, exprs[i]); - return 0; + return err; } if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) From 70a00e2f1dbae11dc3444444c6bd7555763d8421 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 21 Dec 2022 10:56:53 -0800 Subject: [PATCH 0037/1593] selftests/bpf: Test bpf_skb_adjust_room on CHECKSUM_PARTIAL When the bpf_skb_adjust_room() shrinks the skb such that its csum_start is invalid, the skb->ip_summed should be reset from CHECKSUM_PARTIAL to CHECKSUM_NONE. The commit 54c3f1a81421 ("bpf: pull before calling skb_postpull_rcsum()") fixed it. This patch adds a test to ensure the skb->ip_summed changed from CHECKSUM_PARTIAL to CHECKSUM_NONE after bpf_skb_adjust_room(). Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20221221185653.1589961-1-martin.lau@linux.dev --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../selftests/bpf/prog_tests/decap_sanity.c | 85 +++++++++++++++++++ .../selftests/bpf/progs/bpf_tracing_net.h | 6 ++ .../selftests/bpf/progs/decap_sanity.c | 68 +++++++++++++++ 4 files changed, 160 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/decap_sanity.c create mode 100644 tools/testing/selftests/bpf/progs/decap_sanity.c diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 585fcf73c7314..3fc3e54b19aad 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -14,6 +14,7 @@ cgrp_kfunc # JIT does not support calling kernel f cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) core_read_macros # unknown func bpf_probe_read#4 (overlapping) d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) +decap_sanity # JIT does not support calling kernel function (kfunc) deny_namespace # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) fentry_fexit # fentry attach failed: -524 (trampoline) diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c new file mode 100644 index 0000000000000..0b2f73b88c53d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include + +#include "test_progs.h" +#include "network_helpers.h" +#include "decap_sanity.skel.h" + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define NS_TEST "decap_sanity_ns" +#define IPV6_IFACE_ADDR "face::1" +#define UDP_TEST_PORT 7777 + +void test_decap_sanity(void) +{ + LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach); + struct nstoken *nstoken = NULL; + struct decap_sanity *skel; + struct sockaddr_in6 addr; + socklen_t addrlen; + char buf[128] = {}; + int sockfd, err; + + skel = decap_sanity__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + SYS("ip netns add %s", NS_TEST); + SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); + SYS("ip -net %s link set dev lo up", NS_TEST); + + nstoken = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto fail; + + qdisc_hook.ifindex = if_nametoindex("lo"); + if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo")) + goto fail; + + err = bpf_tc_hook_create(&qdisc_hook); + if (!ASSERT_OK(err, "create qdisc hook")) + goto fail; + + tc_attach.prog_fd = bpf_program__fd(skel->progs.decap_sanity); + err = bpf_tc_attach(&qdisc_hook, &tc_attach); + if (!ASSERT_OK(err, "attach filter")) + goto fail; + + addrlen = sizeof(addr); + err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, UDP_TEST_PORT, + (void *)&addr, &addrlen); + if (!ASSERT_OK(err, "make_sockaddr")) + goto fail; + sockfd = socket(AF_INET6, SOCK_DGRAM, 0); + if (!ASSERT_NEQ(sockfd, -1, "socket")) + goto fail; + err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen); + close(sockfd); + if (!ASSERT_EQ(err, sizeof(buf), "send")) + goto fail; + + ASSERT_TRUE(skel->bss->init_csum_partial, "init_csum_partial"); + ASSERT_TRUE(skel->bss->final_csum_none, "final_csum_none"); + ASSERT_FALSE(skel->bss->broken_csum_start, "broken_csum_start"); + +fail: + if (nstoken) { + bpf_tc_hook_destroy(&qdisc_hook); + close_netns(nstoken); + } + system("ip netns del " NS_TEST " >& /dev/null"); + decap_sanity__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index b394817126cf9..cfed4df490f35 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -50,6 +50,12 @@ #define ICSK_TIME_LOSS_PROBE 5 #define ICSK_TIME_REO_TIMEOUT 6 +#define ETH_HLEN 14 +#define ETH_P_IPV6 0x86DD + +#define CHECKSUM_NONE 0 +#define CHECKSUM_PARTIAL 3 + #define IFNAMSIZ 16 #define RTF_GATEWAY 0x0002 diff --git a/tools/testing/selftests/bpf/progs/decap_sanity.c b/tools/testing/selftests/bpf/progs/decap_sanity.c new file mode 100644 index 0000000000000..bd3c657c58a79 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/decap_sanity.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include +#include + +#define UDP_TEST_PORT 7777 + +void *bpf_cast_to_kern_ctx(void *) __ksym; +bool init_csum_partial = false; +bool final_csum_none = false; +bool broken_csum_start = false; + +static unsigned int skb_headlen(const struct sk_buff *skb) +{ + return skb->len - skb->data_len; +} + +static unsigned int skb_headroom(const struct sk_buff *skb) +{ + return skb->data - skb->head; +} + +static int skb_checksum_start_offset(const struct sk_buff *skb) +{ + return skb->csum_start - skb_headroom(skb); +} + +SEC("tc") +int decap_sanity(struct __sk_buff *skb) +{ + struct sk_buff *kskb; + struct ipv6hdr ip6h; + struct udphdr udph; + int err; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6)) + return TC_ACT_SHOT; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h))) + return TC_ACT_SHOT; + + if (ip6h.nexthdr != IPPROTO_UDP) + return TC_ACT_SHOT; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph))) + return TC_ACT_SHOT; + + if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT)) + return TC_ACT_SHOT; + + kskb = bpf_cast_to_kern_ctx(skb); + init_csum_partial = (kskb->ip_summed == CHECKSUM_PARTIAL); + err = bpf_skb_adjust_room(skb, -(s32)(ETH_HLEN + sizeof(ip6h) + sizeof(udph)), + 1, BPF_F_ADJ_ROOM_FIXED_GSO); + if (err) + return TC_ACT_SHOT; + final_csum_none = (kskb->ip_summed == CHECKSUM_NONE); + if (kskb->ip_summed == CHECKSUM_PARTIAL && + (unsigned int)skb_checksum_start_offset(kskb) >= skb_headlen(kskb)) + broken_csum_start = true; + + return TC_ACT_SHOT; +} + +char __license[] SEC("license") = "GPL"; From 53fc61be273a1e76dd5e356f91805dce00ff2d2c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Dec 2022 09:54:48 -0800 Subject: [PATCH 0038/1593] ice: xsk: do not use xdp_return_frame() on tx_buf->raw_buf Previously ice XDP xmit routine was changed in a way that it avoids xdp_buff->xdp_frame conversion as it is simply not needed for handling XDP_TX action and what is more it saves us CPU cycles. This routine is re-used on ZC driver to handle XDP_TX action. Although for XDP_TX on Rx ZC xdp_buff that comes from xsk_buff_pool is converted to xdp_frame, xdp_frame itself is not stored inside ice_tx_buf, we only store raw data pointer. Casting this pointer to xdp_frame and calling against it xdp_return_frame in ice_clean_xdp_tx_buf() results in undefined behavior. To fix this, simply call page_frag_free() on tx_buf->raw_buf. Later intention is to remove the buff->frame conversion in order to simplify the codebase and improve XDP_TX performance on ZC. Fixes: 126cdfe1007a ("ice: xsk: Improve AF_XDP ZC Tx and use batching API") Reported-and-tested-by: Robin Cowley Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Piotr Raczynski Link: https://lore.kernel.org/r/20221220175448.693999-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 907055b77af0e..7105de6fb3444 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -783,7 +783,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) static void ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) { - xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); + page_frag_free(tx_buf->raw_buf); xdp_ring->xdp_tx_active--; dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); From f2575c8f404911da83f25b688e12afcf4273e640 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 20 Dec 2022 18:18:25 +0100 Subject: [PATCH 0039/1593] net: vrf: determine the dst using the original ifindex for multicast Multicast packets received on an interface bound to a VRF are marked as belonging to the VRF and the skb device is updated to point to the VRF device itself. This was fine even when a route was associated to a device as when performing a fib table lookup 'oif' in fib6_table_lookup (coming from 'skb->dev->ifindex' in ip6_route_input) was set to 0 when FLOWI_FLAG_SKIP_NH_OIF was set. With commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") this is not longer true and multicast traffic is not received on the original interface. Instead of adding back a similar check in fib6_table_lookup determine the dst using the original ifindex for multicast VRF traffic. To make things consistent across the function do the above for all strict packets, which was the logic before commit 6f12fa775530 ("vrf: mark skb for multicast or link-local as enslaved to VRF"). Note that reverting to this behavior should be fine as the change was about marking packets belonging to the VRF, not about their dst. Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: Jianlin Shi Signed-off-by: Antoine Tenart Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20221220171825.1172237-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6b5a4d036d153..bdb3a76a352e4 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1385,8 +1385,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. - * For strict packets with a source LLA, determine the dst using the - * original ifindex. + * For non-loopback strict packets, determine the dst using the original + * ifindex. */ if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; @@ -1395,7 +1395,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (skb->pkt_type == PACKET_LOOPBACK) skb->pkt_type = PACKET_HOST; - else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) + else vrf_ip6_input_dst(skb, vrf_dev, orig_iif); goto out; From 95637d91fefdb94d6e7389222ba9ddab0e9f5abe Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 20 Dec 2022 16:27:17 -0500 Subject: [PATCH 0040/1593] net: openvswitch: release vport resources on failure A recent commit introducing upcall packet accounting failed to properly release the vport object when the per-cpu stats struct couldn't be allocated. This can cause dangling pointers to dp objects long after they've been released. Cc: wangchuanlei Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets") Reported-by: syzbot+8f4e2dcfcb3209ac35f9@syzkaller.appspotmail.com Signed-off-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Michal Swiatkowski Link: https://lore.kernel.org/r/20221220212717.526780-1-aconole@redhat.com Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9ca721c9fa718..a71795355aecf 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1861,7 +1861,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); if (!vport->upcall_stats) { err = -ENOMEM; - goto err_destroy_portids; + goto err_destroy_vport; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1876,6 +1876,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; +err_destroy_vport: + ovs_dp_detach_port(vport); err_destroy_portids: kfree(rcu_dereference_raw(dp->upcall_portids)); err_unlock_and_destroy_meters: @@ -2323,7 +2325,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); if (!vport->upcall_stats) { err = -ENOMEM; - goto exit_unlock_free; + goto exit_unlock_free_vport; } err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), @@ -2343,6 +2345,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_vport_genl_family, reply, info); return 0; +exit_unlock_free_vport: + ovs_dp_detach_port(vport); exit_unlock_free: ovs_unlock(); kfree_skb(reply); From 3d8f2c4269d08f8793e946279dbdf5e972cc4911 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 20 Dec 2022 12:25:55 -0800 Subject: [PATCH 0041/1593] vmxnet3: correctly report csum_level for encapsulated packet Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, the pathc did not report correctly the csum_level for encapsulated packet. This patch fixes this issue by reporting correct csum level for the encapsulated packet. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Peng Li Link: https://lore.kernel.org/r/20221220202556.24421-1-doshir@vmware.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 6f1e560fb15c4..56267c327f0b7 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1288,6 +1288,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, (le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { skb->ip_summed = CHECKSUM_UNNECESSARY; + if ((le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { + skb->csum_level = 1; + } WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && !(le32_to_cpu(gdesc->dword[0]) & (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); @@ -1297,6 +1301,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & (1 << VMXNET3_RCD_TUC_SHIFT))) { skb->ip_summed = CHECKSUM_UNNECESSARY; + if ((le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { + skb->csum_level = 1; + } WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && !(le32_to_cpu(gdesc->dword[0]) & (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); From e20aa071cd955aabc15be0ec1e914283592ddef4 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Tue, 20 Dec 2022 16:21:00 +0100 Subject: [PATCH 0042/1593] nfp: fix schedule in atomic context when sync mc address The callback `.ndo_set_rx_mode` is called in atomic context, sleep is not allowed in the implementation. Now use workqueue mechanism to avoid this issue. Fixes: de6248644966 ("nfp: add support for multicast filter") Signed-off-by: Yinjun Zhang Reviewed-by: Louis Peens Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20221220152100.1042774-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 7 +++ .../ethernet/netronome/nfp/nfp_net_common.c | 61 +++++++++++++++++-- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index da33f09facb91..432d79d691c29 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -617,6 +617,9 @@ struct nfp_net_dp { * @vnic_no_name: For non-port PF vNIC make ndo_get_phys_port_name return * -EOPNOTSUPP to keep backwards compatibility (set by app) * @port: Pointer to nfp_port structure if vNIC is a port + * @mc_lock: Protect mc_addrs list + * @mc_addrs: List of mc addrs to add/del to HW + * @mc_work: Work to update mc addrs * @app_priv: APP private data for this vNIC */ struct nfp_net { @@ -718,6 +721,10 @@ struct nfp_net { struct nfp_port *port; + spinlock_t mc_lock; + struct list_head mc_addrs; + struct work_struct mc_work; + void *app_priv; }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 09053373288fe..18fc9971f1c8f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1334,9 +1334,14 @@ int nfp_ctrl_open(struct nfp_net *nn) return err; } -static int nfp_net_mc_cfg(struct net_device *netdev, const unsigned char *addr, const u32 cmd) +struct nfp_mc_addr_entry { + u8 addr[ETH_ALEN]; + u32 cmd; + struct list_head list; +}; + +static int nfp_net_mc_cfg(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) { - struct nfp_net *nn = netdev_priv(netdev); int ret; ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ); @@ -1351,6 +1356,25 @@ static int nfp_net_mc_cfg(struct net_device *netdev, const unsigned char *addr, return nfp_net_mbox_reconfig_and_unlock(nn, cmd); } +static int nfp_net_mc_prep(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) +{ + struct nfp_mc_addr_entry *entry; + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + + ether_addr_copy(entry->addr, addr); + entry->cmd = cmd; + spin_lock_bh(&nn->mc_lock); + list_add_tail(&entry->list, &nn->mc_addrs); + spin_unlock_bh(&nn->mc_lock); + + schedule_work(&nn->mc_work); + + return 0; +} + static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) { struct nfp_net *nn = netdev_priv(netdev); @@ -1361,12 +1385,35 @@ static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) return -EINVAL; } - return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD); + return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD); } static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr) { - return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL); + struct nfp_net *nn = netdev_priv(netdev); + + return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL); +} + +static void nfp_net_mc_addr_config(struct work_struct *work) +{ + struct nfp_net *nn = container_of(work, struct nfp_net, mc_work); + struct nfp_mc_addr_entry *entry, *tmp; + struct list_head tmp_list; + + INIT_LIST_HEAD(&tmp_list); + + spin_lock_bh(&nn->mc_lock); + list_splice_init(&nn->mc_addrs, &tmp_list); + spin_unlock_bh(&nn->mc_lock); + + list_for_each_entry_safe(entry, tmp, &tmp_list, list) { + if (nfp_net_mc_cfg(nn, entry->addr, entry->cmd)) + nn_err(nn, "Config mc address to HW failed.\n"); + + list_del(&entry->list); + kfree(entry); + } } static void nfp_net_set_rx_mode(struct net_device *netdev) @@ -2633,6 +2680,11 @@ int nfp_net_init(struct nfp_net *nn) if (!nn->dp.netdev) return 0; + + spin_lock_init(&nn->mc_lock); + INIT_LIST_HEAD(&nn->mc_addrs); + INIT_WORK(&nn->mc_work, nfp_net_mc_addr_config); + return register_netdev(nn->dp.netdev); err_clean_mbox: @@ -2652,5 +2704,6 @@ void nfp_net_clean(struct nfp_net *nn) unregister_netdev(nn->dp.netdev); nfp_net_ipsec_clean(nn); nfp_ccm_mbox_clean(nn); + flush_work(&nn->mc_work); nfp_net_reconfig_wait_posted(nn); } From 7d803344fdc3e38079fabcf38b1e4cb6f8faa655 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Dec 2022 11:52:14 -0800 Subject: [PATCH 0043/1593] mptcp: fix deadlock in fastopen error path MatM reported a deadlock at fastopening time: INFO: task syz-executor.0:11454 blocked for more than 143 seconds. Tainted: G S 6.1.0-rc5-03226-gdb0157db5153 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor.0 state:D stack:25104 pid:11454 ppid:424 flags:0x00004006 Call Trace: context_switch kernel/sched/core.c:5191 [inline] __schedule+0x5c2/0x1550 kernel/sched/core.c:6503 schedule+0xe8/0x1c0 kernel/sched/core.c:6579 __lock_sock+0x142/0x260 net/core/sock.c:2896 lock_sock_nested+0xdb/0x100 net/core/sock.c:3466 __mptcp_close_ssk+0x1a3/0x790 net/mptcp/protocol.c:2328 mptcp_destroy_common+0x16a/0x650 net/mptcp/protocol.c:3171 mptcp_disconnect+0xb8/0x450 net/mptcp/protocol.c:3019 __inet_stream_connect+0x897/0xa40 net/ipv4/af_inet.c:720 tcp_sendmsg_fastopen+0x3dd/0x740 net/ipv4/tcp.c:1200 mptcp_sendmsg_fastopen net/mptcp/protocol.c:1682 [inline] mptcp_sendmsg+0x128a/0x1a50 net/mptcp/protocol.c:1721 inet6_sendmsg+0x11f/0x150 net/ipv6/af_inet6.c:663 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xf7/0x190 net/socket.c:734 ____sys_sendmsg+0x336/0x970 net/socket.c:2476 ___sys_sendmsg+0x122/0x1c0 net/socket.c:2530 __sys_sendmmsg+0x18d/0x460 net/socket.c:2616 __do_sys_sendmmsg net/socket.c:2645 [inline] __se_sys_sendmmsg net/socket.c:2642 [inline] __x64_sys_sendmmsg+0x9d/0x110 net/socket.c:2642 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5920a75e7d RSP: 002b:00007f59201e8028 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f5920bb4f80 RCX: 00007f5920a75e7d RDX: 0000000000000001 RSI: 0000000020002940 RDI: 0000000000000005 RBP: 00007f5920ae7593 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000020004050 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f5920bb4f80 R15: 00007f59201c8000 In the error path, tcp_sendmsg_fastopen() ends-up calling mptcp_disconnect(), and the latter tries to close each subflow, acquiring the socket lock on each of them. At fastopen time, we have a single subflow, and such subflow socket lock is already held by the called, causing the deadlock. We already track the 'fastopen in progress' status inside the msk socket. Use it to address the issue, making mptcp_disconnect() a no op when invoked from the fastopen (error) path and doing the relevant cleanup after releasing the subflow socket lock. While at the above, rename the fastopen status bit to something more meaningful. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/321 Fixes: fa9e57468aa1 ("mptcp: fix abba deadlock on fastopen") Reported-by: Mat Martineau Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 18 +++++++++++++++--- net/mptcp/protocol.h | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f6f93957275b8..907b435e29842 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1662,6 +1662,8 @@ static void mptcp_set_nospace(struct sock *sk) set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); } +static int mptcp_disconnect(struct sock *sk, int flags); + static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, size_t len, int *copied_syn) { @@ -1672,9 +1674,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh lock_sock(ssk); msg->msg_flags |= MSG_DONTWAIT; msk->connect_flags = O_NONBLOCK; - msk->is_sendmsg = 1; + msk->fastopening = 1; ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); - msk->is_sendmsg = 0; + msk->fastopening = 0; msg->msg_flags = saved_flags; release_sock(ssk); @@ -1688,6 +1690,8 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh */ if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) *copied_syn = 0; + } else if (ret && ret != -EINPROGRESS) { + mptcp_disconnect(sk, 0); } return ret; @@ -2989,6 +2993,14 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); + /* We are on the fastopen error path. We can't call straight into the + * subflows cleanup code due to lock nesting (we are already under + * msk->firstsocket lock). Do nothing and leave the cleanup to the + * caller. + */ + if (msk->fastopening) + return 0; + inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_timer(sk); @@ -3532,7 +3544,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* if reaching here via the fastopen/sendmsg path, the caller already * acquired the subflow socket lock, too. */ - if (msk->is_sendmsg) + if (msk->fastopening) err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1); else err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 955fb3d88eb3a..f47d3e4018b51 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -295,7 +295,7 @@ struct mptcp_sock { u8 recvmsg_inq:1, cork:1, nodelay:1, - is_sendmsg:1; + fastopening:1; int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; From fec3adfd754ccc99a7230e8ab9f105b65fb07bcc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Dec 2022 11:52:15 -0800 Subject: [PATCH 0044/1593] mptcp: fix lockdep false positive MattB reported a lockdep splat in the mptcp listener code cleanup: WARNING: possible circular locking dependency detected packetdrill/14278 is trying to acquire lock: ffff888017d868f0 ((work_completion)(&msk->work)){+.+.}-{0:0}, at: __flush_work (kernel/workqueue.c:3069) but task is already holding lock: ffff888017d84130 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close (net/mptcp/protocol.c:2973) which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_INET){+.+.}-{0:0}: __lock_acquire (kernel/locking/lockdep.c:5055) lock_acquire (kernel/locking/lockdep.c:466) lock_sock_nested (net/core/sock.c:3463) mptcp_worker (net/mptcp/protocol.c:2614) process_one_work (kernel/workqueue.c:2294) worker_thread (include/linux/list.h:292) kthread (kernel/kthread.c:376) ret_from_fork (arch/x86/entry/entry_64.S:312) -> #0 ((work_completion)(&msk->work)){+.+.}-{0:0}: check_prev_add (kernel/locking/lockdep.c:3098) validate_chain (kernel/locking/lockdep.c:3217) __lock_acquire (kernel/locking/lockdep.c:5055) lock_acquire (kernel/locking/lockdep.c:466) __flush_work (kernel/workqueue.c:3070) __cancel_work_timer (kernel/workqueue.c:3160) mptcp_cancel_work (net/mptcp/protocol.c:2758) mptcp_subflow_queue_clean (net/mptcp/subflow.c:1817) __mptcp_close_ssk (net/mptcp/protocol.c:2363) mptcp_destroy_common (net/mptcp/protocol.c:3170) mptcp_destroy (include/net/sock.h:1495) __mptcp_destroy_sock (net/mptcp/protocol.c:2886) __mptcp_close (net/mptcp/protocol.c:2959) mptcp_close (net/mptcp/protocol.c:2974) inet_release (net/ipv4/af_inet.c:432) __sock_release (net/socket.c:651) sock_close (net/socket.c:1367) __fput (fs/file_table.c:320) task_work_run (kernel/task_work.c:181 (discriminator 1)) exit_to_user_mode_prepare (include/linux/resume_user_mode.h:49) syscall_exit_to_user_mode (kernel/entry/common.c:130) do_syscall_64 (arch/x86/entry/common.c:87) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_INET); lock((work_completion)(&msk->work)); lock(sk_lock-AF_INET); lock((work_completion)(&msk->work)); *** DEADLOCK *** The report is actually a false positive, since the only existing lock nesting is the msk socket lock acquired by the mptcp work. cancel_work_sync() is invoked without the relevant socket lock being held, but under a different (the msk listener) socket lock. We could silence the splat adding a per workqueue dynamic lockdep key, but that looks overkill. Instead just tell lockdep the msk socket lock is not held around cancel_work_sync(). Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/322 Fixes: 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") Reported-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 19 +++++++++++++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 907b435e29842..b7ad030dfe891 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2357,7 +2357,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* otherwise tcp will dispose of the ssk and subflow ctx */ if (ssk->sk_state == TCP_LISTEN) { tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(ssk); + mptcp_subflow_queue_clean(sk, ssk); inet_csk_listen_stop(ssk); mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f47d3e4018b51..a0d1658ce59ee 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -628,7 +628,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); -void mptcp_subflow_queue_clean(struct sock *ssk); +void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d1d32a66ae3f7..bd387d4b5a38f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1791,7 +1791,7 @@ static void subflow_state_change(struct sock *sk) } } -void mptcp_subflow_queue_clean(struct sock *listener_ssk) +void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) { struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; struct mptcp_sock *msk, *next, *head = NULL; @@ -1840,8 +1840,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) do_cancel_work = __mptcp_close(sk, 0); release_sock(sk); - if (do_cancel_work) + if (do_cancel_work) { + /* lockdep will report a false positive ABBA deadlock + * between cancel_work_sync and the listener socket. + * The involved locks belong to different sockets WRT + * the existing AB chain. + * Using a per socket key is problematic as key + * deregistration requires process context and must be + * performed at socket disposal time, in atomic + * context. + * Just tell lockdep to consider the listener socket + * released here. + */ + mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); mptcp_cancel_work(sk); + mutex_acquire(&listener_sk->sk_lock.dep_map, + SINGLE_DEPTH_NESTING, 0, _RET_IP_); + } sock_put(sk); } From 123b99619cca94bdca0bf7bde9abe28f0a0dfe06 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 20:10:12 +0100 Subject: [PATCH 0045/1593] netfilter: nf_tables: honor set timeout and garbage collection updates Set timeout and garbage collection interval updates are ignored on updates. Add transaction to update global set element timeout and garbage collection interval. Fixes: 96518518cc41 ("netfilter: add nftables") Suggested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 13 ++++++- net/netfilter/nf_tables_api.c | 63 ++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4957b4775757b..9430128aae991 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -597,7 +597,9 @@ void *nft_set_catchall_gc(const struct nft_set *set); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { - return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; + u32 gc_int = READ_ONCE(set->gc_int); + + return gc_int ? msecs_to_jiffies(gc_int) : HZ; } /** @@ -1570,6 +1572,9 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + u32 gc_int; + u64 timeout; + bool update; bool bound; }; @@ -1579,6 +1584,12 @@ struct nft_trans_set { (((struct nft_trans_set *)trans->data)->set_id) #define nft_trans_set_bound(trans) \ (((struct nft_trans_set *)trans->data)->bound) +#define nft_trans_set_update(trans) \ + (((struct nft_trans_set *)trans->data)->update) +#define nft_trans_set_timeout(trans) \ + (((struct nft_trans_set *)trans->data)->timeout) +#define nft_trans_set_gc_int(trans) \ + (((struct nft_trans_set *)trans->data)->gc_int) struct nft_trans_chain { bool update; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 319887f4d3ef7..8c09e4d12ac1e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) return 0; } -static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, - struct nft_set *set) +static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + struct nft_set *set, + const struct nft_set_desc *desc) { struct nft_trans *trans; @@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, if (trans == NULL) return -ENOMEM; - if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; + if (desc) { + nft_trans_set_update(trans) = true; + nft_trans_set_gc_int(trans) = desc->gc_int; + nft_trans_set_timeout(trans) = desc->timeout; + } nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } +static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + struct nft_set *set) +{ + return __nft_trans_set_add(ctx, msg_type, set, NULL); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -4044,8 +4056,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb, static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { - struct nlmsghdr *nlh; + u64 timeout = READ_ONCE(set->timeout); + u32 gc_int = READ_ONCE(set->gc_int); u32 portid = ctx->portid; + struct nlmsghdr *nlh; struct nlattr *nest; u32 seq = ctx->seq; int i; @@ -4081,13 +4095,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) goto nla_put_failure; - if (set->timeout && + if (timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, - nf_jiffies64_to_msecs(set->timeout), + nf_jiffies64_to_msecs(timeout), NFTA_SET_PAD)) goto nla_put_failure; - if (set->gc_int && - nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) + if (gc_int && + nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int))) goto nla_put_failure; if (set->policy != NFT_SET_POL_PERFORMANCE) { @@ -4632,7 +4646,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, for (i = 0; i < num_exprs; i++) nft_expr_destroy(&ctx, exprs[i]); - return err; + if (err < 0) + return err; + + return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc); } if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -6070,7 +6087,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } else if (set->flags & NFT_SET_TIMEOUT && !(flags & NFT_SET_ELEM_INTERVAL_END)) { - timeout = set->timeout; + timeout = READ_ONCE(set->timeout); } expiration = 0; @@ -6171,7 +6188,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key_end; - if (timeout != set->timeout) { + if (timeout != READ_ONCE(set->timeout)) { err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); if (err < 0) goto err_parse_key_end; @@ -9093,14 +9110,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: - nft_clear(net, nft_trans_set(trans)); - /* This avoids hitting -EBUSY when deleting the table - * from the transaction. - */ - if (nft_set_is_anonymous(nft_trans_set(trans)) && - !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + if (nft_trans_set_update(trans)) { + struct nft_set *set = nft_trans_set(trans); + WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans)); + WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans)); + } else { + nft_clear(net, nft_trans_set(trans)); + /* This avoids hitting -EBUSY when deleting the table + * from the transaction. + */ + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !list_empty(&nft_trans_set(trans)->bindings)) + trans->ctx.table->use--; + } nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); @@ -9322,6 +9345,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: + if (nft_trans_set_update(trans)) { + nft_trans_destroy(trans); + break; + } trans->ctx.table->use--; if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); From 42c7ded0eeacd2ba5db599205c71c279dc715de7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Dec 2022 13:08:31 +0000 Subject: [PATCH 0046/1593] bonding: fix lockdep splat in bond_miimon_commit() bond_miimon_commit() is run while RTNL is held, not RCU. WARNING: suspicious RCU usage 6.1.0-syzkaller-09671-g89529367293c #0 Not tainted ----------------------------- drivers/net/bonding/bond_main.c:2704 suspicious rcu_dereference_check() usage! Fixes: e95cc44763a4 ("bonding: do failover when high prio link up") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Hangbin Liu Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Link: https://lore.kernel.org/r/20221220130831.1480888-1-edumazet@google.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b4c65783960a5..0363ce5976614 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2654,10 +2654,12 @@ static void bond_miimon_link_change(struct bonding *bond, static void bond_miimon_commit(struct bonding *bond) { - struct slave *slave, *primary; + struct slave *slave, *primary, *active; bool do_failover = false; struct list_head *iter; + ASSERT_RTNL(); + bond_for_each_slave(bond, slave, iter) { switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: @@ -2700,8 +2702,8 @@ static void bond_miimon_commit(struct bonding *bond) bond_miimon_link_change(bond, slave, BOND_LINK_UP); - if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary || - slave->prio > rcu_dereference(bond->curr_active_slave)->prio) + active = rtnl_dereference(bond->curr_active_slave); + if (!active || slave == primary || slave->prio > active->prio) do_failover = true; continue; From d717f9474e3fb7e6bd3e43ca16e131f04320ed6f Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 21 Dec 2022 10:33:15 +0100 Subject: [PATCH 0047/1593] net: lan966x: Fix configuration of the PCS When the PCS was taken out of reset, we were changing by mistake also the speed to 100 Mbit. But in case the link was going down, the link up routine was setting correctly the link speed. If the link was not getting down then the speed was forced to run at 100 even if the speed was something else. On lan966x, to set the speed link to 1G or 2.5G a value of 1 needs to be written in DEV_CLOCK_CFG_LINK_SPEED. This is similar to the procedure in lan966x_port_init. The issue was reproduced using 1000base-x sfp module using the commands: ip link set dev eth2 up ip link addr add 10.97.10.2/24 dev eth2 ethtool -s eth2 speed 1000 autoneg off Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Horatiu Vultur Reviewed-by: Piotr Raczynski Link: https://lore.kernel.org/r/20221221093315.939133-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 1a61c6cdb0779..0050fcb988b75 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -381,7 +381,7 @@ int lan966x_port_pcs_set(struct lan966x_port *port, } /* Take PCS out of reset */ - lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(2) | + lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(LAN966X_SPEED_1000) | DEV_CLOCK_CFG_PCS_RX_RST_SET(0) | DEV_CLOCK_CFG_PCS_TX_RST_SET(0), DEV_CLOCK_CFG_LINK_SPEED | From fa349e396e4886d742fd6501c599ec627ef1353b Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Tue, 20 Dec 2022 12:59:03 -0600 Subject: [PATCH 0048/1593] veth: Fix race with AF_XDP exposing old or uninitialized descriptors When AF_XDP is used on on a veth interface the RX ring is updated in two steps. veth_xdp_rcv() removes packet descriptors from the FILL ring fills them and places them in the RX ring updating the cached_prod pointer. Later xdp_do_flush() syncs the RX ring prod pointer with the cached_prod pointer allowing user-space to see the recently filled in descriptors. The rings are intended to be SPSC, however the existing order in veth_poll allows the xdp_do_flush() to run concurrently with another CPU creating a race condition that allows user-space to see old or uninitialized descriptors in the RX ring. This bug has been observed in production systems. To summarize, we are expecting this ordering: CPU 0 __xsk_rcv_zc() CPU 0 __xsk_map_flush() CPU 2 __xsk_rcv_zc() CPU 2 __xsk_map_flush() But we are seeing this order: CPU 0 __xsk_rcv_zc() CPU 2 __xsk_rcv_zc() CPU 0 __xsk_map_flush() CPU 2 __xsk_map_flush() This occurs because we rely on NAPI to ensure that only one napi_poll handler is running at a time for the given veth receive queue. napi_schedule_prep() will prevent multiple instances from getting scheduled. However calling napi_complete_done() signals that this napi_poll is complete and allows subsequent calls to napi_schedule_prep() and __napi_schedule() to succeed in scheduling a concurrent napi_poll before the xdp_do_flush() has been called. For the veth driver a concurrent call to napi_schedule_prep() and __napi_schedule() can occur on a different CPU because the veth xmit path can additionally schedule a napi_poll creating the race. The fix as suggested by Magnus Karlsson, is to simply move the xdp_do_flush() call before napi_complete_done(). This syncs the producer ring pointers before another instance of napi_poll can be scheduled on another CPU. It will also slightly improve performance by moving the flush closer to when the descriptors were placed in the RX ring. Fixes: d1396004dd86 ("veth: Add XDP TX and REDIRECT") Suggested-by: Magnus Karlsson Signed-off-by: Shawn Bohrer Link: https://lore.kernel.org/r/20221220185903.1105011-1-sbohrer@cloudflare.com Signed-off-by: Paolo Abeni --- drivers/net/veth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index ac7c0653695f0..dfc7d87fad59f 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -974,6 +974,9 @@ static int veth_poll(struct napi_struct *napi, int budget) xdp_set_return_frame_no_direct(); done = veth_xdp_rcv(rq, budget, &bq, &stats); + if (stats.xdp_redirect > 0) + xdp_do_flush(); + if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ smp_store_mb(rq->rx_notify_masked, false); @@ -987,8 +990,6 @@ static int veth_poll(struct napi_struct *napi, int budget) if (stats.xdp_tx > 0) veth_xdp_flush(rq, &bq); - if (stats.xdp_redirect > 0) - xdp_do_flush(); xdp_clear_return_frame_no_direct(); return done; From 789e1e10f214c00ca18fc6610824c5b9876ba5f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 Dec 2022 09:51:30 -0500 Subject: [PATCH 0049/1593] nfsd: shut down the NFSv4 state objects before the filecache Currently, we shut down the filecache before trying to clean up the stateids that depend on it. This leads to the kernel trying to free an nfsd_file twice, and a refcount overput on the nf_mark. Change the shutdown procedure to tear down all of the stateids prior to shutting down the filecache. Reported-and-tested-by: Wang Yugui Signed-off-by: Jeff Layton Fixes: 5e113224c17e ("nfsd: nfsd_file cache entries should be per net namespace") Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 56fba1cba3af7..325d3d3f12110 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -453,8 +453,8 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); + nfsd_file_cache_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); nn->lockd_up = false; From 55171f2930be98c8a49991435cdf3a8b574353b6 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Thu, 22 Dec 2022 10:26:27 +0000 Subject: [PATCH 0050/1593] bpftool: Fix linkage with statically built libllvm Since the commit eb9d1acf634b ("bpftool: Add LLVM as default library for disassembling JIT-ed programs") we might link the bpftool program with the libllvm library. This works fine when a shared libllvm library is available, but fails if we want to link bpftool with a statically built LLVM: [...] /usr/bin/ld: /usr/local/lib/libLLVMSupport.a(CrashRecoveryContext.cpp.o): in function `llvm::CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup()': CrashRecoveryContext.cpp:(.text._ZN4llvm27CrashRecoveryContextCleanupD0Ev+0x17): undefined reference to `operator delete(void*, unsigned long)' /usr/bin/ld: /usr/local/lib/libLLVMSupport.a(CrashRecoveryContext.cpp.o): in function `llvm::CrashRecoveryContext::~CrashRecoveryContext()': CrashRecoveryContext.cpp:(.text._ZN4llvm20CrashRecoveryContextD2Ev+0xc8): undefined reference to `operator delete(void*, unsigned long)' [...] So in the case of static libllvm we need to explicitly link bpftool with required libraries, namely, libstdc++ and those provided by the `llvm-config --system-libs` command. We can distinguish between the shared and static cases by using the `llvm-config --shared-mode` command. Fixes: eb9d1acf634b ("bpftool: Add LLVM as default library for disassembling JIT-ed programs") Signed-off-by: Anton Protopopov Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20221222102627.1643709-1-aspsk@isovalent.com --- tools/bpf/bpftool/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 787b857d3fb5f..f610e184ce02a 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -145,6 +145,10 @@ ifeq ($(feature-llvm),1) LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets CFLAGS += $(shell $(LLVM_CONFIG) --cflags --libs $(LLVM_CONFIG_LIB_COMPONENTS)) LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + ifeq ($(shell $(LLVM_CONFIG) --shared-mode),static) + LIBS += $(shell $(LLVM_CONFIG) --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LIBS += -lstdc++ + endif LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) else # Fall back on libbfd From 8374bfd5a3c90a5b250f7c087c4d2b8ac467b12e Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Thu, 22 Dec 2022 10:44:13 +0800 Subject: [PATCH 0051/1593] bpf: fix nullness propagation for reg to reg comparisons After befae75856ab, the verifier would propagate null information after JEQ/JNE, e.g., if two pointers, one is maybe_null and the other is not, the former would be marked as non-null in eq path. However, as comment "PTR_TO_BTF_ID points to a kernel struct that does not need to be null checked by the BPF program ... The verifier must keep this in mind and can make no assumptions about null or non-null when doing branch ...". If one pointer is maybe_null and the other is PTR_TO_BTF, the former is incorrectly marked non-null. The following BPF prog can trigger a null-ptr-deref, also see this report for more details[1]: 0: (18) r1 = map_fd ; R1_w=map_ptr(ks=4, vs=4) 2: (79) r6 = *(u64 *)(r1 +8) ; R6_w=bpf_map->inner_map_data ; R6 is PTR_TO_BTF_ID ; equals to null at runtime 3: (bf) r2 = r10 4: (07) r2 += -4 5: (62) *(u32 *)(r2 +0) = 0 6: (85) call bpf_map_lookup_elem#1 ; R0_w=map_value_or_null 7: (1d) if r6 == r0 goto pc+1 8: (95) exit ; from 7 to 9: R0=map_value R6=ptr_bpf_map 9: (61) r0 = *(u32 *)(r0 +0) ; null-ptr-deref 10: (95) exit So, make the verifier propagate nullness information for reg to reg comparisons only if neither reg is PTR_TO_BTF_ID. [1] https://lore.kernel.org/bpf/CACkBjsaFJwjC5oiw-1KXvcazywodwXo4zGYsRHwbr2gSG9WcSw@mail.gmail.com/T/#u Fixes: befae75856ab ("bpf: propagate nullness information for reg to reg comparisons") Signed-off-by: Hao Sun Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221222024414.29539-1-sunhao.th@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a5255a0dcbb68..243d06ce68426 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11822,10 +11822,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * register B - not null * for JNE A, B, ... - A is not null in the false branch; * for JEQ A, B, ... - A is not null in the true branch. + * + * Since PTR_TO_BTF_ID points to a kernel struct that does + * not need to be null checked by the BPF program, i.e., + * could be null even without PTR_MAYBE_NULL marking, so + * only propagate nullness when neither reg is that type. */ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X && __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && - type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) { + type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) && + base_type(src_reg->type) != PTR_TO_BTF_ID && + base_type(dst_reg->type) != PTR_TO_BTF_ID) { eq_branch_regs = NULL; switch (opcode) { case BPF_JEQ: From cedebd74cf3883f0384af9ec26b4e6f8f1964dd4 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Thu, 22 Dec 2022 10:44:14 +0800 Subject: [PATCH 0052/1593] selftests/bpf: check null propagation only neither reg is PTR_TO_BTF_ID Verify that nullness information is not porpagated in the branches of register to register JEQ and JNE operations if one of them is PTR_TO_BTF_ID. Implement this in C level so we can use CO-RE. Signed-off-by: Hao Sun Suggested-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221222024414.29539-2-sunhao.th@gmail.com Signed-off-by: Martin KaFai Lau --- .../bpf/prog_tests/jeq_infer_not_null.c | 9 ++++ .../bpf/progs/jeq_infer_not_null_fail.c | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c create mode 100644 tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c diff --git a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c new file mode 100644 index 0000000000000..3add34df57678 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "jeq_infer_not_null_fail.skel.h" + +void test_jeq_infer_not_null(void) +{ + RUN_TESTS(jeq_infer_not_null_fail); +} diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c new file mode 100644 index 0000000000000..f46965053acb2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, u64); + __type(value, u64); +} m_hash SEC(".maps"); + +SEC("?raw_tp") +__failure __msg("R8 invalid mem access 'map_value_or_null") +int jeq_infer_not_null_ptr_to_btfid(void *ctx) +{ + struct bpf_map *map = (struct bpf_map *)&m_hash; + struct bpf_map *inner_map = map->inner_map_meta; + u64 key = 0, ret = 0, *val; + + val = bpf_map_lookup_elem(map, &key); + /* Do not mark ptr as non-null if one of them is + * PTR_TO_BTF_ID (R9), reject because of invalid + * access to map value (R8). + * + * Here, we need to inline those insns to access + * R8 directly, since compiler may use other reg + * once it figures out val==inner_map. + */ + asm volatile("r8 = %[val];\n" + "r9 = %[inner_map];\n" + "if r8 != r9 goto +1;\n" + "%[ret] = *(u64 *)(r8 +0);\n" + : [ret] "+r"(ret) + : [inner_map] "r"(inner_map), [val] "r"(val) + : "r8", "r9"); + + return ret; +} From 8d8bee13ae9e316443c6666286360126a19c8d94 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 16 Dec 2022 12:29:37 -0500 Subject: [PATCH 0053/1593] powerpc: dts: t208x: Disable 10G on MAC1 and MAC2 There aren't enough resources to run these ports at 10G speeds. Disable 10G for these ports, reverting to the previous speed. Fixes: 36926a7d70c2 ("powerpc: dts: t208x: Mark MAC1 and MAC2 as 10G") Reported-by: Camelia Alexandra Groza Signed-off-by: Sean Anderson Reviewed-by: Camelia Groza Tested-by: Camelia Groza Link: https://lore.kernel.org/r/20221216172937.2960054-1-sean.anderson@seco.com Signed-off-by: Jakub Kicinski --- arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index 74e17e134387d..27714dc2f04a5 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -659,3 +659,19 @@ interrupts = <16 2 1 9>; }; }; + +&fman0_rx_0x08 { + /delete-property/ fsl,fman-10g-port; +}; + +&fman0_tx_0x28 { + /delete-property/ fsl,fman-10g-port; +}; + +&fman0_rx_0x09 { + /delete-property/ fsl,fman-10g-port; +}; + +&fman0_tx_0x29 { + /delete-property/ fsl,fman-10g-port; +}; From 7fac54b93ad13e5e7ac237af33eb2a0940eaeea0 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Wed, 21 Dec 2022 20:36:27 +0800 Subject: [PATCH 0054/1593] atm: uapi: fix spelling typos in comments Fix the typo of 'Unsuported' in atmbr2684.h Signed-off-by: Rong Tao Link: https://lore.kernel.org/r/tencent_F1354BEC925C65EA357E741E91DF2044E805@qq.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/atmbr2684.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/atmbr2684.h b/include/uapi/linux/atmbr2684.h index a9e2250cd7205..d47c47d06f110 100644 --- a/include/uapi/linux/atmbr2684.h +++ b/include/uapi/linux/atmbr2684.h @@ -38,7 +38,7 @@ */ #define BR2684_ENCAPS_VC (0) /* VC-mux */ #define BR2684_ENCAPS_LLC (1) -#define BR2684_ENCAPS_AUTODETECT (2) /* Unsuported */ +#define BR2684_ENCAPS_AUTODETECT (2) /* Unsupported */ /* * Is this VC bridged or routed? From 09e6b30eeb254f1818a008cace3547159e908dfd Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Thu, 22 Dec 2022 14:43:41 +0800 Subject: [PATCH 0055/1593] net: hns3: add interrupts re-initialization while doing VF FLR Currently keep alive message between PF and VF may be lost and the VF is unalive in PF. So the VF will not do reset during PF FLR reset process. This would make the allocated interrupt resources of VF invalid and VF would't receive or respond to PF any more. So this patch adds VF interrupts re-initialization during VF FLR for VF recovery in above cases. Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR") Signed-off-by: Jie Wang Signed-off-by: Hao Lan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index db6f7cdba9587..081bd2c3f2891 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2767,7 +2767,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev) struct pci_dev *pdev = hdev->pdev; int ret = 0; - if (hdev->reset_type == HNAE3_VF_FULL_RESET && + if ((hdev->reset_type == HNAE3_VF_FULL_RESET || + hdev->reset_type == HNAE3_FLR_RESET) && test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) { hclgevf_misc_irq_uninit(hdev); hclgevf_uninit_msi(hdev); From 7d89b53cea1a702f97117fb4361523519bb1e52c Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Thu, 22 Dec 2022 14:43:42 +0800 Subject: [PATCH 0056/1593] net: hns3: fix miss L3E checking for rx packet For device supports RXD advanced layout, the driver will return directly if the hardware finish the checksum calculate. It cause missing L3E checking for ip packets. Fixes it. Fixes: 1ddc028ac849 ("net: hns3: refactor out RX completion checksum") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 0ec5730b17886..b4c4fb873568c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3855,18 +3855,16 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) return 0; } -static bool hns3_checksum_complete(struct hns3_enet_ring *ring, +static void hns3_checksum_complete(struct hns3_enet_ring *ring, struct sk_buff *skb, u32 ptype, u16 csum) { if (ptype == HNS3_INVALID_PTYPE || hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE) - return false; + return; hns3_ring_stats_update(ring, csum_complete); skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)csum); - - return true; } static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info, @@ -3926,8 +3924,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, HNS3_RXD_PTYPE_S); - if (hns3_checksum_complete(ring, skb, ptype, csum)) - return; + hns3_checksum_complete(ring, skb, ptype, csum); /* check if hardware has done checksum */ if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) @@ -3936,6 +3933,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) | BIT(HNS3_RXD_OL3E_B) | BIT(HNS3_RXD_OL4E_B)))) { + skb->ip_summed = CHECKSUM_NONE; hns3_ring_stats_update(ring, l3l4_csum_err); return; From 8ee57c7b8406c7aa8ca31e014440c87c6383f429 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Thu, 22 Dec 2022 14:43:43 +0800 Subject: [PATCH 0057/1593] net: hns3: fix VF promisc mode not update when mac table full Currently, it missed set HCLGE_VPORT_STATE_PROMISC_CHANGE flag for VF when vport->overflow_promisc_flags changed. So the VF won't check whether to update promisc mode in this case. So add it. Fixes: 1e6e76101fd9 ("net: hns3: configure promisc mode for VF asynchronously") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Signed-off-by: Jakub Kicinski --- .../hisilicon/hns3/hns3pf/hclge_main.c | 75 +++++++++++-------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 4e54f91f7a6c1..6c2742f59c777 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -12754,60 +12754,71 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable) return ret; } -static void hclge_sync_promisc_mode(struct hclge_dev *hdev) +static int hclge_sync_vport_promisc_mode(struct hclge_vport *vport) { - struct hclge_vport *vport = &hdev->vport[0]; struct hnae3_handle *handle = &vport->nic; + struct hclge_dev *hdev = vport->back; + bool uc_en = false; + bool mc_en = false; u8 tmp_flags; + bool bc_en; int ret; - u16 i; if (vport->last_promisc_flags != vport->overflow_promisc_flags) { set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); vport->last_promisc_flags = vport->overflow_promisc_flags; } - if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) { + if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, + &vport->state)) + return 0; + + /* for PF */ + if (!vport->vport_id) { tmp_flags = handle->netdev_flags | vport->last_promisc_flags; ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE, tmp_flags & HNAE3_MPE); - if (!ret) { - clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, - &vport->state); + if (!ret) set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, &vport->state); - } + else + set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, + &vport->state); + return ret; } - for (i = 1; i < hdev->num_alloc_vport; i++) { - bool uc_en = false; - bool mc_en = false; - bool bc_en; + /* for VF */ + if (vport->vf_info.trusted) { + uc_en = vport->vf_info.request_uc_en > 0 || + vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE; + mc_en = vport->vf_info.request_mc_en > 0 || + vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE; + } + bc_en = vport->vf_info.request_bc_en > 0; - vport = &hdev->vport[i]; + ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, + mc_en, bc_en); + if (ret) { + set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); + return ret; + } + hclge_set_vport_vlan_fltr_change(vport); - if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, - &vport->state)) - continue; + return 0; +} - if (vport->vf_info.trusted) { - uc_en = vport->vf_info.request_uc_en > 0 || - vport->overflow_promisc_flags & - HNAE3_OVERFLOW_UPE; - mc_en = vport->vf_info.request_mc_en > 0 || - vport->overflow_promisc_flags & - HNAE3_OVERFLOW_MPE; - } - bc_en = vport->vf_info.request_bc_en > 0; +static void hclge_sync_promisc_mode(struct hclge_dev *hdev) +{ + struct hclge_vport *vport; + int ret; + u16 i; - ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, - mc_en, bc_en); - if (ret) { - set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, - &vport->state); + for (i = 0; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + + ret = hclge_sync_vport_promisc_mode(vport); + if (ret) return; - } - hclge_set_vport_vlan_fltr_change(vport); } } From fcbb408a1aaf426f88d8fb3b4c14e3625745b02f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 22 Dec 2022 13:39:58 -0800 Subject: [PATCH 0058/1593] selftests/bpf: Add host-tools to gitignore Shows up when cross-compiling: HOST_SCRATCH_DIR := $(OUTPUT)/host-tools vs SCRATCH_DIR := $(OUTPUT)/tools HOST_SCRATCH_DIR := $(SCRATCH_DIR) Reported-by: John Sperbeck Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20221222213958.2302320-1-sdf@google.com --- tools/testing/selftests/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 07d2d0a8c5cb4..401a75844cc0e 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -36,6 +36,7 @@ test_cpp *.lskel.h /no_alu32 /bpf_gcc +/host-tools /tools /runqslower /bench From 523dfa96add75e60cfe6bf5a1c8f713635cd6b73 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:34:49 +0100 Subject: [PATCH 0059/1593] drm/tests: reduce drm_mm_test stack usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The check_reserve_boundaries function uses a lot of kernel stack, and it gets inlined by clang, which makes __drm_test_mm_reserve use even more of it, to the point of hitting the warning limit: drivers/gpu/drm/tests/drm_mm_test.c:344:12: error: stack frame size (1048) exceeds limit (1024) in '__drm_test_mm_reserve' [-Werror,-Wframe-larger-than] When building with gcc, this does not happen, but the structleak plugin can similarly increase the stack usage and needs to be disabled, as we do for all other kunit users. Signed-off-by: Arnd Bergmann Reviewed-by: Maíra Canal Reviewed-by: Nathan Chancellor Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20221215163511.266214-1-arnd@kernel.org --- drivers/gpu/drm/tests/Makefile | 2 ++ drivers/gpu/drm/tests/drm_mm_test.c | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile index b29ef1085cad9..f896ef85c2f2b 100644 --- a/drivers/gpu/drm/tests/Makefile +++ b/drivers/gpu/drm/tests/Makefile @@ -12,3 +12,5 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \ drm_mm_test.o \ drm_plane_helper_test.o \ drm_rect_test.o + +CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN) diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 89f12d3b4a219..186b28dc70380 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -298,9 +298,9 @@ static bool expect_reserve_fail(struct kunit *test, struct drm_mm *mm, struct dr return false; } -static bool check_reserve_boundaries(struct kunit *test, struct drm_mm *mm, - unsigned int count, - u64 size) +static bool noinline_for_stack check_reserve_boundaries(struct kunit *test, struct drm_mm *mm, + unsigned int count, + u64 size) { const struct boundary { u64 start, size; From 399ab7fe0fa0d846881685fd4e57e9a8ef7559f7 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 22 Dec 2022 11:51:19 +0800 Subject: [PATCH 0060/1593] net: sched: fix memory leak in tcindex_set_parms Syzkaller reports a memory leak as follows: ==================================== BUG: memory leak unreferenced object 0xffff88810c287f00 (size 256): comm "syz-executor105", pid 3600, jiffies 4294943292 (age 12.990s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046 [] kmalloc include/linux/slab.h:576 [inline] [] kmalloc_array include/linux/slab.h:627 [inline] [] kcalloc include/linux/slab.h:659 [inline] [] tcf_exts_init include/net/pkt_cls.h:250 [inline] [] tcindex_set_parms+0xa7/0xbe0 net/sched/cls_tcindex.c:342 [] tcindex_change+0xdf/0x120 net/sched/cls_tcindex.c:553 [] tc_new_tfilter+0x4f2/0x1100 net/sched/cls_api.c:2147 [] rtnetlink_rcv_msg+0x4dc/0x5d0 net/core/rtnetlink.c:6082 [] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2540 [] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] [] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345 [] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921 [] sock_sendmsg_nosec net/socket.c:714 [inline] [] sock_sendmsg+0x56/0x80 net/socket.c:734 [] ____sys_sendmsg+0x178/0x410 net/socket.c:2482 [] ___sys_sendmsg+0xa8/0x110 net/socket.c:2536 [] __sys_sendmmsg+0x105/0x330 net/socket.c:2622 [] __do_sys_sendmmsg net/socket.c:2651 [inline] [] __se_sys_sendmmsg net/socket.c:2648 [inline] [] __x64_sys_sendmmsg+0x24/0x30 net/socket.c:2648 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd ==================================== Kernel uses tcindex_change() to change an existing filter properties. Yet the problem is that, during the process of changing, if `old_r` is retrieved from `p->perfect`, then kernel uses tcindex_alloc_perfect_hash() to newly allocate filter results, uses tcindex_filter_result_init() to clear the old filter result, without destroying its tcf_exts structure, which triggers the above memory leak. To be more specific, there are only two source for the `old_r`, according to the tcindex_lookup(). `old_r` is retrieved from `p->perfect`, or `old_r` is retrieved from `p->h`. * If `old_r` is retrieved from `p->perfect`, kernel uses tcindex_alloc_perfect_hash() to newly allocate the filter results. Then `r` is assigned with `cp->perfect + handle`, which is newly allocated. So condition `old_r && old_r != r` is true in this situation, and kernel uses tcindex_filter_result_init() to clear the old filter result, without destroying its tcf_exts structure * If `old_r` is retrieved from `p->h`, then `p->perfect` is NULL according to the tcindex_lookup(). Considering that `cp->h` is directly copied from `p->h` and `p->perfect` is NULL, `r` is assigned with `tcindex_lookup(cp, handle)`, whose value should be the same as `old_r`, so condition `old_r && old_r != r` is false in this situation, kernel ignores using tcindex_filter_result_init() to clear the old filter result. So only when `old_r` is retrieved from `p->perfect` does kernel use tcindex_filter_result_init() to clear the old filter result, which triggers the above memory leak. Considering that there already exists a tc_filter_wq workqueue to destroy the old tcindex_data by tcindex_partial_destroy_work() at the end of tcindex_set_parms(), this patch solves this memory leak bug by removing this old filter result clearing part and delegating it to the tc_filter_wq workqueue. Note that this patch doesn't introduce any other issues. If `old_r` is retrieved from `p->perfect`, this patch just delegates old filter result clearing part to the tc_filter_wq workqueue; If `old_r` is retrieved from `p->h`, kernel doesn't reach the old filter result clearing part, so removing this part has no effect. [Thanks to the suggestion from Jakub Kicinski, Cong Wang, Paolo Abeni and Dmitry Vyukov] Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") Link: https://lore.kernel.org/all/0000000000001de5c505ebc9ec59@google.com/ Reported-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com Tested-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com Cc: Cong Wang Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Dmitry Vyukov Acked-by: Paolo Abeni Signed-off-by: Hawkins Jiawei Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index eb0e9458e722e..ee2a050c887bf 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -333,7 +333,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter_result *r, struct nlattr **tb, struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { - struct tcindex_filter_result new_filter_result, *old_r = r; + struct tcindex_filter_result new_filter_result; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_result cr = {}; @@ -402,7 +402,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result, cp, net); if (err < 0) goto errout_alloc; - if (old_r) + if (r) cr = r->res; err = -EBUSY; @@ -479,14 +479,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcf_bind_filter(tp, &cr, base); } - if (old_r && old_r != r) { - err = tcindex_filter_result_init(old_r, cp, net); - if (err < 0) { - kfree(f); - goto errout_alloc; - } - } - oldp = p; r->res = cr; tcf_exts_change(&r->exts, &e); From 13a7c8964afcd8ca43c0b6001ebb0127baa95362 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Thu, 22 Dec 2022 14:52:28 +0300 Subject: [PATCH 0061/1593] qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure adapter->dcb would get silently freed inside qlcnic_dcb_enable() in case qlcnic_dcb_attach() would return an error, which always happens under OOM conditions. This would lead to use-after-free because both of the existing callers invoke qlcnic_dcb_get_info() on the obtained pointer, which is potentially freed at that point. Propagate errors from qlcnic_dcb_enable(), and instead free the dcb pointer at callsite using qlcnic_dcb_free(). This also removes the now unused qlcnic_clear_dcb_ops() helper, which was a simple wrapper around kfree() also causing memory leaks for partially initialized dcb. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: 3c44bba1d270 ("qlcnic: Disable DCB operations from SR-IOV VFs") Reviewed-by: Michal Swiatkowski Signed-off-by: Daniil Tatianin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 8 +++++++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h | 10 ++-------- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 +++++++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index dbb800769cb63..c95d56e56c59a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter) goto disable_mbx_intr; qlcnic_83xx_clear_function_resources(adapter); - qlcnic_dcb_enable(adapter->dcb); + + err = qlcnic_dcb_enable(adapter->dcb); + if (err) { + qlcnic_dcb_free(adapter->dcb); + goto disable_mbx_intr; + } + qlcnic_83xx_initialize_nic(adapter, 1); qlcnic_dcb_get_info(adapter->dcb); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h index 7519773eaca6e..22afa2be85fdb 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h @@ -41,11 +41,6 @@ struct qlcnic_dcb { unsigned long state; }; -static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb) -{ - kfree(dcb); -} - static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) { if (dcb && dcb->ops->get_hw_capability) @@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb) dcb->ops->init_dcbnl_ops(dcb); } -static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb) +static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb) { - if (dcb && qlcnic_dcb_attach(dcb)) - qlcnic_clear_dcb_ops(dcb); + return dcb ? qlcnic_dcb_attach(dcb) : 0; } #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 28476b982bab6..44dac3c0908eb 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2599,7 +2599,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Device does not support MSI interrupts\n"); if (qlcnic_82xx_check(adapter)) { - qlcnic_dcb_enable(adapter->dcb); + err = qlcnic_dcb_enable(adapter->dcb); + if (err) { + qlcnic_dcb_free(adapter->dcb); + dev_err(&pdev->dev, "Failed to enable DCB\n"); + goto err_out_free_hw; + } + qlcnic_dcb_get_info(adapter->dcb); err = qlcnic_setup_intr(adapter); From 30e725537546248bddc12eaac2fe0a258917f190 Mon Sep 17 00:00:00 2001 From: "Johnny S. Lee" Date: Thu, 22 Dec 2022 22:34:05 +0800 Subject: [PATCH 0062/1593] net: dsa: mv88e6xxx: depend on PTP conditionally PTP hardware timestamping related objects are not linked when PTP support for MV88E6xxx (NET_DSA_MV88E6XXX_PTP) is disabled, therefore NET_DSA_MV88E6XXX should not depend on PTP_1588_CLOCK_OPTIONAL regardless of NET_DSA_MV88E6XXX_PTP. Instead, condition more strictly on how NET_DSA_MV88E6XXX_PTP's dependencies are met, making sure that it cannot be enabled when NET_DSA_MV88E6XXX=y and PTP_1588_CLOCK=m. In other words, this commit allows NET_DSA_MV88E6XXX to be built-in while PTP_1588_CLOCK is a module, as long as NET_DSA_MV88E6XXX_PTP is prevented from being enabled. Fixes: e5f31552674e ("ethernet: fix PTP_1588_CLOCK dependencies") Signed-off-by: Johnny S. Lee Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 7a2445a34eb77..e3181d5471dfe 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -2,7 +2,6 @@ config NET_DSA_MV88E6XXX tristate "Marvell 88E6xxx Ethernet switch fabric support" depends on NET_DSA - depends on PTP_1588_CLOCK_OPTIONAL select IRQ_DOMAIN select NET_DSA_TAG_EDSA select NET_DSA_TAG_DSA @@ -13,7 +12,8 @@ config NET_DSA_MV88E6XXX config NET_DSA_MV88E6XXX_PTP bool "PTP support for Marvell 88E6xxx" default n - depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK + depends on (NET_DSA_MV88E6XXX = y && PTP_1588_CLOCK = y) || \ + (NET_DSA_MV88E6XXX = m && PTP_1588_CLOCK) help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch chips that support it. From df49908f3c52d211aea5e2a14a93bbe67a2cb3af Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 23 Dec 2022 11:37:18 +0400 Subject: [PATCH 0063/1593] nfc: Fix potential resource leaks nfc_get_device() take reference for the device, add missing nfc_put_device() to release it when not need anymore. Also fix the style warnning by use error EOPNOTSUPP instead of ENOTSUPP. Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation") Fixes: 29e76924cf08 ("nfc: netlink: Add capability to reply to vendor_cmd with data") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- net/nfc/netlink.c | 52 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 9d91087b93992..1fc339084d897 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) u32 dev_idx, se_idx; u8 *apdu; size_t apdu_len; + int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX] || @@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) if (!dev) return -ENODEV; - if (!dev->ops || !dev->ops->se_io) - return -ENOTSUPP; + if (!dev->ops || !dev->ops->se_io) { + rc = -EOPNOTSUPP; + goto put_dev; + } apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); - if (apdu_len == 0) - return -EINVAL; + if (apdu_len == 0) { + rc = -EINVAL; + goto put_dev; + } apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); - if (!apdu) - return -EINVAL; + if (!apdu) { + rc = -EINVAL; + goto put_dev; + } ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + if (!ctx) { + rc = -ENOMEM; + goto put_dev; + } ctx->dev_idx = dev_idx; ctx->se_idx = se_idx; - return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); + rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); + +put_dev: + nfc_put_device(dev); + return rc; } static int nfc_genl_vendor_cmd(struct sk_buff *skb, @@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); dev = nfc_get_device(dev_idx); - if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) + if (!dev) return -ENODEV; + if (!dev->vendor_cmds || !dev->n_vendor_cmds) { + err = -ENODEV; + goto put_dev; + } + if (info->attrs[NFC_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); - if (data_len == 0) - return -EINVAL; + if (data_len == 0) { + err = -EINVAL; + goto put_dev; + } } else { data = NULL; data_len = 0; @@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, dev->cur_cmd_info = info; err = cmd->doit(dev, data, data_len); dev->cur_cmd_info = NULL; - return err; + goto put_dev; } - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + +put_dev: + nfc_put_device(dev); + return err; } /* message building helper */ From d3805695fe1e7383517903715cefc9bbdcffdc90 Mon Sep 17 00:00:00 2001 From: Anuradha Weeraman Date: Sun, 25 Dec 2022 23:12:22 +0530 Subject: [PATCH 0064/1593] net: ethernet: marvell: octeontx2: Fix uninitialized variable warning Fix for uninitialized variable warning. Addresses-Coverity: ("Uninitialized scalar variable") Signed-off-by: Anuradha Weeraman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index fa8029a940689..eb25e458266ca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -589,7 +589,7 @@ int rvu_mbox_handler_mcs_free_resources(struct rvu *rvu, u16 pcifunc = req->hdr.pcifunc; struct mcs_rsrc_map *map; struct mcs *mcs; - int rc; + int rc = 0; if (req->mcs_id >= rvu->mcs_blk_cnt) return MCS_AF_ERR_INVALID_MCSID; From 26870c3f5b15187268bf183055c7b9f29fe66079 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 26 Dec 2022 10:11:17 -0800 Subject: [PATCH 0065/1593] xfs: don't assert if cmap covers imap after cycling lock In xfs_reflink_fill_cow_hole, there's a debugging assertion that trips if (after cycling the ILOCK to get a transaction) the requeried cow mapping overlaps the start of the area being written. IOWs, it trips if the hole in the cow fork that it's supposed to fill has been filled. This is trivially possible since we cycled ILOCK_EXCL. If we trip the assertion, then we know that cmap is a delalloc extent because @found is false. Fortunately, the bmapi_write call below will convert the delalloc extent to a real unwritten cow fork extent, so all we need to do here is remove the assertion. It turns out that generic/095 trips this pretty regularly with alwayscow mode enabled. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index fe46bce8cae63..5535778a98f92 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -416,8 +416,6 @@ xfs_reflink_fill_cow_hole( goto convert; } - ASSERT(cmap->br_startoff > imap->br_startoff); - /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, From d4542f314507015ac0e25c6e8102b88293826a51 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 26 Dec 2022 10:11:18 -0800 Subject: [PATCH 0066/1593] xfs: make xfs_iomap_page_ops static Shut up the sparse warnings about this variable that isn't referenced anywhere else. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 669c1bc5c3a77..fc1946f80a4aa 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -83,7 +83,7 @@ xfs_iomap_valid( return true; } -const struct iomap_page_ops xfs_iomap_page_ops = { +static const struct iomap_page_ops xfs_iomap_page_ops = { .iomap_valid = xfs_iomap_valid, }; From 8c3313e8556b8a817e7b4bc9c249b58855a7e49c Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 12 Dec 2022 12:44:23 +0000 Subject: [PATCH 0067/1593] arm64: dts: rockchip: use correct reset names for rk3399 crypto nodes The reset names does not follow the binding, use the correct ones. Fixes: 8c701fa6e38c ("arm64: dts: rockchip: rk3399: add crypto node") Signed-off-by: Corentin Labbe Link: https://lore.kernel.org/r/20221212124423.1239748-1-clabbe@baylibre.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 4391aea25984b..834d16acb546f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -589,7 +589,7 @@ clocks = <&cru HCLK_M_CRYPTO0>, <&cru HCLK_S_CRYPTO0>, <&cru SCLK_CRYPTO0>; clock-names = "hclk_master", "hclk_slave", "sclk"; resets = <&cru SRST_CRYPTO0>, <&cru SRST_CRYPTO0_S>, <&cru SRST_CRYPTO0_M>; - reset-names = "master", "lave", "crypto"; + reset-names = "master", "slave", "crypto-rst"; }; crypto1: crypto@ff8b8000 { @@ -599,7 +599,7 @@ clocks = <&cru HCLK_M_CRYPTO1>, <&cru HCLK_S_CRYPTO1>, <&cru SCLK_CRYPTO1>; clock-names = "hclk_master", "hclk_slave", "sclk"; resets = <&cru SRST_CRYPTO1>, <&cru SRST_CRYPTO1_S>, <&cru SRST_CRYPTO1_M>; - reset-names = "master", "slave", "crypto"; + reset-names = "master", "slave", "crypto-rst"; }; i2c1: i2c@ff110000 { From b24cded8c065d7cef8690b2c7b82b828cce57708 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Sun, 25 Dec 2022 09:37:12 +0100 Subject: [PATCH 0068/1593] spi: mediatek: Enable irq before the spi registration If the irq is enabled after the spi si registered, there can be a race with the initialization of the devices on the spi bus. Eg: mtk-spi 1100a000.spi: spi-mem transfer timeout spi-nor: probe of spi0.0 failed with error -110 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 ... Call trace: mtk_spi_can_dma+0x0/0x2c Fixes: c6f7874687f7 ("spi: mediatek: Enable irq when pdata is ready") Reported-by: Daniel Golle Signed-off-by: Ricardo Ribalda Tested-by: Daniel Golle Link: https://lore.kernel.org/r/20221225-mtk-spi-fixes-v1-0-bb6c14c232f8@chromium.org Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 6de8360e5c2a9..9eab6c20dbc56 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1253,6 +1253,11 @@ static int mtk_spi_probe(struct platform_device *pdev) dev_notice(dev, "SPI dma_set_mask(%d) failed, ret:%d\n", addr_bits, ret); + ret = devm_request_irq(dev, irq, mtk_spi_interrupt, + IRQF_TRIGGER_NONE, dev_name(dev), master); + if (ret) + return dev_err_probe(dev, ret, "failed to register irq\n"); + pm_runtime_enable(dev); ret = devm_spi_register_master(dev, master); @@ -1261,13 +1266,6 @@ static int mtk_spi_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "failed to register master\n"); } - ret = devm_request_irq(dev, irq, mtk_spi_interrupt, - IRQF_TRIGGER_NONE, dev_name(dev), master); - if (ret) { - pm_runtime_disable(dev); - return dev_err_probe(dev, ret, "failed to register irq\n"); - } - return 0; } From e8bb8f19e73a1e855e54788f8673b9b49e46b5cd Mon Sep 17 00:00:00 2001 From: Witold Sadowski Date: Mon, 19 Dec 2022 06:42:48 -0800 Subject: [PATCH 0069/1593] spi: cadence: Fix busy cycles calculation If xSPI is in x2/x4/x8 mode to calculate busy cycles, busy bits count must be divided by the number of lanes. If opcommand is using 8 busy bits, but SPI is in x4 mode, there will be only 2 busy cycles. Signed-off-by: Witold Sadowski Reviewed-by: Chandrakala Chavva Reviewed-by: Sunil Kovvuri Goutham Link: https://lore.kernel.org/r/20221219144254.20883-2-wsadowski@marvell.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-xspi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c index 520b4cc69cdc9..91db3c973167b 100644 --- a/drivers/spi/spi-cadence-xspi.c +++ b/drivers/spi/spi-cadence-xspi.c @@ -177,7 +177,10 @@ #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op) ( \ FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_DCNT_H, \ ((op)->data.nbytes >> 16) & 0xffff) | \ - FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, (op)->dummy.nbytes * 8)) + FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, \ + (op)->dummy.buswidth != 0 ? \ + (((op)->dummy.nbytes * 8) / (op)->dummy.buswidth) : \ + 0)) #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_4(op, chipsel) ( \ FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R4_BANK, chipsel) | \ From 83749a2ee548f568ce2037749e19602fdc9f3ee3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:55:15 +0100 Subject: [PATCH 0070/1593] cpufreq: apple: remove duplicate intializer When -Woverride-init is enabled, gcc notices that the .attr field is initialized twice: drivers/cpufreq/apple-soc-cpufreq.c:331:27: error: initialized field overwritten [-Werror=override-init] 331 | .attr = apple_soc_cpufreq_hw_attr, | ^~~~~~~~~~~~~~~~~~~~~~~~~ Remove the first one, since this is not actually used. Signed-off-by: Arnd Bergmann Reviewed-by: Eric Curtin Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index d1801281cdd99..6f26395184c41 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -321,7 +321,6 @@ static struct cpufreq_driver apple_soc_cpufreq_driver = { .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, - .attr = cpufreq_generic_attr, .get = apple_soc_cpufreq_get_rate, .init = apple_soc_cpufreq_init, .exit = apple_soc_cpufreq_exit, From f5f94b9c8b805d87ff185caf9779c3a4d07819e3 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Tue, 20 Dec 2022 11:12:25 +0100 Subject: [PATCH 0071/1593] cpufreq: CPPC: Add u64 casts to avoid overflowing The fields of the _CPC object are unsigned 32-bits values. To avoid overflows while using _CPC's values, add 'u64' casts. Signed-off-by: Pierre Gondois Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 432dfb4e8027e..022e3555407c8 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -487,7 +487,8 @@ static unsigned int get_perf_level_count(struct cpufreq_policy *policy) cpu_data = policy->driver_data; perf_caps = &cpu_data->perf_caps; max_cap = arch_scale_cpu_capacity(cpu); - min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf); + min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); if ((min_cap == 0) || (max_cap < min_cap)) return 0; return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP; @@ -519,10 +520,10 @@ static int cppc_get_cpu_power(struct device *cpu_dev, cpu_data = policy->driver_data; perf_caps = &cpu_data->perf_caps; max_cap = arch_scale_cpu_capacity(cpu_dev->id); - min_cap = div_u64(max_cap * perf_caps->lowest_perf, - perf_caps->highest_perf); - - perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); + perf_step = div_u64((u64)CPPC_EM_CAP_STEP * perf_caps->highest_perf, + max_cap); min_step = min_cap / CPPC_EM_CAP_STEP; max_step = max_cap / CPPC_EM_CAP_STEP; From 1a6a8b0080b09bf14a7d45abfe8020bd63306c17 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 20 Dec 2022 14:39:10 +0100 Subject: [PATCH 0072/1593] cpufreq: qcom-hw: Fix reading "reg" with address/size-cells != 2 Commit 054a3ef683a1 ("cpufreq: qcom-hw: Allocate qcom_cpufreq_data during probe") assumed that every reg variable is 4*u32 wide (as most new qcom SoCs set #address- and #size-cells to <2>. That is not the case for all of them though. Check the cells values dynamically to ensure the proper region of the DTB is being read. Fixes: 054a3ef683a1 ("cpufreq: qcom-hw: Allocate qcom_cpufreq_data during probe") Signed-off-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 340fed35e45dd..9505a812d6a1d 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -649,9 +649,10 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { struct clk_hw_onecell_data *clk_data; struct device *dev = &pdev->dev; + struct device_node *soc_node; struct device *cpu_dev; struct clk *clk; - int ret, i, num_domains; + int ret, i, num_domains, reg_sz; clk = clk_get(dev, "xo"); if (IS_ERR(clk)) @@ -679,7 +680,21 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) return ret; /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ - num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * 4); + soc_node = of_get_parent(dev->of_node); + if (!soc_node) + return -EINVAL; + + ret = of_property_read_u32(soc_node, "#address-cells", ®_sz); + if (ret) + goto of_exit; + + ret = of_property_read_u32(soc_node, "#size-cells", &i); + if (ret) + goto of_exit; + + reg_sz += i; + + num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * reg_sz); if (num_domains <= 0) return num_domains; @@ -743,6 +758,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) else dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n"); +of_exit: + of_node_put(soc_node); + return ret; } From 01c5bb0cc2a39fbc56ff9a5ef28b79447f0c2351 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Tue, 20 Dec 2022 21:32:37 +0530 Subject: [PATCH 0073/1593] cpufreq: Add Tegra234 to cpufreq-dt-platdev blocklist Tegra234 platform uses the tegra194-cpufreq driver, so add it to the blocklist in cpufreq-dt-platdev driver to avoid the cpufreq driver registration from there. Signed-off-by: Sumit Gupta Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 8ab6728830436..e329d29d1f9d1 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -137,6 +137,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "nvidia,tegra30", }, { .compatible = "nvidia,tegra124", }, { .compatible = "nvidia,tegra210", }, + { .compatible = "nvidia,tegra234", }, { .compatible = "qcom,apq8096", }, { .compatible = "qcom,msm8996", }, From c28f3d80383571d3630df1a0e89500d23e855924 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Thu, 22 Dec 2022 20:22:46 -0800 Subject: [PATCH 0074/1593] thunderbolt: Do not report errors if on-board retimers are found Currently we return an error even if on-board retimers are found and that's not expected. Fix this to return an error only if there was one and 0 otherwise. Fixes: 1e56c88adecc ("thunderbolt: Runtime resume USB4 port when retimers are scanned") Cc: stable@vger.kernel.org Signed-off-by: Utkarsh Patel Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 81252e31014a1..6ebe7a2886ec7 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -471,10 +471,9 @@ int tb_retimer_scan(struct tb_port *port, bool add) break; } - if (!last_idx) { - ret = 0; + ret = 0; + if (!last_idx) goto out; - } /* Add on-board retimers if they do not exist already */ for (i = 1; i <= last_idx; i++) { From 827e0920dbfb0d92416e56081d70745597403446 Mon Sep 17 00:00:00 2001 From: Benedikt Niedermayr Date: Fri, 9 Dec 2022 12:28:28 +0100 Subject: [PATCH 0075/1593] memory: omap-gpmc: fix wait pin validation This bug has been introduced after switching from -1 to UINT_MAX for GPMC_WAITPIN_INVALID. The bug leads to an error when the optional gpmc,wait-pin dt-property is not used: ... gpmc_cs_program_settings: invalid wait-pin (-1) ... Signed-off-by: Benedikt Niedermayr Fixes: 8dd7e4af5853 ("memory: omap-gpmc: fix coverity issue "Control flow issues"") Acked-by: Roger Quadros Tested-by: Tony Lindgren Link: https://lore.kernel.org/r/20221209112828.581491-1-benedikt.niedermayr@siemens.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/omap-gpmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 57d9f91fe89bf..d78f73db37c88 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -1918,7 +1918,8 @@ int gpmc_cs_program_settings(int cs, struct gpmc_settings *p) } } - if (p->wait_pin > gpmc_nr_waitpins) { + if (p->wait_pin != GPMC_WAITPIN_INVALID && + p->wait_pin > gpmc_nr_waitpins) { pr_err("%s: invalid wait-pin (%d)\n", __func__, p->wait_pin); return -EINVAL; } From ef86b2c2807f41c045e5534d8513a8b83f63bc39 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Fri, 25 Nov 2022 09:37:52 +0530 Subject: [PATCH 0076/1593] memory: tegra: Remove clients SID override programming On newer Tegra releases, early boot SID override programming and SID override programming during resume is handled by bootloader. In the function tegra186_mc_program_sid() which is getting removed, SID override register of all clients is written without checking if secure firmware has allowed write on it or not. If write is disabled by secure firmware then it can lead to errors coming from secure firmware and hang in kernel boot. Also, SID override is programmed on-demand during probe_finalize() call of IOMMU which is done in tegra186_mc_client_sid_override() in this same file. This function does it correctly by checking if write is permitted on SID override register. It also checks if SID override register is already written with correct value and skips re-writing it in that case. Fixes: 393d66fd2cac ("memory: tegra: Implement SID override programming") Signed-off-by: Ashish Mhetre Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20221125040752.12627-1-amhetre@nvidia.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/tegra/tegra186.c | 36 --------------------------------- 1 file changed, 36 deletions(-) diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c index 62477e592bf5f..7bb73f06fad3e 100644 --- a/drivers/memory/tegra/tegra186.c +++ b/drivers/memory/tegra/tegra186.c @@ -22,32 +22,6 @@ #define MC_SID_STREAMID_SECURITY_WRITE_ACCESS_DISABLED BIT(16) #define MC_SID_STREAMID_SECURITY_OVERRIDE BIT(8) -static void tegra186_mc_program_sid(struct tegra_mc *mc) -{ - unsigned int i; - - for (i = 0; i < mc->soc->num_clients; i++) { - const struct tegra_mc_client *client = &mc->soc->clients[i]; - u32 override, security; - - override = readl(mc->regs + client->regs.sid.override); - security = readl(mc->regs + client->regs.sid.security); - - dev_dbg(mc->dev, "client %s: override: %x security: %x\n", - client->name, override, security); - - dev_dbg(mc->dev, "setting SID %u for %s\n", client->sid, - client->name); - writel(client->sid, mc->regs + client->regs.sid.override); - - override = readl(mc->regs + client->regs.sid.override); - security = readl(mc->regs + client->regs.sid.security); - - dev_dbg(mc->dev, "client %s: override: %x security: %x\n", - client->name, override, security); - } -} - static int tegra186_mc_probe(struct tegra_mc *mc) { struct platform_device *pdev = to_platform_device(mc->dev); @@ -85,8 +59,6 @@ static int tegra186_mc_probe(struct tegra_mc *mc) if (err < 0) return err; - tegra186_mc_program_sid(mc); - return 0; } @@ -95,13 +67,6 @@ static void tegra186_mc_remove(struct tegra_mc *mc) of_platform_depopulate(mc->dev); } -static int tegra186_mc_resume(struct tegra_mc *mc) -{ - tegra186_mc_program_sid(mc); - - return 0; -} - #if IS_ENABLED(CONFIG_IOMMU_API) static void tegra186_mc_client_sid_override(struct tegra_mc *mc, const struct tegra_mc_client *client, @@ -173,7 +138,6 @@ static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev) const struct tegra_mc_ops tegra186_mc_ops = { .probe = tegra186_mc_probe, .remove = tegra186_mc_remove, - .resume = tegra186_mc_resume, .probe_device = tegra186_mc_probe_device, .handle_irq = tegra30_mc_handle_irq, }; From 340cb392a038cf70540a4cdf2e98a247c66b6df4 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 25 Nov 2022 15:37:57 +0800 Subject: [PATCH 0077/1593] memory: atmel-sdramc: Fix missing clk_disable_unprepare in atmel_ramc_probe() The clk_disable_unprepare() should be called in the error handling of caps->has_mpddr_clk, fix it by replacing devm_clk_get and clk_prepare_enable by devm_clk_get_enabled. Fixes: e81b6abebc87 ("memory: add a driver for atmel ram controllers") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221125073757.3535219-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/atmel-sdramc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/memory/atmel-sdramc.c b/drivers/memory/atmel-sdramc.c index 9c49d00c2a966..ea6e9e1eaf046 100644 --- a/drivers/memory/atmel-sdramc.c +++ b/drivers/memory/atmel-sdramc.c @@ -47,19 +47,17 @@ static int atmel_ramc_probe(struct platform_device *pdev) caps = of_device_get_match_data(&pdev->dev); if (caps->has_ddrck) { - clk = devm_clk_get(&pdev->dev, "ddrck"); + clk = devm_clk_get_enabled(&pdev->dev, "ddrck"); if (IS_ERR(clk)) return PTR_ERR(clk); - clk_prepare_enable(clk); } if (caps->has_mpddr_clk) { - clk = devm_clk_get(&pdev->dev, "mpddr"); + clk = devm_clk_get_enabled(&pdev->dev, "mpddr"); if (IS_ERR(clk)) { pr_err("AT91 RAMC: couldn't get mpddr clock\n"); return PTR_ERR(clk); } - clk_prepare_enable(clk); } return 0; From cb8fd6f75775165390ededea8799b60d93d9fe3e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 26 Nov 2022 12:49:11 +0800 Subject: [PATCH 0078/1593] memory: mvebu-devbus: Fix missing clk_disable_unprepare in mvebu_devbus_probe() The clk_disable_unprepare() should be called in the error handling of devbus_get_timing_params() and of_platform_populate(), fix it by replacing devm_clk_get and clk_prepare_enable by devm_clk_get_enabled. Fixes: e81b6abebc87 ("memory: add a driver for atmel ram controllers") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221126044911.7226-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski --- drivers/memory/mvebu-devbus.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/memory/mvebu-devbus.c b/drivers/memory/mvebu-devbus.c index 8450638e86700..efc6c08db2b70 100644 --- a/drivers/memory/mvebu-devbus.c +++ b/drivers/memory/mvebu-devbus.c @@ -280,10 +280,9 @@ static int mvebu_devbus_probe(struct platform_device *pdev) if (IS_ERR(devbus->base)) return PTR_ERR(devbus->base); - clk = devm_clk_get(&pdev->dev, NULL); + clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); - clk_prepare_enable(clk); /* * Obtain clock period in picoseconds, From ad75bd85b1db69c97eefea07b375567821f6ef58 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 13 Dec 2022 13:29:43 +0100 Subject: [PATCH 0079/1593] platform/x86: sony-laptop: Don't turn off 0x153 keyboard backlight during probe The 0x153 version of the kbd backlight control SNC handle has no separate address to probe if the backlight is there. This turns the probe call into a set keyboard backlight call with a value of 0 turning off the keyboard backlight. Skip probing when there is no separate probe address to avoid this. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1583752 Fixes: 800f20170dcf ("Keyboard backlight control for some Vaio Fit models") Signed-off-by: Hans de Goede Reviewed-by: Mattia Dongili Link: https://lore.kernel.org/r/20221213122943.11123-1-hdegoede@redhat.com --- drivers/platform/x86/sony-laptop.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 7156ae2ad1962..537d6a2d0781b 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1887,14 +1887,21 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd, break; } - ret = sony_call_snc_handle(handle, probe_base, &result); - if (ret) - return ret; + /* + * Only probe if there is a separate probe_base, otherwise the probe call + * is equivalent to __sony_nc_kbd_backlight_mode_set(0), resulting in + * the keyboard backlight being turned off. + */ + if (probe_base) { + ret = sony_call_snc_handle(handle, probe_base, &result); + if (ret) + return ret; - if ((handle == 0x0137 && !(result & 0x02)) || - !(result & 0x01)) { - dprintk("no backlight keyboard found\n"); - return 0; + if ((handle == 0x0137 && !(result & 0x02)) || + !(result & 0x01)) { + dprintk("no backlight keyboard found\n"); + return 0; + } } kbdbl_ctl = kzalloc(sizeof(*kbdbl_ctl), GFP_KERNEL); From f4b7f8febd4d9b615fbec2a06bf352b9c3729b11 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Dec 2022 16:43:57 +0100 Subject: [PATCH 0080/1593] platform/x86: ideapad-laptop: Add Legion 5 15ARH05 DMI id to set_fn_lock_led_list[] The Lenovo Legion 5 15ARH05 needs ideapad-laptop to call SALS_FNLOCK_ON / SALS_FNLOCK_OFF on Fn-lock state change to get the LED in the Fn key to correctly reflect the Fn-lock state. Add a DMI match for the Legion 5 15ARH05 to the set_fn_lock_led_list[] table for this. Fixes: 81a5603a0f50 ("platform/x86: ideapad-laptop: Fix interrupt storm on fn-lock toggle on some Yoga laptops") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221215154357.123876-1-hdegoede@redhat.com --- drivers/platform/x86/ideapad-laptop.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 435d2d3d903b0..0eb5bfdd823a1 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1621,6 +1621,12 @@ static const struct dmi_system_id set_fn_lock_led_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"), } }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion 5 15ARH05"), + } + }, {} }; From ee4e530bdde29a69c58656a919545251a782674e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:57 +0100 Subject: [PATCH 0081/1593] arm64: dts: qcom: sc8280xp: fix primary USB-DP PHY reset The vendor kernel is using the GCC_USB4_DP_PHY_PRIM_BCR and GCC_USB4_1_DP_PHY_PRIM_BCR resets for the USB4-USB3-DP QMP PHYs. Update the primary USB-DP PHY node to match. Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221121085058.31213-15-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 109c9d2b684d1..ad71e68384da5 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1689,7 +1689,7 @@ clock-names = "aux", "ref_clk_src", "ref", "com_aux"; resets = <&gcc GCC_USB3_PHY_PRIM_BCR>, - <&gcc GCC_USB3_DP_PHY_PRIM_BCR>; + <&gcc GCC_USB4_DP_PHY_PRIM_BCR>; reset-names = "phy", "common"; power-domains = <&gcc USB30_PRIM_GDSC>; From 721c0d68c0f882b6358102b52961ff6eb601839c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:58 +0100 Subject: [PATCH 0082/1593] arm64: dts: qcom: sc8280xp: fix USB-DP PHY nodes Update the USB4-USB3-DP QMP PHY nodes to match the new binding which specifically includes the missing register regions (e.g. DP_PHY) and allows for supporting DisplayPort Alternate Mode. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221121085058.31213-16-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 77 ++++++++------------------ 1 file changed, 23 insertions(+), 54 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index ad71e68384da5..71cf81a8eb4da 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -762,7 +763,7 @@ <0>, <0>, <0>, - <&usb_0_ssphy>, + <&usb_0_qmpphy QMP_USB43DP_USB3_PIPE_CLK>, <0>, <0>, <0>, @@ -770,7 +771,7 @@ <0>, <0>, <0>, - <&usb_1_ssphy>, + <&usb_1_qmpphy QMP_USB43DP_USB3_PIPE_CLK>, <0>, <0>, <0>, @@ -1673,42 +1674,26 @@ }; }; - usb_0_qmpphy: phy-wrapper@88ec000 { + usb_0_qmpphy: phy@88eb000 { compatible = "qcom,sc8280xp-qmp-usb43dp-phy"; - reg = <0 0x088ec000 0 0x1e4>, - <0 0x088eb000 0 0x40>, - <0 0x088ed000 0 0x1c8>; - #address-cells = <2>; - #size-cells = <2>; - ranges; + reg = <0 0x088eb000 0 0x4000>; clocks = <&gcc GCC_USB3_PRIM_PHY_AUX_CLK>, - <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_USB4_EUD_CLKREF_CLK>, - <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>; - clock-names = "aux", "ref_clk_src", "ref", "com_aux"; + <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>, + <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; + clock-names = "aux", "ref", "com_aux", "usb3_pipe"; + + power-domains = <&gcc USB30_PRIM_GDSC>; resets = <&gcc GCC_USB3_PHY_PRIM_BCR>, <&gcc GCC_USB4_DP_PHY_PRIM_BCR>; reset-names = "phy", "common"; - power-domains = <&gcc USB30_PRIM_GDSC>; + #clock-cells = <1>; + #phy-cells = <1>; status = "disabled"; - - usb_0_ssphy: usb3-phy@88eb400 { - reg = <0 0x088eb400 0 0x100>, - <0 0x088eb600 0 0x3ec>, - <0 0x088ec400 0 0x364>, - <0 0x088eba00 0 0x100>, - <0 0x088ebc00 0 0x3ec>, - <0 0x088ec200 0 0x18>; - #phy-cells = <0>; - #clock-cells = <0>; - clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; - clock-names = "pipe0"; - clock-output-names = "usb0_phy_pipe_clk_src"; - }; }; usb_1_hsphy: phy@8902000 { @@ -1725,42 +1710,26 @@ status = "disabled"; }; - usb_1_qmpphy: phy-wrapper@8904000 { + usb_1_qmpphy: phy@8903000 { compatible = "qcom,sc8280xp-qmp-usb43dp-phy"; - reg = <0 0x08904000 0 0x1e4>, - <0 0x08903000 0 0x40>, - <0 0x08905000 0 0x1c8>; - #address-cells = <2>; - #size-cells = <2>; - ranges; + reg = <0 0x08903000 0 0x4000>; clocks = <&gcc GCC_USB3_SEC_PHY_AUX_CLK>, - <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_USB4_CLKREF_CLK>, - <&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>; - clock-names = "aux", "ref_clk_src", "ref", "com_aux"; + <&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>, + <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>; + clock-names = "aux", "ref", "com_aux", "usb3_pipe"; + + power-domains = <&gcc USB30_SEC_GDSC>; resets = <&gcc GCC_USB3_PHY_SEC_BCR>, <&gcc GCC_USB4_1_DP_PHY_PRIM_BCR>; reset-names = "phy", "common"; - power-domains = <&gcc USB30_SEC_GDSC>; + #clock-cells = <1>; + #phy-cells = <1>; status = "disabled"; - - usb_1_ssphy: usb3-phy@8903400 { - reg = <0 0x08903400 0 0x100>, - <0 0x08903600 0 0x3ec>, - <0 0x08904400 0 0x364>, - <0 0x08903a00 0 0x100>, - <0 0x08903c00 0 0x3ec>, - <0 0x08904200 0 0x18>; - #phy-cells = <0>; - #clock-cells = <0>; - clocks = <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>; - clock-names = "pipe0"; - clock-output-names = "usb1_phy_pipe_clk_src"; - }; }; pmu@9091000 { @@ -1910,7 +1879,7 @@ reg = <0 0x0a600000 0 0xcd00>; interrupts = ; iommus = <&apps_smmu 0x820 0x0>; - phys = <&usb_0_hsphy>, <&usb_0_ssphy>; + phys = <&usb_0_hsphy>, <&usb_0_qmpphy QMP_USB43DP_USB3_PHY>; phy-names = "usb2-phy", "usb3-phy"; }; }; @@ -1964,7 +1933,7 @@ reg = <0 0x0a800000 0 0xcd00>; interrupts = ; iommus = <&apps_smmu 0x860 0x0>; - phys = <&usb_1_hsphy>, <&usb_1_ssphy>; + phys = <&usb_1_hsphy>, <&usb_1_qmpphy QMP_USB43DP_USB3_PHY>; phy-names = "usb2-phy", "usb3-phy"; }; }; From 9d8b5376cc2848ca22314fdec9a7a45b1bf69189 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 25 Nov 2022 16:04:01 -0800 Subject: [PATCH 0083/1593] fbdev: make offb driver tristate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the offb (Open Firmware frame buffer) driver tristate, i.e., so that it can be built as a loadable module. However, it still depends on the setting of DRM_OFDRM so that both of these drivers cannot be builtin at the same time nor can one be builtin and the other one a loadable module. Build-tested successfully with all combination of DRM_OFDRM and FB_OF. This fixes a build issue that Michal reported when FB_OF=y and DRM_OFDRM=m: powerpc64-linux-ld: drivers/video/fbdev/offb.o:(.data.rel.ro+0x58): undefined reference to `cfb_fillrect' powerpc64-linux-ld: drivers/video/fbdev/offb.o:(.data.rel.ro+0x60): undefined reference to `cfb_copyarea' powerpc64-linux-ld: drivers/video/fbdev/offb.o:(.data.rel.ro+0x68): undefined reference to `cfb_imageblit' Signed-off-by: Randy Dunlap Suggested-by: Arnd Bergmann Cc: Masahiro Yamada Cc: Thomas Zimmermann Cc: Michal Suchánek Cc: linuxppc-dev@lists.ozlabs.org Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Acked-by: Thomas Zimmermann Signed-off-by: Helge Deller --- drivers/video/fbdev/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index df6e09f7d2422..b2bed599e6c6e 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -456,8 +456,8 @@ config FB_ATARI chipset found in Ataris. config FB_OF - bool "Open Firmware frame buffer device support" - depends on (FB = y) && PPC && (!PPC_PSERIES || PCI) + tristate "Open Firmware frame buffer device support" + depends on FB && PPC && (!PPC_PSERIES || PCI) depends on !DRM_OFDRM select APERTURE_HELPERS select FB_CFB_FILLRECT From 1bdeb321d1f856346fe0078af09c9e7ffbd2ca7a Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 15 Nov 2022 09:49:02 +0800 Subject: [PATCH 0084/1593] drm/msm/dpu: Fix some kernel-doc comments Make the description of @init to @p in dpu_encoder_phys_wb_init() and remove @wb_roi in dpu_encoder_phys_wb_setup_fb() to clear the below warnings: drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:139: warning: Excess function parameter 'wb_roi' description in 'dpu_encoder_phys_wb_setup_fb' drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:699: warning: Function parameter or member 'p' not described in 'dpu_encoder_phys_wb_init' drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:699: warning: Excess function parameter 'init' description in 'dpu_encoder_phys_wb_init' Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3067 Reported-by: Abaci Robot Signed-off-by: Yang Li Fixes: d7d0e73f7de3 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for writeback") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/511605/ Link: https://lore.kernel.org/r/20221115014902.45240-1-yang.lee@linux.alibaba.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 7cbcef6efe171..62f6ff6abf410 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -132,7 +132,6 @@ static void dpu_encoder_phys_wb_set_qos(struct dpu_encoder_phys *phys_enc) * dpu_encoder_phys_wb_setup_fb - setup output framebuffer * @phys_enc: Pointer to physical encoder * @fb: Pointer to output framebuffer - * @wb_roi: Pointer to output region of interest */ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc, struct drm_framebuffer *fb) @@ -692,7 +691,7 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) /** * dpu_encoder_phys_wb_init - initialize writeback encoder - * @init: Pointer to init info structure with initialization params + * @p: Pointer to init info structure with initialization params */ struct dpu_encoder_phys *dpu_encoder_phys_wb_init( struct dpu_enc_phys_init_params *p) From a2117773c839a8439a3771e0c040b5c505b083a7 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 30 Nov 2022 14:58:07 +0100 Subject: [PATCH 0085/1593] dt-bindings: msm/dsi: Don't require vcca-supply on 14nm PHY On some SoCs (hello SM6115) vcca-supply is not wired to any smd-rpm or rpmh regulator, but instead powered by the VDD_MX line, which is voted for in the DSI ctrl node. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Fixes: 8fc939e72ff8 ("dt-bindings: msm: dsi: add yaml schemas for DSI PHY bindings") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/513555/ Link: https://lore.kernel.org/r/20221130135807.45028-1-konrad.dybcio@linaro.org Signed-off-by: Abhinav Kumar --- Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml index 819de5ce0bc91..a43e11d3b00d2 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml @@ -39,7 +39,6 @@ required: - compatible - reg - reg-names - - vcca-supply unevaluatedProperties: false From ef11cb7a29c0e13031c968190ea8f86104e7fb6a Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 16 Nov 2022 17:32:18 +0100 Subject: [PATCH 0086/1593] dt-bindings: msm/dsi: Don't require vdds-supply on 10nm PHY On some SoCs (hello SM6350) vdds-supply is not wired to any smd-rpm or rpmh regulator, but instead powered by the VDD_MX/mx.lvl line, which is voted for in the DSI ctrl node. Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Fixes: 8fc939e72ff8 ("dt-bindings: msm: dsi: add yaml schemas for DSI PHY bindings") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/511889/ Link: https://lore.kernel.org/r/20221116163218.42449-1-konrad.dybcio@linaro.org Signed-off-by: Abhinav Kumar --- Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml index d9ad8b659f58e..3ec466c3ab38b 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml @@ -69,7 +69,6 @@ required: - compatible - reg - reg-names - - vdds-supply unevaluatedProperties: false From e5266ca38294c6eba48f5c9cd3d0402d619d7c05 Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Wed, 30 Nov 2022 21:09:39 +0100 Subject: [PATCH 0087/1593] dt-bindings: display: msm: Rename mdss node name in example Follow other YAMLs and replace mdss name into display-subystem. Signed-off-by: Adam Skladowski Acked-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Fixes: b93bdff44a85 ("dt-bindings: display/msm: add support for SM6115") Fixes: 06097b13ef97 ("dt-bindings: display/msm: split dpu-qcm2290 into DPU and MDSS parts") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/513585/ Link: https://lore.kernel.org/r/20221130200950.144618-2-a39.skl@gmail.com Signed-off-by: Abhinav Kumar --- .../devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml | 2 +- .../devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml index d6f043a4b08d2..4795e13c7b597 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml @@ -72,7 +72,7 @@ examples: #include #include - mdss@5e00000 { + display-subsystem@5e00000 { #address-cells = <1>; #size-cells = <1>; compatible = "qcom,qcm2290-mdss"; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml index a86d7f53fa84d..886858ef67000 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml @@ -62,7 +62,7 @@ examples: #include #include - mdss@5e00000 { + display-subsystem@5e00000 { #address-cells = <1>; #size-cells = <1>; compatible = "qcom,sm6115-mdss"; From 45dac1352b55b1d8cb17f218936b2bc2bc1fb4ee Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 7 Dec 2022 10:59:22 +0400 Subject: [PATCH 0088/1593] drm/msm/dpu: Fix memory leak in msm_mdss_parse_data_bus_icc_path of_icc_get() alloc resources for path1, we should release it when not need anymore. Early return when IS_ERR_OR_NULL(path0) may leak path1. Defer getting path1 to fix this. Fixes: b9364eed9232 ("drm/msm/dpu: Move min BW request and full BW disable back to mdss") Signed-off-by: Miaoqian Lin Reviewed-by: Douglas Anderson Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/514264/ Link: https://lore.kernel.org/r/20221207065922.2086368-1-linmq006@gmail.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_mdss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 86b28add1ffff..2527afef9c199 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -47,15 +47,17 @@ struct msm_mdss { static int msm_mdss_parse_data_bus_icc_path(struct device *dev, struct msm_mdss *msm_mdss) { - struct icc_path *path0 = of_icc_get(dev, "mdp0-mem"); - struct icc_path *path1 = of_icc_get(dev, "mdp1-mem"); + struct icc_path *path0; + struct icc_path *path1; + path0 = of_icc_get(dev, "mdp0-mem"); if (IS_ERR_OR_NULL(path0)) return PTR_ERR_OR_ZERO(path0); msm_mdss->path[0] = path0; msm_mdss->num_paths = 1; + path1 = of_icc_get(dev, "mdp1-mem"); if (!IS_ERR_OR_NULL(path1)) { msm_mdss->path[1] = path1; msm_mdss->num_paths++; From 4164843261d28f86910a5c2727faddce066334c5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 13 Dec 2022 07:15:33 +0100 Subject: [PATCH 0089/1593] drm/msm/hdmi: Fix the error handling path of msm_hdmi_dev_probe() If an error occurs after a successful msm_hdmi_get_phy() call, it must be undone by a corresponding msm_hdmi_put_phy(), as already done in the remove function. Fixes: 437365464043 ("drm/msm/hdmi: move msm_hdmi_get_phy() to msm_hdmi_dev_probe()") Signed-off-by: Christophe JAILLET Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/514847/ Link: https://lore.kernel.org/r/b3f9da097851e2e42a40dc61458aa98c41c88d0d.1670741386.git.christophe.jaillet@wanadoo.fr Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/hdmi/hdmi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 4d3fdc806befd..97372bb241d89 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -532,11 +532,19 @@ static int msm_hdmi_dev_probe(struct platform_device *pdev) ret = devm_pm_runtime_enable(&pdev->dev); if (ret) - return ret; + goto err_put_phy; platform_set_drvdata(pdev, hdmi); - return component_add(&pdev->dev, &msm_hdmi_ops); + ret = component_add(&pdev->dev, &msm_hdmi_ops); + if (ret) + goto err_put_phy; + + return 0; + +err_put_phy: + msm_hdmi_put_phy(hdmi); + return ret; } static int msm_hdmi_dev_remove(struct platform_device *pdev) From 22c7e1a0fa45cd7d028d6b4117161fd0e3427fe0 Mon Sep 17 00:00:00 2001 From: Dominik Kobinski Date: Sun, 11 Dec 2022 11:05:01 +0100 Subject: [PATCH 0090/1593] arm64: dts: msm8992-bullhead: add memory hole region Add region for memory hole present on bullhead in order to fix a reboot issue on recent kernels Reported-by: Petr Vorel Signed-off-by: Dominik Kobinski Reviewed-by: Konrad Dybcio Tested-by: Petr Vorel Reviewed-by: Petr Vorel Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221211100501.82323-1-dominikkobinski314@gmail.com --- arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi index 87c90e93667f7..79de9cc395c4c 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi @@ -3,6 +3,7 @@ * Copyright (c) 2015, LGE Inc. All rights reserved. * Copyright (c) 2016, The Linux Foundation. All rights reserved. * Copyright (c) 2021, Petr Vorel + * Copyright (c) 2022, Dominik Kobinski */ /dts-v1/; @@ -51,6 +52,11 @@ reg = <0 0x03400000 0 0x1200000>; no-map; }; + + removed_region: reserved@5000000 { + reg = <0 0x05000000 0 0x2200000>; + no-map; + }; }; }; From 25e8ac233d24051e2c4ff64c34f60609b0988568 Mon Sep 17 00:00:00 2001 From: Vijaya Krishna Nivarthi Date: Fri, 16 Dec 2022 23:08:13 +0530 Subject: [PATCH 0091/1593] dmaengine: qcom: gpi: Set link_rx bit on GO TRE for rx operation Rx operation on SPI GSI DMA is currently not working. As per GSI spec, link_rx bit is to be set on GO TRE on tx channel whenever there is going to be a DMA TRE on rx channel. This is currently set for duplex operation only. Set the bit for rx operation as well. This is part of changes required to bring up Rx. Fixes: 94b8f0e58fa1 ("dmaengine: qcom: gpi: set chain and link flag for duplex") Signed-off-by: Vijaya Krishna Nivarthi Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/1671212293-14767-1-git-send-email-quic_vnivarth@quicinc.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 061add8322951..59a36cbf9b5f7 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1756,6 +1756,7 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, tre->dword[3] = u32_encode_bits(TRE_TYPE_GO, TRE_FLAGS_TYPE); if (spi->cmd == SPI_RX) { tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOB); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK); } else if (spi->cmd == SPI_TX) { tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN); } else { /* SPI_DUPLEX */ From 8d8cf163c8d8c93bccf0c70a133309693af9bf61 Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Wed, 28 Dec 2022 09:40:01 +0800 Subject: [PATCH 0092/1593] fbdev: omapfb: use strscpy() to instead of strncpy() The implementation of strscpy() is more robust and safer. That's now the recommended way to copy NUL-terminated strings. Signed-off-by: Xu Panda Signed-off-by: Yang Yang Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 17cda57656838..1f3df2055ff0d 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1447,7 +1447,7 @@ static int fbinfo_init(struct omapfb_device *fbdev, struct fb_info *info) info->fbops = &omapfb_ops; info->flags = FBINFO_FLAG_DEFAULT; - strncpy(fix->id, MODULE_NAME, sizeof(fix->id)); + strscpy(fix->id, MODULE_NAME, sizeof(fix->id)); info->pseudo_palette = fbdev->pseudo_palette; @@ -1573,8 +1573,7 @@ static int omapfb_find_ctrl(struct omapfb_device *fbdev) fbdev->ctrl = NULL; - strncpy(name, conf->lcd.ctrl_name, sizeof(name) - 1); - name[sizeof(name) - 1] = '\0'; + strscpy(name, conf->lcd.ctrl_name, sizeof(name)); if (strcmp(name, "internal") == 0) { fbdev->ctrl = fbdev->int_ctrl; From 6b90032c73405cd4da29ab914df11fd1be960b99 Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Wed, 28 Dec 2022 09:44:11 +0800 Subject: [PATCH 0093/1593] fbdev: atyfb: use strscpy() to instead of strncpy() The implementation of strscpy() is more robust and safer. That's now the recommended way to copy NUL-terminated strings. Signed-off-by: Xu Panda Signed-off-by: Yang Yang Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/atyfb_base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 0ccf5d401ecbc..d59215a4992e0 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3192,8 +3192,7 @@ static void aty_init_lcd(struct atyfb_par *par, u32 bios_base) * which we print to the screen. */ id = *(u8 *)par->lcd_table; - strncpy(model, (char *)par->lcd_table+1, 24); - model[23] = 0; + strscpy(model, (char *)par->lcd_table+1, sizeof(model)); width = par->lcd_width = *(u16 *)(par->lcd_table+25); height = par->lcd_height = *(u16 *)(par->lcd_table+27); From 0e50d999903c009b6a9cd2277c82d6798d982e31 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Dec 2022 14:49:00 +0000 Subject: [PATCH 0094/1593] rxrpc: Fix a couple of potential use-after-frees At the end of rxrpc_recvmsg(), if a call is found, the call is put and then a trace line is emitted referencing that call in a couple of places - but the call may have been deallocated by the time those traces happen. Fix this by stashing the call debug_id in a variable and passing that to the tracepoint rather than the call pointer. Fixes: 849979051cbc ("rxrpc: Add a tracepoint to follow what recvmsg does") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/recvmsg.c | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c6cfed00d0c6c..5f9dd73895364 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1062,10 +1062,10 @@ TRACE_EVENT(rxrpc_receive, ); TRACE_EVENT(rxrpc_recvmsg, - TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why, + TP_PROTO(unsigned int call_debug_id, enum rxrpc_recvmsg_trace why, int ret), - TP_ARGS(call, why, ret), + TP_ARGS(call_debug_id, why, ret), TP_STRUCT__entry( __field(unsigned int, call ) @@ -1074,7 +1074,7 @@ TRACE_EVENT(rxrpc_recvmsg, ), TP_fast_assign( - __entry->call = call ? call->debug_id : 0; + __entry->call = call_debug_id; __entry->why = why; __entry->ret = ret; ), diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 36b25d003cf00..6ebd6440a2b7c 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -388,13 +388,14 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct list_head *l; + unsigned int call_debug_id = 0; size_t copied = 0; long timeo; int ret; DEFINE_WAIT(wait); - trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0); + trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0); if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; @@ -431,7 +432,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (list_empty(&rx->recvmsg_q)) { if (signal_pending(current)) goto wait_interrupted; - trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 0); + trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0); timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(&rx->sk), &wait); @@ -450,7 +451,8 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, rxrpc_get_call(call, rxrpc_call_get_recvmsg); write_unlock(&rx->recvmsg_lock); - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0); + call_debug_id = call->debug_id; + trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0); /* We're going to drop the socket lock, so we need to lock the call * against interference by sendmsg. @@ -531,7 +533,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, error_unlock_call: mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_recvmsg); - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret); + trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret); return ret; error_requeue_call: @@ -539,14 +541,14 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, write_lock(&rx->recvmsg_lock); list_add(&call->recvmsg_link, &rx->recvmsg_q); write_unlock(&rx->recvmsg_lock); - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0); + trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0); } else { rxrpc_put_call(call, rxrpc_call_put_recvmsg); } error_no_call: release_sock(&rx->sk); error_trace: - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret); + trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret); return ret; wait_interrupted: From f4ef681115f822daf7f36f8b1892d9f1e1a26fbf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 22 Dec 2022 11:22:47 -0800 Subject: [PATCH 0095/1593] docs: netdev: reshuffle sections in prep for de-FAQization Subsequent changes will reformat the doc away from FAQ. To make that more readable perform the pure section moves now. Reviewed-by: Randy Dunlap Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/process/maintainer-netdev.rst | 186 ++++++++++---------- 1 file changed, 93 insertions(+), 93 deletions(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 1fa5ab8754d35..8f22f8a3dcd10 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -44,17 +44,6 @@ for the future release. You can find the trees here: - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git -How do I indicate which tree (net vs. net-next) my patch should be in? ----------------------------------------------------------------------- -To help maintainers and CI bots you should explicitly mark which tree -your patch is targeting. Assuming that you use git, use the prefix -flag:: - - git format-patch --subject-prefix='PATCH net-next' start..finish - -Use ``net`` instead of ``net-next`` (always lower case) in the above for -bug-fix ``net`` content. - How often do changes from these trees make it to the mainline Linus tree? ------------------------------------------------------------------------- To understand this, you need to know a bit of background information on @@ -127,15 +116,6 @@ patch. Patches are indexed by the ``Message-ID`` header of the emails which carried them so if you have trouble finding your patch append the value of ``Message-ID`` to the URL above. -How long before my patch is accepted? -------------------------------------- -Generally speaking, the patches get triaged quickly (in less than -48h). But be patient, if your patch is active in patchwork (i.e. it's -listed on the project's patch list) the chances it was missed are close to zero. -Asking the maintainer for status updates on your -patch is a good way to ensure your patch is ignored or pushed to the -bottom of the priority list. - Should I directly update patchwork state of my own patches? ----------------------------------------------------------- It may be tempting to help the maintainers and update the state of your @@ -145,19 +125,14 @@ it to the maintainer to figure out what is the most recent and current version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. -How do I divide my work into patches? +How long before my patch is accepted? ------------------------------------- - -Put yourself in the shoes of the reviewer. Each patch is read separately -and therefore should constitute a comprehensible step towards your stated -goal. - -Avoid sending series longer than 15 patches. Larger series takes longer -to review as reviewers will defer looking at it until they find a large -chunk of time. A small series can be reviewed in a short time, so Maintainers -just do it. As a result, a sequence of smaller series gets merged quicker and -with better review coverage. Re-posting large series also increases the mailing -list traffic. +Generally speaking, the patches get triaged quickly (in less than +48h). But be patient, if your patch is active in patchwork (i.e. it's +listed on the project's patch list) the chances it was missed are close to zero. +Asking the maintainer for status updates on your +patch is a good way to ensure your patch is ignored or pushed to the +bottom of the priority list. I made changes to only a few patches in a patch series should I resend only those changed? ------------------------------------------------------------------------------------------ @@ -165,17 +140,6 @@ No, please resend the entire patch series and make sure you do number your patches such that it is clear this is the latest and greatest set of patches that can be applied. -I have received review feedback, when should I post a revised version of the patches? -------------------------------------------------------------------------------------- -Allow at least 24 hours to pass between postings. This will ensure reviewers -from all geographical locations have a chance to chime in. Do not wait -too long (weeks) between postings either as it will make it harder for reviewers -to recall all the context. - -Make sure you address all the feedback in your new posting. Do not post a new -version of the code if the discussion about the previous version is still -ongoing, unless directly instructed by a reviewer. - I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? ---------------------------------------------------------------------------------------------------------------------------------------- There is no revert possible, once it is pushed out, it stays like that. @@ -191,6 +155,82 @@ the case today. Please follow the standard stable rules in :ref:`Documentation/process/stable-kernel-rules.rst `, and make sure you include appropriate Fixes tags! +I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list? +--------------------------------------------------------------------------------------------------------------------------- +No. The current netdev maintainer has consistently requested that +people use the mailing lists and not reach out directly. If you aren't +OK with that, then perhaps consider mailing security@kernel.org or +reading about http://oss-security.openwall.org/wiki/mailing-lists/distros +as possible alternative mechanisms. + +How do I post corresponding changes to user space components? +------------------------------------------------------------- +User space code exercising kernel features should be posted +alongside kernel patches. This gives reviewers a chance to see +how any new interface is used and how well it works. + +When user space tools reside in the kernel repo itself all changes +should generally come as one series. If series becomes too large +or the user space project is not reviewed on netdev include a link +to a public repo where user space patches can be seen. + +In case user space tooling lives in a separate repository but is +reviewed on netdev (e.g. patches to ``iproute2`` tools) kernel and +user space patches should form separate series (threads) when posted +to the mailing list, e.g.:: + + [PATCH net-next 0/3] net: some feature cover letter + └─ [PATCH net-next 1/3] net: some feature prep + └─ [PATCH net-next 2/3] net: some feature do it + └─ [PATCH net-next 3/3] selftest: net: some feature + + [PATCH iproute2-next] ip: add support for some feature + +Posting as one thread is discouraged because it confuses patchwork +(as of patchwork 2.2.2). + +Any other tips to help ensure my net/net-next patch gets OK'd? +-------------------------------------------------------------- +Attention to detail. Re-read your own work as if you were the +reviewer. You can start with using ``checkpatch.pl``, perhaps even with +the ``--strict`` flag. But do not be mindlessly robotic in doing so. +If your change is a bug fix, make sure your commit log indicates the +end-user visible symptom, the underlying reason as to why it happens, +and then if necessary, explain why the fix proposed is the best way to +get things done. Don't mangle whitespace, and as is common, don't +mis-indent function arguments that span multiple lines. If it is your +first patch, mail it to yourself so you can test apply it to an +unpatched tree to confirm infrastructure didn't mangle it. + +Finally, go back and read +:ref:`Documentation/process/submitting-patches.rst ` +to be sure you are not repeating some common mistake documented there. + +How do I indicate which tree (net vs. net-next) my patch should be in? +---------------------------------------------------------------------- +To help maintainers and CI bots you should explicitly mark which tree +your patch is targeting. Assuming that you use git, use the prefix +flag:: + + git format-patch --subject-prefix='PATCH net-next' start..finish + +Use ``net`` instead of ``net-next`` (always lower case) in the above for +bug-fix ``net`` content. + +How do I divide my work into patches? +------------------------------------- + +Put yourself in the shoes of the reviewer. Each patch is read separately +and therefore should constitute a comprehensible step towards your stated +goal. + +Avoid sending series longer than 15 patches. Larger series takes longer +to review as reviewers will defer looking at it until they find a large +chunk of time. A small series can be reviewed in a short time, so Maintainers +just do it. As a result, a sequence of smaller series gets merged quicker and +with better review coverage. Re-posting large series also increases the mailing +list traffic. + Is the comment style convention different for the networking content? --------------------------------------------------------------------- Yes, in a largely trivial way. Instead of this:: @@ -224,13 +264,16 @@ I am working in existing code which uses non-standard formatting. Which formatti Make your code follow the most recent guidelines, so that eventually all code in the domain of netdev is in the preferred format. -I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list? ---------------------------------------------------------------------------------------------------------------------------- -No. The current netdev maintainer has consistently requested that -people use the mailing lists and not reach out directly. If you aren't -OK with that, then perhaps consider mailing security@kernel.org or -reading about http://oss-security.openwall.org/wiki/mailing-lists/distros -as possible alternative mechanisms. +I have received review feedback, when should I post a revised version of the patches? +------------------------------------------------------------------------------------- +Allow at least 24 hours to pass between postings. This will ensure reviewers +from all geographical locations have a chance to chime in. Do not wait +too long (weeks) between postings either as it will make it harder for reviewers +to recall all the context. + +Make sure you address all the feedback in your new posting. Do not post a new +version of the code if the discussion about the previous version is still +ongoing, unless directly instructed by a reviewer. What level of testing is expected before I submit my change? ------------------------------------------------------------ @@ -244,32 +287,6 @@ and the patch series contains a set of kernel selftest for You are expected to test your changes on top of the relevant networking tree (``net`` or ``net-next``) and not e.g. a stable tree or ``linux-next``. -How do I post corresponding changes to user space components? -------------------------------------------------------------- -User space code exercising kernel features should be posted -alongside kernel patches. This gives reviewers a chance to see -how any new interface is used and how well it works. - -When user space tools reside in the kernel repo itself all changes -should generally come as one series. If series becomes too large -or the user space project is not reviewed on netdev include a link -to a public repo where user space patches can be seen. - -In case user space tooling lives in a separate repository but is -reviewed on netdev (e.g. patches to ``iproute2`` tools) kernel and -user space patches should form separate series (threads) when posted -to the mailing list, e.g.:: - - [PATCH net-next 0/3] net: some feature cover letter - └─ [PATCH net-next 1/3] net: some feature prep - └─ [PATCH net-next 2/3] net: some feature do it - └─ [PATCH net-next 3/3] selftest: net: some feature - - [PATCH iproute2-next] ip: add support for some feature - -Posting as one thread is discouraged because it confuses patchwork -(as of patchwork 2.2.2). - Can I reproduce the checks from patchwork on my local machine? -------------------------------------------------------------- @@ -303,23 +320,6 @@ it has a real, in-tree user. Mock-ups and tests based on ``netdevsim`` are strongly encouraged when adding new APIs, but ``netdevsim`` in itself is **not** considered a use case/user. -Any other tips to help ensure my net/net-next patch gets OK'd? --------------------------------------------------------------- -Attention to detail. Re-read your own work as if you were the -reviewer. You can start with using ``checkpatch.pl``, perhaps even with -the ``--strict`` flag. But do not be mindlessly robotic in doing so. -If your change is a bug fix, make sure your commit log indicates the -end-user visible symptom, the underlying reason as to why it happens, -and then if necessary, explain why the fix proposed is the best way to -get things done. Don't mangle whitespace, and as is common, don't -mis-indent function arguments that span multiple lines. If it is your -first patch, mail it to yourself so you can test apply it to an -unpatched tree to confirm infrastructure didn't mangle it. - -Finally, go back and read -:ref:`Documentation/process/submitting-patches.rst ` -to be sure you are not repeating some common mistake documented there. - My company uses peer feedback in employee performance reviews. Can I ask netdev maintainers for feedback? --------------------------------------------------------------------------------------------------------- From ff249be5cca9f982e58936847ba6c30104abbcad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 22 Dec 2022 11:22:48 -0800 Subject: [PATCH 0096/1593] docs: netdev: convert to a non-FAQ document The netdev-FAQ document has grown over the years to the point where finding information in it is somewhat challenging. The length of the questions prevents readers from locating content that's relevant at a glance. Convert to a more standard documentation format with sections and sub-sections rather than questions and answers. The content edits are limited to what's necessary to change the format, and very minor clarifications. Reviewed-by: Randy Dunlap Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/process/maintainer-netdev.rst | 221 +++++++++++--------- 1 file changed, 125 insertions(+), 96 deletions(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 8f22f8a3dcd10..4a75686d35ab4 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -2,9 +2,9 @@ .. _netdev-FAQ: -========== -netdev FAQ -========== +============================= +Networking subsystem (netdev) +============================= tl;dr ----- @@ -15,14 +15,15 @@ tl;dr - don't repost your patches within one 24h period - reverse xmas tree -What is netdev? ---------------- -It is a mailing list for all network-related Linux stuff. This +netdev +------ + +netdev is a mailing list for all network-related Linux stuff. This includes anything found under net/ (i.e. core code like IPv6) and drivers/net (i.e. hardware specific drivers) in the Linux source tree. Note that some subsystems (e.g. wireless drivers) which have a high -volume of traffic have their own specific mailing lists. +volume of traffic have their own specific mailing lists and trees. The netdev list is managed (like many other Linux mailing lists) through VGER (http://vger.kernel.org/) with archives available at @@ -32,21 +33,10 @@ Aside from subsystems like those mentioned above, all network-related Linux development (i.e. RFC, review, comments, etc.) takes place on netdev. -How do the changes posted to netdev make their way into Linux? --------------------------------------------------------------- -There are always two trees (git repositories) in play. Both are -driven by David Miller, the main network maintainer. There is the -``net`` tree, and the ``net-next`` tree. As you can probably guess from -the names, the ``net`` tree is for fixes to existing code already in the -mainline tree from Linus, and ``net-next`` is where the new code goes -for the future release. You can find the trees here: - -- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git -- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git +Development cycle +----------------- -How often do changes from these trees make it to the mainline Linus tree? -------------------------------------------------------------------------- -To understand this, you need to know a bit of background information on +Here is a bit of background information on the cadence of Linux development. Each new release starts off with a two week "merge window" where the main maintainers feed their new stuff to Linus for merging into the mainline tree. After the two weeks, the @@ -58,9 +48,33 @@ rc2 is released. This repeats on a roughly weekly basis until rc7 state of churn), and a week after the last vX.Y-rcN was done, the official vX.Y is released. -Relating that to netdev: At the beginning of the 2-week merge window, -the ``net-next`` tree will be closed - no new changes/features. The -accumulated new content of the past ~10 weeks will be passed onto +To find out where we are now in the cycle - load the mainline (Linus) +page here: + + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + +and note the top of the "tags" section. If it is rc1, it is early in +the dev cycle. If it was tagged rc7 a week ago, then a release is +probably imminent. If the most recent tag is a final release tag +(without an ``-rcN`` suffix) - we are most likely in a merge window +and ``net-next`` is closed. + +git trees and patch flow +------------------------ + +There are two networking trees (git repositories) in play. Both are +driven by David Miller, the main network maintainer. There is the +``net`` tree, and the ``net-next`` tree. As you can probably guess from +the names, the ``net`` tree is for fixes to existing code already in the +mainline tree from Linus, and ``net-next`` is where the new code goes +for the future release. You can find the trees here: + +- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git +- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git + +Relating that to kernel development: At the beginning of the 2-week +merge window, the ``net-next`` tree will be closed - no new changes/features. +The accumulated new content of the past ~10 weeks will be passed onto mainline/Linus via a pull request for vX.Y -- at the same time, the ``net`` tree will start accumulating fixes for this pulled content relating to vX.Y @@ -92,22 +106,14 @@ focus for ``net`` is on stabilization and bug fixes. Finally, the vX.Y gets released, and the whole cycle starts over. -So where are we now in this cycle? ----------------------------------- +netdev patch review +------------------- -Load the mainline (Linus) page here: +Patch status +~~~~~~~~~~~~ - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git - -and note the top of the "tags" section. If it is rc1, it is early in -the dev cycle. If it was tagged rc7 a week ago, then a release is -probably imminent. If the most recent tag is a final release tag -(without an ``-rcN`` suffix) - we are most likely in a merge window -and ``net-next`` is closed. - -How can I tell the status of a patch I've sent? ------------------------------------------------ -Start by looking at the main patchworks queue for netdev: +Status of a patch can be checked by looking at the main patchwork +queue for netdev: https://patchwork.kernel.org/project/netdevbpf/list/ @@ -116,17 +122,20 @@ patch. Patches are indexed by the ``Message-ID`` header of the emails which carried them so if you have trouble finding your patch append the value of ``Message-ID`` to the URL above. -Should I directly update patchwork state of my own patches? ------------------------------------------------------------ +Updating patch status +~~~~~~~~~~~~~~~~~~~~~ + It may be tempting to help the maintainers and update the state of your -own patches when you post a new version or spot a bug. Please do not do that. +own patches when you post a new version or spot a bug. Please **do not** +do that. Interfering with the patch status on patchwork will only cause confusion. Leave it to the maintainer to figure out what is the most recent and current version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. -How long before my patch is accepted? -------------------------------------- +Review timelines +~~~~~~~~~~~~~~~~ + Generally speaking, the patches get triaged quickly (in less than 48h). But be patient, if your patch is active in patchwork (i.e. it's listed on the project's patch list) the chances it was missed are close to zero. @@ -134,37 +143,47 @@ Asking the maintainer for status updates on your patch is a good way to ensure your patch is ignored or pushed to the bottom of the priority list. -I made changes to only a few patches in a patch series should I resend only those changed? ------------------------------------------------------------------------------------------- -No, please resend the entire patch series and make sure you do number your +Partial resends +~~~~~~~~~~~~~~~ + +Please always resend the entire patch series and make sure you do number your patches such that it is clear this is the latest and greatest set of patches -that can be applied. +that can be applied. Do not try to resend just the patches which changed. + +Handling misapplied patches +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? ----------------------------------------------------------------------------------------------------------------------------------------- +Occasionally a patch series gets applied before receiving critical feedback, +or the wrong version of a series gets applied. There is no revert possible, once it is pushed out, it stays like that. Please send incremental versions on top of what has been merged in order to fix the patches the way they would look like if your latest patch series was to be merged. -Are there special rules regarding stable submissions on netdev? ---------------------------------------------------------------- +Stable tree +~~~~~~~~~~~ + While it used to be the case that netdev submissions were not supposed to carry explicit ``CC: stable@vger.kernel.org`` tags that is no longer the case today. Please follow the standard stable rules in :ref:`Documentation/process/stable-kernel-rules.rst `, and make sure you include appropriate Fixes tags! -I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list? ---------------------------------------------------------------------------------------------------------------------------- -No. The current netdev maintainer has consistently requested that +Security fixes +~~~~~~~~~~~~~~ + +Do not email netdev maintainers directly if you think you discovered +a bug that might have possible security implications. +The current netdev maintainer has consistently requested that people use the mailing lists and not reach out directly. If you aren't OK with that, then perhaps consider mailing security@kernel.org or reading about http://oss-security.openwall.org/wiki/mailing-lists/distros as possible alternative mechanisms. -How do I post corresponding changes to user space components? -------------------------------------------------------------- + +Co-posting changes to user space components +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + User space code exercising kernel features should be posted alongside kernel patches. This gives reviewers a chance to see how any new interface is used and how well it works. @@ -189,9 +208,10 @@ to the mailing list, e.g.:: Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). -Any other tips to help ensure my net/net-next patch gets OK'd? --------------------------------------------------------------- -Attention to detail. Re-read your own work as if you were the +Preparing changes +----------------- + +Attention to detail is important. Re-read your own work as if you were the reviewer. You can start with using ``checkpatch.pl``, perhaps even with the ``--strict`` flag. But do not be mindlessly robotic in doing so. If your change is a bug fix, make sure your commit log indicates the @@ -206,8 +226,9 @@ Finally, go back and read :ref:`Documentation/process/submitting-patches.rst ` to be sure you are not repeating some common mistake documented there. -How do I indicate which tree (net vs. net-next) my patch should be in? ----------------------------------------------------------------------- +Indicating target tree +~~~~~~~~~~~~~~~~~~~~~~ + To help maintainers and CI bots you should explicitly mark which tree your patch is targeting. Assuming that you use git, use the prefix flag:: @@ -217,8 +238,8 @@ flag:: Use ``net`` instead of ``net-next`` (always lower case) in the above for bug-fix ``net`` content. -How do I divide my work into patches? -------------------------------------- +Dividing work into patches +~~~~~~~~~~~~~~~~~~~~~~~~~~ Put yourself in the shoes of the reviewer. Each patch is read separately and therefore should constitute a comprehensible step towards your stated @@ -231,9 +252,11 @@ just do it. As a result, a sequence of smaller series gets merged quicker and with better review coverage. Re-posting large series also increases the mailing list traffic. -Is the comment style convention different for the networking content? ---------------------------------------------------------------------- -Yes, in a largely trivial way. Instead of this:: +Multi-line comments +~~~~~~~~~~~~~~~~~~~ + +Comment style convention is slightly different for networking and most of +the tree. Instead of this:: /* * foobar blah blah blah @@ -246,8 +269,8 @@ it is requested that you make it look like this:: * another line of text */ -What is "reverse xmas tree"? ----------------------------- +Local variable ordering ("reverse xmas tree", "RCS") +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Netdev has a convention for ordering local variables in functions. Order the variable declaration lines longest to shortest, e.g.:: @@ -259,13 +282,16 @@ Order the variable declaration lines longest to shortest, e.g.:: If there are dependencies between the variables preventing the ordering move the initialization out of line. -I am working in existing code which uses non-standard formatting. Which formatting should I use? ------------------------------------------------------------------------------------------------- -Make your code follow the most recent guidelines, so that eventually all code +Format precedence +~~~~~~~~~~~~~~~~~ + +When working in existing code which uses nonstandard formatting make +your code follow the most recent guidelines, so that eventually all code in the domain of netdev is in the preferred format. -I have received review feedback, when should I post a revised version of the patches? -------------------------------------------------------------------------------------- +Resending after review +~~~~~~~~~~~~~~~~~~~~~~ + Allow at least 24 hours to pass between postings. This will ensure reviewers from all geographical locations have a chance to chime in. Do not wait too long (weeks) between postings either as it will make it harder for reviewers @@ -275,8 +301,12 @@ Make sure you address all the feedback in your new posting. Do not post a new version of the code if the discussion about the previous version is still ongoing, unless directly instructed by a reviewer. -What level of testing is expected before I submit my change? ------------------------------------------------------------- +Testing +------- + +Expected level of testing +~~~~~~~~~~~~~~~~~~~~~~~~~ + At the very minimum your changes must survive an ``allyesconfig`` and an ``allmodconfig`` build with ``W=1`` set without new warnings or failures. @@ -287,43 +317,42 @@ and the patch series contains a set of kernel selftest for You are expected to test your changes on top of the relevant networking tree (``net`` or ``net-next``) and not e.g. a stable tree or ``linux-next``. -Can I reproduce the checks from patchwork on my local machine? --------------------------------------------------------------- +patchwork checks +~~~~~~~~~~~~~~~~ Checks in patchwork are mostly simple wrappers around existing kernel scripts, the sources are available at: https://github.com/kuba-moo/nipa/tree/master/tests -Running all the builds and checks locally is a pain, can I post my patches and have the patchwork bot validate them? --------------------------------------------------------------------------------------------------------------------- - -No, you must ensure that your patches are ready by testing them locally +**Do not** post your patches just to run them through the checks. +You must ensure that your patches are ready by testing them locally before posting to the mailing list. The patchwork build bot instance gets overloaded very easily and netdev@vger really doesn't need more traffic if we can help it. -netdevsim is great, can I extend it for my out-of-tree tests? -------------------------------------------------------------- +netdevsim +~~~~~~~~~ -No, ``netdevsim`` is a test vehicle solely for upstream tests. -(Please add your tests under ``tools/testing/selftests/``.) +``netdevsim`` is a test driver which can be used to exercise driver +configuration APIs without requiring capable hardware. +Mock-ups and tests based on ``netdevsim`` are strongly encouraged when +adding new APIs, but ``netdevsim`` in itself is **not** considered +a use case/user. You must also implement the new APIs in a real driver. -We also give no guarantees that ``netdevsim`` won't change in the future +We give no guarantees that ``netdevsim`` won't change in the future in a way which would break what would normally be considered uAPI. -Is netdevsim considered a "user" of an API? -------------------------------------------- - -Linux kernel has a long standing rule that no API should be added unless -it has a real, in-tree user. Mock-ups and tests based on ``netdevsim`` are -strongly encouraged when adding new APIs, but ``netdevsim`` in itself -is **not** considered a use case/user. +``netdevsim`` is reserved for use by upstream tests only, so any +new ``netdevsim`` features must be accompanied by selftests under +``tools/testing/selftests/``. -My company uses peer feedback in employee performance reviews. Can I ask netdev maintainers for feedback? ---------------------------------------------------------------------------------------------------------- +Testimonials / feedback +----------------------- -Yes, especially if you spend significant amount of time reviewing code +Some companies use peer feedback in employee performance reviews. +Please feel free to request feedback from netdev maintainers, +especially if you spend significant amount of time reviewing code and go out of your way to improve shared infrastructure. The feedback must be requested by you, the contributor, and will always From b9e05399d9273c8c066e73db1e6e85364003030c Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Mon, 10 Oct 2022 10:27:03 -0700 Subject: [PATCH 0097/1593] vdpa: merge functionally duplicated dev_features attributes We can merge VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES with VDPA_ATTR_DEV_FEATURES which is functionally equivalent. While at it, tweak the comment in header file to make user provioned device features distinguished from those supported by the parent mgmtdev device: the former of which can be inherited as a whole from the latter, or can be a subset of the latter if explicitly specified. Signed-off-by: Si-Wei Liu Message-Id: <1665422823-18364-1-git-send-email-si-wei.liu@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa.c | 2 +- include/uapi/linux/vdpa.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index febdc99b51a7b..41ed56362992b 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -855,7 +855,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms features_device = vdev->config->get_device_features(vdev); - if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device, + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 9bd79235c875f..54b649ab0f22b 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -53,11 +53,9 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ + /* virtio features that are provisioned to the vDPA device */ VDPA_ATTR_DEV_FEATURES, /* u64 */ - /* virtio features that are supported by the vDPA device */ - VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, /* u64 */ - /* new attributes must be added above here */ VDPA_ATTR_MAX, }; From c262f75cb6bb5a63828e72ce3b8fe808e5029479 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 12 Oct 2022 08:29:49 +0200 Subject: [PATCH 0098/1593] tools/virtio: initialize spinlocks in vring_test.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The virtio_device vqs_list spinlocks must be initialized before use to prevent functions that manipulate the device virtualqueues, such as vring_new_virtqueue(), from blocking indefinitely. Signed-off-by: Ricardo Cañuelo Message-Id: <20221012062949.1526176-1-ricardo.canuelo@collabora.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- tools/virtio/vringh_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index fa87b58bd5fa5..98ff808d6f0c2 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -308,6 +308,7 @@ static int parallel_test(u64 features, gvdev.vdev.features = features; INIT_LIST_HEAD(&gvdev.vdev.vqs); + spin_lock_init(&gvdev.vdev.vqs_list_lock); gvdev.to_host_fd = to_host[1]; gvdev.notifies = 0; @@ -455,6 +456,7 @@ int main(int argc, char *argv[]) getrange = getrange_iov; vdev.features = 0; INIT_LIST_HEAD(&vdev.vqs); + spin_lock_init(&vdev.vqs_list_lock); while (argv[1]) { if (strcmp(argv[1], "--indirect") == 0) From 258896fcc786b4e7db238eba26f6dd080e0ff41e Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 15 Oct 2022 23:41:26 -0400 Subject: [PATCH 0099/1593] virtio-blk: use a helper to handle request queuing errors Define a new helper function, virtblk_fail_to_queue(), to clean up the error handling code in virtio_queue_rq(). Signed-off-by: Dmitry Fomichev Message-Id: <20221016034127.330942-2-dmitry.fomichev@wdc.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 68bd2f7961b3f..271a9878fa8b3 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -315,6 +315,19 @@ static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) virtqueue_notify(vq->vq); } +static blk_status_t virtblk_fail_to_queue(struct request *req, int rc) +{ + virtblk_cleanup_cmd(req); + switch (rc) { + case -ENOSPC: + return BLK_STS_DEV_RESOURCE; + case -ENOMEM: + return BLK_STS_RESOURCE; + default: + return BLK_STS_IOERR; + } +} + static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, struct virtio_blk *vblk, struct request *req, @@ -327,10 +340,8 @@ static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, return status; vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); - if (unlikely(vbr->sg_table.nents < 0)) { - virtblk_cleanup_cmd(req); - return BLK_STS_RESOURCE; - } + if (unlikely(vbr->sg_table.nents < 0)) + return virtblk_fail_to_queue(req, -ENOMEM); blk_mq_start_request(req); @@ -364,15 +375,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); virtblk_unmap_data(req, vbr); - virtblk_cleanup_cmd(req); - switch (err) { - case -ENOSPC: - return BLK_STS_DEV_RESOURCE; - case -ENOMEM: - return BLK_STS_RESOURCE; - default: - return BLK_STS_IOERR; - } + return virtblk_fail_to_queue(req, err); } if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) From 8e6a8d7a3dd93e93645be061692cb4ee6702dff0 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 26 Dec 2022 16:13:27 +0900 Subject: [PATCH 0100/1593] net: ethernet: renesas: rswitch: Fix error path in renesas_eth_sw_probe() If rswitch_init() returns non-zero and this driver is re-probed, the following error happens: renesas_eth_sw e6880000.ethernet: Unbalanced pm_runtime_enable! So, fix error path in renesas_eth_sw_probe(). Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index e42ceaa0099fd..473d86bdf97d6 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1786,6 +1786,11 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); ret = rswitch_init(priv); + if (ret < 0) { + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return ret; + } device_set_wakeup_capable(&pdev->dev, 1); From bd2adfe3b3b863c883309bcc915f13c831ca88da Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 26 Dec 2022 16:13:28 +0900 Subject: [PATCH 0101/1593] net: ethernet: renesas: rswitch: Fix getting mac address from device tree To get mac address from device tree which is from each ethernet-port, fix the first argument of of_get_ethdev_address(). Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 473d86bdf97d6..6441892636dba 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1578,6 +1578,7 @@ static int rswitch_device_alloc(struct rswitch_private *priv, int index) { struct platform_device *pdev = priv->pdev; struct rswitch_device *rdev; + struct device_node *port; struct net_device *ndev; int err; @@ -1606,7 +1607,9 @@ static int rswitch_device_alloc(struct rswitch_private *priv, int index) netif_napi_add(ndev, &rdev->napi, rswitch_poll); - err = of_get_ethdev_address(pdev->dev.of_node, ndev); + port = rswitch_get_port_node(rdev); + err = of_get_ethdev_address(port, ndev); + of_node_put(port); if (err) { if (is_valid_ether_addr(rdev->etha->mac_addr)) eth_hw_addr_set(ndev, rdev->etha->mac_addr); From 0020ae2a4aa81becd182231bf48acd66c86c86dd Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 26 Dec 2022 22:19:36 -0500 Subject: [PATCH 0102/1593] bnxt_en: fix devlink port registration to netdev We don't register a devlink port in case of a VF so avoid setting the devlink pointer to netdev. Also, SET_NETDEV_DEVLINK_PORT has to be moved so that we determine whether the device is PF/VF first. This fixes the NULL pointer dereference of devlink_port->devlink when creating VFs: BUG: kernel NULL pointer dereference, address: 0000000000000160 PGD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 14 PID: 388 Comm: kworker/14:1 Kdump: loaded Not tainted 6.1.0-rc8 #5 Hardware name: Dell Inc. PowerEdge R750/06V45N, BIOS 1.3.8 08/31/2021 Workqueue: events work_for_cpu_fn RIP: 0010:devlink_nl_port_handle_size+0xb/0x50 Code: 83 c4 10 5b 5d c3 cc cc cc cc b8 a6 ff ff ff eb de e8 c9 59 21 00 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 53 48 8b 47 20 <48> 8b a8 60 01 00 00 48 8b 45 60 48 8b 38 e8 92 90 1a 00 48 8b 7d RSP: 0018:ff4fe5394846fcd8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000794 RCX: 0000000000000000 RDX: ff1f129683a30a40 RSI: 0000000000000008 RDI: ff1f1296bb496188 RBP: 0000000000000334 R08: 0000000000000cc0 R09: 0000000000000000 R10: ff1f1296bb494298 R11: ffffffffffffffc0 R12: 0000000000000000 R13: 0000000000000000 R14: ff1f1296bb494000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ff1f129e5fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000160 CR3: 000000131f610006 CR4: 0000000000771ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: if_nlmsg_size+0x14a/0x220 rtmsg_ifinfo_build_skb+0x3c/0x100 rtmsg_ifinfo+0x9c/0xc0 register_netdevice+0x59d/0x670 register_netdev+0x1c/0x40 bnxt_init_one+0x674/0xa60 [bnxt_en] local_pci_probe+0x42/0x80 work_for_cpu_fn+0x13/0x20 process_one_work+0x1e2/0x3b0 ? rescuer_thread+0x390/0x390 worker_thread+0x1c4/0x3a0 ? rescuer_thread+0x390/0x390 kthread+0xd6/0x100 ? kthread_complete_and_exit+0x20/0x20 Fixes: ac73d4bf2cda ("net: make drivers to use SET_NETDEV_DEVLINK_PORT to set devlink_port") Cc: Jiri Pirko Signed-off-by: Vikas Gupta Reviewed-by: Andy Gospodarek Reviewed-by: Kalesh Anakkur Purayil Reviewed-by: Damodharam Ammepalli Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4c7d07c684c49..93d32b333007f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13591,7 +13591,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; bp = netdev_priv(dev); - SET_NETDEV_DEVLINK_PORT(dev, &bp->dl_port); bp->board_idx = ent->driver_data; bp->msg_enable = BNXT_DEF_MSG_ENABLE; bnxt_set_max_func_irqs(bp, max_irqs); @@ -13599,6 +13598,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (bnxt_vf_pciid(bp->board_idx)) bp->flags |= BNXT_FLAG_VF; + /* No devlink port registration in case of a VF */ + if (BNXT_PF(bp)) + SET_NETDEV_DEVLINK_PORT(dev, &bp->dl_port); + if (pdev->msix_cap) bp->flags |= BNXT_FLAG_MSIX_CAP; From bbfc17e50ba2ed18dfef46b1c433d50a58566bf1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:37 -0500 Subject: [PATCH 0103/1593] bnxt_en: Simplify bnxt_xdp_buff_init() bnxt_xdp_buff_init() does not modify the data_ptr or the len parameters, so no need to pass in the addresses of these parameters. Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff") Reviewed-by: Andy Gospodarek Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 6 +++--- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 93d32b333007f..b8639b7e6b2b3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1925,7 +1925,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, dma_addr = rx_buf->mapping; if (bnxt_xdp_attached(bp, rxr)) { - bnxt_xdp_buff_init(bp, rxr, cons, &data_ptr, &len, &xdp); + bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp); if (agg_bufs) { u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, cp_cons, agg_bufs, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index c3065ec0a4798..1847f191577d1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -177,7 +177,7 @@ bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) } void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, - u16 cons, u8 **data_ptr, unsigned int *len, + u16 cons, u8 *data_ptr, unsigned int len, struct xdp_buff *xdp) { struct bnxt_sw_rx_bd *rx_buf; @@ -191,13 +191,13 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, offset = bp->rx_offset; mapping = rx_buf->mapping - bp->rx_dma_offset; - dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); + dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir); if (bp->xdp_has_frags) buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); - xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false); + xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false); } void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 505911ae095d3..2bbdb8e7c506b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -27,7 +27,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr); void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, - u16 cons, u8 **data_ptr, unsigned int *len, + u16 cons, u8 *data_ptr, unsigned int len, struct xdp_buff *xdp); void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp); From 9b3e607871ea5ee90f10f5be3965fc07f2aa3ef7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:38 -0500 Subject: [PATCH 0104/1593] bnxt_en: Fix XDP RX path The XDP program can change the starting address of the RX data buffer and this information needs to be passed back from bnxt_rx_xdp() to bnxt_rx_pkt() for the XDP_PASS case so that the SKB can point correctly to the modified buffer address. Add back the data_ptr parameter to bnxt_rx_xdp() to make this work. Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff") Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 +++++-- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b8639b7e6b2b3..1acabfe26db18 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1940,7 +1940,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } if (xdp_active) { - if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &len, event)) { + if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &data_ptr, &len, event)) { rc = 1; goto next_rx; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 1847f191577d1..2ceeaa818c1c6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -222,7 +222,8 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, * false - packet should be passed to the stack. */ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event) + struct xdp_buff xdp, struct page *page, u8 **data_ptr, + unsigned int *len, u8 *event) { struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog); struct bnxt_tx_ring_info *txr; @@ -255,8 +256,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, *event &= ~BNXT_RX_EVENT; *len = xdp.data_end - xdp.data; - if (orig_data != xdp.data) + if (orig_data != xdp.data) { offset = xdp.data - xdp.data_hard_start; + *data_ptr = xdp.data_hard_start + offset; + } switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 2bbdb8e7c506b..ea430d6961df3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -18,8 +18,8 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct xdp_buff *xdp); void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct xdp_buff xdp, struct page *page, unsigned int *len, - u8 *event); + struct xdp_buff xdp, struct page *page, u8 **data_ptr, + unsigned int *len, u8 *event); int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); int bnxt_xdp_xmit(struct net_device *dev, int num_frames, struct xdp_frame **frames, u32 flags); From 1abeacc1979fa4a756695f5030791d8f0fa934b9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:39 -0500 Subject: [PATCH 0105/1593] bnxt_en: Fix first buffer size calculations for XDP multi-buffer The size of the first buffer is always page size, and the useable space is the page size minus the offset and the skb_shared_info size. Make sure SKB and XDP buf sizes match so that the skb_shared_info is at the same offset seen from the SKB and XDP_BUF. build_skb() should be passed PAGE_SIZE. xdp_init_buff() should be passed PAGE_SIZE as well. xdp_get_shared_info_from_buff() will automatically deduct the skb_shared_info size if the XDP buffer has frags. There is no need to keep bp->xdp_has_frags. Change BNXT_PAGE_MODE_BUF_SIZE to BNXT_MAX_PAGE_MODE_MTU_SBUF since this constant is really the MTU with ethernet header size subtracted. Also fix the BNXT_MAX_PAGE_MODE_MTU macro with proper parentheses. Fixes: 32861236190b ("bnxt: change receive ring space parameters") Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 15 +++++++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 +------ 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1acabfe26db18..a21c6829e301e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -991,8 +991,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, dma_addr -= bp->rx_dma_offset; dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); - skb = build_skb(page_address(page), BNXT_PAGE_MODE_BUF_SIZE + - bp->rx_dma_offset); + skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { __free_page(page); return NULL; @@ -3969,8 +3968,10 @@ void bnxt_set_ring_params(struct bnxt *bp) bp->rx_agg_ring_mask = (bp->rx_agg_nr_pages * RX_DESC_CNT) - 1; if (BNXT_RX_PAGE_MODE(bp)) { - rx_space = BNXT_PAGE_MODE_BUF_SIZE; - rx_size = BNXT_MAX_PAGE_MODE_MTU; + rx_space = PAGE_SIZE; + rx_size = PAGE_SIZE - + ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } else { rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN); rx_space = rx_size + NET_SKB_PAD + diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 41c6dd0ae447e..5163ef4a49ea3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -591,12 +591,20 @@ struct nqe_cn { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) #define BNXT_MAX_MTU 9500 -#define BNXT_PAGE_MODE_BUF_SIZE \ + +/* First RX buffer page in XDP multi-buf mode + * + * +-------------------------------------------------------------------------+ + * | XDP_PACKET_HEADROOM | bp->rx_buf_use_size | skb_shared_info| + * | (bp->rx_dma_offset) | | | + * +-------------------------------------------------------------------------+ + */ +#define BNXT_MAX_PAGE_MODE_MTU_SBUF \ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ XDP_PACKET_HEADROOM) #define BNXT_MAX_PAGE_MODE_MTU \ - BNXT_PAGE_MODE_BUF_SIZE - \ - SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)) + (BNXT_MAX_PAGE_MODE_MTU_SBUF - \ + SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) #define BNXT_MIN_PKT_SIZE 52 @@ -2134,7 +2142,6 @@ struct bnxt { #define BNXT_DUMP_CRASH 1 struct bpf_prog *xdp_prog; - u8 xdp_has_frags; struct bnxt_ptp_cfg *ptp_cfg; u8 ptp_all_rx_tstamp; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 2ceeaa818c1c6..36d5202c0aeec 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -193,9 +193,6 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, mapping = rx_buf->mapping - bp->rx_dma_offset; dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir); - if (bp->xdp_has_frags) - buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; - xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false); } @@ -404,10 +401,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); return -EOPNOTSUPP; } - if (prog) { + if (prog) tx_xdp = bp->rx_nr_rings; - bp->xdp_has_frags = prog->aux->xdp_has_frags; - } tc = netdev_get_num_tc(dev); if (!tc) From a056ebcc30e2f78451d66f615d2f6bdada3e6438 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:40 -0500 Subject: [PATCH 0106/1593] bnxt_en: Fix HDS and jumbo thresholds for RX packets The recent XDP multi-buffer feature has introduced regressions in the setting of HDS and jumbo thresholds. HDS was accidentally disabled in the nornmal mode without XDP. This patch restores jumbo HDS placement when not in XDP mode. In XDP multi-buffer mode, HDS should be disabled and the jumbo threshold should be set to the usable page size in the first page buffer. Fixes: 32861236190b ("bnxt: change receive ring space parameters") Reviewed-by: Mohammad Shuab Siddique Reviewed-by: Ajit Khaparde Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a21c6829e301e..16ce7a90610c5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5399,15 +5399,16 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT); req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID); - if (BNXT_RX_PAGE_MODE(bp) && !BNXT_RX_JUMBO_MODE(bp)) { + if (BNXT_RX_PAGE_MODE(bp)) { + req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size); + } else { req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); req->enables |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); + req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); + req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); } - /* thresholds not implemented in firmware yet */ - req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); - req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); return hwrm_req_send(bp, req); } From b659b613cea2ae39746ca8bd2b69d1985dd9d770 Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Thu, 22 Dec 2022 21:53:02 +0100 Subject: [PATCH 0107/1593] Revert "usb: ulpi: defer ulpi_register on ulpi_read_id timeout" This reverts commit 8a7b31d545d3a15f0e6f5984ae16f0ca4fd76aac. This patch results in some qemu test failures, specifically xilinx-zynq-a9 machine and zynq-zc702 as well as zynq-zed devicetree files, when trying to boot from USB drive. Link: https://lore.kernel.org/lkml/20221220194334.GA942039@roeck-us.net/ Fixes: 8a7b31d545d3 ("usb: ulpi: defer ulpi_register on ulpi_read_id timeout") Cc: stable@vger.kernel.org Reported-by: Guenter Roeck Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20221222205302.45761-1-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 60e8174686a17..d7c8461976ce0 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi) /* Test the interface */ ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa); if (ret < 0) - return ret; + goto err; ret = ulpi_read(ulpi, ULPI_SCRATCH); if (ret < 0) From 2de5bba5890f6604a997c75e754df8082386c9f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:57:20 +0100 Subject: [PATCH 0108/1593] usb: fotg210: fix OTG-only build The fotg210 module combines the HCD and OTG drivers, which then fails to build when only the USB gadget support is enabled in the kernel but host support is not: aarch64-linux-ld: drivers/usb/fotg210/fotg210-core.o: in function `fotg210_init': fotg210-core.c:(.init.text+0xc): undefined reference to `usb_disabled' Move the check for usb_disabled() after the check for the HCD module, and let the OTG driver still be probed in this configuration. A nicer approach might be to have the common portion built as a library module, with the two platform other files registering their own platform_driver instances separately. Fixes: ddacd6ef44ca ("usb: fotg210: Fix Kconfig for USB host modules") Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20221215165728.2062984-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c index 8a54edf921ac2..ee740a6da463f 100644 --- a/drivers/usb/fotg210/fotg210-core.c +++ b/drivers/usb/fotg210/fotg210-core.c @@ -144,10 +144,7 @@ static struct platform_driver fotg210_driver = { static int __init fotg210_init(void) { - if (usb_disabled()) - return -ENODEV; - - if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) + if (IS_ENABLED(CONFIG_USB_FOTG210_HCD) && !usb_disabled()) fotg210_hcd_init(); return platform_driver_register(&fotg210_driver); } From a6ce72c0fb6041f9871f880b2d02b294f7f49cb4 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:52 +0200 Subject: [PATCH 0109/1593] vdpa/mlx5: Fix rule forwarding VLAN to TIR Set the VLAN id to the header values field instead of overwriting the headers criteria field. Before this fix, VLAN filtering would not really work and tagged packets would be forwarded unfiltered to the TIR. Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support") Acked-by: Jason Wang Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 90913365def43..3fb06dcee9436 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1468,11 +1468,13 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); eth_broadcast_addr(dmac_c); ether_addr_copy(dmac_v, mac); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); + } if (tagged) { MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; From 5aec804936bbff182081f1cdc271fcb76af1a4ff Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:53 +0200 Subject: [PATCH 0110/1593] vdpa/mlx5: Return error on vlan ctrl commands if not supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if VIRTIO_NET_F_CTRL_VLAN is negotiated and return error if control VQ command is received. Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-3-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 3fb06dcee9436..01da229d22da3 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1823,6 +1823,9 @@ static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) size_t read; u16 id; + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) + return status; + switch (cmd) { case VIRTIO_NET_CTRL_VLAN_ADD: read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); From 1ab53760d322c82fb4cb5e81b5817065801e3ec4 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:54 +0200 Subject: [PATCH 0111/1593] vdpa/mlx5: Fix wrong mac address deletion Delete the old MAC from the table and not the new one which is not there yet. Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support") Acked-by: Jason Wang Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-4-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 01da229d22da3..b06260a376800 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1686,7 +1686,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) /* Need recreate the flow table entry, so that the packet could forward back */ - mac_vlan_del(ndev, ndev->config.mac, 0, false); + mac_vlan_del(ndev, mac_back, 0, false); if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); From 0dbc1b4ae07d003b2e88ba9d4142846320f8e349 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:55 +0200 Subject: [PATCH 0112/1593] vdpa/mlx5: Avoid using reslock in event_handler event_handler runs under atomic context and may not acquire reslock. We can still guarantee that the handler won't be called after suspend by clearing nb_registered, unregistering the handler and flushing the workqueue. Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-5-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index b06260a376800..98dd8ce8af265 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2845,8 +2845,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) int i; down_write(&ndev->reslock); - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); ndev->nb_registered = false; + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); flush_workqueue(ndev->mvdev.wq); for (i = 0; i < ndev->cur_num_vqs; i++) { mvq = &ndev->vqs[i]; @@ -3024,7 +3024,7 @@ static void update_carrier(struct work_struct *work) else ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); - if (ndev->config_cb.callback) + if (ndev->nb_registered && ndev->config_cb.callback) ndev->config_cb.callback(ndev->config_cb.private); kfree(wqent); @@ -3041,21 +3041,13 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p switch (eqe->sub_type) { case MLX5_PORT_CHANGE_SUBTYPE_DOWN: case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - down_read(&ndev->reslock); - if (!ndev->nb_registered) { - up_read(&ndev->reslock); - return NOTIFY_DONE; - } wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); - if (!wqent) { - up_read(&ndev->reslock); + if (!wqent) return NOTIFY_DONE; - } wqent->mvdev = &ndev->mvdev; INIT_WORK(&wqent->work, update_carrier); queue_work(ndev->mvdev.wq, &wqent->work); - up_read(&ndev->reslock); ret = NOTIFY_OK; break; default: @@ -3242,8 +3234,8 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct workqueue_struct *wq; if (ndev->nb_registered) { - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); ndev->nb_registered = false; + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); } wq = mvdev->wq; mvdev->wq = NULL; From 38fc462f57ef4e5dc722bab6824854b105de8aa2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:56 +0200 Subject: [PATCH 0113/1593] vdpa/mlx5: Avoid overwriting CVQ iotlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When qemu uses different address spaces for data and control virtqueues, the current code would overwrite the control virtqueue iotlb through the dup_iotlb call. Fix this by referring to the address space identifier and the group to asid mapping to determine which mapping needs to be updated. We also move the address space logic from mlx5 net to core directory. Reported-by: Eugenio Pérez Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-6-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 5 +-- drivers/vdpa/mlx5/core/mr.c | 44 ++++++++++++++++----------- drivers/vdpa/mlx5/net/mlx5_vnet.c | 49 ++++++------------------------ 3 files changed, 39 insertions(+), 59 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 6af9fdbb86b7a..058fbe28107e9 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -116,8 +116,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int inlen); int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map); -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); + bool *change_map, unsigned int asid); +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + unsigned int asid); void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); #define mlx5_vdpa_warn(__dev, format, ...) \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index a639b9208d414..a4d7ee2339fa5 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -511,7 +511,8 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) mutex_unlock(&mr->mkey_mtx); } -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; int err; @@ -519,42 +520,49 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb if (mr->initialized) return 0; - if (iotlb) - err = create_user_mr(mvdev, iotlb); - else - err = create_dma_mr(mvdev, mr); + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { + if (iotlb) + err = create_user_mr(mvdev, iotlb); + else + err = create_dma_mr(mvdev, mr); - if (err) - return err; + if (err) + return err; + } - err = dup_iotlb(mvdev, iotlb); - if (err) - goto out_err; + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) { + err = dup_iotlb(mvdev, iotlb); + if (err) + goto out_err; + } mr->initialized = true; return 0; out_err: - if (iotlb) - destroy_user_mr(mvdev, mr); - else - destroy_dma_mr(mvdev, mr); + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { + if (iotlb) + destroy_user_mr(mvdev, mr); + else + destroy_dma_mr(mvdev, mr); + } return err; } -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + unsigned int asid) { int err; mutex_lock(&mvdev->mr.mkey_mtx); - err = _mlx5_vdpa_create_mr(mvdev, iotlb); + err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); mutex_unlock(&mvdev->mr.mkey_mtx); return err; } int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map) + bool *change_map, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; int err = 0; @@ -566,7 +574,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io *change_map = true; } if (!*change_map) - err = _mlx5_vdpa_create_mr(mvdev, iotlb); + err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); mutex_unlock(&mr->mkey_mtx); return err; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 98dd8ce8af265..3a6dbbc6440d4 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2394,7 +2394,8 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) } } -static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, unsigned int asid) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; @@ -2406,7 +2407,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb teardown_driver(ndev); mlx5_vdpa_destroy_mr(mvdev); - err = mlx5_vdpa_create_mr(mvdev, iotlb); + err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); if (err) goto err_mr; @@ -2587,7 +2588,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) ++mvdev->generation; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - if (mlx5_vdpa_create_mr(mvdev, NULL)) + if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) mlx5_vdpa_warn(mvdev, "create MR failed\n"); } up_write(&ndev->reslock); @@ -2623,41 +2624,20 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) return mvdev->generation; } -static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) -{ - u64 start = 0ULL, last = 0ULL - 1; - struct vhost_iotlb_map *map; - int err = 0; - - spin_lock(&mvdev->cvq.iommu_lock); - vhost_iotlb_reset(mvdev->cvq.iotlb); - - for (map = vhost_iotlb_itree_first(iotlb, start, last); map; - map = vhost_iotlb_itree_next(map, start, last)) { - err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, - map->last, map->addr, map->perm); - if (err) - goto out; - } - -out: - spin_unlock(&mvdev->cvq.iommu_lock); - return err; -} - -static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + unsigned int asid) { bool change_map; int err; - err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); + err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); if (err) { mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); return err; } if (change_map) - err = mlx5_vdpa_change_map(mvdev, iotlb); + err = mlx5_vdpa_change_map(mvdev, iotlb, asid); return err; } @@ -2670,16 +2650,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, int err = -EINVAL; down_write(&ndev->reslock); - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - err = set_map_data(mvdev, iotlb); - if (err) - goto out; - } - - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) - err = set_map_control(mvdev, iotlb); - -out: + err = set_map_data(mvdev, iotlb, asid); up_write(&ndev->reslock); return err; } @@ -3182,7 +3153,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, goto err_mpfs; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - err = mlx5_vdpa_create_mr(mvdev, NULL); + err = mlx5_vdpa_create_mr(mvdev, NULL, 0); if (err) goto err_res; } From 344686136d73501a18a9621de690ff7824a3d129 Mon Sep 17 00:00:00 2001 From: Shaoqin Huang Date: Thu, 20 Oct 2022 23:27:33 -0700 Subject: [PATCH 0114/1593] virtio_pci: use helper function is_power_of_2() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use helper function is_power_of_2() to check if num is power of two. Minor readability improvement. Signed-off-by: Shaoqin Huang Message-Id: <20221021062734.228881-2-shaoqin.huang@intel.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_pci_modern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index c3b9f27618497..207294bd7b9d6 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -310,7 +310,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!num || vp_modern_get_queue_enable(mdev, index)) return ERR_PTR(-ENOENT); - if (num & (num - 1)) { + if (!is_power_of_2(num)) { dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num); return ERR_PTR(-EINVAL); } From b9d978a89296c57fbbbd8ea647c303ce4d37028f Mon Sep 17 00:00:00 2001 From: Shaoqin Huang Date: Thu, 20 Oct 2022 23:27:34 -0700 Subject: [PATCH 0115/1593] virtio_ring: use helper function is_power_of_2() Use helper function is_power_of_2() to check if num is power of two. Minor readability improvement. Signed-off-by: Shaoqin Huang Message-Id: <20221021062734.228881-3-shaoqin.huang@intel.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 2e7689bb933b8..723c4e29e1d3b 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1052,7 +1052,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, dma_addr_t dma_addr; /* We assume num is a power of 2. */ - if (num & (num - 1)) { + if (!is_power_of_2(num)) { dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); return -EINVAL; } From a9f0a19ff7700cc8a30db2496f40d18490dcb9df Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2022 14:37:56 +0100 Subject: [PATCH 0116/1593] RDMA/mlx5: remove variable i Variable i is just being incremented and it's never used anywhere else. The variable and the increment are redundant so remove it. Signed-off-by: Colin Ian King Message-Id: <20221024133756.2158497-1-colin.i.king@gmail.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/core/mr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index a4d7ee2339fa5..0a1e0b0dc37e3 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -311,7 +311,6 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 u64 st; u64 sz; int err; - int i = 0; st = start; while (size) { @@ -336,7 +335,6 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 mr->num_directs++; mr->num_klms++; st += sz; - i++; } list_splice_tail(&tmp, &mr->head); return 0; From b66ead2d0ecac00c3a06a6218af5411cb5fcb5d5 Mon Sep 17 00:00:00 2001 From: Angus Chen Date: Tue, 1 Nov 2022 19:16:54 +0800 Subject: [PATCH 0117/1593] virtio_pci: modify ENOENT to EINVAL Virtio_crypto use max_data_queues+1 to setup vqs, we use vp_modern_get_num_queues to protect the vq range in setup_vq. We could enter index >= vp_modern_get_num_queues(mdev) in setup_vq if common->num_queues is not set well,and it return -ENOENT. It is better to use -EINVAL instead. Signed-off-by: Angus Chen Message-Id: <20221101111655.1947-1-angus.chen@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_modern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 207294bd7b9d6..9e496e288cfad 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -303,7 +303,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, int err; if (index >= vp_modern_get_num_queues(mdev)) - return ERR_PTR(-ENOENT); + return ERR_PTR(-EINVAL); /* Check if queue is either not available or already active. */ num = vp_modern_get_queue_size(mdev, index); From 75e4ab9735a5a70612dd06461ca372b897bf371c Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Sat, 5 Nov 2022 11:51:51 -0400 Subject: [PATCH 0118/1593] tools: Delete the unneeded semicolon after curly braces Unneeded semicolon after curly braces, so delete it. Signed-off-by: Shaomin Deng Message-Id: <20221105155151.12155-1-dengshaomin@cdjrlc.com> Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio-trace/trace-agent-ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c index 73d253d4b559a..39860be6e2d86 100644 --- a/tools/virtio/virtio-trace/trace-agent-ctl.c +++ b/tools/virtio/virtio-trace/trace-agent-ctl.c @@ -75,7 +75,7 @@ static int wait_order(int ctl_fd) if (ret) break; - }; + } return ret; From aeca7ff254843d49a8739f07f7dab1341450111d Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Thu, 10 Nov 2022 16:23:48 +0800 Subject: [PATCH 0119/1593] vdpa_sim: fix possible memory leak in vdpasim_net_init() and vdpasim_blk_init() Inject fault while probing module, if device_register() fails in vdpasim_net_init() or vdpasim_blk_init(), but the refcount of kobject is not decreased to 0, the name allocated in dev_set_name() is leaked. Fix this by calling put_device(), so that name can be freed in callback function kobject_cleanup(). (vdpa_sim_net) unreferenced object 0xffff88807eebc370 (size 16): comm "modprobe", pid 3848, jiffies 4362982860 (age 18.153s) hex dump (first 16 bytes): 76 64 70 61 73 69 6d 5f 6e 65 74 00 6b 6b 6b a5 vdpasim_net.kkk. backtrace: [] __kmalloc_node_track_caller+0x4e/0x150 [] kstrdup+0x33/0x60 [] kobject_set_name_vargs+0x41/0x110 [] dev_set_name+0xab/0xe0 [] device_add+0xe3/0x1a80 [] 0xffffffffa0270013 [] do_one_initcall+0x87/0x2e0 [] do_init_module+0x1ab/0x640 [] load_module+0x5d00/0x77f0 [] __do_sys_finit_module+0x110/0x1b0 [] do_syscall_64+0x35/0x80 [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 (vdpa_sim_blk) unreferenced object 0xffff8881070c1250 (size 16): comm "modprobe", pid 6844, jiffies 4364069319 (age 17.572s) hex dump (first 16 bytes): 76 64 70 61 73 69 6d 5f 62 6c 6b 00 6b 6b 6b a5 vdpasim_blk.kkk. backtrace: [] __kmalloc_node_track_caller+0x4e/0x150 [] kstrdup+0x33/0x60 [] kobject_set_name_vargs+0x41/0x110 [] dev_set_name+0xab/0xe0 [] device_add+0xe3/0x1a80 [] 0xffffffffa0220013 [] do_one_initcall+0x87/0x2e0 [] do_init_module+0x1ab/0x640 [] load_module+0x5d00/0x77f0 [] __do_sys_finit_module+0x110/0x1b0 [] do_syscall_64+0x35/0x80 [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 899c4d187f6a ("vdpa_sim_blk: add support for vdpa management tool") Fixes: a3c06ae158dd ("vdpa_sim_net: Add support for user supported devices") Signed-off-by: ruanjinjie Reviewed-by: Stefano Garzarella Message-Id: <20221110082348.4105476-1-ruanjinjie@huawei.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 4 +++- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index c6db1a1baf768..f745926237a88 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -427,8 +427,10 @@ static int __init vdpasim_blk_init(void) int ret; ret = device_register(&vdpasim_blk_mgmtdev); - if (ret) + if (ret) { + put_device(&vdpasim_blk_mgmtdev); return ret; + } ret = vdpa_mgmtdev_register(&mgmt_dev); if (ret) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index c3cb225ea4693..11f5a121df243 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -305,8 +305,10 @@ static int __init vdpasim_net_init(void) int ret; ret = device_register(&vdpasim_net_mgmtdev); - if (ret) + if (ret) { + put_device(&vdpasim_net_mgmtdev); return ret; + } ret = vdpa_mgmtdev_register(&mgmt_dev); if (ret) From 7a4efe182ca61fb3e5307e69b261c57cbf434cd4 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Tue, 8 Nov 2022 10:17:05 +0000 Subject: [PATCH 0120/1593] vhost/vsock: Fix error handling in vhost_vsock_init() A problem about modprobe vhost_vsock failed is triggered with the following log given: modprobe: ERROR: could not insert 'vhost_vsock': Device or resource busy The reason is that vhost_vsock_init() returns misc_register() directly without checking its return value, if misc_register() failed, it returns without calling vsock_core_unregister() on vhost_transport, resulting the vhost_vsock can never be installed later. A simple call graph is shown as below: vhost_vsock_init() vsock_core_register() # register vhost_transport misc_register() device_create_with_groups() device_create_groups_vargs() dev = kzalloc(...) # OOM happened # return without unregister vhost_transport Fix by calling vsock_core_unregister() when misc_register() returns error. Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko") Signed-off-by: Yuan Can Message-Id: <20221108101705.45981-1-yuancan@huawei.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Acked-by: Jason Wang --- drivers/vhost/vsock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index cd6f7776013ac..a2b3743723639 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -959,7 +959,14 @@ static int __init vhost_vsock_init(void) VSOCK_TRANSPORT_F_H2G); if (ret < 0) return ret; - return misc_register(&vhost_vsock_misc); + + ret = misc_register(&vhost_vsock_misc); + if (ret) { + vsock_core_unregister(&vhost_transport.transport); + return ret; + } + + return 0; }; static void __exit vhost_vsock_exit(void) From f85efa9b0f5381874f727bd98f56787840313f0b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 9 Nov 2022 11:25:02 +0100 Subject: [PATCH 0121/1593] vringh: fix range used in iotlb_translate() vhost_iotlb_itree_first() requires `start` and `last` parameters to search for a mapping that overlaps the range. In iotlb_translate() we cyclically call vhost_iotlb_itree_first(), incrementing `addr` by the amount already translated, so rightly we move the `start` parameter passed to vhost_iotlb_itree_first(), but we should hold the `last` parameter constant. Let's fix it by saving the `last` parameter value before incrementing `addr` in the loop. Fixes: 9ad9c49cfe97 ("vringh: IOTLB support") Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Message-Id: <20221109102503.18816-2-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index c9f5c8ea3afbd..33eb941fcf154 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1102,7 +1102,7 @@ static int iotlb_translate(const struct vringh *vrh, struct vhost_iotlb_map *map; struct vhost_iotlb *iotlb = vrh->iotlb; int ret = 0; - u64 s = 0; + u64 s = 0, last = addr + len - 1; spin_lock(vrh->iotlb_lock); @@ -1114,8 +1114,7 @@ static int iotlb_translate(const struct vringh *vrh, break; } - map = vhost_iotlb_itree_first(iotlb, addr, - addr + len - 1); + map = vhost_iotlb_itree_first(iotlb, addr, last); if (!map || map->start > addr) { ret = -EINVAL; break; From 98047313cdb46828093894d0ac8b1183b8b317f9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 9 Nov 2022 11:25:03 +0100 Subject: [PATCH 0122/1593] vhost: fix range used in translate_desc() vhost_iotlb_itree_first() requires `start` and `last` parameters to search for a mapping that overlaps the range. In translate_desc() we cyclically call vhost_iotlb_itree_first(), incrementing `addr` by the amount already translated, so rightly we move the `start` parameter passed to vhost_iotlb_itree_first(), but we should hold the `last` parameter constant. Let's fix it by saving the `last` parameter value before incrementing `addr` in the loop. Fixes: a9709d6874d5 ("vhost: convert pre sorted vhost memory array to interval tree") Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Message-Id: <20221109102503.18816-3-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5c9fe3c9c3646..cbe72bfd2f1fa 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2053,7 +2053,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct vhost_dev *dev = vq->dev; struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; - u64 s = 0; + u64 s = 0, last = addr + len - 1; int ret = 0; while ((u64)len > s) { @@ -2063,7 +2063,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, break; } - map = vhost_iotlb_itree_first(umem, addr, addr + len - 1); + map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { if (umem != dev->iotlb) { ret = -EFAULT; From c070c1912a83432530cbb4271d5b9b11fa36b67a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 9 Nov 2022 16:42:13 +0100 Subject: [PATCH 0123/1593] vhost-vdpa: fix an iotlb memory leak Before commit 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") we called vhost_vdpa_iotlb_unmap(v, iotlb, 0ULL, 0ULL - 1) during release to free all the resources allocated when processing user IOTLB messages through vhost_vdpa_process_iotlb_update(). That commit changed the handling of IOTLB a bit, and we accidentally removed some code called during the release. We partially fixed this with commit 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release") but a potential memory leak is still there as showed by kmemleak if the application does not send VHOST_IOTLB_INVALIDATE or crashes: unreferenced object 0xffff888007fbaa30 (size 16): comm "blkio-bench", pid 914, jiffies 4294993521 (age 885.500s) hex dump (first 16 bytes): 40 73 41 07 80 88 ff ff 00 00 00 00 00 00 00 00 @sA............. backtrace: [<0000000087736d2a>] kmem_cache_alloc_trace+0x142/0x1c0 [<0000000060740f50>] vhost_vdpa_process_iotlb_msg+0x68c/0x901 [vhost_vdpa] [<0000000083e8e205>] vhost_chr_write_iter+0xc0/0x4a0 [vhost] [<000000008f2f414a>] vhost_vdpa_chr_write_iter+0x18/0x20 [vhost_vdpa] [<00000000de1cd4a0>] vfs_write+0x216/0x4b0 [<00000000a2850200>] ksys_write+0x71/0xf0 [<00000000de8e720b>] __x64_sys_write+0x19/0x20 [<0000000018b12cbb>] do_syscall_64+0x3f/0x90 [<00000000986ec465>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Let's fix this calling vhost_vdpa_iotlb_unmap() on the whole range in vhost_vdpa_remove_as(). We move that call before vhost_dev_cleanup() since we need a valid v->vdev.mm in vhost_vdpa_pa_unmap(). vhost_iotlb_reset() call can be removed, since vhost_vdpa_iotlb_unmap() on the whole range removes all the entries. The kmemleak log reported was observed with a vDPA device that has `use_va` set to true (e.g. VDUSE). This patch has been tested with both types of devices. Fixes: 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release") Fixes: 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") Signed-off-by: Stefano Garzarella Message-Id: <20221109154213.146789-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 166044642fd5c..b08e07fc7d1ff 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -65,6 +65,10 @@ static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 start, u64 last); + static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) { struct vhost_vdpa_as *as = container_of(iotlb, struct @@ -135,7 +139,7 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) return -EINVAL; hlist_del(&as->hash_link); - vhost_iotlb_reset(&as->iotlb); + vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1); kfree(as); return 0; @@ -1162,14 +1166,14 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v) struct vhost_vdpa_as *as; u32 asid; - vhost_dev_cleanup(&v->vdev); - kfree(v->vdev.vqs); - for (asid = 0; asid < v->vdpa->nas; asid++) { as = asid_to_as(v, asid); if (as) vhost_vdpa_remove_as(v, asid); } + + vhost_dev_cleanup(&v->vdev); + kfree(v->vdev.vqs); } static int vhost_vdpa_open(struct inode *inode, struct file *filep) From f4e468f708386ce5fa6878a7ef43a9818ceeaecf Mon Sep 17 00:00:00 2001 From: Angus Chen Date: Thu, 10 Nov 2022 11:01:23 +0800 Subject: [PATCH 0124/1593] virtio_blk: use UINT_MAX instead of -1U We use UINT_MAX to limit max_discard_sectors in virtblk_probe, we can use UINT_MAX to limit max_hw_sectors for consistencies. No functional change intended. Signed-off-by: Angus Chen Message-Id: <20221110030124.1986-1-angus.chen@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 271a9878fa8b3..dcbf86cd2155e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -994,7 +994,7 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_max_segments(q, sg_elems); /* No real sector limit. */ - blk_queue_max_hw_sectors(q, -1U); + blk_queue_max_hw_sectors(q, UINT_MAX); max_size = virtio_max_dma_size(vdev); From 794ec498c9fa79e6bfd71b931410d5897a9c00d4 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 10 Nov 2022 15:13:35 +0100 Subject: [PATCH 0125/1593] vdpa_sim: fix vringh initialization in vdpasim_queue_ready() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we initialize vringh, we should pass the features and the number of elements in the virtqueue negotiated with the driver, otherwise operations with vringh may fail. This was discovered in a case where the driver sets a number of elements in the virtqueue different from the value returned by .get_vq_num_max(). In vdpasim_vq_reset() is safe to initialize the vringh with default values, since the virtqueue will not be used until vdpasim_queue_ready() is called again. Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator") Signed-off-by: Stefano Garzarella Message-Id: <20221110141335.62171-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index b071f0d842fba..b20689f8fe89c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -67,8 +67,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, - VDPASIM_QUEUE_MAX, false, + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, (struct vring_desc *)(uintptr_t)vq->desc_addr, (struct vring_avail *) (uintptr_t)vq->driver_addr, From a4722f64f924a9992efc08d141c21b2da02b70f3 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sun, 13 Nov 2022 15:07:42 +0800 Subject: [PATCH 0126/1593] tools/virtio: Variable type completion Replace "unsigned" with "unsigned int" Signed-off-by: wangjianli Message-Id: <20221113070742.48271-1-wangjianli@cdjrlc.com> Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 86a410ddcedde..120062f94590c 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -173,7 +173,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, long started = 0, completed = 0, next_reset = reset_n; long completed_before, started_before; int r, test = 1; - unsigned len; + unsigned int len; long long spurious = 0; const bool random_batch = batch == RANDOM_BATCH; From b1d65f717cd6305a396a8738e022c6f7c65cfbe8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 14 Nov 2022 11:07:40 +0000 Subject: [PATCH 0127/1593] virtio-crypto: fix memory leak in virtio_crypto_alg_skcipher_close_session() 'vc_ctrl_req' is alloced in virtio_crypto_alg_skcipher_close_session(), and should be freed in the invalid ctrl_status->status error handling case. Otherwise there is a memory leak. Fixes: 0756ad15b1fe ("virtio-crypto: use private buffer for control request") Signed-off-by: Wei Yongjun Message-Id: <20221114110740.537276-1-weiyongjun@huaweicloud.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei Acked-by: zhenwei pi Acked-by: Jason Wang --- drivers/crypto/virtio/virtio_crypto_skcipher_algs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c index e553ccadbcbc8..e5876286828b8 100644 --- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c @@ -239,7 +239,8 @@ static int virtio_crypto_alg_skcipher_close_session( pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", ctrl_status->status, destroy_session->session_id); - return -EINVAL; + err = -EINVAL; + goto out; } err = 0; From c8e82e3877028381969779a86972d9a4f57a9ea0 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Fri, 25 Nov 2022 00:12:14 +0800 Subject: [PATCH 0128/1593] virtio: Implementing attribute show with sysfs_emit Replace sprintf with sysfs_emit or its variants for their built-in PAGE_SIZE awareness. Signed-off-by: Dawei Li Message-Id: Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 828ced0607423..b9a80aedee1b7 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -15,7 +15,7 @@ static ssize_t device_show(struct device *_d, struct device_attribute *attr, char *buf) { struct virtio_device *dev = dev_to_virtio(_d); - return sprintf(buf, "0x%04x\n", dev->id.device); + return sysfs_emit(buf, "0x%04x\n", dev->id.device); } static DEVICE_ATTR_RO(device); @@ -23,7 +23,7 @@ static ssize_t vendor_show(struct device *_d, struct device_attribute *attr, char *buf) { struct virtio_device *dev = dev_to_virtio(_d); - return sprintf(buf, "0x%04x\n", dev->id.vendor); + return sysfs_emit(buf, "0x%04x\n", dev->id.vendor); } static DEVICE_ATTR_RO(vendor); @@ -31,7 +31,7 @@ static ssize_t status_show(struct device *_d, struct device_attribute *attr, char *buf) { struct virtio_device *dev = dev_to_virtio(_d); - return sprintf(buf, "0x%08x\n", dev->config->get_status(dev)); + return sysfs_emit(buf, "0x%08x\n", dev->config->get_status(dev)); } static DEVICE_ATTR_RO(status); @@ -39,7 +39,7 @@ static ssize_t modalias_show(struct device *_d, struct device_attribute *attr, char *buf) { struct virtio_device *dev = dev_to_virtio(_d); - return sprintf(buf, "virtio:d%08Xv%08X\n", + return sysfs_emit(buf, "virtio:d%08Xv%08X\n", dev->id.device, dev->id.vendor); } static DEVICE_ATTR_RO(modalias); @@ -54,9 +54,9 @@ static ssize_t features_show(struct device *_d, /* We actually represent this as a bitstring, as it could be * arbitrary length in future. */ for (i = 0; i < sizeof(dev->features)*8; i++) - len += sprintf(buf+len, "%c", + len += sysfs_emit_at(buf, len, "%c", __virtio_test_bit(dev, i) ? '1' : '0'); - len += sprintf(buf+len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } static DEVICE_ATTR_RO(features); From e794070af224ade46db368271896b2685ff4f96b Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Mon, 19 Dec 2022 15:33:31 +0800 Subject: [PATCH 0129/1593] vhost_vdpa: fix the crash in unmap a large memory While testing in vIOMMU, sometimes Guest will unmap very large memory, which will cause the crash. To fix this, add a new function vhost_vdpa_general_unmap(). This function will only unmap the memory that saved in iotlb. Call Trace: [ 647.820144] ------------[ cut here ]------------ [ 647.820848] kernel BUG at drivers/iommu/intel/iommu.c:1174! [ 647.821486] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 647.822082] CPU: 10 PID: 1181 Comm: qemu-system-x86 Not tainted 6.0.0-rc1home_lulu_2452_lulu7_vhost+ #62 [ 647.823139] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-29-g6a62e0cb0dfe-prebuilt.qem4 [ 647.824365] RIP: 0010:domain_unmap+0x48/0x110 [ 647.825424] Code: 48 89 fb 8d 4c f6 1e 39 c1 0f 4f c8 83 e9 0c 83 f9 3f 7f 18 48 89 e8 48 d3 e8 48 85 c0 75 59 [ 647.828064] RSP: 0018:ffffae5340c0bbf0 EFLAGS: 00010202 [ 647.828973] RAX: 0000000000000001 RBX: ffff921793d10540 RCX: 000000000000001b [ 647.830083] RDX: 00000000080000ff RSI: 0000000000000001 RDI: ffff921793d10540 [ 647.831214] RBP: 0000000007fc0100 R08: ffffae5340c0bcd0 R09: 0000000000000003 [ 647.832388] R10: 0000007fc0100000 R11: 0000000000100000 R12: 00000000080000ff [ 647.833668] R13: ffffae5340c0bcd0 R14: ffff921793d10590 R15: 0000008000100000 [ 647.834782] FS: 00007f772ec90640(0000) GS:ffff921ce7a80000(0000) knlGS:0000000000000000 [ 647.836004] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 647.836990] CR2: 00007f02c27a3a20 CR3: 0000000101b0c006 CR4: 0000000000372ee0 [ 647.838107] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 647.839283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 647.840666] Call Trace: [ 647.841437] [ 647.842107] intel_iommu_unmap_pages+0x93/0x140 [ 647.843112] __iommu_unmap+0x91/0x1b0 [ 647.844003] iommu_unmap+0x6a/0x95 [ 647.844885] vhost_vdpa_unmap+0x1de/0x1f0 [vhost_vdpa] [ 647.845985] vhost_vdpa_process_iotlb_msg+0xf0/0x90b [vhost_vdpa] [ 647.847235] ? _raw_spin_unlock+0x15/0x30 [ 647.848181] ? _copy_from_iter+0x8c/0x580 [ 647.849137] vhost_chr_write_iter+0xb3/0x430 [vhost] [ 647.850126] vfs_write+0x1e4/0x3a0 [ 647.850897] ksys_write+0x53/0xd0 [ 647.851688] do_syscall_64+0x3a/0x90 [ 647.852508] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 647.853457] RIP: 0033:0x7f7734ef9f4f [ 647.854408] Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 29 76 f8 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c8 [ 647.857217] RSP: 002b:00007f772ec8f040 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 [ 647.858486] RAX: ffffffffffffffda RBX: 00000000fef00000 RCX: 00007f7734ef9f4f [ 647.859713] RDX: 0000000000000048 RSI: 00007f772ec8f090 RDI: 0000000000000010 [ 647.860942] RBP: 00007f772ec8f1a0 R08: 0000000000000000 R09: 0000000000000000 [ 647.862206] R10: 0000000000000001 R11: 0000000000000293 R12: 0000000000000010 [ 647.863446] R13: 0000000000000002 R14: 0000000000000000 R15: ffffffff01100000 [ 647.864692] [ 647.865458] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs v] [ 647.874688] ---[ end trace 0000000000000000 ]--- Cc: stable@vger.kernel.org Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Cindy Lu Message-Id: <20221219073331.556140-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 46 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b08e07fc7d1ff..ec32f785dfdec 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -66,8 +66,8 @@ static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last); + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid); static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) { @@ -139,7 +139,7 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) return -EINVAL; hlist_del(&as->hash_link); - vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1); + vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); kfree(as); return 0; @@ -687,10 +687,20 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, mutex_unlock(&d->mutex); return r; } +static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, + struct vhost_iotlb_map *map, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + if (ops->dma_map) { + ops->dma_unmap(vdpa, asid, map->start, map->size); + } else if (ops->set_map == NULL) { + iommu_unmap(v->domain, map->start, map->size); + } +} -static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last) +static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { struct vhost_dev *dev = &v->vdev; struct vhost_iotlb_map *map; @@ -707,13 +717,13 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, unpin_user_page(page); } atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); + vhost_vdpa_general_unmap(v, map, asid); vhost_iotlb_map_free(iotlb, map); } } -static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last) +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { struct vhost_iotlb_map *map; struct vdpa_map_file *map_file; @@ -722,20 +732,21 @@ static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, map_file = (struct vdpa_map_file *)map->opaque; fput(map_file->file); kfree(map_file); + vhost_vdpa_general_unmap(v, map, asid); vhost_iotlb_map_free(iotlb, map); } } static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last) + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid) { struct vdpa_device *vdpa = v->vdpa; if (vdpa->use_va) - return vhost_vdpa_va_unmap(v, iotlb, start, last); + return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); - return vhost_vdpa_pa_unmap(v, iotlb, start, last); + return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); } static int perm_to_iommu_flags(u32 perm) @@ -802,17 +813,12 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, const struct vdpa_config_ops *ops = vdpa->config; u32 asid = iotlb_to_asid(iotlb); - vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1); + vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); - if (ops->dma_map) { - ops->dma_unmap(vdpa, asid, iova, size); - } else if (ops->set_map) { + if (ops->set_map) { if (!v->in_batch) ops->set_map(vdpa, asid, iotlb); - } else { - iommu_unmap(v->domain, iova, size); } - /* If we are in the middle of batch processing, delay the free * of AS until BATCH_END. */ From 8aeac42d60936046a00e67cdf7d27b061df2962f Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 27 Nov 2022 19:43:46 -0800 Subject: [PATCH 0130/1593] tools/virtio: remove stray characters __read_once_size() is not a macro, remove those '/'s. Signed-off-by: Davidlohr Bueso Message-Id: <20221128034347.990-2-dave@stgolabs.net> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- tools/virtio/ringtest/main.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 6d1fccd3d86ce..9ed09caa659e7 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -149,16 +149,16 @@ static inline void busy_wait(void) static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { - switch (size) { \ - case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \ - case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \ - case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \ - case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \ - default: \ - barrier(); \ - __builtin_memcpy((void *)res, (const void *)p, size); \ - barrier(); \ - } \ + switch (size) { + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } } static __always_inline void __write_once_size(volatile void *p, void *res, int size) From 81931012bd7dc52fadf2b720605fce8a7148d4a7 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 27 Nov 2022 19:43:47 -0800 Subject: [PATCH 0131/1593] tools/virtio: remove smp_read_barrier_depends() This gets rid of the last references to smp_read_barrier_depends() which for the kernel side was removed in v5.9. The serialization required for Alpha is done inside READ_ONCE() instead of having users deal with it. Simply use a full barrier, the architecture does not have rmb in the first place. Signed-off-by: Davidlohr Bueso Message-Id: <20221128034347.990-3-dave@stgolabs.net> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- tools/virtio/ringtest/main.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 9ed09caa659e7..b68920d527503 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -140,12 +140,6 @@ static inline void busy_wait(void) #define smp_wmb() smp_release() #endif -#ifdef __alpha__ -#define smp_read_barrier_depends() smp_acquire() -#else -#define smp_read_barrier_depends() do {} while(0) -#endif - static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { @@ -175,13 +169,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s } } +#ifdef __alpha__ #define READ_ONCE(x) \ ({ \ union { typeof(x) __val; char __c[1]; } __u; \ __read_once_size(&(x), __u.__c, sizeof(x)); \ - smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + smp_mb(); /* Enforce dependency ordering from x */ \ __u.__val; \ }) +#else +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) +#endif #define WRITE_ONCE(x, val) \ ({ \ From 937c783aa3d8d77963ec91918d3298edb45b9161 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 28 Nov 2022 07:57:15 -0800 Subject: [PATCH 0132/1593] vduse: Validate vq_num in vduse_validate_config() Add a limit to 'config->vq_num' which is user controlled data which comes from an vduse_ioctl to prevent large memory allocations. Micheal says - This limit is somewhat arbitrary. However, currently virtio pci and ccw are limited to a 16 bit vq number. While MMIO isn't it is also isn't used with lots of VQs due to current lack of support for per-vq interrupts. Thus, the 0xffff limit on number of VQs corresponding to a 16-bit VQ number seems sufficient for now. This is found using static analysis with smatch. Suggested-by: Michael S. Tsirkin Signed-off-by: Harshit Mogalapalli Message-Id: <20221128155717.2579992-1-harshit.m.mogalapalli@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 0dd3c1f291da3..0c3b48616a9f3 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1440,6 +1440,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config) if (config->config_size > PAGE_SIZE) return false; + if (config->vq_num > 0xffff) + return false; + if (!device_is_allowed(config->device_id)) return false; From ed843d6ed7310a27cf7c8ee0a82a482eed0cb4a6 Mon Sep 17 00:00:00 2001 From: Rong Wang Date: Wed, 7 Dec 2022 20:08:13 +0800 Subject: [PATCH 0133/1593] vdpa/vp_vdpa: fix kfree a wrong pointer in vp_vdpa_remove In vp_vdpa_remove(), the code kfree(&vp_vdpa_mgtdev->mgtdev.id_table) uses a reference of pointer as the argument of kfree, which is the wrong pointer and then may hit crash like this: Unable to handle kernel paging request at virtual address 00ffff003363e30c Internal error: Oops: 96000004 [#1] SMP Call trace: rb_next+0x20/0x5c ext4_readdir+0x494/0x5c4 [ext4] iterate_dir+0x168/0x1b4 __se_sys_getdents64+0x68/0x170 __arm64_sys_getdents64+0x24/0x30 el0_svc_common.constprop.0+0x7c/0x1bc do_el0_svc+0x2c/0x94 el0_svc+0x20/0x30 el0_sync_handler+0xb0/0xb4 el0_sync+0x160/0x180 Code: 54000220 f9400441 b4000161 aa0103e0 (f9400821) SMP: stopping secondary CPUs Starting crashdump kernel... Fixes: ffbda8e9df10 ("vdpa/vp_vdpa : add vdpa tool support in vp_vdpa") Signed-off-by: Rong Wang Signed-off-by: Nanyong Sun Message-Id: <20221207120813.2837529-1-sunnanyong@huawei.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Cindy Lu Acked-by: Jason Wang --- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index d448db0c4de3f..8fe267ca3e76f 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -647,7 +647,7 @@ static void vp_vdpa_remove(struct pci_dev *pdev) mdev = vp_vdpa_mgtdev->mdev; vp_modern_remove(mdev); vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev); - kfree(&vp_vdpa_mgtdev->mgtdev.id_table); + kfree(vp_vdpa_mgtdev->mgtdev.id_table); kfree(mdev); kfree(vp_vdpa_mgtdev); } From 1c96d5457f7251d1c62aacc04921557d56fc049a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 7 Sep 2022 14:01:10 +0800 Subject: [PATCH 0134/1593] vdpa: conditionally fill max max queue pair for stats For the device without multiqueue feature, we will read 0 as max_virtqueue_pairs from the config. So if we fill VDPA_ATTR_DEV_NET_CFG_MAX_VQP with the value we read from the config we will confuse the user. Fixing this by only filling the value when multiqueue is offered by the device so userspace can assume 1 when the attr is not provided. Fixes: 13b00b135665c("vdpa: Add support for querying vendor statistics") Cc: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20220907060110.4511-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Eli Cohen --- drivers/vdpa/vdpa.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 41ed56362992b..8ef7aa1365cc5 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -935,7 +935,6 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, { struct virtio_net_config config = {}; u64 features; - u16 max_vqp; u8 status; int err; @@ -946,15 +945,15 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, } vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); - max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs); - if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp)) - return -EMSGSIZE; - features = vdev->config->get_driver_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features, VDPA_ATTR_PAD)) return -EMSGSIZE; + err = vdpa_dev_net_mq_config_fill(msg, features, &config); + if (err) + return err; + if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index)) return -EMSGSIZE; From 0b7a04a30eef20e6b24926a45c0ce7906ae85bd6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 13 Dec 2022 17:07:17 +0800 Subject: [PATCH 0135/1593] vdpasim: fix memory leak when freeing IOTLBs After commit bda324fd037a ("vdpasim: control virtqueue support"), vdpasim->iommu became an array of IOTLB, so we should clean the mappings of each free one by one instead of just deleting the ranges in the first IOTLB which may leak maps. Fixes: bda324fd037a ("vdpasim: control virtqueue support") Cc: Gautam Dawar Signed-off-by: Jason Wang Message-Id: <20221213090717.61529-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Gautam Dawar --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index b20689f8fe89c..cb88891b44a8c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -689,7 +689,9 @@ static void vdpasim_free(struct vdpa_device *vdpa) } kvfree(vdpasim->buffer); - vhost_iotlb_free(vdpasim->iommu); + for (i = 0; i < vdpasim->dev_attr.nas; i++) + vhost_iotlb_reset(&vdpasim->iommu[i]); + kfree(vdpasim->iommu); kfree(vdpasim->vqs); kfree(vdpasim->config); } From 72455a1142527e607e1d69439f3ffa2ef6d09e26 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 14 Dec 2022 13:43:06 +0800 Subject: [PATCH 0136/1593] vdpa_sim_net: should not drop the multicast/broadcast packet In the receive_filter(), should not drop the packet with the broadcast/multicast address. Add the check for this Signed-off-by: Cindy Lu Message-Id: <20221214054306.24145-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 11f5a121df243..584b975a98a7e 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -62,6 +62,9 @@ static bool receive_filter(struct vdpasim *vdpasim, size_t len) if (len < ETH_ALEN + hdr_len) return false; + if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) || + is_multicast_ether_addr(vdpasim->buffer + hdr_len)) + return true; if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN)) return true; From a26116c1e74028914f281851488546c91cbae57d Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 21 Oct 2022 17:41:26 -0300 Subject: [PATCH 0137/1593] virtio_blk: Fix signedness bug in virtblk_prep_rq() The virtblk_map_data() function returns negative error codes, however, the 'nents' field of vbr->sg_table is an unsigned int, which causes the error handling not to work correctly. Cc: stable@vger.kernel.org Fixes: 0e9911fa768f ("virtio-blk: support mq_ops->queue_rqs()") Signed-off-by: Rafael Mendonca Message-Id: <20221021204126.927603-1-rafaelmendsr@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Reviewed-by: Suwan Kim Reviewed-by: Stefan Hajnoczi Acked-by: Jason Wang --- drivers/block/virtio_blk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dcbf86cd2155e..6a77fa9174288 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -334,14 +334,16 @@ static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, struct virtblk_req *vbr) { blk_status_t status; + int num; status = virtblk_setup_cmd(vblk->vdev, req, vbr); if (unlikely(status)) return status; - vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); - if (unlikely(vbr->sg_table.nents < 0)) + num = virtblk_map_data(hctx, req, vbr); + if (unlikely(num < 0)) return virtblk_fail_to_queue(req, -ENOMEM); + vbr->sg_table.nents = num; blk_mq_start_request(req); From b51b75f0604f17c0f6f3b6f68f1a521a5cc6b04f Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 7 Dec 2022 14:52:20 -0800 Subject: [PATCH 0138/1593] dmaengine: idxd: Let probe fail when workqueue cannot be enabled The workqueue is enabled when the appropriate driver is loaded and disabled when the driver is removed. When the driver is removed it assumes that the workqueue was enabled successfully and proceeds to free allocations made during workqueue enabling. Failure during workqueue enabling does not prevent the driver from being loaded. This is because the error path within drv_enable_wq() returns success unless a second failure is encountered during the error path. By returning success it is possible to load the driver even if the workqueue cannot be enabled and allocations that do not exist are attempted to be freed during driver remove. Some examples of problematic flows: (a) idxd_dmaengine_drv_probe() -> drv_enable_wq() -> idxd_wq_request_irq(): In above flow, if idxd_wq_request_irq() fails then idxd_wq_unmap_portal() is called on error exit path, but drv_enable_wq() returns 0 because idxd_wq_disable() succeeds. The driver is thus loaded successfully. idxd_dmaengine_drv_remove()->drv_disable_wq()->idxd_wq_unmap_portal() Above flow on driver unload triggers the WARN in devm_iounmap() because the device resource has already been removed during error path of drv_enable_wq(). (b) idxd_dmaengine_drv_probe() -> drv_enable_wq() -> idxd_wq_request_irq(): In above flow, if idxd_wq_request_irq() fails then idxd_wq_init_percpu_ref() is never called to initialize the percpu counter, yet the driver loads successfully because drv_enable_wq() returns 0. idxd_dmaengine_drv_remove()->__idxd_wq_quiesce()->percpu_ref_kill(): Above flow on driver unload triggers a BUG when attempting to drop the initial ref of the uninitialized percpu ref: BUG: kernel NULL pointer dereference, address: 0000000000000010 Fix the drv_enable_wq() error path by returning the original error that indicates failure of workqueue enabling. This ensures that the probe fails when an error is encountered and the driver remove paths are only attempted when the workqueue was enabled successfully. Fixes: 1f2bb40337f0 ("dmaengine: idxd: move wq_enable() to device.c") Signed-off-by: Reinette Chatre Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/e8d8116e5efa0fd14fadc5adae6ffd319f0e5ff1.1670452419.git.reinette.chatre@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 06f5d3783d771..c5523ced2e3a4 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1390,8 +1390,7 @@ int drv_enable_wq(struct idxd_wq *wq) err_irq: idxd_wq_unmap_portal(wq); err_map_portal: - rc = idxd_wq_disable(wq, false); - if (rc < 0) + if (idxd_wq_disable(wq, false)) dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq))); err: return rc; From 1beeec45f9ac31eba52478379f70a5fa9c2ad005 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 7 Dec 2022 14:52:21 -0800 Subject: [PATCH 0139/1593] dmaengine: idxd: Prevent use after free on completion memory On driver unload any pending descriptors are flushed at the time the interrupt is freed: idxd_dmaengine_drv_remove() -> drv_disable_wq() -> idxd_wq_free_irq() -> idxd_flush_pending_descs(). If there are any descriptors present that need to be flushed this flow triggers a "not present" page fault as below: BUG: unable to handle page fault for address: ff391c97c70c9040 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page The address that triggers the fault is the address of the descriptor that was freed moments earlier via: drv_disable_wq()->idxd_wq_free_resources() Fix the use after free by freeing the descriptors after any possible usage. This is done after idxd_wq_reset() to ensure that the memory remains accessible during possible completion writes by the device. Fixes: 63c14ae6c161 ("dmaengine: idxd: refactor wq driver enable/disable operations") Suggested-by: Dave Jiang Signed-off-by: Reinette Chatre Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/6c4657d9cff0a0a00501a7b928297ac966e9ec9d.1670452419.git.reinette.chatre@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c5523ced2e3a4..cd792f3f9873f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1407,11 +1407,11 @@ void drv_disable_wq(struct idxd_wq *wq) dev_warn(dev, "Clients has claim on wq %d: %d\n", wq->id, idxd_wq_refcount(wq)); - idxd_wq_free_resources(wq); idxd_wq_unmap_portal(wq); idxd_wq_drain(wq); idxd_wq_free_irq(wq); idxd_wq_reset(wq); + idxd_wq_free_resources(wq); percpu_ref_exit(&wq->wq_active); wq->type = IDXD_WQT_NONE; wq->client_count = 0; From 6744a030d81e456883bfbb627ac1f30465c1a989 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 7 Dec 2022 14:52:22 -0800 Subject: [PATCH 0140/1593] dmaengine: idxd: Do not call DMX TX callbacks during workqueue disable On driver unload any pending descriptors are flushed and pending DMA descriptors are explicitly completed: idxd_dmaengine_drv_remove() -> drv_disable_wq() -> idxd_wq_free_irq() -> idxd_flush_pending_descs() -> idxd_dma_complete_txd() With this done during driver unload any remaining descriptor is likely stuck and can be dropped. Even so, the descriptor may still have a callback set that could no longer be accessible. An example of such a problem is when the dmatest fails and the dmatest module is unloaded. The failure of dmatest leaves descriptors with dma_async_tx_descriptor::callback pointing to code that no longer exist. This causes a page fault as below at the time the IDXD driver is unloaded when it attempts to run the callback: BUG: unable to handle page fault for address: ffffffffc0665190 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page Fix this by clearing the callback pointers on the transmit descriptors only when workqueue is disabled. Fixes: 403a2e236538 ("dmaengine: idxd: change MSIX allocation based on per wq activation") Signed-off-by: Reinette Chatre Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/37d06b772aa7f8863ca50f90930ea2fd80b38fc3.1670452419.git.reinette.chatre@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index cd792f3f9873f..29dbb0f52e186 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1172,8 +1172,19 @@ static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) spin_unlock(&ie->list_lock); list_for_each_entry_safe(desc, itr, &flist, list) { + struct dma_async_tx_descriptor *tx; + list_del(&desc->list); ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + /* + * wq is being disabled. Any remaining descriptors are + * likely to be stuck and can be dropped. callback could + * point to code that is no longer accessible, for example + * if dmatest module has been unloaded. + */ + tx = &desc->txd; + tx->callback = NULL; + tx->callback_result = NULL; idxd_dma_complete_txd(desc, ctype, true); } } From efab25894a41a920d9581183741e7fadba00719c Mon Sep 17 00:00:00 2001 From: Jayesh Choudhary Date: Mon, 28 Nov 2022 14:20:05 +0530 Subject: [PATCH 0141/1593] dmaengine: ti: k3-udma: Do conditional decrement of UDMA_CHAN_RT_PEER_BCNT_REG PSIL_EP_NATIVE endpoints may not have PEER registers for BCNT and thus udma_decrement_byte_counters() should not try to decrement these counters. This fixes the issue of crypto IPERF testing where the client side (EVM) hangs without transfer of packets to the server side, seen since this function was added. Fixes: 7c94dcfa8fcf ("dmaengine: ti: k3-udma: Reset UDMA_CHAN_RT byte counters to prevent overflow") Signed-off-by: Jayesh Choudhary Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20221128085005.489964-1-j-choudhary@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index ce8b80bb34d7d..4c62274e0b332 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -762,11 +762,12 @@ static void udma_decrement_byte_counters(struct udma_chan *uc, u32 val) if (uc->desc->dir == DMA_DEV_TO_MEM) { udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); - udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + if (uc->config.ep_type != PSIL_EP_NATIVE) + udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); } else { udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); - if (!uc->bchan) + if (!uc->bchan && uc->config.ep_type != PSIL_EP_NATIVE) udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); } } From f96a483f8e131de50ebf30589ffba1a4f6bd12f7 Mon Sep 17 00:00:00 2001 From: Jie Hai Date: Sat, 26 Nov 2022 14:49:38 +0800 Subject: [PATCH 0142/1593] MAINTAINERS: update Jie Hai's email address Signed-off-by: Jie Hai Link: https://lore.kernel.org/r/20221126064938.50685-1-haijie1@huawei.com Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f61eb221415bd..ef4d96db17c16 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9299,7 +9299,7 @@ F: net/dsa/tag_hellcreek.c HISILICON DMA DRIVER M: Zhou Wang -M: Jie Hai +M: Jie Hai L: dmaengine@vger.kernel.org S: Maintained F: drivers/dma/hisi_dma.c From 9deb1e9fb88b1120a908676fa33bdf9e2eeaefce Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 26 Dec 2022 14:48:23 +0300 Subject: [PATCH 0143/1593] net/ethtool/ioctl: return -EOPNOTSUPP if we have no phy stats It's not very useful to copy back an empty ethtool_stats struct and return 0 if we didn't actually have any stats. This also allows for further simplification of this function in the future commits. Signed-off-by: Daniil Tatianin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index c2f1a542e6fa9..932fa8225b2f3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2099,7 +2099,8 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) return n_stats; if (n_stats > S32_MAX / sizeof(u64)) return -ENOMEM; - WARN_ON_ONCE(!n_stats); + if (WARN_ON_ONCE(!n_stats)) + return -EOPNOTSUPP; if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; From fd4778581d61d8848b532f8cdc9b325138748437 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 26 Dec 2022 14:48:24 +0300 Subject: [PATCH 0144/1593] net/ethtool/ioctl: remove if n_stats checks from ethtool_get_phy_stats Now that we always early return if we don't have any stats we can remove these checks as they're no longer necessary. Signed-off-by: Daniil Tatianin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 932fa8225b2f3..85f0cffdcec8a 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2107,28 +2107,24 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) stats.n_stats = n_stats; - if (n_stats) { - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (!data) - return -ENOMEM; + data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!data) + return -ENOMEM; - if (phydev && !ops->get_ethtool_phy_stats && - phy_ops && phy_ops->get_stats) { - ret = phy_ops->get_stats(phydev, &stats, data); - if (ret < 0) - goto out; - } else { - ops->get_ethtool_phy_stats(dev, &stats, data); - } + if (phydev && !ops->get_ethtool_phy_stats && + phy_ops && phy_ops->get_stats) { + ret = phy_ops->get_stats(phydev, &stats, data); + if (ret < 0) + goto out; } else { - data = NULL; + ops->get_ethtool_phy_stats(dev, &stats, data); } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) goto out; useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64)))) + if (copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64)))) goto out; ret = 0; From 201ed315f9676809cd5b20a39206e964106d4f27 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 26 Dec 2022 14:48:25 +0300 Subject: [PATCH 0145/1593] net/ethtool/ioctl: split ethtool_get_phy_stats into multiple helpers So that it's easier to follow and make sense of the branching and various conditions. Stats retrieval has been split into two separate functions ethtool_get_phy_stats_phydev & ethtool_get_phy_stats_ethtool. The former attempts to retrieve the stats using phydev & phy_ops, while the latter uses ethtool_ops. Actual n_stats validation & array allocation has been moved into a new ethtool_vzalloc_stats_array helper. This also fixes a potential NULL dereference of ops->get_ethtool_phy_stats where it was getting called in an else branch unconditionally without making sure it was actually present. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Daniil Tatianin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 102 ++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 85f0cffdcec8a..646b3e490c71a 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2078,23 +2078,8 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } -static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) +static int ethtool_vzalloc_stats_array(int n_stats, u64 **data) { - const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - struct ethtool_stats stats; - u64 *data; - int ret, n_stats; - - if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count)) - return -EOPNOTSUPP; - - if (phydev && !ops->get_ethtool_phy_stats && - phy_ops && phy_ops->get_sset_count) - n_stats = phy_ops->get_sset_count(phydev); - else - n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); if (n_stats < 0) return n_stats; if (n_stats > S32_MAX / sizeof(u64)) @@ -2102,31 +2087,82 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) if (WARN_ON_ONCE(!n_stats)) return -EOPNOTSUPP; + *data = vzalloc(array_size(n_stats, sizeof(u64))); + if (!*data) + return -ENOMEM; + + return 0; +} + +static int ethtool_get_phy_stats_phydev(struct phy_device *phydev, + struct ethtool_stats *stats, + u64 **data) + { + const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; + int n_stats, ret; + + if (!phy_ops || !phy_ops->get_sset_count || !phy_ops->get_stats) + return -EOPNOTSUPP; + + n_stats = phy_ops->get_sset_count(phydev); + + ret = ethtool_vzalloc_stats_array(n_stats, data); + if (ret) + return ret; + + stats->n_stats = n_stats; + return phy_ops->get_stats(phydev, stats, *data); +} + +static int ethtool_get_phy_stats_ethtool(struct net_device *dev, + struct ethtool_stats *stats, + u64 **data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + int n_stats, ret; + + if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats) + return -EOPNOTSUPP; + + n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); + + ret = ethtool_vzalloc_stats_array(n_stats, data); + if (ret) + return ret; + + stats->n_stats = n_stats; + ops->get_ethtool_phy_stats(dev, stats, *data); + + return 0; +} + +static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) +{ + struct phy_device *phydev = dev->phydev; + struct ethtool_stats stats; + u64 *data = NULL; + int ret = -EOPNOTSUPP; + if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; - stats.n_stats = n_stats; + if (phydev) + ret = ethtool_get_phy_stats_phydev(phydev, &stats, &data); - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (!data) - return -ENOMEM; + if (ret == -EOPNOTSUPP) + ret = ethtool_get_phy_stats_ethtool(dev, &stats, &data); - if (phydev && !ops->get_ethtool_phy_stats && - phy_ops && phy_ops->get_stats) { - ret = phy_ops->get_stats(phydev, &stats, data); - if (ret < 0) - goto out; - } else { - ops->get_ethtool_phy_stats(dev, &stats, data); - } + if (ret) + goto out; - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) + if (copy_to_user(useraddr, &stats, sizeof(stats))) { + ret = -EFAULT; goto out; + } + useraddr += sizeof(stats); - if (copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64)))) - goto out; - ret = 0; + if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64)))) + ret = -EFAULT; out: vfree(data); From ad425666a1f05d9b215a84cf010c3789b2ea8206 Mon Sep 17 00:00:00 2001 From: Chunhao Lin Date: Mon, 26 Dec 2022 20:31:52 +0800 Subject: [PATCH 0146/1593] r8169: move rtl_wol_enable_rx() and rtl_prepare_power_down() There is no functional change. Moving these two functions for following patch "r8169: fix dmar pte write access is not set error". Signed-off-by: Chunhao Lin Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 44 +++++++++++------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a9dcc98b6af18..acc2500342cab 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2210,28 +2210,6 @@ static int rtl_set_mac_address(struct net_device *dev, void *p) return 0; } -static void rtl_wol_enable_rx(struct rtl8169_private *tp) -{ - if (tp->mac_version >= RTL_GIGA_MAC_VER_25) - RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | - AcceptBroadcast | AcceptMulticast | AcceptMyPhys); -} - -static void rtl_prepare_power_down(struct rtl8169_private *tp) -{ - if (tp->dash_type != RTL_DASH_NONE) - return; - - if (tp->mac_version == RTL_GIGA_MAC_VER_32 || - tp->mac_version == RTL_GIGA_MAC_VER_33) - rtl_ephy_write(tp, 0x19, 0xff64); - - if (device_may_wakeup(tp_to_dev(tp))) { - phy_speed_down(tp->phydev, false); - rtl_wol_enable_rx(tp); - } -} - static void rtl_init_rxcfg(struct rtl8169_private *tp) { switch (tp->mac_version) { @@ -2455,6 +2433,28 @@ static void rtl_enable_rxdvgate(struct rtl8169_private *tp) rtl_wait_txrx_fifo_empty(tp); } +static void rtl_wol_enable_rx(struct rtl8169_private *tp) +{ + if (tp->mac_version >= RTL_GIGA_MAC_VER_25) + RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | + AcceptBroadcast | AcceptMulticast | AcceptMyPhys); +} + +static void rtl_prepare_power_down(struct rtl8169_private *tp) +{ + if (tp->dash_type != RTL_DASH_NONE) + return; + + if (tp->mac_version == RTL_GIGA_MAC_VER_32 || + tp->mac_version == RTL_GIGA_MAC_VER_33) + rtl_ephy_write(tp, 0x19, 0xff64); + + if (device_may_wakeup(tp_to_dev(tp))) { + phy_speed_down(tp->phydev, false); + rtl_wol_enable_rx(tp); + } +} + static void rtl_set_tx_config_registers(struct rtl8169_private *tp) { u32 val = TX_DMA_BURST << TxDMAShift | From bb41c13c05c23d9bc46b4e37d8914078c6a40e3a Mon Sep 17 00:00:00 2001 From: Chunhao Lin Date: Mon, 26 Dec 2022 20:31:53 +0800 Subject: [PATCH 0147/1593] r8169: fix dmar pte write access is not set error When close device, if wol is enabled, rx will be enabled. When open device it will cause rx packet to be dma to the wrong memory address after pci_set_master() and system log will show blow messages. DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write] Request device [02:00.0] PASID ffffffff fault addr ffdd4000 [fault reason 05] PTE Write access is not set In this patch, driver disable tx/rx when close device. If wol is enabled, only enable rx filter and disable rxdv_gate(if support) to let hardware only receive packet to fifo but not to dma it. Signed-off-by: Chunhao Lin Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index acc2500342cab..24592d9725230 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2438,6 +2438,9 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) if (tp->mac_version >= RTL_GIGA_MAC_VER_25) RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | AcceptBroadcast | AcceptMulticast | AcceptMyPhys); + + if (tp->mac_version >= RTL_GIGA_MAC_VER_40) + rtl_disable_rxdvgate(tp); } static void rtl_prepare_power_down(struct rtl8169_private *tp) @@ -3872,7 +3875,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp) netdev_reset_queue(tp->dev); } -static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down) +static void rtl8169_cleanup(struct rtl8169_private *tp) { napi_disable(&tp->napi); @@ -3884,9 +3887,6 @@ static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down) rtl_rx_close(tp); - if (going_down && tp->dev->wol_enabled) - goto no_reset; - switch (tp->mac_version) { case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: @@ -3907,7 +3907,7 @@ static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down) } rtl_hw_reset(tp); -no_reset: + rtl8169_tx_clear(tp); rtl8169_init_ring_indexes(tp); } @@ -3918,7 +3918,7 @@ static void rtl_reset_work(struct rtl8169_private *tp) netif_stop_queue(tp->dev); - rtl8169_cleanup(tp, false); + rtl8169_cleanup(tp); for (i = 0; i < NUM_RX_DESC; i++) rtl8169_mark_to_asic(tp->RxDescArray + i); @@ -4605,7 +4605,7 @@ static void rtl8169_down(struct rtl8169_private *tp) pci_clear_master(tp->pci_dev); rtl_pci_commit(tp); - rtl8169_cleanup(tp, true); + rtl8169_cleanup(tp); rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); } From c2052189f19bd98c80b5d46dc6e42330d2b3b35d Mon Sep 17 00:00:00 2001 From: Xuezhi Zhang Date: Tue, 27 Dec 2022 19:03:52 +0800 Subject: [PATCH 0148/1593] s390/qeth: convert sysfs snprintf to sysfs_emit Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: Xuezhi Zhang Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_sys.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 406be169173ce..d1adc4b831936 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -410,13 +410,13 @@ static ssize_t qeth_dev_isolation_show(struct device *dev, switch (card->options.isolation) { case ISOLATION_MODE_NONE: - return snprintf(buf, 6, "%s\n", ATTR_QETH_ISOLATION_NONE); + return sysfs_emit(buf, "%s\n", ATTR_QETH_ISOLATION_NONE); case ISOLATION_MODE_FWD: - return snprintf(buf, 9, "%s\n", ATTR_QETH_ISOLATION_FWD); + return sysfs_emit(buf, "%s\n", ATTR_QETH_ISOLATION_FWD); case ISOLATION_MODE_DROP: - return snprintf(buf, 6, "%s\n", ATTR_QETH_ISOLATION_DROP); + return sysfs_emit(buf, "%s\n", ATTR_QETH_ISOLATION_DROP); default: - return snprintf(buf, 5, "%s\n", "N/A"); + return sysfs_emit(buf, "%s\n", "N/A"); } } @@ -500,9 +500,9 @@ static ssize_t qeth_hw_trap_show(struct device *dev, struct qeth_card *card = dev_get_drvdata(dev); if (card->info.hwtrap) - return snprintf(buf, 5, "arm\n"); + return sysfs_emit(buf, "arm\n"); else - return snprintf(buf, 8, "disarm\n"); + return sysfs_emit(buf, "disarm\n"); } static ssize_t qeth_hw_trap_store(struct device *dev, From 40cab44b9089a41f71bbd0eff753eb91d5dafd68 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 27 Dec 2022 11:04:59 -0300 Subject: [PATCH 0149/1593] net/sched: fix retpoline wrapper compilation on configs without tc filters Rudi reports a compilation failure on x86_64 when CONFIG_NET_CLS or CONFIG_NET_CLS_ACT is not set but CONFIG_RETPOLINE is set. A misplaced '#endif' was causing the issue. Fixes: 7f0e810220e2 ("net/sched: add retpoline wrapper for tc") Tested-by: Rudi Heitbaum Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller --- include/net/tc_wrapper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index ceed2fc089ffa..d323fffb839aa 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -216,6 +216,8 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, return tp->classify(skb, tp, res); } +#endif /* CONFIG_NET_CLS */ + static inline void tc_wrapper_init(void) { #ifdef CONFIG_X86 @@ -224,8 +226,6 @@ static inline void tc_wrapper_init(void) #endif } -#endif /* CONFIG_NET_CLS */ - #else #define TC_INDIRECT_SCOPE static From 0154252a3b87f77db1e44516d1ed2e82e2d29c30 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 4 Dec 2022 09:46:14 +0100 Subject: [PATCH 0150/1593] ARM: dts: qcom: apq8084-ifc6540: fix overriding SDHCI While changing node names of APQ8084 SDHCI, the ones in IFC6540 board were not updated leading to disabled and misconfigured SDHCI. Cc: Fixes: 2477d81901a2 ("ARM: dts: qcom: Fix sdhci node names - use 'mmc@'") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221204084614.12193-1-krzysztof.kozlowski@linaro.org --- arch/arm/boot/dts/qcom-apq8084-ifc6540.dts | 20 ++++++++++---------- arch/arm/boot/dts/qcom-apq8084.dtsi | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts index 44cd72f1b1be4..116e59a3b76d0 100644 --- a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts +++ b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts @@ -19,16 +19,16 @@ serial@f995e000 { status = "okay"; }; + }; +}; - sdhci@f9824900 { - bus-width = <8>; - non-removable; - status = "okay"; - }; +&sdhc_1 { + bus-width = <8>; + non-removable; + status = "okay"; +}; - sdhci@f98a4900 { - cd-gpios = <&tlmm 122 GPIO_ACTIVE_LOW>; - bus-width = <4>; - }; - }; +&sdhc_2 { + cd-gpios = <&tlmm 122 GPIO_ACTIVE_LOW>; + bus-width = <4>; }; diff --git a/arch/arm/boot/dts/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom-apq8084.dtsi index fe30abfff90a7..4b0d2b4f4b6a9 100644 --- a/arch/arm/boot/dts/qcom-apq8084.dtsi +++ b/arch/arm/boot/dts/qcom-apq8084.dtsi @@ -421,7 +421,7 @@ status = "disabled"; }; - mmc@f9824900 { + sdhc_1: mmc@f9824900 { compatible = "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4"; reg = <0xf9824900 0x11c>, <0xf9824000 0x800>; reg-names = "hc", "core"; @@ -434,7 +434,7 @@ status = "disabled"; }; - mmc@f98a4900 { + sdhc_2: mmc@f98a4900 { compatible = "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4"; reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>; reg-names = "hc", "core"; From cc3304052a89ab6ac887ed9224420a27e3d354e1 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Fri, 2 Dec 2022 16:28:48 +0100 Subject: [PATCH 0151/1593] iio: adc: stm32-dfsdm: fill module aliases When STM32 DFSDM driver is built as module, no modalias information is available. This prevents module to be loaded by udev. Add MODULE_DEVICE_TABLE() to fill module aliases. Fixes: e2e6771c6462 ("IIO: ADC: add STM32 DFSDM sigma delta ADC support") Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20221202152848.45585-1-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-adc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 6d21ea84fa82d..a428bdb567d52 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -1520,6 +1520,7 @@ static const struct of_device_id stm32_dfsdm_adc_match[] = { }, {} }; +MODULE_DEVICE_TABLE(of, stm32_dfsdm_adc_match); static int stm32_dfsdm_adc_probe(struct platform_device *pdev) { From cbd3a0153cd18a2cbef6bf3cf31bb406c3fc9f55 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 29 Nov 2022 10:03:16 +0800 Subject: [PATCH 0152/1593] iio: adc: berlin2-adc: Add missing of_node_put() in error path of_get_parent() will return a device_node pointer with refcount incremented. We need to use of_node_put() on it when done. Add the missing of_node_put() in the error path of berlin2_adc_probe(); Fixes: 70f1937911ca ("iio: adc: add support for Berlin") Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221129020316.191731-1-wangxiongfeng2@huawei.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/berlin2-adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/berlin2-adc.c b/drivers/iio/adc/berlin2-adc.c index 3d2e8b4db61ae..a4e7c7eff5acf 100644 --- a/drivers/iio/adc/berlin2-adc.c +++ b/drivers/iio/adc/berlin2-adc.c @@ -298,8 +298,10 @@ static int berlin2_adc_probe(struct platform_device *pdev) int ret; indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*priv)); - if (!indio_dev) + if (!indio_dev) { + of_node_put(parent_np); return -ENOMEM; + } priv = iio_priv(indio_dev); From 6794ed0cfcc6ce737240eccc48b3e8190df36703 Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Fri, 25 Nov 2022 12:31:12 +0100 Subject: [PATCH 0153/1593] iio: adc: xilinx-ams: fix devm_krealloc() return value check The clang-analyzer reported a warning: "Value stored to 'ret' is never read". Fix the return value check if devm_krealloc() fails to resize ams_channels. Fixes: d5c70627a794 ("iio: adc: Add Xilinx AMS driver") Signed-off-by: Marco Pagani Acked-by: Michal Simek Link: https://lore.kernel.org/r/20221125113112.219290-1-marpagan@redhat.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-ams.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c index 5b4bdf3a26bb8..a507d2e170792 100644 --- a/drivers/iio/adc/xilinx-ams.c +++ b/drivers/iio/adc/xilinx-ams.c @@ -1329,7 +1329,7 @@ static int ams_parse_firmware(struct iio_dev *indio_dev) dev_channels = devm_krealloc(dev, ams_channels, dev_size, GFP_KERNEL); if (!dev_channels) - ret = -ENOMEM; + return -ENOMEM; indio_dev->channels = dev_channels; indio_dev->num_channels = num_channels; From 0fc3562a993c3dc41d1177b3983d9300d0db1d4d Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 1 Dec 2022 09:01:10 -0500 Subject: [PATCH 0154/1593] iio: imx8qxp-adc: fix irq flood when call imx8qxp_adc_read_raw() irq flood happen when run cat /sys/bus/iio/devices/iio:device0/in_voltage1_raw imx8qxp_adc_read_raw() { ... enable irq /* adc start */ writel(1, adc->regs + IMX8QXP_ADR_ADC_SWTRIG); ^^^^ trigger irq flood. wait_for_completion_interruptible_timeout(); readl(adc->regs + IMX8QXP_ADR_ADC_RESFIFO); ^^^^ clear irq here. ... } There is only FIFO watermark interrupt at this ADC controller. IRQ line will be assert until software read data from FIFO. So IRQ flood happen during wait_for_completion_interruptible_timeout(). Move FIFO read into irq handle to avoid irq flood. Fixes: 1e23dcaa1a9f ("iio: imx8qxp-adc: Add driver support for NXP IMX8QXP ADC") Cc: stable@vger.kernel.org Signed-off-by: Frank Li Reviewed-by: Cai Huoqing Reviewed-by: Haibo Chen Link: https://lore.kernel.org/r/20221201140110.2653501-1-Frank.Li@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/imx8qxp-adc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/imx8qxp-adc.c b/drivers/iio/adc/imx8qxp-adc.c index 36777b827165a..f5a0fc9e64c54 100644 --- a/drivers/iio/adc/imx8qxp-adc.c +++ b/drivers/iio/adc/imx8qxp-adc.c @@ -86,6 +86,8 @@ #define IMX8QXP_ADC_TIMEOUT msecs_to_jiffies(100) +#define IMX8QXP_ADC_MAX_FIFO_SIZE 16 + struct imx8qxp_adc { struct device *dev; void __iomem *regs; @@ -95,6 +97,7 @@ struct imx8qxp_adc { /* Serialise ADC channel reads */ struct mutex lock; struct completion completion; + u32 fifo[IMX8QXP_ADC_MAX_FIFO_SIZE]; }; #define IMX8QXP_ADC_CHAN(_idx) { \ @@ -238,8 +241,7 @@ static int imx8qxp_adc_read_raw(struct iio_dev *indio_dev, return ret; } - *val = FIELD_GET(IMX8QXP_ADC_RESFIFO_VAL_MASK, - readl(adc->regs + IMX8QXP_ADR_ADC_RESFIFO)); + *val = adc->fifo[0]; mutex_unlock(&adc->lock); return IIO_VAL_INT; @@ -265,10 +267,15 @@ static irqreturn_t imx8qxp_adc_isr(int irq, void *dev_id) { struct imx8qxp_adc *adc = dev_id; u32 fifo_count; + int i; fifo_count = FIELD_GET(IMX8QXP_ADC_FCTRL_FCOUNT_MASK, readl(adc->regs + IMX8QXP_ADR_ADC_FCTRL)); + for (i = 0; i < fifo_count; i++) + adc->fifo[i] = FIELD_GET(IMX8QXP_ADC_RESFIFO_VAL_MASK, + readl_relaxed(adc->regs + IMX8QXP_ADR_ADC_RESFIFO)); + if (fifo_count) complete(&adc->completion); From f804bd0dc28683a93a60f271aaefb2fc5b0853dd Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Thu, 1 Dec 2022 19:16:35 +0100 Subject: [PATCH 0155/1593] iio:adc:twl6030: Enable measurements of VUSB, VBAT and others Some inputs need to be wired up to produce proper measurements, without this change only near zero values are reported. Signed-off-by: Andreas Kemnade Fixes: 1696f36482e70 ("iio: twl6030-gpadc: TWL6030, TWL6032 GPADC driver") Link: https://lore.kernel.org/r/20221201181635.3522962-1-andreas@kemnade.info Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/twl6030-gpadc.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c index f53e8558b560c..40438e5b49702 100644 --- a/drivers/iio/adc/twl6030-gpadc.c +++ b/drivers/iio/adc/twl6030-gpadc.c @@ -57,6 +57,18 @@ #define TWL6030_GPADCS BIT(1) #define TWL6030_GPADCR BIT(0) +#define USB_VBUS_CTRL_SET 0x04 +#define USB_ID_CTRL_SET 0x06 + +#define TWL6030_MISC1 0xE4 +#define VBUS_MEAS 0x01 +#define ID_MEAS 0x01 + +#define VAC_MEAS 0x04 +#define VBAT_MEAS 0x02 +#define BB_MEAS 0x01 + + /** * struct twl6030_chnl_calib - channel calibration * @gain: slope coefficient for ideal curve @@ -927,6 +939,26 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) return ret; } + ret = twl_i2c_write_u8(TWL_MODULE_USB, VBUS_MEAS, USB_VBUS_CTRL_SET); + if (ret < 0) { + dev_err(dev, "failed to wire up inputs\n"); + return ret; + } + + ret = twl_i2c_write_u8(TWL_MODULE_USB, ID_MEAS, USB_ID_CTRL_SET); + if (ret < 0) { + dev_err(dev, "failed to wire up inputs\n"); + return ret; + } + + ret = twl_i2c_write_u8(TWL6030_MODULE_ID0, + VBAT_MEAS | BB_MEAS | BB_MEAS, + TWL6030_MISC1); + if (ret < 0) { + dev_err(dev, "failed to wire up inputs\n"); + return ret; + } + indio_dev->name = DRIVER_NAME; indio_dev->info = &twl6030_gpadc_iio_info; indio_dev->modes = INDIO_DIRECT_MODE; From 429e1e8ec696e0e7a0742904e3dc2f83b7b23dfb Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Thu, 8 Dec 2022 15:19:05 +0800 Subject: [PATCH 0156/1593] iio: imu: fxos8700: fix map label of channel type to MAGN sensor FXOS8700 is an IMU sensor with ACCEL sensor and MAGN sensor. Sensor type is indexed by corresponding channel type in a switch. IIO_ANGL_VEL channel type mapped to MAGN sensor has caused confusion. Fix the mapping label of "IIO_MAGN" channel type instead of "IIO_ANGL_VEL" channel type to MAGN sensor. Fixes: 84e5ddd5c46e ("iio: imu: Add support for the FXOS8700 IMU") Signed-off-by: Carlos Song Link: https://lore.kernel.org/r/20221208071911.2405922-2-carlos.song@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/fxos8700_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c index 423cfe526f2a1..235b02b2f4e59 100644 --- a/drivers/iio/imu/fxos8700_core.c +++ b/drivers/iio/imu/fxos8700_core.c @@ -320,7 +320,7 @@ static enum fxos8700_sensor fxos8700_to_sensor(enum iio_chan_type iio_type) switch (iio_type) { case IIO_ACCEL: return FXOS8700_ACCEL; - case IIO_ANGL_VEL: + case IIO_MAGN: return FXOS8700_MAGN; default: return -EINVAL; From c68b44bc7d9b1469774a1c985ee71d2cbc5ebef5 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Thu, 8 Dec 2022 15:19:06 +0800 Subject: [PATCH 0157/1593] iio: imu: fxos8700: fix swapped ACCEL and MAGN channels readback Because ACCEL and MAGN channels data register base address is swapped the accelerometer and magnetometer channels readback is swapped. Fixes: 84e5ddd5c46e ("iio: imu: Add support for the FXOS8700 IMU") Signed-off-by: Carlos Song Link: https://lore.kernel.org/r/20221208071911.2405922-3-carlos.song@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/fxos8700_core.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c index 235b02b2f4e59..977eb7dc7dbd8 100644 --- a/drivers/iio/imu/fxos8700_core.c +++ b/drivers/iio/imu/fxos8700_core.c @@ -395,9 +395,22 @@ static int fxos8700_get_data(struct fxos8700_data *data, int chan_type, { u8 base, reg; int ret; - enum fxos8700_sensor type = fxos8700_to_sensor(chan_type); - base = type ? FXOS8700_OUT_X_MSB : FXOS8700_M_OUT_X_MSB; + /* + * Different register base addresses varies with channel types. + * This bug hasn't been noticed before because using an enum is + * really hard to read. Use an a switch statement to take over that. + */ + switch (chan_type) { + case IIO_ACCEL: + base = FXOS8700_OUT_X_MSB; + break; + case IIO_MAGN: + base = FXOS8700_M_OUT_X_MSB; + break; + default: + return -EINVAL; + } /* Block read 6 bytes of device output registers to avoid data loss */ ret = regmap_bulk_read(data->regmap, base, data->buf, From 37a94d86d7050665d6d01378b2c916c28e454f10 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Thu, 8 Dec 2022 15:19:07 +0800 Subject: [PATCH 0158/1593] iio: imu: fxos8700: fix incomplete ACCEL and MAGN channels readback The length of ACCEL and MAGN 3-axis channels output data is 6 byte individually. However block only read 3 bytes data into buffer from ACCEL or MAGN output data registers every time. It causes an incomplete ACCEL and MAGN channels readback. Set correct value count for regmap_bulk_read to get 6 bytes ACCEL and MAGN channels readback. Fixes: 84e5ddd5c46e ("iio: imu: Add support for the FXOS8700 IMU") Signed-off-by: Carlos Song Link: https://lore.kernel.org/r/20221208071911.2405922-4-carlos.song@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/fxos8700_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c index 977eb7dc7dbd8..b62bc92bbaccc 100644 --- a/drivers/iio/imu/fxos8700_core.c +++ b/drivers/iio/imu/fxos8700_core.c @@ -414,7 +414,7 @@ static int fxos8700_get_data(struct fxos8700_data *data, int chan_type, /* Block read 6 bytes of device output registers to avoid data loss */ ret = regmap_bulk_read(data->regmap, base, data->buf, - FXOS8700_DATA_BUF_SIZE); + sizeof(data->buf)); if (ret) return ret; From a53f945879c0cb9de3a4c05a665f5157884b5208 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Thu, 8 Dec 2022 15:19:08 +0800 Subject: [PATCH 0159/1593] iio: imu: fxos8700: fix IMU data bits returned to user space ACCEL output data registers contain the X-axis, Y-axis, and Z-axis 14-bit left-justified sample data and MAGN output data registers contain the X-axis, Y-axis, and Z-axis 16-bit sample data. The ACCEL raw register output data should be divided by 4 before sent to userspace. Apply a 2 bits signed right shift to the raw data from ACCEL output data register but keep that from MAGN sensor as the origin. Fixes: 84e5ddd5c46e ("iio: imu: Add support for the FXOS8700 IMU") Signed-off-by: Carlos Song Link: https://lore.kernel.org/r/20221208071911.2405922-5-carlos.song@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/fxos8700_core.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c index b62bc92bbaccc..06948a8cc315c 100644 --- a/drivers/iio/imu/fxos8700_core.c +++ b/drivers/iio/imu/fxos8700_core.c @@ -394,6 +394,7 @@ static int fxos8700_get_data(struct fxos8700_data *data, int chan_type, int axis, int *val) { u8 base, reg; + s16 tmp; int ret; /* @@ -421,8 +422,33 @@ static int fxos8700_get_data(struct fxos8700_data *data, int chan_type, /* Convert axis to buffer index */ reg = axis - IIO_MOD_X; + /* + * Convert to native endianness. The accel data and magn data + * are signed, so a forced type conversion is needed. + */ + tmp = be16_to_cpu(data->buf[reg]); + + /* + * ACCEL output data registers contain the X-axis, Y-axis, and Z-axis + * 14-bit left-justified sample data and MAGN output data registers + * contain the X-axis, Y-axis, and Z-axis 16-bit sample data. Apply + * a signed 2 bits right shift to the readback raw data from ACCEL + * output data register and keep that from MAGN sensor as the origin. + * Value should be extended to 32 bit. + */ + switch (chan_type) { + case IIO_ACCEL: + tmp = tmp >> 2; + break; + case IIO_MAGN: + /* Nothing to do */ + break; + default: + return -EINVAL; + } + /* Convert to native endianness */ - *val = sign_extend32(be16_to_cpu(data->buf[reg]), 15); + *val = sign_extend32(tmp, 15); return 0; } From 9d61c1820598a5ea474576ed55318a6dadee37ed Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Thu, 8 Dec 2022 15:19:09 +0800 Subject: [PATCH 0160/1593] iio: imu: fxos8700: fix ACCEL measurement range selection When device is in active mode, it fails to set an ACCEL full-scale range(2g/4g/8g) in FXOS8700_XYZ_DATA_CFG. This is not align with the datasheet, but it is a fxos8700 chip behavior. Keep the device in standby mode before setting ACCEL full-scale range into FXOS8700_XYZ_DATA_CFG in chip initialization phase and setting scale phase. Fixes: 84e5ddd5c46e ("iio: imu: Add support for the FXOS8700 IMU") Signed-off-by: Carlos Song Link: https://lore.kernel.org/r/20221208071911.2405922-6-carlos.song@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/fxos8700_core.c | 41 +++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c index 06948a8cc315c..ec622123ccac1 100644 --- a/drivers/iio/imu/fxos8700_core.c +++ b/drivers/iio/imu/fxos8700_core.c @@ -345,7 +345,8 @@ static int fxos8700_set_active_mode(struct fxos8700_data *data, static int fxos8700_set_scale(struct fxos8700_data *data, enum fxos8700_sensor t, int uscale) { - int i; + int i, ret, val; + bool active_mode; static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale); struct device *dev = regmap_get_device(data->regmap); @@ -354,6 +355,25 @@ static int fxos8700_set_scale(struct fxos8700_data *data, return -EINVAL; } + /* + * When device is in active mode, it failed to set an ACCEL + * full-scale range(2g/4g/8g) in FXOS8700_XYZ_DATA_CFG. + * This is not align with the datasheet, but it is a fxos8700 + * chip behavier. Set the device in standby mode before setting + * an ACCEL full-scale range. + */ + ret = regmap_read(data->regmap, FXOS8700_CTRL_REG1, &val); + if (ret) + return ret; + + active_mode = val & FXOS8700_ACTIVE; + if (active_mode) { + ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, + val & ~FXOS8700_ACTIVE); + if (ret) + return ret; + } + for (i = 0; i < scale_num; i++) if (fxos8700_accel_scale[i].uscale == uscale) break; @@ -361,8 +381,12 @@ static int fxos8700_set_scale(struct fxos8700_data *data, if (i == scale_num) return -EINVAL; - return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, + ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, fxos8700_accel_scale[i].bits); + if (ret) + return ret; + return regmap_write(data->regmap, FXOS8700_CTRL_REG1, + active_mode); } static int fxos8700_get_scale(struct fxos8700_data *data, @@ -631,14 +655,17 @@ static int fxos8700_chip_init(struct fxos8700_data *data, bool use_spi) if (ret) return ret; - /* Max ODR (800Hz individual or 400Hz hybrid), active mode */ - ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, - FXOS8700_CTRL_ODR_MAX | FXOS8700_ACTIVE); + /* + * Set max full-scale range (+/-8G) for ACCEL sensor in chip + * initialization then activate the device. + */ + ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G); if (ret) return ret; - /* Set for max full-scale range (+/-8G) */ - return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G); + /* Max ODR (800Hz individual or 400Hz hybrid), active mode */ + return regmap_write(data->regmap, FXOS8700_CTRL_REG1, + FXOS8700_CTRL_ODR_MAX | FXOS8700_ACTIVE); } static void fxos8700_chip_uninit(void *data) From bffb7d9d1a3dbd09e083b88aefd093b3b10abbfb Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 17 Dec 2022 23:13:05 +0100 Subject: [PATCH 0161/1593] iio:adc:twl6030: Enable measurement of VAC VAC needs to be wired up to produce proper measurements, without this change only near zero values are reported. Reported-by: kernel test robot Reported-by: Julia Lawall Fixes: 1696f36482e7 ("iio: twl6030-gpadc: TWL6030, TWL6032 GPADC driver") Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20221217221305.671117-1-andreas@kemnade.info Signed-off-by: Jonathan Cameron --- drivers/iio/adc/twl6030-gpadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c index 40438e5b49702..32873fb5f3676 100644 --- a/drivers/iio/adc/twl6030-gpadc.c +++ b/drivers/iio/adc/twl6030-gpadc.c @@ -952,7 +952,7 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) } ret = twl_i2c_write_u8(TWL6030_MODULE_ID0, - VBAT_MEAS | BB_MEAS | BB_MEAS, + VBAT_MEAS | BB_MEAS | VAC_MEAS, TWL6030_MISC1); if (ret < 0) { dev_err(dev, "failed to wire up inputs\n"); From 6c82b94d583a116faf99858379ee34844df963a1 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 22 Nov 2022 13:37:13 +0100 Subject: [PATCH 0162/1593] Revert "arm64: dts: qcom: sm8250: Disable the not yet supported cluster idle state" Due to recent improvements of the cluster idle state support for Qcom based platforms, we are now able to support the deepest cluster idle state. Let's therefore revert the earlier workaround. This reverts commit cadaa773bcf1 ("arm64: dts: qcom: sm8250: Disable the not yet supported cluster idle state"), which is available from v6.1-rc6. Signed-off-by: Ulf Hansson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221122123713.65631-1-ulf.hansson@linaro.org --- arch/arm64/boot/dts/qcom/sm8250.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index dab5579946f35..927032863e2f1 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -334,7 +334,6 @@ exit-latency-us = <6562>; min-residency-us = <9987>; local-timer-stop; - status = "disabled"; }; }; }; From 1f0ae22ab470946143485a02cc1cd7e05c0f9120 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 12 Dec 2022 10:42:15 +0200 Subject: [PATCH 0163/1593] net/mlx5: E-Switch, properly handle ingress tagged packets on VST Fix SRIOV VST mode behavior to insert cvlan when a guest tag is already present in the frame. Previous VST mode behavior was to drop packets or override existing tag, depending on the device version. In this patch we fix this behavior by correctly building the HW steering rule with a push vlan action, or for older devices we ask the FW to stack the vlan when a vlan is already present. Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes") Fixes: dfcb1ed3c331 ("net/mlx5: E-Switch, Vport ingress/egress ACLs rules for VST mode") Signed-off-by: Moshe Shemesh Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 7 +++- .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 33 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 30 ++++++++++++----- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 ++++ include/linux/mlx5/device.h | 5 +++ include/linux/mlx5/mlx5_ifc.h | 3 +- 6 files changed, 68 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 60a73990017c2..6b4c9ffad95b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); struct mlx5_flow_destination drop_ctr_dst = {}; struct mlx5_flow_destination *dst = NULL; struct mlx5_fc *drop_counter = NULL; @@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, */ int table_size = 2; int dest_num = 0; + int actions_flag; int err = 0; if (vport->egress.legacy.drop_counter) { @@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, vport->vport, vport->info.vlan, vport->info.qos); /* Allowed vlan rule */ + actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_mode_steering) + actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, - MLX5_FLOW_CONTEXT_ACTION_ALLOW); + actions_flag); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index b1a5199260f69..093ed86a0acd8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); struct mlx5_flow_destination drop_ctr_dst = {}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_spec *spec = NULL; struct mlx5_fc *counter = NULL; + bool vst_check_cvlan = false; + bool vst_push_cvlan = false; /* The ingress acl table contains 4 groups * (2 active rules at the same time - * 1 allow rule from one of the first 3 groups. @@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, goto out; } - if (vport->info.vlan || vport->info.qos) + if ((vport->info.vlan || vport->info.qos)) { + if (vst_mode_steering) + vst_push_cvlan = true; + else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always)) + vst_check_cvlan = true; + } + + if (vst_check_cvlan || vport->info.spoofchk) + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* Create ingress allow rule */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_push_cvlan) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + flow_act.vlan[0].prio = vport->info.qos; + flow_act.vlan[0].vid = vport->info.vlan; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + } + + if (vst_check_cvlan) MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, ether_addr_copy(smac_v, vport->info.mac); } - /* Create ingress allow rule */ - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.allow_rule)) { @@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, goto out; } + if (!vst_check_cvlan && !vport->info.spoofchk) + goto out; + memset(&flow_act, 0, sizeof(flow_act)); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach drop flow counter */ @@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, return 0; out: - esw_acl_ingress_lgcy_cleanup(esw, vport); + if (err) + esw_acl_ingress_lgcy_cleanup(esw, vport); kvfree(spec); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 527e4bffda8d4..0dfd5742c6fe9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, esw_vport_context.vport_cvlan_strip, 1); if (set_flags & SET_VLAN_INSERT) { - /* insert only if no vlan in packet */ - MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.vport_cvlan_insert, 1); - + if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) { + /* insert either if vlan exist in packet or not */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_ALWAYS); + } else { + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN); + } MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -809,6 +816,7 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); u16 vport_num = vport->vport; int flags; int err; @@ -839,8 +847,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) flags = (vport->info.vlan || vport->info.qos) ? SET_VLAN_STRIP | SET_VLAN_INSERT : 0; - modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, - vport->info.qos, flags); + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); return 0; @@ -1848,6 +1857,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool vst_mode_steering = esw_vst_mode_is_steering(esw); int err = 0; if (IS_ERR(evport)) @@ -1855,9 +1865,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan > 4095 || qos > 7) return -EINVAL; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); - if (err) - return err; + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) { + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); + if (err) + return err; + } evport->info.vlan = vlan; evport->info.qos = qos; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5a85a5d32be7e..92644fbb50816 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -527,6 +527,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); +static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw) +{ + return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) && + MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan)); +} + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 5fe5d198b57ad..29d4b201c7b26 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1090,6 +1090,11 @@ enum { MLX5_VPORT_ADMIN_STATE_AUTO = 0x2, }; +enum { + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN = 0x1, + MLX5_VPORT_CVLAN_INSERT_ALWAYS = 0x3, +}; + enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3d1c62c98ddc..a9ee7bc59c901 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -913,7 +913,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x2]; + u8 reserved_at_5[0x1]; + u8 vport_cvlan_insert_always[0x1]; u8 esw_shared_ingress_acl[0x1]; u8 esw_uplink_ingress_acl[0x1]; u8 root_ft_on_other_esw[0x1]; From 2a35b2c2e6a252eda2134aae6a756861d9299531 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 18 Oct 2022 12:51:52 +0200 Subject: [PATCH 0164/1593] net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error path There are two cleanup calls missing in mlx5_init_once() error path. Add them making the error path flow to be the same as mlx5_cleanup_once(). Fixes: 52ec462eca9b ("net/mlx5: Add reserved-gids support") Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section") Signed-off-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7f5db13e3550c..ec5652f31dda1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1050,6 +1050,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) err_tables_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_fw_reset_cleanup(dev); err_events_cleanup: From 44aee8ea15ac205490a41b00cbafcccbf9f7f82b Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 18 Dec 2022 12:42:14 +0200 Subject: [PATCH 0165/1593] net/mlx5: Fix io_eq_size and event_eq_size params validation io_eq_size and event_eq_size params are of param type DEVLINK_PARAM_TYPE_U32. But, the validation callback is addressing them as DEVLINK_PARAM_TYPE_U16. This cause mismatch in validation in big-endian systems, in which values in range were rejected while 268500991 was accepted. Fix it by checking the U32 value in the validation callback. Fixes: 0844fa5f7b89 ("net/mlx5: Let user configure io_eq_size param") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index ddb197970c22c..be59bb35d795c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -563,7 +563,7 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) { - return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL; + return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; } static const struct devlink_param mlx5_devlink_params[] = { From 9078e843efec530f279a155f262793c58b0746bd Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 24 Nov 2022 13:34:12 +0200 Subject: [PATCH 0166/1593] net/mlx5: Avoid recovery in probe flows Currently, recovery is done without considering whether the device is still in probe flow. This may lead to recovery before device have finished probed successfully. e.g.: while mlx5_init_one() is running. Recovery flow is using functionality that is loaded only by mlx5_init_one(), and there is no point in running recovery without mlx5_init_one() finished successfully. Fix it by waiting for probe flow to finish and checking whether the device is probed before trying to perform recovery. Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 86ed87d704f7d..96417c5feed76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -674,6 +674,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) dev = container_of(priv, struct mlx5_core_dev, priv); devlink = priv_to_devlink(dev); + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { + mlx5_core_err(dev, "health works are not permitted at this stage\n"); + return; + } + mutex_unlock(&dev->intf_state_mutex); enter_error_state(dev, false); if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { devl_lock(devlink); From c4ad5f2bdad56265b23d3635494ecdb205431807 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 9 Nov 2022 14:42:59 +0200 Subject: [PATCH 0167/1593] net/mlx5: Fix RoCE setting at HCA level mlx5 PF can disable RoCE for its VFs and SFs. In such case RoCE is marked as unsupported on those VFs/SFs. The cited patch added an option for disable (and enable) RoCE at HCA level. However, that commit didn't check whether RoCE is supported on the HCA and enabled user to try and set RoCE to on. Fix it by checking whether the HCA supports RoCE. Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index be59bb35d795c..5bd83c0275f82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -468,7 +468,7 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, bool new_state = val.vbool; if (new_state && !MLX5_CAP_GEN(dev, roce) && - !MLX5_CAP_GEN(dev, roce_rw_supported)) { + !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) { NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ec5652f31dda1..df134f6d32dc5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -613,7 +613,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); - if (MLX5_CAP_GEN(dev, roce_rw_supported)) + if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_on(dev)); From b12d581e83e3ae1080c32ab83f123005bd89a840 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 28 Nov 2022 15:24:21 +0200 Subject: [PATCH 0168/1593] net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by default mlx5e_build_nic_params will turn CQE compression on if the hardware capability is enabled and the slow_pci_heuristic condition is detected. As IPoIB doesn't support CQE compression, make sure to disable the feature in the IPoIB profile init. Please note that the feature is not exposed to the user for IPoIB interfaces, so it can't be subsequently turned on. Fixes: b797a684b0dd ("net/mlx5e: Enable CQE compression when PCI is slower than link") Signed-off-by: Dragos Tatulea Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 7c5c500fd215e..2c73c8445e630 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -71,6 +71,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; params->tunneled_offload_en = false; + + /* CQE compression is not supported for IPoIB */ + params->rx_cqe_compress_def = false; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); } /* Called directly after IPoIB netdevice was created to initialize SW structs */ From f8c18a5749cf917096f75dd59885b7a0fe9298ba Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 27 Nov 2022 09:21:28 +0200 Subject: [PATCH 0169/1593] net/mlx5e: Fix RX reporter for XSK RQs RX reporter mistakenly reads from the regular (inactive) RQ when XSK RQ is active. Fix it here. Fixes: 3db4c85cde7a ("net/mlx5e: xsk: Use queue indices starting from 0 for XSK queues") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 5f6f95ad6888c..1ae15b8536a85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -459,7 +459,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, goto unlock; for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_rq *rq; + + rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ? + &c->xskrq : &c->rq; err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); if (err) From 849190e3e4ccf452fbe2240eace30a9ca83fb8d2 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 28 Nov 2022 13:54:29 +0800 Subject: [PATCH 0170/1593] net/mlx5e: CT: Fix ct debugfs folder name Need to use sprintf to build a string instead of sscanf. Otherwise dirname is null and both "ct_nic" and "ct_fdb" won't be created. But its redundant anyway as driver could be in switchdev mode but still add nic rules. So use "ct" as folder name. Fixes: 77422a8f6f61 ("net/mlx5e: CT: Add ct driver counters") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a69849e0deed7..313df8232db70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2103,14 +2103,9 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, static void mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) { - bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB; struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; - char dirname[16] = {}; - if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0) - return; - - ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev)); + ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev)); debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, &ct_dbgfs->stats.offloaded); debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, From 2951b2e142ecf6e0115df785ba91e91b6da74602 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 5 Dec 2022 09:22:50 +0800 Subject: [PATCH 0171/1593] net/mlx5e: Always clear dest encap in neigh-update-del The cited commit introduced a bug for multiple encapsulations flow. If one dest encap becomes invalid, the flow is set slow path flag. But when other dests encap become invalid, they are not cleared due to slow path flag of the flow. When neigh-update-add is running, it will use invalid encap. Fix it by checking slow path flag after clearing dest encap. Fixes: 9a5f9cc794e1 ("net/mlx5e: Fix possible use-after-free deleting fdb rule") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index ff73d25bc6eb8..2aaf8ab857b8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -222,7 +222,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, int err; list_for_each_entry(flow, flow_list, tmp_list) { - if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) + if (!mlx5e_is_offloaded_flow(flow)) continue; attr = mlx5e_tc_get_encap_attr(flow); @@ -231,6 +231,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + /* Clear pkt_reformat before checking slow path flag. Because + * in next iteration, the same flow is already set slow path + * flag, but still need to clear the pkt_reformat. + */ + if (flow_flag_test(flow, SLOW)) + continue; + /* update from encap rule to slow path rule */ spec = &flow->attr->parse_attr->spec; rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); From 1e267ab88dc44c48f556218f7b7f14c76f7aa066 Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Wed, 14 Dec 2022 16:02:57 +0200 Subject: [PATCH 0172/1593] net/mlx5e: Fix hw mtu initializing at XDP SQ allocation Current xdp xmit functions logic (mlx5e_xmit_xdp_frame_mpwqe or mlx5e_xmit_xdp_frame), validates xdp packet length by comparing it to hw mtu (configured at xdp sq allocation) before xmiting it. This check does not account for ethernet fcs length (calculated and filled by the nic). Hence, when we try sending packets with length > (hw-mtu - ethernet-fcs-size), the device port drops it and tx_errors_phy is incremented. Desired behavior is to catch these packets and drop them by the driver. Fix this behavior in XDP SQ allocation function (mlx5e_alloc_xdpsq) by subtracting ethernet FCS header size (4 Bytes) from current hw mtu value, since ethernet FCS is calculated and written to ethernet frames by the nic. Fixes: d8bec2b29a82 ("net/mlx5e: Support bpf_xdp_adjust_head()") Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8d36e2de53a99..cff5f2e29e1e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1305,7 +1305,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; sq->xsk_pool = xsk_pool; sq->stats = sq->xsk_pool ? From e54638a8380bd9c146a883035fffd0a821813682 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 1 Aug 2021 14:45:17 +0300 Subject: [PATCH 0173/1593] net/mlx5e: Set geneve_tlv_option_0_exist when matching on geneve option The cited patch added support of matching on geneve option by setting geneve_tlv_option_0_data mask and key but didn't set geneve_tlv_option_0_exist bit which is required on some HWs when matching geneve_tlv_option_0_data parameter, this may cause in some cases for packets to wrongly match on rules with different geneve option. Example of such case is packet with geneve_tlv_object class=789 and data=456 will wrongly match on rule with match geneve_tlv_object class=123 and data=456. Fix it by setting geneve_tlv_option_0_exist bit when supported by the HW when matching on geneve_tlv_option_0_data parameter. Fixes: 9272e3df3023 ("net/mlx5e: Geneve, Add support for encap/decap flows offload") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index f5b26f5a7de46..054d80c4e65cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -273,6 +273,11 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); MLX5_SET(fte_match_set_misc3, misc_3_c, geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_exist)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist); + MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist); + } spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; From 4d1c1379d71777ddeda3e54f8fc26e9ecbfd1009 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 15 Dec 2022 14:28:34 +0200 Subject: [PATCH 0174/1593] net/mlx5: Lag, fix failure to cancel delayed bond work Commit 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings") accidentally removed a call to cancel delayed bond work thus it may cause queued delay to expire and fall on an already destroyed work queue. Fix by restoring the call cancel_delayed_work_sync() before destroying the workqueue. This prevents call trace such as this: [ 329.230417] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 329.231444] #PF: supervisor write access in kernel mode [ 329.232233] #PF: error_code(0x0002) - not-present page [ 329.233007] PGD 0 P4D 0 [ 329.233476] Oops: 0002 [#1] SMP [ 329.234012] CPU: 5 PID: 145 Comm: kworker/u20:4 Tainted: G OE 6.0.0-rc5_mlnx #1 [ 329.235282] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 329.236868] Workqueue: mlx5_cmd_0000:08:00.1 cmd_work_handler [mlx5_core] [ 329.237886] RIP: 0010:_raw_spin_lock+0xc/0x20 [ 329.238585] Code: f0 0f b1 17 75 02 f3 c3 89 c6 e9 6f 3c 5f ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 31 c0 ba 01 00 00 00 0f b1 17 75 02 f3 c3 89 c6 e9 45 3c 5f ff 0f 1f 44 00 00 0f 1f [ 329.241156] RSP: 0018:ffffc900001b0e98 EFLAGS: 00010046 [ 329.241940] RAX: 0000000000000000 RBX: ffffffff82374ae0 RCX: 0000000000000000 [ 329.242954] RDX: 0000000000000001 RSI: 0000000000000014 RDI: 0000000000000000 [ 329.243974] RBP: ffff888106ccf000 R08: ffff8881004000c8 R09: ffff888100400000 [ 329.244990] R10: 0000000000000000 R11: ffffffff826669f8 R12: 0000000000002000 [ 329.246009] R13: 0000000000000005 R14: ffff888100aa7ce0 R15: ffff88852ca80000 [ 329.247030] FS: 0000000000000000(0000) GS:ffff88852ca80000(0000) knlGS:0000000000000000 [ 329.248260] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 329.249111] CR2: 0000000000000000 CR3: 000000016d675001 CR4: 0000000000770ee0 [ 329.250133] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 329.251152] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 329.252176] PKRU: 55555554 Fixes: 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings") Signed-off-by: Eli Cohen Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 32c3e0a649a75..ad32b80e85018 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -228,6 +228,7 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); + cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); From 9ed1d9aeef5842ecacb660fce933613b58af1e00 Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Sat, 24 Dec 2022 21:31:46 +0800 Subject: [PATCH 0175/1593] bpf: Fix panic due to wrong pageattr of im->image In the scenario where livepatch and kretfunc coexist, the pageattr of im->image is rox after arch_prepare_bpf_trampoline in bpf_trampoline_update, and then modify_fentry or register_fentry returns -EAGAIN from bpf_tramp_ftrace_ops_func, the BPF_TRAMP_F_ORIG_STACK flag will be configured, and arch_prepare_bpf_trampoline will be re-executed. At this time, because the pageattr of im->image is rox, arch_prepare_bpf_trampoline will read and write im->image, which causes a fault. as follows: insmod livepatch-sample.ko # samples/livepatch/livepatch-sample.c bpftrace -e 'kretfunc:cmdline_proc_show {}' BUG: unable to handle page fault for address: ffffffffa0206000 PGD 322d067 P4D 322d067 PUD 322e063 PMD 1297e067 PTE d428061 Oops: 0003 [#1] PREEMPT SMP PTI CPU: 2 PID: 270 Comm: bpftrace Tainted: G E K 6.1.0 #5 RIP: 0010:arch_prepare_bpf_trampoline+0xed/0x8c0 RSP: 0018:ffffc90001083ad8 EFLAGS: 00010202 RAX: ffffffffa0206000 RBX: 0000000000000020 RCX: 0000000000000000 RDX: ffffffffa0206001 RSI: ffffffffa0206000 RDI: 0000000000000030 RBP: ffffc90001083b70 R08: 0000000000000066 R09: ffff88800f51b400 R10: 000000002e72c6e5 R11: 00000000d0a15080 R12: ffff8880110a68c8 R13: 0000000000000000 R14: ffff88800f51b400 R15: ffffffff814fec10 FS: 00007f87bc0dc780(0000) GS:ffff88803e600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa0206000 CR3: 0000000010b70000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bpf_trampoline_update+0x25a/0x6b0 __bpf_trampoline_link_prog+0x101/0x240 bpf_trampoline_link_prog+0x2d/0x50 bpf_tracing_prog_attach+0x24c/0x530 bpf_raw_tp_link_attach+0x73/0x1d0 __sys_bpf+0x100e/0x2570 __x64_sys_bpf+0x1c/0x30 do_syscall_64+0x5b/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd With this patch, when modify_fentry or register_fentry returns -EAGAIN from bpf_tramp_ftrace_ops_func, the pageattr of im->image will be reset to nx+rw. Cc: stable@vger.kernel.org Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Signed-off-by: Chuang Wang Acked-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20221224133146.780578-1-nashuiliang@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/trampoline.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 11f5ec0b80165..d0ed7d6f5eec5 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -488,6 +488,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut /* reset fops->func and fops->trampoline for re-register */ tr->fops->func = NULL; tr->fops->trampoline = 0; + + /* reset im->image memory attr for arch_prepare_bpf_trampoline */ + set_memory_nx((long)im->image, 1); + set_memory_rw((long)im->image, 1); goto again; } #endif From 8f161ca1105a6af6614333f13aa7be4aab8b633a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 28 Dec 2022 13:55:57 -0800 Subject: [PATCH 0176/1593] selftests/bpf: Temporarily disable part of btf_dump:var_data test. Commit 7443b296e699 ("x86/percpu: Move cpu_number next to current_task") moved global per_cpu variable 'cpu_number' into pcpu_hot structure. Therefore this part of var_data test is no longer valid. Disable it until better solution is found. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 0ba2e8b9c6ace..e9ea38aa8248b 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -801,7 +801,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, char *str) { -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +#if 0 TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); #endif From 7ff94f276f8ea05df82eb115225e9b26f47a3347 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 16 Dec 2022 14:18:54 -0800 Subject: [PATCH 0177/1593] bpf: keep a reference to the mm, in case the task is dead. Fix the system crash that happens when a task iterator travel through vma of tasks. In task iterators, we used to access mm by following the pointer on the task_struct; however, the death of a task will clear the pointer, even though we still hold the task_struct. That can cause an unexpected crash for a null pointer when an iterator is visiting a task that dies during the visit. Keeping a reference of mm on the iterator ensures we always have a valid pointer to mm. Co-developed-by: Song Liu Signed-off-by: Song Liu Signed-off-by: Kui-Feng Lee Reported-by: Nathan Slingerland Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221216221855.4122288-2-kuifeng@meta.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/task_iter.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c2a2182ce5702..c4ab9d6cdbe9c 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info { */ struct bpf_iter_seq_task_common common; struct task_struct *task; + struct mm_struct *mm; struct vm_area_struct *vma; u32 tid; unsigned long prev_vm_start; @@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) enum bpf_task_vma_iter_find_op op; struct vm_area_struct *curr_vma; struct task_struct *curr_task; + struct mm_struct *curr_mm; u32 saved_tid = info->tid; /* If this function returns a non-NULL vma, it holds a reference to - * the task_struct, and holds read lock on vma->mm->mmap_lock. + * the task_struct, holds a refcount on mm->mm_users, and holds + * read lock on vma->mm->mmap_lock. * If this function returns NULL, it does not hold any reference or * lock. */ if (info->task) { curr_task = info->task; curr_vma = info->vma; + curr_mm = info->mm; /* In case of lock contention, drop mmap_lock to unblock * the writer. * @@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) * 4.2) VMA2 and VMA2' covers different ranges, process * VMA2'. */ - if (mmap_lock_is_contended(curr_task->mm)) { + if (mmap_lock_is_contended(curr_mm)) { info->prev_vm_start = curr_vma->vm_start; info->prev_vm_end = curr_vma->vm_end; op = task_vma_iter_find_vma; - mmap_read_unlock(curr_task->mm); - if (mmap_read_lock_killable(curr_task->mm)) + mmap_read_unlock(curr_mm); + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } else { op = task_vma_iter_next_vma; } @@ -535,42 +541,47 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) op = task_vma_iter_find_vma; } - if (!curr_task->mm) + curr_mm = get_task_mm(curr_task); + if (!curr_mm) goto next_task; - if (mmap_read_lock_killable(curr_task->mm)) + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } switch (op) { case task_vma_iter_first_vma: - curr_vma = find_vma(curr_task->mm, 0); + curr_vma = find_vma(curr_mm, 0); break; case task_vma_iter_next_vma: - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; case task_vma_iter_find_vma: /* We dropped mmap_lock so it is necessary to use find_vma * to find the next vma. This is similar to the mechanism * in show_smaps_rollup(). */ - curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1); + curr_vma = find_vma(curr_mm, info->prev_vm_end - 1); /* case 1) and 4.2) above just use curr_vma */ /* check for case 2) or case 4.1) above */ if (curr_vma && curr_vma->vm_start == info->prev_vm_start && curr_vma->vm_end == info->prev_vm_end) - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; } if (!curr_vma) { /* case 3) above, or case 2) 4.1) with vma->next == NULL */ - mmap_read_unlock(curr_task->mm); + mmap_read_unlock(curr_mm); + mmput(curr_mm); goto next_task; } info->task = curr_task; info->vma = curr_vma; + info->mm = curr_mm; return curr_vma; next_task: @@ -579,6 +590,7 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) put_task_struct(curr_task); info->task = NULL; + info->mm = NULL; info->tid++; goto again; @@ -587,6 +599,7 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) put_task_struct(curr_task); info->task = NULL; info->vma = NULL; + info->mm = NULL; return NULL; } @@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v) */ info->prev_vm_start = ~0UL; info->prev_vm_end = info->vma->vm_end; - mmap_read_unlock(info->task->mm); + mmap_read_unlock(info->mm); + mmput(info->mm); + info->mm = NULL; put_task_struct(info->task); info->task = NULL; } From b7793c8db7d9beb903bb42f52872b5b46abdcb88 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 16 Dec 2022 14:18:55 -0800 Subject: [PATCH 0178/1593] selftests/bpf: add a test for iter/task_vma for short-lived processes When a task iterator traverses vma(s), it is possible task->mm might become invalid in the middle of traversal and this may cause kernel misbehave (e.g., crash) This test case creates iterators repeatedly and forks short-lived processes in the background to detect this bug. The test will last for 3 seconds to get the chance to trigger the issue. Signed-off-by: Kui-Feng Lee Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221216221855.4122288-3-kuifeng@meta.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/bpf_iter.c | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 6f8ed61fc4b4a..3af6450763e9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -1465,6 +1465,77 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts) bpf_iter_task_vma__destroy(skel); } +static void test_task_vma_dead_task(void) +{ + struct bpf_iter_task_vma *skel; + int wstatus, child_pid = -1; + time_t start_tm, cur_tm; + int err, iter_fd = -1; + int wait_sec = 3; + + skel = bpf_iter_task_vma__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + return; + + skel->bss->pid = getpid(); + + err = bpf_iter_task_vma__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + goto out; + + skel->links.proc_maps = bpf_program__attach_iter( + skel->progs.proc_maps, NULL); + + if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) { + skel->links.proc_maps = NULL; + goto out; + } + + start_tm = time(NULL); + cur_tm = start_tm; + + child_pid = fork(); + if (child_pid == 0) { + /* Fork short-lived processes in the background. */ + while (cur_tm < start_tm + wait_sec) { + system("echo > /dev/null"); + cur_tm = time(NULL); + } + exit(0); + } + + if (!ASSERT_GE(child_pid, 0, "fork_child")) + goto out; + + while (cur_tm < start_tm + wait_sec) { + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps)); + if (!ASSERT_GE(iter_fd, 0, "create_iter")) + goto out; + + /* Drain all data from iter_fd. */ + while (cur_tm < start_tm + wait_sec) { + err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE); + if (!ASSERT_GE(err, 0, "read_iter_fd")) + goto out; + + cur_tm = time(NULL); + + if (err == 0) + break; + } + + close(iter_fd); + iter_fd = -1; + } + + check_bpf_link_info(skel->progs.proc_maps); + +out: + waitpid(child_pid, &wstatus, 0); + close(iter_fd); + bpf_iter_task_vma__destroy(skel); +} + void test_bpf_sockmap_map_iter_fd(void) { struct bpf_iter_sockmap *skel; @@ -1586,6 +1657,8 @@ void test_bpf_iter(void) test_task_file(); if (test__start_subtest("task_vma")) test_task_vma(); + if (test__start_subtest("task_vma_dead_task")) + test_task_vma_dead_task(); if (test__start_subtest("task_btf")) test_task_btf(); if (test__start_subtest("tcp4")) From 45435d8da71f9f3e6860e6e6ea9667b6ec17ec64 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Dec 2022 10:28:44 -0800 Subject: [PATCH 0179/1593] bpf: Always use maximal size for copy_array() Instead of counting on prior allocations to have sized allocations to the next kmalloc bucket size, always perform a krealloc that is at least ksize(dst) in size (which is a no-op), so the size can be correctly tracked by all the various allocation size trackers (KASAN, __alloc_size, etc). Reported-by: Hyunwoo Kim Link: https://lore.kernel.org/bpf/20221223094551.GA1439509@ubuntu Fixes: ceb35b666d42 ("bpf/verifier: Use kmalloc_size_roundup() to match ksize() usage") Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: John Fastabend Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: KP Singh Cc: Stanislav Fomichev Cc: Hao Luo Cc: Jiri Olsa Cc: bpf@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221223182836.never.866-kees@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 243d06ce68426..85f96c1e9f62f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1054,6 +1054,8 @@ static void print_insn_state(struct bpf_verifier_env *env, */ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) { + size_t alloc_bytes; + void *orig = dst; size_t bytes; if (ZERO_OR_NULL_PTR(src)) @@ -1062,11 +1064,11 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (ksize(dst) < ksize(src)) { - kfree(dst); - dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); - if (!dst) - return NULL; + alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); + dst = krealloc(orig, alloc_bytes, flags); + if (!dst) { + kfree(orig); + return NULL; } memcpy(dst, src, bytes); From 5626af8fc4322a972b59c7b1a6760767f7b58d4f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:38:10 +0100 Subject: [PATCH 0180/1593] pinctrl: sp7021: fix unused function warning sppctl_gpio_inv_get is only used from the debugfs code inside of an #ifdef, so we get a warning without that: drivers/pinctrl/sunplus/sppctl.c:393:12: error: 'sppctl_gpio_inv_get' defined but not used [-Werror=unused-function] 393 | static int sppctl_gpio_inv_get(struct gpio_chip *chip, unsigned int offset) | ^~~~~~~~~~~~~~~~~~~ Replace the #ifdef with an IS_ENABLED() check that avoids the warning. Fixes: aa74c44be19c ("pinctrl: Add driver for Sunplus SP7021") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20221215163822.542622-1-arnd@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/sunplus/sppctl.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c index 2b3335ab56c66..25101293268f6 100644 --- a/drivers/pinctrl/sunplus/sppctl.c +++ b/drivers/pinctrl/sunplus/sppctl.c @@ -499,7 +499,6 @@ static int sppctl_gpio_set_config(struct gpio_chip *chip, unsigned int offset, return 0; } -#ifdef CONFIG_DEBUG_FS static void sppctl_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) { const char *label; @@ -521,7 +520,6 @@ static void sppctl_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) seq_puts(s, "\n"); } } -#endif static int sppctl_gpio_new(struct platform_device *pdev, struct sppctl_pdata *pctl) { @@ -550,9 +548,8 @@ static int sppctl_gpio_new(struct platform_device *pdev, struct sppctl_pdata *pc gchip->get = sppctl_gpio_get; gchip->set = sppctl_gpio_set; gchip->set_config = sppctl_gpio_set_config; -#ifdef CONFIG_DEBUG_FS - gchip->dbg_show = sppctl_gpio_dbg_show; -#endif + gchip->dbg_show = IS_ENABLED(CONFIG_DEBUG_FS) ? + sppctl_gpio_dbg_show : NULL; gchip->base = -1; gchip->ngpio = sppctl_gpio_list_sz; gchip->names = sppctl_gpio_list_s; From 56c5dab20a6391604df9521f812c01d1e3fe1bd0 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 12 Dec 2022 13:04:11 +0100 Subject: [PATCH 0181/1593] RDMA/srp: Move large values to a new enum for gcc13 Since gcc13, each member of an enum has the same type as the enum [1]. And that is inherited from its members. Provided these two: SRP_TAG_NO_REQ = ~0U, SRP_TAG_TSK_MGMT = 1U << 31 all other members are unsigned ints. Esp. with SRP_MAX_SGE and SRP_TSK_MGMT_SQ_SIZE and their use in min(), this results in the following warnings: include/linux/minmax.h:20:35: error: comparison of distinct pointer types lacks a cast drivers/infiniband/ulp/srp/ib_srp.c:563:42: note: in expansion of macro 'min' include/linux/minmax.h:20:35: error: comparison of distinct pointer types lacks a cast drivers/infiniband/ulp/srp/ib_srp.c:2369:27: note: in expansion of macro 'min' So move the large values away to a separate enum, so that they don't affect other members. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36113 Link: https://lore.kernel.org/r/20221212120411.13750-1-jirislaby@kernel.org Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 00b0068fda208..5d94db453df32 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -62,9 +62,6 @@ enum { SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, - SRP_TAG_NO_REQ = ~0U, - SRP_TAG_TSK_MGMT = 1U << 31, - SRP_MAX_PAGES_PER_MR = 512, SRP_MAX_ADD_CDB_LEN = 16, @@ -79,6 +76,11 @@ enum { sizeof(struct srp_imm_buf), }; +enum { + SRP_TAG_NO_REQ = ~0U, + SRP_TAG_TSK_MGMT = BIT(31), +}; + enum srp_target_state { SRP_TARGET_SCANNING, SRP_TARGET_LIVE, From 74b91a1bdb994dfaed0074154ca7d493aeb735a6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Dec 2022 22:43:51 +0100 Subject: [PATCH 0182/1593] arm64: dts: qcom: sm8350: correct SDHCI interconnect arguments The interconnect providers accept only one argument (cells == 1), so fix a copy&paste from SM8450: sm8350-hdk.dtb: mmc@8804000: interconnects: [[74, 9, 0], [75, 1, 0], [76, 2, 0], [77, 36, 0]] is too long Fixes: 60477435e4de ("arm64: dts: qcom: sm8350: Add SDHCI2") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221224214351.18215-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8350.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 245dce24ec599..fb3cd20a82b5e 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -2382,8 +2382,8 @@ <&rpmhcc RPMH_CXO_CLK>; clock-names = "iface", "core", "xo"; resets = <&gcc GCC_SDCC2_BCR>; - interconnects = <&aggre2_noc MASTER_SDCC_2 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_SDCC_2 0>; + interconnects = <&aggre2_noc MASTER_SDCC_2 &mc_virt SLAVE_EBI1>, + <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_SDCC_2>; interconnect-names = "sdhc-ddr","cpu-sdhc"; iommus = <&apps_smmu 0x4a0 0x0>; power-domains = <&rpmhpd SM8350_CX>; From 65ea840afd508194b0ee903256162aa87e46ec30 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Sat, 26 Nov 2022 07:14:30 +0000 Subject: [PATCH 0183/1593] fpga: stratix10-soc: Fix return value check in s10_ops_write_init() In case of error, the function stratix10_svc_allocate_memory() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: e7eef1d7633a ("fpga: add intel stratix10 soc fpga manager driver") Signed-off-by: Zheng Yongjun Reviewed-by: Russ Weight Cc: stable@vger.kernel.org Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20221126071430.19540-1-zhengyongjun3@huawei.com Signed-off-by: Xu Yilun --- drivers/fpga/stratix10-soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c index 357cea58ec98e..f7f01982a5126 100644 --- a/drivers/fpga/stratix10-soc.c +++ b/drivers/fpga/stratix10-soc.c @@ -213,9 +213,9 @@ static int s10_ops_write_init(struct fpga_manager *mgr, /* Allocate buffers from the service layer's pool. */ for (i = 0; i < NUM_SVC_BUFS; i++) { kbuf = stratix10_svc_allocate_memory(priv->chan, SVC_BUF_SIZE); - if (!kbuf) { + if (IS_ERR(kbuf)) { s10_free_buffers(mgr); - ret = -ENOMEM; + ret = PTR_ERR(kbuf); goto init_done; } From 60ce26d10e5850f33cc76fce52f5377045e75a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 14 Dec 2022 16:49:52 +0200 Subject: [PATCH 0184/1593] fpga: m10bmc-sec: Fix probe rollback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle probe error rollbacks properly to avoid leaks. Fixes: 5cd339b370e2 ("fpga: m10bmc-sec: add max10 secure update functions") Reviewed-by: Matthew Gerlach Reviewed-by: Russ Weight Reviewed-by: Marco Pagani Signed-off-by: Ilpo Järvinen Cc: stable@vger.kernel.org Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20221214144952.8392-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Xu Yilun --- drivers/fpga/intel-m10-bmc-sec-update.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/fpga/intel-m10-bmc-sec-update.c b/drivers/fpga/intel-m10-bmc-sec-update.c index 79d48852825ef..03f1bd81c434d 100644 --- a/drivers/fpga/intel-m10-bmc-sec-update.c +++ b/drivers/fpga/intel-m10-bmc-sec-update.c @@ -574,20 +574,27 @@ static int m10bmc_sec_probe(struct platform_device *pdev) len = scnprintf(buf, SEC_UPDATE_LEN_MAX, "secure-update%d", sec->fw_name_id); sec->fw_name = kmemdup_nul(buf, len, GFP_KERNEL); - if (!sec->fw_name) - return -ENOMEM; + if (!sec->fw_name) { + ret = -ENOMEM; + goto fw_name_fail; + } fwl = firmware_upload_register(THIS_MODULE, sec->dev, sec->fw_name, &m10bmc_ops, sec); if (IS_ERR(fwl)) { dev_err(sec->dev, "Firmware Upload driver failed to start\n"); - kfree(sec->fw_name); - xa_erase(&fw_upload_xa, sec->fw_name_id); - return PTR_ERR(fwl); + ret = PTR_ERR(fwl); + goto fw_uploader_fail; } sec->fwl = fwl; return 0; + +fw_uploader_fail: + kfree(sec->fw_name); +fw_name_fail: + xa_erase(&fw_upload_xa, sec->fw_name_id); + return ret; } static int m10bmc_sec_remove(struct platform_device *pdev) From 936a192f974018b4f6040f6f77b1cc1e75bd8666 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 26 Dec 2022 22:27:52 +0900 Subject: [PATCH 0185/1593] tcp: Add TIME_WAIT sockets in bhash2. Jiri Slaby reported regression of bind() with a simple repro. [0] The repro creates a TIME_WAIT socket and tries to bind() a new socket with the same local address and port. Before commit 28044fc1d495 ("net: Add a bhash2 table hashed by port and address"), the bind() failed with -EADDRINUSE, but now it succeeds. The cited commit should have put TIME_WAIT sockets into bhash2; otherwise, inet_bhash2_conflict() misses TIME_WAIT sockets when validating bind() requests if the address is not a wildcard one. The straight option is to move sk_bind2_node from struct sock to struct sock_common to add twsk to bhash2 as implemented as RFC. [1] However, the binary layout change in the struct sock could affect performances moving hot fields on different cachelines. To avoid that, we add another TIME_WAIT list in inet_bind2_bucket and check it while validating bind(). [0]: https://lore.kernel.org/netdev/6b971a4e-c7d8-411e-1f92-fda29b5b2fb9@kernel.org/ [1]: https://lore.kernel.org/netdev/20221221151258.25748-2-kuniyu@amazon.com/ Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Reported-by: Jiri Slaby Suggested-by: Paolo Abeni Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 ++++ include/net/inet_timewait_sock.h | 5 +++++ net/ipv4/inet_connection_sock.c | 26 ++++++++++++++++++++++---- net/ipv4/inet_hashtables.c | 8 +++++--- net/ipv4/inet_timewait_sock.c | 31 +++++++++++++++++++++++++++++-- 5 files changed, 65 insertions(+), 9 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 69174093078f0..99bd823e97f62 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -108,6 +108,10 @@ struct inet_bind2_bucket { struct hlist_node node; /* List of sockets hashed to this bucket */ struct hlist_head owners; + /* bhash has twsk in owners, but bhash2 has twsk in + * deathrow not to add a member in struct sock_common. + */ + struct hlist_head deathrow; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b47545f22d39..4a8e578405cb3 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -73,9 +73,14 @@ struct inet_timewait_sock { u32 tw_priority; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; + struct inet_bind2_bucket *tw_tb2; + struct hlist_node tw_bind2_node; }; #define tw_tclass tw_tos +#define twsk_for_each_bound_bhash2(__tw, list) \ + hlist_for_each_entry(__tw, list, tw_bind2_node) + static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { return (struct inet_timewait_sock *)sk; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b366ab9148f2d..848ffc3e0239c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, return false; } +static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, + kuid_t sk_uid, bool relax, + bool reuseport_cb_ok, bool reuseport_ok) +{ + if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) + return false; + + return inet_bind_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok); +} + static bool inet_bhash2_conflict(const struct sock *sk, const struct inet_bind2_bucket *tb2, kuid_t sk_uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { + struct inet_timewait_sock *tw2; struct sock *sk2; sk_for_each_bound_bhash2(sk2, &tb2->owners) { - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) - continue; + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) + return true; + } - if (inet_bind_conflict(sk, sk2, sk_uid, relax, - reuseport_cb_ok, reuseport_ok)) + twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { + sk2 = (struct sock *)tw2; + + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) return true; } + return false; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d039b4e732a31..24a38b56fab9e 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, #endif tb->rcv_saddr = sk->sk_rcv_saddr; INIT_HLIST_HEAD(&tb->owners); + INIT_HLIST_HEAD(&tb->deathrow); hlist_add_head(&tb->node, &head->chain); } @@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { - if (hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } @@ -1103,15 +1104,16 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); - spin_unlock(&head2->lock); - if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); + + spin_unlock(&head2->lock); spin_unlock(&head->lock); + if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 66fc940f9521a..1d77d992e6e77 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -29,6 +29,7 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { + struct inet_bind2_bucket *tb2 = tw->tw_tb2; struct inet_bind_bucket *tb = tw->tw_tb; if (!tb) @@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + + __hlist_del(&tw->tw_bind2_node); + tw->tw_tb2 = NULL; + inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + __sock_put((struct sock *)tw); } @@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; spin_lock(lock); sk_nulls_del_node_init_rcu((struct sock *)tw); @@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw, + twsk_net(tw), tw->tw_num); spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); inet_twsk_bind_unhash(tw, hashinfo); + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); refcount_dec(&tw->tw_dr->tw_refcount); @@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, hlist_add_head(&tw->tw_bind_node, list); } +static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind2_node, list); +} + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; + /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); + spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); + tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); + + tw->tw_tb2 = icsk->icsk_bind2_hash; + WARN_ON(!icsk->icsk_bind2_hash); + inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow); + + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); spin_lock(lock); From 2c042e8e54efb2b8e25ed0cb28224e79948dc8ce Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 26 Dec 2022 22:27:53 +0900 Subject: [PATCH 0186/1593] tcp: Add selftest for bind() and TIME_WAIT. bhash2 split the bind() validation logic into wildcard and non-wildcard cases. Let's add a test to catch future regression. Before the previous patch: # ./bind_timewait TAP version 13 1..2 # Starting 2 tests from 3 test cases. # RUN bind_timewait.localhost.1 ... # bind_timewait.c:87:1:Expected ret (0) == -1 (-1) # 1: Test terminated by assertion # FAIL bind_timewait.localhost.1 not ok 1 bind_timewait.localhost.1 # RUN bind_timewait.addrany.1 ... # OK bind_timewait.addrany.1 ok 2 bind_timewait.addrany.1 # FAILED: 1 / 2 tests passed. # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0 After: # ./bind_timewait TAP version 13 1..2 # Starting 2 tests from 3 test cases. # RUN bind_timewait.localhost.1 ... # OK bind_timewait.localhost.1 ok 1 bind_timewait.localhost.1 # RUN bind_timewait.addrany.1 ... # OK bind_timewait.addrany.1 ok 2 bind_timewait.addrany.1 # PASSED: 2 / 2 tests passed. # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Signed-off-by: David S. Miller --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/bind_timewait.c | 92 +++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 tools/testing/selftests/net/bind_timewait.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 9cc84114741d3..a6911cae368c7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash +bind_timewait csum cmsg_sender diag_uid diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c new file mode 100644 index 0000000000000..cb9fdf51ea59f --- /dev/null +++ b/tools/testing/selftests/net/bind_timewait.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include +#include + +#include "../kselftest_harness.h" + +FIXTURE(bind_timewait) +{ + struct sockaddr_in addr; + socklen_t addrlen; +}; + +FIXTURE_VARIANT(bind_timewait) +{ + __u32 addr_const; +}; + +FIXTURE_VARIANT_ADD(bind_timewait, localhost) +{ + .addr_const = INADDR_LOOPBACK +}; + +FIXTURE_VARIANT_ADD(bind_timewait, addrany) +{ + .addr_const = INADDR_ANY +}; + +FIXTURE_SETUP(bind_timewait) +{ + self->addr.sin_family = AF_INET; + self->addr.sin_port = 0; + self->addr.sin_addr.s_addr = htonl(variant->addr_const); + self->addrlen = sizeof(self->addr); +} + +FIXTURE_TEARDOWN(bind_timewait) +{ +} + +void create_timewait_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_timewait) *self) +{ + int server_fd, client_fd, child_fd, ret; + struct sockaddr_in addr; + socklen_t addrlen; + + server_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(server_fd, 0); + + ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + ret = listen(server_fd, 1); + ASSERT_EQ(ret, 0); + + ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen); + ASSERT_EQ(ret, 0); + + client_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(client_fd, 0); + + ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GT(child_fd, 0); + + close(child_fd); + close(client_fd); + close(server_fd); +} + +TEST_F(bind_timewait, 1) +{ + int fd, ret; + + create_timewait_socket(_metadata, self); + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(fd, 0); + + ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EADDRINUSE); + + close(fd); +} + +TEST_HARNESS_MAIN From 6b57bffa5f675a01c7981ed271e8521e87441abd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 27 Dec 2022 22:45:07 +0100 Subject: [PATCH 0187/1593] net: ethernet: broadcom: bcm63xx_enet: Drop empty platform remove function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A remove callback just returning 0 is equivalent to no remove callback at all. So drop the useless function. Signed-off-by: Uwe Kleine-König Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index d91fdb0c2649d..2cf96892e5650 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2784,17 +2784,11 @@ static int bcm_enet_shared_probe(struct platform_device *pdev) return 0; } -static int bcm_enet_shared_remove(struct platform_device *pdev) -{ - return 0; -} - /* this "shared" driver is needed because both macs share a single * address space */ struct platform_driver bcm63xx_enet_shared_driver = { .probe = bcm_enet_shared_probe, - .remove = bcm_enet_shared_remove, .driver = { .name = "bcm63xx_enet_shared", .owner = THIS_MODULE, From af691c94d022440476b76560d310d6fea790cc60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 27 Dec 2022 22:45:08 +0100 Subject: [PATCH 0188/1593] net: ethernet: freescale: enetc: Drop empty platform remove function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A remove callback just returning 0 is equivalent to no remove callback at all. So drop the useless function. Signed-off-by: Uwe Kleine-König Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_ierb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c index 91f02c5050285..b307bef4dc298 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c @@ -127,11 +127,6 @@ static int enetc_ierb_probe(struct platform_device *pdev) return 0; } -static int enetc_ierb_remove(struct platform_device *pdev) -{ - return 0; -} - static const struct of_device_id enetc_ierb_match[] = { { .compatible = "fsl,ls1028a-enetc-ierb", }, {}, @@ -144,7 +139,6 @@ static struct platform_driver enetc_ierb_driver = { .of_match_table = enetc_ierb_match, }, .probe = enetc_ierb_probe, - .remove = enetc_ierb_remove, }; module_platform_driver(enetc_ierb_driver); From fec7352117fa301bfbc31bacc14bb9a579376b36 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 28 Dec 2022 14:27:49 +0800 Subject: [PATCH 0189/1593] net: hns3: refine the handling for VF heartbeat Currently, the PF check the VF alive by the KEEP_ALVE mailbox from VF. VF keep sending the mailbox per 2 seconds. Once PF lost the mailbox for more than 8 seconds, it will regards the VF is abnormal, and stop notifying the state change to VF, include link state, vf mac, reset, even though it receives the KEEP_ALIVE mailbox again. It's inreasonable. This patch fixes it. PF will record the state change which need to notify VF when lost the VF's KEEP_ALIVE mailbox. And notify VF when receive the mailbox again. Introduce a new flag HCLGE_VPORT_STATE_INITED, used to distinguish the case whether VF driver loaded or not. For VF will query these states when initializing, so it's unnecessary to notify it in this case. Fixes: aa5c4f175be6 ("net: hns3: add reset handling for VF when doing PF reset") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Reported-by: kernel test robot Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 57 +++++++++++---- .../hisilicon/hns3/hns3pf/hclge_main.h | 7 ++ .../hisilicon/hns3/hns3pf/hclge_mbx.c | 71 ++++++++++++++++--- 3 files changed, 112 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6c2742f59c777..07ad5f35219e2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3910,9 +3910,17 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) return ret; } - if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + if (!reset || + !test_bit(HCLGE_VPORT_STATE_INITED, &vport->state)) continue; + if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) && + hdev->reset_type == HNAE3_FUNC_RESET) { + set_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, + &vport->need_notify); + continue; + } + /* Inform VF to process the reset. * hclge_inform_reset_assert_to_vf may fail if VF * driver is not loaded. @@ -4609,18 +4617,25 @@ static void hclge_reset_service_task(struct hclge_dev *hdev) static void hclge_update_vport_alive(struct hclge_dev *hdev) { +#define HCLGE_ALIVE_SECONDS_NORMAL 8 + + unsigned long alive_time = HCLGE_ALIVE_SECONDS_NORMAL * HZ; int i; /* start from vport 1 for PF is always alive */ for (i = 1; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; - if (time_after(jiffies, vport->last_active_jiffies + 8 * HZ)) + if (!test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) || + !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + continue; + if (time_after(jiffies, vport->last_active_jiffies + + alive_time)) { clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); - - /* If vf is not alive, set to default value */ - if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - vport->mps = HCLGE_MAC_DEFAULT_FRAME; + dev_warn(&hdev->pdev->dev, + "VF %u heartbeat timeout\n", + i - HCLGE_VF_VPORT_START_NUM); + } } } @@ -8064,9 +8079,11 @@ int hclge_vport_start(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; + set_bit(HCLGE_VPORT_STATE_INITED, &vport->state); set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); vport->last_active_jiffies = jiffies; + vport->need_notify = 0; if (test_bit(vport->vport_id, hdev->vport_config_block)) { if (vport->vport_id) { @@ -8084,7 +8101,9 @@ int hclge_vport_start(struct hclge_vport *vport) void hclge_vport_stop(struct hclge_vport *vport) { + clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + vport->need_notify = 0; } static int hclge_client_start(struct hnae3_handle *handle) @@ -9208,7 +9227,8 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, return 0; } - dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n", + dev_info(&hdev->pdev->dev, + "MAC of VF %d has been set to %s, will be active after VF reset\n", vf, format_mac_addr); return 0; } @@ -10465,12 +10485,16 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, * for DEVICE_VERSION_V3, vf doesn't need to know about the port based * VLAN state. */ - if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && - test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], - vport->vport_id, - state, &vlan_info); - + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) { + if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + state, + &vlan_info); + else + set_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, + &vport->need_notify); + } return 0; } @@ -11941,7 +11965,7 @@ static void hclge_reset_vport_state(struct hclge_dev *hdev) int i; for (i = 0; i < hdev->num_alloc_vport; i++) { - hclge_vport_stop(vport); + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); vport++; } } @@ -12955,6 +12979,11 @@ static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid) struct hclge_vlan_info vlan_info; int ret; + clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + vport->need_notify = 0; + vport->mps = 0; + /* after disable sriov, clean VF rate configured by PF */ ret = hclge_tm_qs_shaper_cfg(vport, 0); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 495b639b0dc24..13f23d606e77b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -995,9 +995,15 @@ enum HCLGE_VPORT_STATE { HCLGE_VPORT_STATE_MAC_TBL_CHANGE, HCLGE_VPORT_STATE_PROMISC_CHANGE, HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, + HCLGE_VPORT_STATE_INITED, HCLGE_VPORT_STATE_MAX }; +enum HCLGE_VPORT_NEED_NOTIFY { + HCLGE_VPORT_NEED_NOTIFY_RESET, + HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, +}; + struct hclge_vlan_info { u16 vlan_proto; /* so far support 802.1Q only */ u16 qos; @@ -1044,6 +1050,7 @@ struct hclge_vport { struct hnae3_handle roce; unsigned long state; + unsigned long need_notify; unsigned long last_active_jiffies; u32 mps; /* Max packet size */ struct hclge_vf_info vf_info; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index a7b06c63143cc..04ff9bf121853 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -124,17 +124,26 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, return status; } +static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type) +{ + __le16 msg_data; + u8 dest_vfid; + + dest_vfid = (u8)vport->vport_id; + msg_data = cpu_to_le16(reset_type); + + /* send this requested info to VF */ + return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), + HCLGE_MBX_ASSERTING_RESET, dest_vfid); +} + int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; - __le16 msg_data; u16 reset_type; - u8 dest_vfid; BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX); - dest_vfid = (u8)vport->vport_id; - if (hdev->reset_type == HNAE3_FUNC_RESET) reset_type = HNAE3_VF_PF_FUNC_RESET; else if (hdev->reset_type == HNAE3_FLR_RESET) @@ -142,11 +151,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) else reset_type = HNAE3_VF_FUNC_RESET; - msg_data = cpu_to_le16(reset_type); - - /* send this requested info to VF */ - return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), - HCLGE_MBX_ASSERTING_RESET, dest_vfid); + return hclge_inform_vf_reset(vport, reset_type); } static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head) @@ -652,9 +657,56 @@ static int hclge_reset_vf(struct hclge_vport *vport) return hclge_func_reset_cmd(hdev, vport->vport_id); } +static void hclge_notify_vf_config(struct hclge_vport *vport) +{ + struct hclge_dev *hdev = vport->back; + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + struct hclge_port_base_vlan_config *vlan_cfg; + int ret; + + hclge_push_vf_link_status(vport); + if (test_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, &vport->need_notify)) { + ret = hclge_inform_vf_reset(vport, HNAE3_VF_PF_FUNC_RESET); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to inform VF %u reset!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + return; + } + vport->need_notify = 0; + return; + } + + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && + test_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify)) { + vlan_cfg = &vport->port_base_vlan_cfg; + ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + vlan_cfg->state, + &vlan_cfg->vlan_info); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to inform VF %u port base vlan!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + return; + } + clear_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify); + } +} + static void hclge_vf_keep_alive(struct hclge_vport *vport) { + struct hclge_dev *hdev = vport->back; + vport->last_active_jiffies = jiffies; + + if (test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) && + !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { + set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + dev_info(&hdev->pdev->dev, "VF %u is alive!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + hclge_notify_vf_config(vport); + } } static int hclge_set_vf_mtu(struct hclge_vport *vport, @@ -954,6 +1006,7 @@ static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param) hclge_rm_vport_all_mac_table(param->vport, true, HCLGE_MAC_ADDR_MC); hclge_rm_vport_all_vlan_table(param->vport, true); + param->vport->mps = 0; return 0; } From d530ece70f16f912e1d1bfeea694246ab78b0a4b Mon Sep 17 00:00:00 2001 From: Jiguang Xiao Date: Wed, 28 Dec 2022 16:14:47 +0800 Subject: [PATCH 0190/1593] net: amd-xgbe: add missed tasklet_kill The driver does not call tasklet_kill in several places. Add the calls to fix it. Fixes: 85b85c853401 ("amd-xgbe: Re-issue interrupt if interrupt status not cleared") Signed-off-by: Jiguang Xiao Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 3 +++ drivers/net/ethernet/amd/xgbe/xgbe-i2c.c | 4 +++- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 7b666106feee9..614c0278419bc 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + tasklet_kill(&pdata->tasklet_dev); + tasklet_kill(&pdata->tasklet_ecc); + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index 22d4fc547a0a3..a9ccc4258ee50 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) xgbe_i2c_disable(pdata); xgbe_i2c_clear_all_interrupts(pdata); - if (pdata->dev_irq != pdata->i2c_irq) + if (pdata->dev_irq != pdata->i2c_irq) { devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); + tasklet_kill(&pdata->tasklet_i2c); + } } static int xgbe_i2c_start(struct xgbe_prv_data *pdata) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 4e97b48695220..0c5c1b1556830 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) /* Disable auto-negotiation */ xgbe_an_disable_all(pdata); - if (pdata->dev_irq != pdata->an_irq) + if (pdata->dev_irq != pdata->an_irq) { devm_free_irq(pdata->dev, pdata->an_irq, pdata); + tasklet_kill(&pdata->tasklet_an); + } pdata->phy_if.phy_impl.stop(pdata); From 1573c6882018f69991aead951d09423ce978adac Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Thu, 29 Dec 2022 13:41:06 +0800 Subject: [PATCH 0191/1593] selftests: net: fix cmsg_so_mark.sh test hang This cmsg_so_mark.sh test will hang on non-amd64 systems because of the infinity loop for argument parsing in cmsg_sender. Variable "o" in cs_parse_args() for taking getopt() should be an int, otherwise it will be 255 when getopt() returns -1 on non-amd64 system and thus causing infinity loop. Link: https://lore.kernel.org/lkml/CA+G9fYsM2k7mrF7W4V_TrZ-qDauWM394=8yEJ=-t1oUg8_40YA@mail.gmail.com/t/ Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/cmsg_sender.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 75dd83e39207b..24b21b15ed3fb 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -110,7 +110,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) static void cs_parse_args(int argc, char *argv[]) { - char o; + int o; while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { From 332b49ff637d6c1a75b971022a8b992cf3c57db1 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:05 +0000 Subject: [PATCH 0192/1593] net: ena: Fix toeplitz initial hash value On driver initialization, RSS hash initial value is set to zero, instead of the default value. This happens because we pass NULL as the RSS key parameter, which caused us to never initialize the RSS hash value. This patch fixes it by making sure the initial value is set, no matter what the value of the RSS key is. Fixes: 91a65b7d3ed8 ("net: ena: fix potential crash when rxfh key is NULL") Signed-off-by: Nati Koler Signed-off-by: David Arinzon Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 29 +++++++---------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 8c8b4c88c7dea..451c3a1b62553 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2400,29 +2400,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, return -EOPNOTSUPP; } - switch (func) { - case ENA_ADMIN_TOEPLITZ: - if (key) { - if (key_len != sizeof(hash_key->key)) { - netdev_err(ena_dev->net_device, - "key len (%u) doesn't equal the supported size (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; - } - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->key_parts = key_len / sizeof(hash_key->key[0]); + if ((func == ENA_ADMIN_TOEPLITZ) && key) { + if (key_len != sizeof(hash_key->key)) { + netdev_err(ena_dev->net_device, + "key len (%u) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; } - break; - case ENA_ADMIN_CRC32: - rss->hash_init_val = init_val; - break; - default: - netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n", - func); - return -EINVAL; + memcpy(hash_key->key, key, key_len); + hash_key->key_parts = key_len / sizeof(hash_key->key[0]); } + rss->hash_init_val = init_val; old_func = rss->hash_func; rss->hash_func = func; rc = ena_com_set_hash_function(ena_dev); From 9c9e539956fa67efb8a65e32b72a853740b33445 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:06 +0000 Subject: [PATCH 0193/1593] net: ena: Don't register memory info on XDP exchange Since the queues aren't destroyed when we only exchange XDP programs, there's no need to re-register them again. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a95529a69cbb6..6ba9b06719a08 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -512,16 +512,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, struct bpf_prog *prog, int first, int count) { + struct bpf_prog *old_bpf_prog; struct ena_ring *rx_ring; int i = 0; for (i = first; i < count; i++) { rx_ring = &adapter->rx_ring[i]; - xchg(&rx_ring->xdp_bpf_prog, prog); - if (prog) { + old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); + + if (!old_bpf_prog && prog) { ena_xdp_register_rxq_info(rx_ring); rx_ring->rx_headroom = XDP_PACKET_HEADROOM; - } else { + } else if (old_bpf_prog && !prog) { ena_xdp_unregister_rxq_info(rx_ring); rx_ring->rx_headroom = NET_SKB_PAD; } From c7f5e34d906320fdc996afa616676161c029cc02 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:07 +0000 Subject: [PATCH 0194/1593] net: ena: Account for the number of processed bytes in XDP The size of packets that were forwarded or dropped by XDP wasn't added to the total processed bytes statistic. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6ba9b06719a08..9ae86bd3d457b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1719,6 +1719,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, } if (xdp_verdict != XDP_PASS) { xdp_flags |= xdp_verdict; + total_len += ena_rx_ctx.ena_bufs[0].len; res_budget--; continue; } From 59811faa2c54dbcf44d575b5a8f6e7077da88dc2 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:08 +0000 Subject: [PATCH 0195/1593] net: ena: Use bitmask to indicate packet redirection Redirecting packets with XDP Redirect is done in two phases: 1. A packet is passed by the driver to the kernel using xdp_do_redirect(). 2. After finishing polling for new packets the driver lets the kernel know that it can now process the redirected packet using xdp_do_flush_map(). The packets' redirection is handled in the napi context of the queue that called xdp_do_redirect() To avoid calling xdp_do_flush_map() each time the driver first checks whether any packets were redirected, using xdp_flags |= xdp_verdict; and if (xdp_flags & XDP_REDIRECT) xdp_do_flush_map() essentially treating XDP instructions as a bitmask, which isn't the case: enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, XDP_TX, XDP_REDIRECT, }; Given the current possible values of xdp_action, the current design doesn't have a bug (since XDP_REDIRECT = 100b), but it is still flawed. This patch makes the driver use a bitmask instead, to avoid future issues. Fixes: a318c70ad152 ("net: ena: introduce XDP redirect implementation") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 26 ++++++++++++-------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 9 +++++++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 9ae86bd3d457b..a67f55e5f7554 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -374,9 +374,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n, static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) { + u32 verdict = ENA_XDP_PASS; struct bpf_prog *xdp_prog; struct ena_ring *xdp_ring; - u32 verdict = XDP_PASS; struct xdp_frame *xdpf; u64 *xdp_stat; @@ -393,7 +393,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) if (unlikely(!xdpf)) { trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = XDP_ABORTED; + verdict = ENA_XDP_DROP; break; } @@ -409,29 +409,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) spin_unlock(&xdp_ring->xdp_tx_lock); xdp_stat = &rx_ring->rx_stats.xdp_tx; + verdict = ENA_XDP_TX; break; case XDP_REDIRECT: if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { xdp_stat = &rx_ring->rx_stats.xdp_redirect; + verdict = ENA_XDP_REDIRECT; break; } trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = XDP_ABORTED; + verdict = ENA_XDP_DROP; break; case XDP_ABORTED: trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; break; case XDP_DROP: xdp_stat = &rx_ring->rx_stats.xdp_drop; + verdict = ENA_XDP_DROP; break; case XDP_PASS: xdp_stat = &rx_ring->rx_stats.xdp_pass; + verdict = ENA_XDP_PASS; break; default: bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_invalid; + verdict = ENA_XDP_DROP; } ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); @@ -1621,12 +1627,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) * we expect, then we simply drop it */ if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) - return XDP_DROP; + return ENA_XDP_DROP; ret = ena_xdp_execute(rx_ring, xdp); /* The xdp program might expand the headers */ - if (ret == XDP_PASS) { + if (ret == ENA_XDP_PASS) { rx_info->page_offset = xdp->data - xdp->data_hard_start; rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; } @@ -1665,7 +1671,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq); do { - xdp_verdict = XDP_PASS; + xdp_verdict = ENA_XDP_PASS; skb = NULL; ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = rx_ring->sgl_size; @@ -1693,7 +1699,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); /* allocate skb and fill it */ - if (xdp_verdict == XDP_PASS) + if (xdp_verdict == ENA_XDP_PASS) skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, @@ -1711,13 +1717,13 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* Packets was passed for transmission, unmap it * from RX side. */ - if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) { + if (xdp_verdict & ENA_XDP_FORWARDED) { ena_unmap_rx_buff(rx_ring, &rx_ring->rx_buffer_info[req_id]); rx_ring->rx_buffer_info[req_id].page = NULL; } } - if (xdp_verdict != XDP_PASS) { + if (xdp_verdict != ENA_XDP_PASS) { xdp_flags |= xdp_verdict; total_len += ena_rx_ctx.ena_bufs[0].len; res_budget--; @@ -1763,7 +1769,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ena_refill_rx_bufs(rx_ring, refill_required); } - if (xdp_flags & XDP_REDIRECT) + if (xdp_flags & ENA_XDP_REDIRECT) xdp_do_flush_map(); return work_done; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 1bdce99bf6888..290ae9bf47ee4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -409,6 +409,15 @@ enum ena_xdp_errors_t { ENA_XDP_NO_ENOUGH_QUEUES, }; +enum ENA_XDP_ACTIONS { + ENA_XDP_PASS = 0, + ENA_XDP_TX = BIT(0), + ENA_XDP_REDIRECT = BIT(1), + ENA_XDP_DROP = BIT(2) +}; + +#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) + static inline bool ena_xdp_present(struct ena_adapter *adapter) { return !!adapter->xdp_bpf_prog; From c7062aaee099f2f43d6f07a71744b44b94b94b34 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:09 +0000 Subject: [PATCH 0196/1593] net: ena: Fix rx_copybreak value update Make the upper bound on rx_copybreak tighter, by making sure it is smaller than the minimum of mtu and ENA_PAGE_SIZE. With the current upper bound of mtu, rx_copybreak can be larger than a page. Such large rx_copybreak will not bring any performance benefit to the user and therefore makes no sense. In addition, the value update was only reflected in the adapter structure, but not applied for each ring, causing it to not take effect. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Osama Abboud Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 6 +----- drivers/net/ethernet/amazon/ena/ena_netdev.c | 18 ++++++++++++++++++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 ++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 48ae6d810f8f9..8da79eedc057c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -887,11 +887,7 @@ static int ena_set_tunable(struct net_device *netdev, switch (tuna->id) { case ETHTOOL_RX_COPYBREAK: len = *(u32 *)data; - if (len > adapter->netdev->mtu) { - ret = -EINVAL; - break; - } - adapter->rx_copybreak = len; + ret = ena_set_rx_copybreak(adapter, len); break; default: ret = -EINVAL; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a67f55e5f7554..80a726932e810 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2814,6 +2814,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, return dev_was_up ? ena_up(adapter) : 0; } +int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak) +{ + struct ena_ring *rx_ring; + int i; + + if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE)) + return -EINVAL; + + adapter->rx_copybreak = rx_copybreak; + + for (i = 0; i < adapter->num_io_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + rx_ring->rx_copybreak = rx_copybreak; + } + + return 0; +} + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) { struct ena_com_dev *ena_dev = adapter->ena_dev; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 290ae9bf47ee4..f9d862b630fad 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -392,6 +392,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); +int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak); + int ena_get_sset_count(struct net_device *netdev, int sset); static inline void ena_reset_device(struct ena_adapter *adapter, From e712f3e4920b3a1a5e6b536827d118e14862896c Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:10 +0000 Subject: [PATCH 0197/1593] net: ena: Set default value for RX interrupt moderation RX ring can be NULL in XDP use cases where only TX queues are configured. In this scenario, the RX interrupt moderation value sent to the device remains in its default value of 0. In this change, setting the default value of the RX interrupt moderation to be the same as of the TX. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: David Arinzon Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 80a726932e810..99f80c2d560a5 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1823,8 +1823,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) static void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { + u32 rx_interval = tx_ring->smoothed_interval; struct ena_eth_io_intr_reg intr_reg; - u32 rx_interval = 0; + /* Rx ring can be NULL when for XDP tx queues which don't have an * accompanying rx_ring pair. */ From a8ee104f986e720cea52133885cc822d459398c7 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:11 +0000 Subject: [PATCH 0198/1593] net: ena: Update NUMA TPH hint register upon NUMA node update The device supports a PCIe optimization hint, which indicates on which NUMA the queue is currently processed. This hint is utilized by PCIe in order to reduce its access time by accessing the correct NUMA resources and maintaining cache coherence. The driver calls the register update for the hint (called TPH - TLP Processing Hint) during the NAPI loop. Though the update is expected upon a NUMA change (when a queue is moved from one NUMA to the other), the current logic performs a register update when the queue is moved to a different CPU, but the CPU is not necessarily in a different NUMA. The changes include: 1. Performing the TPH update only when the queue has switched a NUMA node. 2. Moving the TPH update call to be triggered only when NAPI was scheduled from interrupt context, as opposed to a busy-polling loop. This is due to the fact that during busy-polling, the frequency of CPU switches for a particular queue is significantly higher, thus, the likelihood to switch NUMA is much higher. Therefore, providing the frequent updates to the device upon a NUMA update are unlikely to be beneficial. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: David Arinzon Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 27 +++++++++++++------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 +++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 99f80c2d560a5..e8ad5ea31affe 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -680,6 +680,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, ring->ena_dev = adapter->ena_dev; ring->per_napi_packets = 0; ring->cpu = 0; + ring->numa_node = 0; ring->no_interrupt_event_cnt = 0; u64_stats_init(&ring->syncp); } @@ -783,6 +784,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; tx_ring->cpu = ena_irq->cpu; + tx_ring->numa_node = node; return 0; err_push_buf_intermediate_buf: @@ -915,6 +917,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; rx_ring->cpu = ena_irq->cpu; + rx_ring->numa_node = node; return 0; } @@ -1863,20 +1866,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, if (likely(tx_ring->cpu == cpu)) goto out; + tx_ring->cpu = cpu; + if (rx_ring) + rx_ring->cpu = cpu; + numa_node = cpu_to_node(cpu); + + if (likely(tx_ring->numa_node == numa_node)) + goto out; + put_cpu(); if (numa_node != NUMA_NO_NODE) { ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); - if (rx_ring) + tx_ring->numa_node = numa_node; + if (rx_ring) { + rx_ring->numa_node = numa_node; ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + } } - tx_ring->cpu = cpu; - if (rx_ring) - rx_ring->cpu = cpu; - return; out: put_cpu(); @@ -1997,11 +2007,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget) if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) ena_adjust_adaptive_rx_intr_moderation(ena_napi); + ena_update_ring_numa_node(tx_ring, rx_ring); ena_unmask_interrupt(tx_ring, rx_ring); } - ena_update_ring_numa_node(tx_ring, rx_ring); - ret = rx_work_done; } else { ret = budget; @@ -2386,7 +2395,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) ctx.mem_queue_type = ena_dev->tx_mem_queue_type; ctx.msix_vector = msix_vector; ctx.queue_size = tx_ring->ring_size; - ctx.numa_node = cpu_to_node(tx_ring->cpu); + ctx.numa_node = tx_ring->numa_node; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { @@ -2454,7 +2463,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ctx.msix_vector = msix_vector; ctx.queue_size = rx_ring->ring_size; - ctx.numa_node = cpu_to_node(rx_ring->cpu); + ctx.numa_node = rx_ring->numa_node; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index f9d862b630fad..2cb141079474c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -262,9 +262,11 @@ struct ena_ring { bool disable_meta_caching; u16 no_interrupt_event_cnt; - /* cpu for TPH */ + /* cpu and NUMA for TPH */ int cpu; - /* number of tx/rx_buffer_info's entries */ + int numa_node; + + /* number of tx/rx_buffer_info's entries */ int ring_size; enum ena_admin_placement_policy_type tx_mem_queue_type; From d039535850ee47079d59527e96be18d8e0daa84b Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 10:29:25 +0400 Subject: [PATCH 0199/1593] net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe of_phy_find_device() return device node with refcount incremented. Call put_device() to relese it when not needed anymore. Fixes: ab4e6ee578e8 ("net: phy: xgmiitorgmii: Check phy_driver ready before accessing") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- drivers/net/phy/xilinx_gmii2rgmii.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 8dcb49ed1f3d9..7fd9fe6a602bc 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) if (!priv->phy_dev->drv) { dev_info(dev, "Attached phy not ready\n"); + put_device(&priv->phy_dev->mdio.dev); return -EPROBE_DEFER; } From a23529989a8f56d23680c4f2d14011bc9c9457c9 Mon Sep 17 00:00:00 2001 From: Nikolaus Voss Date: Tue, 20 Dec 2022 09:17:50 +0100 Subject: [PATCH 0200/1593] crypto: caam - fix CAAM io mem access in blob_gen IO memory access has to be done with accessors defined in caam/regs.h as there are little-endian architectures with a big-endian CAAM unit. Fixes: 6a83830f649a ("crypto: caam - warn if blob_gen key is insecure") Signed-off-by: Nikolaus Voss Reviewed-by: Ahmad Fatoum Signed-off-by: Herbert Xu --- drivers/crypto/caam/blob_gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c index 1f65df4898478..f46b161d2cda6 100644 --- a/drivers/crypto/caam/blob_gen.c +++ b/drivers/crypto/caam/blob_gen.c @@ -104,7 +104,7 @@ int caam_process_blob(struct caam_blob_priv *priv, } ctrlpriv = dev_get_drvdata(jrdev->parent); - moo = FIELD_GET(CSTA_MOO, ioread32(&ctrlpriv->ctrl->perfmon.status)); + moo = FIELD_GET(CSTA_MOO, rd_reg32(&ctrlpriv->ctrl->perfmon.status)); if (moo != CSTA_MOO_SECURE && moo != CSTA_MOO_TRUSTED) dev_warn(jrdev, "using insecure test key, enable HAB to use unique device key!\n"); From 736f88689c6912f05d0116917910603a7ba97de7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Wed, 21 Dec 2022 15:32:32 +0800 Subject: [PATCH 0201/1593] crypto: arm64/sm4 - fix possible crash with CFI enabled The SM4 CCM/GCM assembly functions for encryption and decryption is called via indirect function calls. Therefore they need to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause its type hash to be emitted when the kernel is built with CONFIG_CFI_CLANG=y. Otherwise, the code crashes with a CFI failure (if the compiler didn't happen to optimize out the indirect call). Fixes: 67fa3a7fdf80 ("crypto: arm64/sm4 - add CE implementation for CCM mode") Fixes: ae1b83c7d572 ("crypto: arm64/sm4 - add CE implementation for GCM mode") Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-ccm-core.S | 5 +++-- arch/arm64/crypto/sm4-ce-gcm-core.S | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S index 028207c4afd0f..fa85856f33cee 100644 --- a/arch/arm64/crypto/sm4-ce-ccm-core.S +++ b/arch/arm64/crypto/sm4-ce-ccm-core.S @@ -8,6 +8,7 @@ */ #include +#include #include #include "sm4-ce-asm.h" @@ -104,7 +105,7 @@ SYM_FUNC_START(sm4_ce_ccm_final) SYM_FUNC_END(sm4_ce_ccm_final) .align 3 -SYM_FUNC_START(sm4_ce_ccm_enc) +SYM_TYPED_FUNC_START(sm4_ce_ccm_enc) /* input: * x0: round key array, CTX * x1: dst @@ -216,7 +217,7 @@ SYM_FUNC_START(sm4_ce_ccm_enc) SYM_FUNC_END(sm4_ce_ccm_enc) .align 3 -SYM_FUNC_START(sm4_ce_ccm_dec) +SYM_TYPED_FUNC_START(sm4_ce_ccm_dec) /* input: * x0: round key array, CTX * x1: dst diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S index 7aa3ec18a2891..347f25d757279 100644 --- a/arch/arm64/crypto/sm4-ce-gcm-core.S +++ b/arch/arm64/crypto/sm4-ce-gcm-core.S @@ -9,6 +9,7 @@ */ #include +#include #include #include "sm4-ce-asm.h" @@ -370,7 +371,7 @@ SYM_FUNC_START(pmull_ghash_update) SYM_FUNC_END(pmull_ghash_update) .align 3 -SYM_FUNC_START(sm4_ce_pmull_gcm_enc) +SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc) /* input: * x0: round key array, CTX * x1: dst @@ -581,7 +582,7 @@ SYM_FUNC_END(sm4_ce_pmull_gcm_enc) #define RH3 v20 .align 3 -SYM_FUNC_START(sm4_ce_pmull_gcm_dec) +SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec) /* input: * x0: round key array, CTX * x1: dst From ba2dc1cb5491712a6946d0595cf11ba463f50e64 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 29 Dec 2022 17:45:01 +0100 Subject: [PATCH 0202/1593] gpiolib: Fix using uninitialized lookup-flags on ACPI platforms Commit 8eb1f71e7acc ("gpiolib: consolidate GPIO lookups") refactors fwnode_get_named_gpiod() and gpiod_get_index() into a unified gpiod_find_and_request() helper. The old functions both initialized their local lookupflags variable to GPIO_LOOKUP_FLAGS_DEFAULT, but the new code leaves it uninitialized. This is a problem for at least ACPI platforms, where acpi_find_gpio() only does a bunch of *lookupflags |= GPIO_* statements and thus relies on the variable being initialized. The variable not being initialized leads to: 1. Potentially the wrong flags getting used 2. The check for conflicting lookup flags in gpiod_configure_flags(): "multiple pull-up, pull-down or pull-disable enabled, invalid config" sometimes triggering, making the GPIO unavailable Restore the initialization of lookupflags to GPIO_LOOKUP_FLAGS_DEFAULT to fix this. Fixes: 8eb1f71e7acc ("gpiolib: consolidate GPIO lookups") Signed-off-by: Hans de Goede Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 5a66d9616d7cc..939c776b94881 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3905,8 +3905,8 @@ static struct gpio_desc *gpiod_find_and_request(struct device *consumer, const char *label, bool platform_lookup_allowed) { + unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT; struct gpio_desc *desc = ERR_PTR(-ENOENT); - unsigned long lookupflags; int ret; if (!IS_ERR_OR_NULL(fwnode)) From 90fee3dd5bfc1b9f4c8c0ba6cd2a35c9d79ca4de Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Sun, 11 Dec 2022 00:05:58 +0200 Subject: [PATCH 0203/1593] gpio: pca953x: avoid to use uninitialized value pinctrl There is a variable pinctrl declared without initializer. And then has the case (switch operation chose the default case) to directly use this uninitialized value, this is not a safe behavior. So here initialize the pinctrl as 0 to avoid this issue. This is reported by Coverity. Fixes: 13c5d4ce8060 ("gpio: pca953x: Add support for PCAL6534") Signed-off-by: Haibo Chen Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index a59d61cd44b2e..5299e5bb76d6e 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -474,6 +474,9 @@ static u8 pcal6534_recalc_addr(struct pca953x_chip *chip, int reg, int off) case PCAL6524_DEBOUNCE: pinctrl = ((reg & PCAL_PINCTRL_MASK) >> 1) + 0x1c; break; + default: + pinctrl = 0; + break; } return pinctrl + addr + (off / BANK_SZ); From e84077437902ec99eba0a6b516df772653f142c7 Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Thu, 20 Oct 2022 12:44:58 +0000 Subject: [PATCH 0204/1593] EDAC/device: Fix period calculation in edac_device_reset_delay_period() Fix period calculation in case user sets a value of 1000. The input of round_jiffies_relative() should be in jiffies and not in milli-seconds. [ bp: Use the same code pattern as in edac_device_workq_setup() for clarity. ] Fixes: c4cf3b454eca ("EDAC: Rework workqueue handling") Signed-off-by: Eliav Farber Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20221020124458.22153-1-farbere@amazon.com --- drivers/edac/edac_device.c | 17 ++++++++--------- drivers/edac/edac_module.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 19522c568aa5d..878deb4880cdb 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -394,17 +394,16 @@ static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) * Then restart the workq on the new delay */ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, - unsigned long value) + unsigned long msec) { - unsigned long jiffs = msecs_to_jiffies(value); - - if (value == 1000) - jiffs = round_jiffies_relative(value); - - edac_dev->poll_msec = value; - edac_dev->delay = jiffs; + edac_dev->poll_msec = msec; + edac_dev->delay = msecs_to_jiffies(msec); - edac_mod_work(&edac_dev->work, jiffs); + /* See comment in edac_device_workq_setup() above */ + if (edac_dev->poll_msec == 1000) + edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); + else + edac_mod_work(&edac_dev->work, edac_dev->delay); } int edac_device_alloc_index(void) diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 763c076d96f21..47593afdc2348 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -53,7 +53,7 @@ bool edac_stop_work(struct delayed_work *work); bool edac_mod_work(struct delayed_work *work, unsigned long delay); extern void edac_device_reset_delay_period(struct edac_device_ctl_info - *edac_dev, unsigned long value); + *edac_dev, unsigned long msec); extern void edac_mc_reset_delay_period(unsigned long value); /* From 2788938b794633fc1865c805764bed196e01f97e Mon Sep 17 00:00:00 2001 From: Cixi Geng Date: Thu, 29 Dec 2022 22:55:43 +0800 Subject: [PATCH 0205/1593] gpio: eic-sprd: Make the irqchip immutable Remove the irq_chip from pmic_eic structure, use the various calls by defining the statically irq_chip structure. Signed-off-by: Cixi Geng Reviewed-by: Baolin Wang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-eic-sprd.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index 8d722e026e9c9..84352a6f4973b 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -91,7 +91,6 @@ enum sprd_eic_type { struct sprd_eic { struct gpio_chip chip; - struct irq_chip intc; void __iomem *base[SPRD_EIC_MAX_BANK]; enum sprd_eic_type type; spinlock_t lock; @@ -255,6 +254,8 @@ static void sprd_eic_irq_mask(struct irq_data *data) default: dev_err(chip->parent, "Unsupported EIC type.\n"); } + + gpiochip_disable_irq(chip, offset); } static void sprd_eic_irq_unmask(struct irq_data *data) @@ -263,6 +264,8 @@ static void sprd_eic_irq_unmask(struct irq_data *data) struct sprd_eic *sprd_eic = gpiochip_get_data(chip); u32 offset = irqd_to_hwirq(data); + gpiochip_enable_irq(chip, offset); + switch (sprd_eic->type) { case SPRD_EIC_DEBOUNCE: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IE, 1); @@ -564,6 +567,15 @@ static void sprd_eic_irq_handler(struct irq_desc *desc) chained_irq_exit(ic, desc); } +static const struct irq_chip sprd_eic_irq = { + .name = "sprd-eic", + .irq_ack = sprd_eic_irq_ack, + .irq_mask = sprd_eic_irq_mask, + .irq_unmask = sprd_eic_irq_unmask, + .irq_set_type = sprd_eic_irq_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; static int sprd_eic_probe(struct platform_device *pdev) { const struct sprd_eic_variant_data *pdata; @@ -626,15 +638,8 @@ static int sprd_eic_probe(struct platform_device *pdev) break; } - sprd_eic->intc.name = dev_name(&pdev->dev); - sprd_eic->intc.irq_ack = sprd_eic_irq_ack; - sprd_eic->intc.irq_mask = sprd_eic_irq_mask; - sprd_eic->intc.irq_unmask = sprd_eic_irq_unmask; - sprd_eic->intc.irq_set_type = sprd_eic_irq_set_type; - sprd_eic->intc.flags = IRQCHIP_SKIP_SET_WAKE; - irq = &sprd_eic->chip.irq; - irq->chip = &sprd_eic->intc; + gpio_irq_chip_set_chip(irq, &sprd_eic_irq); irq->handler = handle_bad_irq; irq->default_type = IRQ_TYPE_NONE; irq->parent_handler = sprd_eic_irq_handler; From be43eea7de5a3977ac3d13fbfb9e505fab475e97 Mon Sep 17 00:00:00 2001 From: Cixi Geng Date: Thu, 29 Dec 2022 22:55:44 +0800 Subject: [PATCH 0206/1593] gpio: pmic-eic-sprd: Make the irqchip immutable Remove the irq_chip from pmic_eic structure, use the various calls by defining the statically irq_chip structure. Signed-off-by: Cixi Geng Reviewed-by: Baolin Wang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pmic-eic-sprd.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-pmic-eic-sprd.c b/drivers/gpio/gpio-pmic-eic-sprd.c index e518490c4b681..c3e4d90f6b183 100644 --- a/drivers/gpio/gpio-pmic-eic-sprd.c +++ b/drivers/gpio/gpio-pmic-eic-sprd.c @@ -47,7 +47,6 @@ enum { /** * struct sprd_pmic_eic - PMIC EIC controller * @chip: the gpio_chip structure. - * @intc: the irq_chip structure. * @map: the regmap from the parent device. * @offset: the EIC controller's offset address of the PMIC. * @reg: the array to cache the EIC registers. @@ -56,7 +55,6 @@ enum { */ struct sprd_pmic_eic { struct gpio_chip chip; - struct irq_chip intc; struct regmap *map; u32 offset; u8 reg[CACHE_NR_REGS]; @@ -151,15 +149,21 @@ static void sprd_pmic_eic_irq_mask(struct irq_data *data) { struct gpio_chip *chip = irq_data_get_irq_chip_data(data); struct sprd_pmic_eic *pmic_eic = gpiochip_get_data(chip); + u32 offset = irqd_to_hwirq(data); pmic_eic->reg[REG_IE] = 0; pmic_eic->reg[REG_TRIG] = 0; + + gpiochip_disable_irq(chip, offset); } static void sprd_pmic_eic_irq_unmask(struct irq_data *data) { struct gpio_chip *chip = irq_data_get_irq_chip_data(data); struct sprd_pmic_eic *pmic_eic = gpiochip_get_data(chip); + u32 offset = irqd_to_hwirq(data); + + gpiochip_enable_irq(chip, offset); pmic_eic->reg[REG_IE] = 1; pmic_eic->reg[REG_TRIG] = 1; @@ -292,6 +296,17 @@ static irqreturn_t sprd_pmic_eic_irq_handler(int irq, void *data) return IRQ_HANDLED; } +static const struct irq_chip pmic_eic_irq_chip = { + .name = "sprd-pmic-eic", + .irq_mask = sprd_pmic_eic_irq_mask, + .irq_unmask = sprd_pmic_eic_irq_unmask, + .irq_set_type = sprd_pmic_eic_irq_set_type, + .irq_bus_lock = sprd_pmic_eic_bus_lock, + .irq_bus_sync_unlock = sprd_pmic_eic_bus_sync_unlock, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; + static int sprd_pmic_eic_probe(struct platform_device *pdev) { struct gpio_irq_chip *irq; @@ -338,16 +353,8 @@ static int sprd_pmic_eic_probe(struct platform_device *pdev) pmic_eic->chip.set = sprd_pmic_eic_set; pmic_eic->chip.get = sprd_pmic_eic_get; - pmic_eic->intc.name = dev_name(&pdev->dev); - pmic_eic->intc.irq_mask = sprd_pmic_eic_irq_mask; - pmic_eic->intc.irq_unmask = sprd_pmic_eic_irq_unmask; - pmic_eic->intc.irq_set_type = sprd_pmic_eic_irq_set_type; - pmic_eic->intc.irq_bus_lock = sprd_pmic_eic_bus_lock; - pmic_eic->intc.irq_bus_sync_unlock = sprd_pmic_eic_bus_sync_unlock; - pmic_eic->intc.flags = IRQCHIP_SKIP_SET_WAKE; - irq = &pmic_eic->chip.irq; - irq->chip = &pmic_eic->intc; + gpio_irq_chip_set_chip(irq, &pmic_eic_irq_chip); irq->threaded = true; ret = devm_gpiochip_add_data(&pdev->dev, &pmic_eic->chip, pmic_eic); From 9883ddf9d68db5332f08dfc7283db69f69f8d6d2 Mon Sep 17 00:00:00 2001 From: Cixi Geng Date: Thu, 29 Dec 2022 22:55:45 +0800 Subject: [PATCH 0207/1593] gpio: sprd: Make the irqchip immutable Make the struct irq_chip const, flag it as IRQCHIP_IMMUTABLE, add the new helper functions, and call the appropriate gpiolib functions. Signed-off-by: Cixi Geng Reported-by: kernel test robot Reported-by: Julia Lawall Reviewed-by: Baolin Wang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sprd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c index 9bff63990eee4..072b4e6532164 100644 --- a/drivers/gpio/gpio-sprd.c +++ b/drivers/gpio/gpio-sprd.c @@ -120,6 +120,7 @@ static void sprd_gpio_irq_mask(struct irq_data *data) u32 offset = irqd_to_hwirq(data); sprd_gpio_update(chip, offset, SPRD_GPIO_IE, 0); + gpiochip_disable_irq(chip, offset); } static void sprd_gpio_irq_ack(struct irq_data *data) @@ -136,6 +137,7 @@ static void sprd_gpio_irq_unmask(struct irq_data *data) u32 offset = irqd_to_hwirq(data); sprd_gpio_update(chip, offset, SPRD_GPIO_IE, 1); + gpiochip_enable_irq(chip, offset); } static int sprd_gpio_irq_set_type(struct irq_data *data, @@ -205,13 +207,14 @@ static void sprd_gpio_irq_handler(struct irq_desc *desc) chained_irq_exit(ic, desc); } -static struct irq_chip sprd_gpio_irqchip = { +static const struct irq_chip sprd_gpio_irqchip = { .name = "sprd-gpio", .irq_ack = sprd_gpio_irq_ack, .irq_mask = sprd_gpio_irq_mask, .irq_unmask = sprd_gpio_irq_unmask, .irq_set_type = sprd_gpio_irq_set_type, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, }; static int sprd_gpio_probe(struct platform_device *pdev) @@ -245,7 +248,7 @@ static int sprd_gpio_probe(struct platform_device *pdev) sprd_gpio->chip.direction_output = sprd_gpio_direction_output; irq = &sprd_gpio->chip.irq; - irq->chip = &sprd_gpio_irqchip; + gpio_irq_chip_set_chip(irq, &sprd_gpio_irqchip); irq->handler = handle_bad_irq; irq->default_type = IRQ_TYPE_NONE; irq->parent_handler = sprd_gpio_irq_handler; From b878d3ba9bb41cddb73ba4b56e5552f0a638daca Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 27 Dec 2022 16:10:05 -0800 Subject: [PATCH 0208/1593] thermal: int340x: Add missing attribute for data rate base Commit 473be51142ad ("thermal: int340x: processor_thermal: Add RFIM driver")' added rfi_restriction_data_rate_base string, mmio details and documentation, but missed adding attribute to sysfs. Add missing sysfs attribute. Fixes: 473be51142ad ("thermal: int340x: processor_thermal: Add RFIM driver") Cc: 5.11+ # v5.11+ Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- .../thermal/intel/int340x_thermal/processor_thermal_rfim.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c index 8c42e76620333..92ed1213fe379 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -172,6 +172,7 @@ static const struct attribute_group fivr_attribute_group = { RFIM_SHOW(rfi_restriction_run_busy, 1) RFIM_SHOW(rfi_restriction_err_code, 1) RFIM_SHOW(rfi_restriction_data_rate, 1) +RFIM_SHOW(rfi_restriction_data_rate_base, 1) RFIM_SHOW(ddr_data_rate_point_0, 1) RFIM_SHOW(ddr_data_rate_point_1, 1) RFIM_SHOW(ddr_data_rate_point_2, 1) @@ -181,11 +182,13 @@ RFIM_SHOW(rfi_disable, 1) RFIM_STORE(rfi_restriction_run_busy, 1) RFIM_STORE(rfi_restriction_err_code, 1) RFIM_STORE(rfi_restriction_data_rate, 1) +RFIM_STORE(rfi_restriction_data_rate_base, 1) RFIM_STORE(rfi_disable, 1) static DEVICE_ATTR_RW(rfi_restriction_run_busy); static DEVICE_ATTR_RW(rfi_restriction_err_code); static DEVICE_ATTR_RW(rfi_restriction_data_rate); +static DEVICE_ATTR_RW(rfi_restriction_data_rate_base); static DEVICE_ATTR_RO(ddr_data_rate_point_0); static DEVICE_ATTR_RO(ddr_data_rate_point_1); static DEVICE_ATTR_RO(ddr_data_rate_point_2); @@ -248,6 +251,7 @@ static struct attribute *dvfs_attrs[] = { &dev_attr_rfi_restriction_run_busy.attr, &dev_attr_rfi_restriction_err_code.attr, &dev_attr_rfi_restriction_data_rate.attr, + &dev_attr_rfi_restriction_data_rate_base.attr, &dev_attr_ddr_data_rate_point_0.attr, &dev_attr_ddr_data_rate_point_1.attr, &dev_attr_ddr_data_rate_point_2.attr, From b814eda949c324791580003303aa608761cfde3f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Sat, 31 Dec 2022 13:40:25 +0800 Subject: [PATCH 0209/1593] soc: imx: imx8mp-blk-ctrl: enable global pixclk with HDMI_TX_PHY PD NXP internal information shows that the PHY refclk is gated by the GLOBAL_TX_PIX_CLK_EN bit, so to allow the PHY PLL to lock without the LCDIF being already active, tie this bit to the HDMI_TX_PHY power domain. Fixes: e3442022f543 ("soc: imx: add i.MX8MP HDMI blk-ctrl") Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- drivers/soc/imx/imx8mp-blk-ctrl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/soc/imx/imx8mp-blk-ctrl.c b/drivers/soc/imx/imx8mp-blk-ctrl.c index 0e3b6ba22f943..9852714eb2a41 100644 --- a/drivers/soc/imx/imx8mp-blk-ctrl.c +++ b/drivers/soc/imx/imx8mp-blk-ctrl.c @@ -212,7 +212,7 @@ static void imx8mp_hdmi_blk_ctrl_power_on(struct imx8mp_blk_ctrl *bc, break; case IMX8MP_HDMIBLK_PD_LCDIF: regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL0, - BIT(7) | BIT(16) | BIT(17) | BIT(18) | + BIT(16) | BIT(17) | BIT(18) | BIT(19) | BIT(20)); regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(11)); regmap_set_bits(bc->regmap, HDMI_RTX_RESET_CTL0, @@ -241,6 +241,7 @@ static void imx8mp_hdmi_blk_ctrl_power_on(struct imx8mp_blk_ctrl *bc, regmap_set_bits(bc->regmap, HDMI_TX_CONTROL0, BIT(1)); break; case IMX8MP_HDMIBLK_PD_HDMI_TX_PHY: + regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL0, BIT(7)); regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(22) | BIT(24)); regmap_set_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(12)); regmap_clear_bits(bc->regmap, HDMI_TX_CONTROL0, BIT(3)); @@ -270,7 +271,7 @@ static void imx8mp_hdmi_blk_ctrl_power_off(struct imx8mp_blk_ctrl *bc, BIT(4) | BIT(5) | BIT(6)); regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(11)); regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL0, - BIT(7) | BIT(16) | BIT(17) | BIT(18) | + BIT(16) | BIT(17) | BIT(18) | BIT(19) | BIT(20)); break; case IMX8MP_HDMIBLK_PD_PAI: @@ -298,6 +299,7 @@ static void imx8mp_hdmi_blk_ctrl_power_off(struct imx8mp_blk_ctrl *bc, case IMX8MP_HDMIBLK_PD_HDMI_TX_PHY: regmap_set_bits(bc->regmap, HDMI_TX_CONTROL0, BIT(3)); regmap_clear_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(12)); + regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL0, BIT(7)); regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(22) | BIT(24)); break; case IMX8MP_HDMIBLK_PD_HDCP: From 8a1ed98fe0f2e7669f0409de0f46f317b275f8be Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 30 Sep 2022 22:54:23 +0800 Subject: [PATCH 0210/1593] arm64: dts: imx8mp: correct usb clocks After commit cf7f3f4fa9e5 ("clk: imx8mp: fix usb_root_clk parent"), usb_root_clk is no longer for suspend clock so update dts accordingly to use right bus clock and suspend clock. Fixes: fb8587a2c165 ("arm64: dtsi: imx8mp: add usb nodes") Cc: stable@vger.kernel.org # ed1f4ccfe947: clk: imx: imx8mp: add shared clk gate for usb suspend clk Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Peng Fan Tested-by: Alexander Stein Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 7a6e6221f4219..6a04d81c69e65 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1297,7 +1297,7 @@ reg = <0x32f10100 0x8>, <0x381f0000 0x20>; clocks = <&clk IMX8MP_CLK_HSIO_ROOT>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "hsio", "suspend"; interrupts = ; power-domains = <&hsio_blk_ctrl IMX8MP_HSIOBLK_PD_USB>; @@ -1310,9 +1310,9 @@ usb_dwc3_0: usb@38100000 { compatible = "snps,dwc3"; reg = <0x38100000 0x10000>; - clocks = <&clk IMX8MP_CLK_HSIO_AXI>, + clocks = <&clk IMX8MP_CLK_USB_ROOT>, <&clk IMX8MP_CLK_USB_CORE_REF>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "bus_early", "ref", "suspend"; interrupts = ; phys = <&usb3_phy0>, <&usb3_phy0>; @@ -1339,7 +1339,7 @@ reg = <0x32f10108 0x8>, <0x382f0000 0x20>; clocks = <&clk IMX8MP_CLK_HSIO_ROOT>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "hsio", "suspend"; interrupts = ; power-domains = <&hsio_blk_ctrl IMX8MP_HSIOBLK_PD_USB>; @@ -1352,9 +1352,9 @@ usb_dwc3_1: usb@38200000 { compatible = "snps,dwc3"; reg = <0x38200000 0x10000>; - clocks = <&clk IMX8MP_CLK_HSIO_AXI>, + clocks = <&clk IMX8MP_CLK_USB_ROOT>, <&clk IMX8MP_CLK_USB_CORE_REF>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "bus_early", "ref", "suspend"; interrupts = ; phys = <&usb3_phy1>, <&usb3_phy1>; From cfd04dd1c4b6c33afc2a934b957d71cf8ddd1539 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 13:29:11 -0300 Subject: [PATCH 0211/1593] arm64: dts: imx8mp-phycore-som: Remove invalid PMIC property 'regulator-compatible' is not a valid property according to nxp,pca9450-regulator.yaml and causes the following warning: DTC_CHK arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk2.dtb ... pmic@25: regulators:LDO1: Unevaluated properties are not allowed ('regulator-compatible' was unexpected) Remove the invalid 'regulator-compatible' property. Cc: Teresa Remmet Fixes: 88f7f6bcca37 ("arm64: dts: freescale: Add support for phyBOARD-Pollux-i.MX8MP") Signed-off-by: Fabio Estevam Reviewed-by: Teresa Remmet Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi index 79b290a002c19..ecc4bce6db97c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi @@ -99,7 +99,6 @@ regulators { buck1: BUCK1 { - regulator-compatible = "BUCK1"; regulator-min-microvolt = <600000>; regulator-max-microvolt = <2187500>; regulator-boot-on; @@ -108,7 +107,6 @@ }; buck2: BUCK2 { - regulator-compatible = "BUCK2"; regulator-min-microvolt = <600000>; regulator-max-microvolt = <2187500>; regulator-boot-on; @@ -119,7 +117,6 @@ }; buck4: BUCK4 { - regulator-compatible = "BUCK4"; regulator-min-microvolt = <600000>; regulator-max-microvolt = <3400000>; regulator-boot-on; @@ -127,7 +124,6 @@ }; buck5: BUCK5 { - regulator-compatible = "BUCK5"; regulator-min-microvolt = <600000>; regulator-max-microvolt = <3400000>; regulator-boot-on; @@ -135,7 +131,6 @@ }; buck6: BUCK6 { - regulator-compatible = "BUCK6"; regulator-min-microvolt = <600000>; regulator-max-microvolt = <3400000>; regulator-boot-on; @@ -143,7 +138,6 @@ }; ldo1: LDO1 { - regulator-compatible = "LDO1"; regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; @@ -151,7 +145,6 @@ }; ldo2: LDO2 { - regulator-compatible = "LDO2"; regulator-min-microvolt = <800000>; regulator-max-microvolt = <1150000>; regulator-boot-on; @@ -159,7 +152,6 @@ }; ldo3: LDO3 { - regulator-compatible = "LDO3"; regulator-min-microvolt = <800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; @@ -167,13 +159,11 @@ }; ldo4: LDO4 { - regulator-compatible = "LDO4"; regulator-min-microvolt = <800000>; regulator-max-microvolt = <3300000>; }; ldo5: LDO5 { - regulator-compatible = "LDO5"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; From 94e2cf1e0db5b06c7a6ae0878c5cbec925819a8a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 13:31:23 -0300 Subject: [PATCH 0212/1593] ARM: dts: imx6ul-pico-dwarf: Use 'clock-frequency' 'clock_frequency' is not a valid property. Use the correct 'clock-frequency' instead. Fixes: 47246fafef84 ("ARM: dts: imx6ul-pico: Add support for the dwarf baseboard") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ul-pico-dwarf.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts index 162dc259edc8c..5a74c7f68eb62 100644 --- a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts +++ b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts @@ -32,7 +32,7 @@ }; &i2c2 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c2>; status = "okay"; From f4dd0845c4f1f5371f1e06fef0e4a1734a2db964 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 13:31:24 -0300 Subject: [PATCH 0213/1593] ARM: dts: imx7d-pico: Use 'clock-frequency' 'clock_frequency' is not a valid property. Use the correct 'clock-frequency' instead. Fixes: 8b646cfb84c3 ("ARM: dts: imx7d-pico: Add support for the dwarf baseboard") Fixes: 6418fd92417f ("ARM: dts: imx7d-pico: Add support for the nymph baseboard") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-pico-dwarf.dts | 4 ++-- arch/arm/boot/dts/imx7d-pico-nymph.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/imx7d-pico-dwarf.dts index 5162fe227d1ea..fdc10563f1473 100644 --- a/arch/arm/boot/dts/imx7d-pico-dwarf.dts +++ b/arch/arm/boot/dts/imx7d-pico-dwarf.dts @@ -32,7 +32,7 @@ }; &i2c1 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; @@ -52,7 +52,7 @@ }; &i2c4 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; diff --git a/arch/arm/boot/dts/imx7d-pico-nymph.dts b/arch/arm/boot/dts/imx7d-pico-nymph.dts index 104a85254adbb..5afb1674e0125 100644 --- a/arch/arm/boot/dts/imx7d-pico-nymph.dts +++ b/arch/arm/boot/dts/imx7d-pico-nymph.dts @@ -43,7 +43,7 @@ }; &i2c1 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; @@ -64,7 +64,7 @@ }; &i2c2 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c2>; status = "okay"; From 9dfbc72256b5de608ad10989bcbafdbbd1ac8d4e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 17:22:59 -0300 Subject: [PATCH 0214/1593] ARM: dts: imx6qdl-gw560x: Remove incorrect 'uart-has-rtscts' The following build warning is seen when running: make dtbs_check DT_SCHEMA_FILES=fsl-imx-uart.yaml arch/arm/boot/dts/imx6dl-gw560x.dtb: serial@2020000: rts-gpios: False schema does not allow [[20, 1, 0]] From schema: Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml The imx6qdl-gw560x board does not expose the UART RTS and CTS as native UART pins, so 'uart-has-rtscts' should not be used. Using 'uart-has-rtscts' with 'rts-gpios' is an invalid combination detected by serial.yaml. Fix the problem by removing the incorrect 'uart-has-rtscts' property. Fixes: b8a559feffb2 ("ARM: dts: imx: add Gateworks Ventana GW5600 support") Signed-off-by: Fabio Estevam Acked-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-gw560x.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi index 4bc4371e6bae5..4b81a975c979d 100644 --- a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi @@ -632,7 +632,6 @@ &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; - uart-has-rtscts; rts-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; From 0d1d030f00f3f3eea04017cbd50ffe44a2842ebc Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Thu, 1 Dec 2022 13:55:48 +0100 Subject: [PATCH 0215/1593] arm64: dts: verdin-imx8mm: fix dahlia audio playback Set optional `simple-audio-card,mclk-fs` parameter to ensure a proper clock to the wm8904 audio codec. Without this change with an audio stream rate of 44.1 kHz the playback is completely distorted. Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Signed-off-by: Emanuele Ghidoli Signed-off-by: Francesco Dolcini Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi index c2a5c2f7b204b..7c3f5c54f0400 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi @@ -9,6 +9,7 @@ simple-audio-card,bitclock-master = <&dailink_master>; simple-audio-card,format = "i2s"; simple-audio-card,frame-master = <&dailink_master>; + simple-audio-card,mclk-fs = <256>; simple-audio-card,name = "imx8mm-wm8904"; simple-audio-card,routing = "Headphone Jack", "HPOUTL", From f78985f9f58380eec37f82c8a2c765aa7670fc29 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Dec 2022 17:49:18 +0100 Subject: [PATCH 0216/1593] ARM: dts: imx: Fix pca9547 i2c-mux node name "make dtbs_check": arch/arm/boot/dts/imx53-ppd.dtb: i2c-switch@70: $nodename:0: 'i2c-switch@70' does not match '^(i2c-?)?mux' From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml arch/arm/boot/dts/imx53-ppd.dtb: i2c-switch@70: Unevaluated properties are not allowed ('#address-cells', '#size-cells', 'i2c@0', 'i2c@1', 'i2c@2', 'i2c@3', 'i2c@4', 'i2c@5', 'i2c@6', 'i2c@7' were unexpected) From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml Fix this by renaming the PCA9547 node to "i2c-mux", to match the I2C bus multiplexer/switch DT bindings and the Generic Names Recommendation in the Devicetree Specification. Signed-off-by: Geert Uytterhoeven Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx53-ppd.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts index 37d0cffea99c5..70c4a4852256c 100644 --- a/arch/arm/boot/dts/imx53-ppd.dts +++ b/arch/arm/boot/dts/imx53-ppd.dts @@ -488,7 +488,7 @@ scl-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; status = "okay"; - i2c-switch@70 { + i2c-mux@70 { compatible = "nxp,pca9547"; #address-cells = <1>; #size-cells = <0>; From 42825d1f269355d63554ab3c3762611e4d8053e9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Dec 2022 17:49:21 +0100 Subject: [PATCH 0217/1593] ARM: dts: vf610: Fix pca9548 i2c-mux node names "make dtbs_check": arch/arm/boot/dts/vf610-zii-dev-rev-b.dtb: tca9548@70: $nodename:0: 'tca9548@70' does not match '^(i2c-?)?mux' From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml arch/arm/boot/dts/vf610-zii-dev-rev-b.dtb: tca9548@70: Unevaluated properties are not allowed ('#address-cells', '#size-cells', 'i2c@0', 'i2c@1', 'i2c@2', 'i2c@3', 'i2c@4' were unexpected) From schema: /scratch/geert/linux/linux-renesas/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml ... Fix this by renaming PCA9548 nodes to "i2c-mux", to match the I2C bus multiplexer/switch DT bindings and the Generic Names Recommendation in the Devicetree Specification. Signed-off-by: Geert Uytterhoeven Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vf610-zii-dev-rev-b.dts | 2 +- arch/arm/boot/dts/vf610-zii-dev-rev-c.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index 42ed4a04a12e2..6280c5e86a124 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -345,7 +345,7 @@ }; &i2c2 { - tca9548@70 { + i2c-mux@70 { compatible = "nxp,pca9548"; pinctrl-0 = <&pinctrl_i2c_mux_reset>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts index f892977da9e4e..c00d39562a10b 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts @@ -340,7 +340,7 @@ }; &i2c2 { - tca9548@70 { + i2c-mux@70 { compatible = "nxp,pca9548"; pinctrl-0 = <&pinctrl_i2c_mux_reset>; pinctrl-names = "default"; From b025b4f5c288e29bbea421613a5b4eacf9261fbb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Dec 2022 17:49:22 +0100 Subject: [PATCH 0218/1593] arm64: dts: freescale: Fix pca954x i2c-mux node names "make dtbs_check": arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dtb: pca9547@77: $nodename:0: 'pca9547@77' does not match '^(i2c-?)?mux' From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dtb: pca9547@77: Unevaluated properties are not allowed ('#address-cells', '#size-cells', 'i2c@4' were unexpected) From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml ... Fix this by renaming PCA954x nodes to "i2c-mux", to match the I2C bus multiplexer/switch DT bindings and the Generic Names Recommendation in the Devicetree Specification. Signed-off-by: Geert Uytterhoeven Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts | 2 +- arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi | 2 +- arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi | 2 +- arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts | 2 +- arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts | 4 ++-- arch/arm64/boot/dts/freescale/imx8qxp-mek.dts | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts index 5a8d85a7d1612..bbdf989058ff7 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts @@ -110,7 +110,7 @@ &i2c0 { status = "okay"; - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts index 9b726c2a48426..dda27ed7aaf2b 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts @@ -89,7 +89,7 @@ &i2c0 { status = "okay"; - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts index b2fcbba60d3ac..3b0ed9305f2bd 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts @@ -88,7 +88,7 @@ &i2c0 { status = "okay"; - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts index 41d8b15f25a54..aa52ff73ff9e0 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts @@ -53,7 +53,7 @@ &i2c0 { status = "okay"; - i2c-switch@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts index 1bfbce69cc8b7..ee8e932628d17 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts @@ -136,7 +136,7 @@ &i2c0 { status = "okay"; - i2c-switch@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts index ef6c8967533ef..d4867d6cf47cd 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts @@ -245,7 +245,7 @@ &i2c3 { status = "okay"; - i2c-switch@70 { + i2c-mux@70 { compatible = "nxp,pca9540"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi index f598669e742fc..52c5a43b30a0f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi @@ -103,7 +103,7 @@ &i2c0 { status = "okay"; - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi index 3d9647b3da147..537cecb13dd08 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi @@ -44,7 +44,7 @@ &i2c0 { status = "okay"; - pca9547@75 { + i2c-mux@75 { compatible = "nxp,pca9547"; reg = <0x75>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi index afb455210bd07..d32a52ab00a42 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi @@ -54,7 +54,7 @@ &i2c0 { status = "okay"; - i2c-switch@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts index 74c09891600f2..6357078185edd 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts @@ -214,7 +214,7 @@ pinctrl-0 = <&pinctrl_i2c3>; status = "okay"; - i2cmux@70 { + i2c-mux@70 { compatible = "nxp,pca9540"; reg = <0x70>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts index 9dda2a1554c32..8614c18b5998c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts @@ -133,7 +133,7 @@ pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; - i2cmux@70 { + i2c-mux@70 { compatible = "nxp,pca9546"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1_pca9546>; @@ -216,7 +216,7 @@ pinctrl-0 = <&pinctrl_i2c4>; status = "okay"; - pca9546: i2cmux@70 { + pca9546: i2c-mux@70 { compatible = "nxp,pca9546"; reg = <0x70>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts index 07d8dd8160f68..afa883389456c 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts @@ -61,7 +61,7 @@ pinctrl-0 = <&pinctrl_lpi2c1 &pinctrl_ioexp_rst>; status = "okay"; - i2c-switch@71 { + i2c-mux@71 { compatible = "nxp,pca9646", "nxp,pca9546"; #address-cells = <1>; #size-cells = <0>; From 5225ba9db112ec4ed67da5e4d8b72e618573955e Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 2 Dec 2022 13:10:37 -0600 Subject: [PATCH 0219/1593] arm64: dts: imx8mm-beacon: Fix ecspi2 pinmux Early hardware did not support hardware handshaking on the UART, but final production hardware did. When the hardware was updated the chip select was changed to facilitate hardware handshaking on UART3. Fix the ecspi2 pin mux to eliminate a pin conflict with UART3 and allow the EEPROM to operate again. Fixes: 4ce01ce36d77 ("arm64: dts: imx8mm-beacon: Enable RTS-CTS on UART3") Signed-off-by: Adam Ford Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi index 03266bd90a06b..169f047fbca50 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi @@ -120,7 +120,7 @@ &ecspi2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_espi2>; - cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>; + cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>; status = "okay"; eeprom@0 { @@ -316,7 +316,7 @@ MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82 MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82 - MX8MM_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41 + MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41 >; }; From ef10d57936ead5e817ef7cea6a87531085e77773 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 4 Dec 2022 10:44:17 +0100 Subject: [PATCH 0220/1593] arm64: dts: imx8mq-thor96: fix no-mmc property for SDHCI There is no "no-emmc" property, so intention for SD/SDIO only nodes was to use "no-mmc". Signed-off-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq-thor96.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts index 5d5aa6537225f..6e6182709d220 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts @@ -339,7 +339,7 @@ bus-width = <4>; non-removable; no-sd; - no-emmc; + no-mmc; status = "okay"; brcmf: wifi@1 { @@ -359,7 +359,7 @@ cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>; bus-width = <4>; no-sdio; - no-emmc; + no-mmc; disable-wp; status = "okay"; }; From 6c620a30515c494b5eeb3dc0e40d3220ea04c53b Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Wed, 7 Dec 2022 11:27:49 +0100 Subject: [PATCH 0221/1593] arm64: dts: verdin-imx8mm: fix dev board audio playback Set optional `simple-audio-card,mclk-fs` parameter to ensure a proper clock to the nau8822 audio codec. Without this change with an audio stream rate of 44.1 kHz the playback is faster. Set the MCLK at the right frequency, codec can properly use it to generate 44.1 kHz I2S-FS. Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Signed-off-by: Emanuele Ghidoli Signed-off-by: Francesco Dolcini Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi index 73cc3fafa0180..b2bcd22821702 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dev.dtsi @@ -11,6 +11,7 @@ simple-audio-card,bitclock-master = <&dailink_master>; simple-audio-card,format = "i2s"; simple-audio-card,frame-master = <&dailink_master>; + simple-audio-card,mclk-fs = <256>; simple-audio-card,name = "imx8mm-nau8822"; simple-audio-card,routing = "Headphones", "LHP", From 62f0147fd4d86620853bee027800f988d3013656 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Thu, 8 Dec 2022 17:06:58 +0800 Subject: [PATCH 0222/1593] arm64: dts: imx93-11x11-evk: correct clock and strobe pad setting For clock and strobe pad of usdhc, need to config as pull down. Current pad config set these pad as both pull up and pull down, this is wrong, so fix it here. Find this issue when enable HS400ES mode on one Micron eMMC chip, CMD8 always meet CRC error in HS400ES/HS400 mode. Fixes: e37907bd8294 ("arm64: dts: freescale: add i.MX93 11x11 EVK basic support") Signed-off-by: Haibo Chen Reviewed-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts index 69786c326db00..27f9a9f331346 100644 --- a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts @@ -74,7 +74,7 @@ pinctrl_usdhc1: usdhc1grp { fsl,pins = < - MX93_PAD_SD1_CLK__USDHC1_CLK 0x17fe + MX93_PAD_SD1_CLK__USDHC1_CLK 0x15fe MX93_PAD_SD1_CMD__USDHC1_CMD 0x13fe MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x13fe MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x13fe @@ -84,7 +84,7 @@ MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x13fe MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x13fe MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x13fe - MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x17fe + MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x15fe >; }; @@ -102,7 +102,7 @@ pinctrl_usdhc2: usdhc2grp { fsl,pins = < - MX93_PAD_SD2_CLK__USDHC2_CLK 0x17fe + MX93_PAD_SD2_CLK__USDHC2_CLK 0x15fe MX93_PAD_SD2_CMD__USDHC2_CMD 0x13fe MX93_PAD_SD2_DATA0__USDHC2_DATA0 0x13fe MX93_PAD_SD2_DATA1__USDHC2_DATA1 0x13fe From 87b30c4b0efb6a194a7b8eac2568a3da520d905f Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Thu, 8 Dec 2022 17:54:03 +0100 Subject: [PATCH 0223/1593] ARM: imx: add missing of_node_put() Calling of_find_compatible_node() returns a node pointer with refcount incremented. Use of_node_put() on it when done. The patch fixes the same problem on different i.MX platforms. Fixes: 8b88f7ef31dde ("ARM: mx25: Retrieve IIM base from dt") Fixes: 94b2bec1b0e05 ("ARM: imx27: Retrieve the SYSCTRL base address from devicetree") Fixes: 3172225d45bd9 ("ARM: imx31: Retrieve the IIM base address from devicetree") Fixes: f68ea682d1da7 ("ARM: imx35: Retrieve the IIM base address from devicetree") Fixes: ee18a7154ee08 ("ARM: imx5: retrieve iim base from device tree") Signed-off-by: Dario Binacchi Reviewed-by: Fabio Estevam Reviewed-by: Martin Kaiser Signed-off-by: Shawn Guo --- arch/arm/mach-imx/cpu-imx25.c | 1 + arch/arm/mach-imx/cpu-imx27.c | 1 + arch/arm/mach-imx/cpu-imx31.c | 1 + arch/arm/mach-imx/cpu-imx35.c | 1 + arch/arm/mach-imx/cpu-imx5.c | 1 + 5 files changed, 5 insertions(+) diff --git a/arch/arm/mach-imx/cpu-imx25.c b/arch/arm/mach-imx/cpu-imx25.c index 3e63445cde062..cc86977d0a340 100644 --- a/arch/arm/mach-imx/cpu-imx25.c +++ b/arch/arm/mach-imx/cpu-imx25.c @@ -23,6 +23,7 @@ static int mx25_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx25-iim"); iim_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!iim_base); rev = readl(iim_base + MXC_IIMSREV); iounmap(iim_base); diff --git a/arch/arm/mach-imx/cpu-imx27.c b/arch/arm/mach-imx/cpu-imx27.c index bf70e13bbe9ee..1d28939083683 100644 --- a/arch/arm/mach-imx/cpu-imx27.c +++ b/arch/arm/mach-imx/cpu-imx27.c @@ -28,6 +28,7 @@ static int mx27_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx27-ccm"); ccm_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!ccm_base); /* * now we have access to the IO registers. As we need diff --git a/arch/arm/mach-imx/cpu-imx31.c b/arch/arm/mach-imx/cpu-imx31.c index b9c24b851d1ab..35c544924e509 100644 --- a/arch/arm/mach-imx/cpu-imx31.c +++ b/arch/arm/mach-imx/cpu-imx31.c @@ -39,6 +39,7 @@ static int mx31_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx31-iim"); iim_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!iim_base); /* read SREV register from IIM module */ diff --git a/arch/arm/mach-imx/cpu-imx35.c b/arch/arm/mach-imx/cpu-imx35.c index 80e7d8ab9f1bb..1fe75b39c2d99 100644 --- a/arch/arm/mach-imx/cpu-imx35.c +++ b/arch/arm/mach-imx/cpu-imx35.c @@ -21,6 +21,7 @@ static int mx35_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx35-iim"); iim_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!iim_base); rev = imx_readl(iim_base + MXC_IIMSREV); diff --git a/arch/arm/mach-imx/cpu-imx5.c b/arch/arm/mach-imx/cpu-imx5.c index ad56263778f93..a67c89bf155dd 100644 --- a/arch/arm/mach-imx/cpu-imx5.c +++ b/arch/arm/mach-imx/cpu-imx5.c @@ -28,6 +28,7 @@ static u32 imx5_read_srev_reg(const char *compat) np = of_find_compatible_node(NULL, NULL, compat); iim_base = of_iomap(np, 0); + of_node_put(np); WARN_ON(!iim_base); srev = readl(iim_base + IIM_SREV) & 0xff; From c10a5855488bbd9912ddae3ad2d1f2e4b1a3c275 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 16 Dec 2022 01:05:26 +0100 Subject: [PATCH 0224/1593] arm64: dts: imx8mm: Drop xtal clock specifier from eDM SBC The clk_xtal32k have clock-cells = <0>, drop the bogus specifier. Fixes: 9509593f327a ("arm64: dts: imx8mm: Model PMIC to SNVS RTC clock path on Data Modul i.MX8M Mini eDM SBC") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts index 24f61db33eba2..752f409a30b16 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts @@ -275,7 +275,7 @@ compatible = "rohm,bd71847"; reg = <0x4b>; #clock-cells = <0>; - clocks = <&clk_xtal32k 0>; + clocks = <&clk_xtal32k>; clock-output-names = "clk-32k-out"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; From 21b84ebeee79d91e405f87f051e9489ef30ecad6 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 26 Aug 2022 21:13:03 +0200 Subject: [PATCH 0225/1593] soc: imx: imx8mp-blk-ctrl: don't set power device name Setting the device name after it has been registered confuses the sysfs cleanup paths. This has already been fixed for the imx8m-blk-ctrl driver in b64b46fbaa1d ("Revert "soc: imx: imx8m-blk-ctrl: set power device name""), but the same problem exists in imx8mp-blk-ctrl. Fixes: 556f5cf9568a ("soc: imx: add i.MX8MP HSIO blk-ctrl") Signed-off-by: Lucas Stach Reviewed-by: Peng Fan Signed-off-by: Shawn Guo --- drivers/soc/imx/imx8mp-blk-ctrl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soc/imx/imx8mp-blk-ctrl.c b/drivers/soc/imx/imx8mp-blk-ctrl.c index 9852714eb2a41..0f13853901dfe 100644 --- a/drivers/soc/imx/imx8mp-blk-ctrl.c +++ b/drivers/soc/imx/imx8mp-blk-ctrl.c @@ -592,7 +592,6 @@ static int imx8mp_blk_ctrl_probe(struct platform_device *pdev) ret = PTR_ERR(domain->power_dev); goto cleanup_pds; } - dev_set_name(domain->power_dev, "%s", data->name); domain->genpd.name = data->name; domain->genpd.power_on = imx8mp_blk_ctrl_power_on; From b3b75ace2085aca623c57e04ea7218ae690090fb Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 17 Dec 2022 12:08:48 -0600 Subject: [PATCH 0226/1593] arm64: dts: imx8mp: Fix missing GPC Interrupt The GPC node references an interrupt parent, but it doesn't state the interrupt itself. According to the TRM, this IRQ is 87. This also eliminate an error detected from dt_binding_check Fixes: fc0f05124621 ("arm64: dts: imx8mp: add GPC node with GPU power domains") Signed-off-by: Adam Ford Reviewed-by: Laurent Pinchart Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 6a04d81c69e65..f26b1b26f4a33 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -524,6 +524,7 @@ compatible = "fsl,imx8mp-gpc"; reg = <0x303a0000 0x1000>; interrupt-parent = <&gic>; + interrupts = ; interrupt-controller; #interrupt-cells = <3>; From 10e2f328bd900787fd2db24e474f87e1d525ccc4 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 17 Dec 2022 12:08:49 -0600 Subject: [PATCH 0227/1593] arm64: dts: imx8mp: Fix power-domain typo dt_binding_check detects an issue with the pgc_hsiomix power domain: pgc: 'power-domains@17' does not match any of the regexes This is because 'power-domains' should be 'power-domain' Fixes: 2ae42e0c0b67 ("arm64: dts: imx8mp: add HSIO power-domains") Signed-off-by: Adam Ford Reviewed-by: Laurent Pinchart Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index f26b1b26f4a33..03034b439c1f7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -591,7 +591,7 @@ reg = ; }; - pgc_hsiomix: power-domains@17 { + pgc_hsiomix: power-domain@17 { #power-domain-cells = <0>; reg = ; clocks = <&clk IMX8MP_CLK_HSIO_AXI>, From 9a65c759e4666476a6642ad87b4db8a67cb957d0 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Sun, 1 Jan 2023 12:50:42 +0800 Subject: [PATCH 0228/1593] arm64: dts: imx8mp-evk: pcie0-refclk cosmetic cleanup Use the correct indention. Fixes: d50650500064 ("arm64: dts: imx8mp-evk: Add PCIe support") Signed-off-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index d4c7ca16abd05..f2d93437084be 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -36,8 +36,8 @@ pcie0_refclk: pcie0-refclk { compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <100000000>; + #clock-cells = <0>; + clock-frequency = <100000000>; }; reg_can1_stby: regulator-can1-stby { From 38b50aa44495d5eb4218f0b82fc2da76505cec53 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 28 Dec 2022 14:56:09 +0200 Subject: [PATCH 0229/1593] RDMA/mlx5: Fix mlx5_ib_get_hw_stats when used for device Currently, when mlx5_ib_get_hw_stats() is used for device (port_num = 0), there is a special handling in order to use the correct counters, but, port_num is being passed down the stack without any change. Also, some functions assume that port_num >=1. As a result, the following oops can occur. BUG: unable to handle page fault for address: ffff89510294f1a8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP CPU: 8 PID: 1382 Comm: devlink Tainted: G W 6.1.0-rc4_for_upstream_base_2022_11_10_16_12 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:_raw_spin_lock+0xc/0x20 Call Trace: mlx5_ib_get_native_port_mdev+0x73/0xe0 [mlx5_ib] do_get_hw_stats.constprop.0+0x109/0x160 [mlx5_ib] mlx5_ib_get_hw_stats+0xad/0x180 [mlx5_ib] ib_setup_device_attrs+0xf0/0x290 [ib_core] ib_register_device+0x3bb/0x510 [ib_core] ? atomic_notifier_chain_register+0x67/0x80 __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5r_probe+0xb8/0x150 [mlx5_ib] ? auxiliary_match_id+0x6a/0x90 auxiliary_bus_probe+0x3c/0x70 ? driver_sysfs_add+0x6b/0x90 really_probe+0xcd/0x380 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 ? driver_allows_async_probing+0x60/0x60 ? driver_allows_async_probing+0x60/0x60 bus_for_each_drv+0x7b/0xc0 __device_attach+0xbc/0x200 bus_probe_device+0x87/0xa0 device_add+0x404/0x940 ? dev_set_name+0x53/0x70 __auxiliary_device_add+0x43/0x60 add_adev+0x99/0xe0 [mlx5_core] mlx5_attach_device+0xc8/0x120 [mlx5_core] mlx5_load_one_devl_locked+0xb2/0xe0 [mlx5_core] devlink_reload+0x133/0x250 devlink_nl_cmd_reload+0x480/0x570 ? devlink_nl_pre_doit+0x44/0x2b0 genl_family_rcv_msg_doit.isra.0+0xc2/0x110 genl_rcv_msg+0x180/0x2b0 ? devlink_nl_cmd_region_read_dumpit+0x540/0x540 ? devlink_reload+0x250/0x250 ? devlink_put+0x50/0x50 ? genl_family_rcv_msg_doit.isra.0+0x110/0x110 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1f6/0x2c0 netlink_sendmsg+0x237/0x490 sock_sendmsg+0x33/0x40 __sys_sendto+0x103/0x160 ? handle_mm_fault+0x10e/0x290 ? do_user_addr_fault+0x1c0/0x5f0 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fix it by setting port_num to 1 in order to get device status and remove unused variable. Fixes: aac4492ef23a ("IB/mlx5: Update counter implementation for dual port RoCE") Link: https://lore.kernel.org/r/98b82994c3cd3fa593b8a75ed3f3901e208beb0f.1672231736.git.leonro@nvidia.com Signed-off-by: Shay Drory Reviewed-by: Patrisious Haddad Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 945758f395236..3e1272695d993 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -278,7 +278,6 @@ static int do_get_hw_stats(struct ib_device *ibdev, const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); struct mlx5_core_dev *mdev; int ret, num_counters; - u32 mdev_port_num; if (!stats) return -EINVAL; @@ -299,8 +298,9 @@ static int do_get_hw_stats(struct ib_device *ibdev, } if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); + if (!port_num) + port_num = 1; + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL); if (!mdev) { /* If port is not affiliated yet, its in down state * which doesn't have any counters yet, so it would be From 8de8482fe5732fbef4f5af82bc0c0362c804cd1f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 28 Dec 2022 14:56:10 +0200 Subject: [PATCH 0230/1593] RDMA/mlx5: Fix validation of max_rd_atomic caps for DC Currently, when modifying DC, we validate max_rd_atomic user attribute against the RC cap, validate against DC. RC and DC QP types have different device limitations. This can cause userspace created DC QPs to malfunction. Fixes: c32a4f296e1d ("IB/mlx5: Add support for DC Initiator QP") Link: https://lore.kernel.org/r/0c5aee72cea188c3bb770f4207cce7abc9b6fc74.1672231736.git.leonro@nvidia.com Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 40d9410ec3033..cf953d23d18da 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4502,6 +4502,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, return false; } +static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_type qp_type) +{ + int log_max_ra_res; + int log_max_ra_req; + + if (qp_type == MLX5_IB_QPT_DCI) { + log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_res_dc); + log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_req_dc); + } else { + log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_res_qp); + log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_req_qp); + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > log_max_ra_res) { + mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", + attr->max_rd_atomic); + return false; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > log_max_ra_req) { + mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", + attr->max_dest_rd_atomic); + return false; + } + return true; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4589,21 +4623,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { - mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", - attr->max_rd_atomic); - goto out; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { - mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", - attr->max_dest_rd_atomic); + if (!validate_rd_atomic(dev, attr, attr_mask, qp_type)) goto out; - } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; From 9807ae69746196ee4bbffe7d22d22ab2b61c6ed0 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:32 +0100 Subject: [PATCH 0231/1593] net: dsa: qca8k: fix wrong length value for mgmt eth packet The assumption that Documentation was right about how this value work was wrong. It was discovered that the length value of the mgmt header is in step of word size. As an example to process 4 byte of data the correct length to set is 2. To process 8 byte 4, 12 byte 6, 16 byte 8... Odd values will always return the next size on the ack packet. (length of 3 (6 byte) will always return 8 bytes of data) This means that a value of 15 (0xf) actually means reading/writing 32 bytes of data instead of 16 bytes. This behaviour is totally absent and not documented in the switch Documentation. In fact from Documentation the max value that mgmt eth can process is 16 byte of data while in reality it can process 32 bytes at once. To handle this we always round up the length after deviding it for word size. We check if the result is odd and we round another time to align to what the switch will provide in the ack packet. The workaround for the length limit of 15 is still needed as the length reg max value is 0xf(15) Reported-by: Ronald Wahl Tested-by: Ronald Wahl Fixes: 90386223f44e ("net: dsa: qca8k: add support for larger read/write size with mgmt Ethernet") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 45 +++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index c5c3b4e92f28b..46151320b2a8f 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -146,7 +146,16 @@ static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb) command = get_unaligned_le32(&mgmt_ethhdr->command); cmd = FIELD_GET(QCA_HDR_MGMT_CMD, command); + len = FIELD_GET(QCA_HDR_MGMT_LENGTH, command); + /* Special case for len of 15 as this is the max value for len and needs to + * be increased before converting it from word to dword. + */ + if (len == 15) + len++; + + /* We can ignore odd value, we always round up them in the alloc function. */ + len *= sizeof(u16); /* Make sure the seq match the requested packet */ if (get_unaligned_le32(&mgmt_ethhdr->seq) == mgmt_eth_data->seq) @@ -193,17 +202,33 @@ static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 * if (!skb) return NULL; - /* Max value for len reg is 15 (0xf) but the switch actually return 16 byte - * Actually for some reason the steps are: - * 0: nothing - * 1-4: first 4 byte - * 5-6: first 12 byte - * 7-15: all 16 byte + /* Hdr mgmt length value is in step of word size. + * As an example to process 4 byte of data the correct length to set is 2. + * To process 8 byte 4, 12 byte 6, 16 byte 8... + * + * Odd values will always return the next size on the ack packet. + * (length of 3 (6 byte) will always return 8 bytes of data) + * + * This means that a value of 15 (0xf) actually means reading/writing 32 bytes + * of data. + * + * To correctly calculate the length we devide the requested len by word and + * round up. + * On the ack function we can skip the odd check as we already handle the + * case here. + */ + real_len = DIV_ROUND_UP(len, sizeof(u16)); + + /* We check if the result len is odd and we round up another time to + * the next size. (length of 3 will be increased to 4 as switch will always + * return 8 bytes) */ - if (len == 16) - real_len = 15; - else - real_len = len; + if (real_len % sizeof(u16) != 0) + real_len++; + + /* Max reg value is 0xf(15) but switch will always return the next size (32 byte) */ + if (real_len == 16) + real_len--; skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); From d9dba91be71f03cc75bcf39fc0d5d99ff33f1ae0 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:33 +0100 Subject: [PATCH 0232/1593] net: dsa: tag_qca: fix wrong MGMT_DATA2 size It was discovered that MGMT_DATA2 can contain up to 28 bytes of data instead of the 12 bytes written in the Documentation by accounting the limit of 16 bytes declared in Documentation subtracting the first 4 byte in the packet header. Update the define with the real world value. Tested-by: Ronald Wahl Fixes: c2ee8181fddb ("net: dsa: tag_qca: add define for handling mgmt Ethernet packet") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: David S. Miller --- include/linux/dsa/tag_qca.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index b1b5720d89a59..ee657452f122a 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -45,8 +45,8 @@ struct sk_buff; QCA_HDR_MGMT_COMMAND_LEN + \ QCA_HDR_MGMT_DATA1_LEN) -#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */ -#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */ +#define QCA_HDR_MGMT_DATA2_LEN 28 /* Other 28 byte for the mdio data */ +#define QCA_HDR_MGMT_PADDING_LEN 18 /* Padding to reach the min Ethernet packet */ #define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \ QCA_HDR_LEN + \ From 03cb9e6d0b32b768e3d9d473c5c4ca1100877664 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:34 +0100 Subject: [PATCH 0233/1593] Revert "net: dsa: qca8k: cache lo and hi for mdio write" This reverts commit 2481d206fae7884cd07014fd1318e63af35e99eb. The Documentation is very confusing about the topic. The cache logic for hi and lo is wrong and actually miss some regs to be actually written. What the Documentation actually intended was that it's possible to skip writing hi OR lo if half of the reg is not needed to be written or read. Revert the change in favor of a better and correct implementation. Reported-by: Ronald Wahl Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 61 +++++++------------------------- drivers/net/dsa/qca/qca8k.h | 5 --- 2 files changed, 12 insertions(+), 54 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 46151320b2a8f..fbcd5c2b13aeb 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -36,44 +36,6 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) *page = regaddr & 0x3ff; } -static int -qca8k_set_lo(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 lo) -{ - u16 *cached_lo = &priv->mdio_cache.lo; - struct mii_bus *bus = priv->bus; - int ret; - - if (lo == *cached_lo) - return 0; - - ret = bus->write(bus, phy_id, regnum, lo); - if (ret < 0) - dev_err_ratelimited(&bus->dev, - "failed to write qca8k 32bit lo register\n"); - - *cached_lo = lo; - return 0; -} - -static int -qca8k_set_hi(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 hi) -{ - u16 *cached_hi = &priv->mdio_cache.hi; - struct mii_bus *bus = priv->bus; - int ret; - - if (hi == *cached_hi) - return 0; - - ret = bus->write(bus, phy_id, regnum, hi); - if (ret < 0) - dev_err_ratelimited(&bus->dev, - "failed to write qca8k 32bit hi register\n"); - - *cached_hi = hi; - return 0; -} - static int qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) { @@ -97,7 +59,7 @@ qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) } static void -qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val) +qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) { u16 lo, hi; int ret; @@ -105,9 +67,12 @@ qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val) lo = val & 0xffff; hi = (u16)(val >> 16); - ret = qca8k_set_lo(priv, phy_id, regnum, lo); + ret = bus->write(bus, phy_id, regnum, lo); if (ret >= 0) - ret = qca8k_set_hi(priv, phy_id, regnum + 1, hi); + ret = bus->write(bus, phy_id, regnum + 1, hi); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit register\n"); } static int @@ -442,7 +407,7 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val) if (ret < 0) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -475,7 +440,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_ val &= ~mask; val |= write_val; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -750,14 +715,14 @@ qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data) if (ret) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(priv, 0x10 | r2, r1, 0); + qca8k_mii_write32(bus, 0x10 | r2, r1, 0); mutex_unlock(&bus->mdio_lock); @@ -787,7 +752,7 @@ qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum) if (ret) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); @@ -798,7 +763,7 @@ qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum) exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(priv, 0x10 | r2, r1, 0); + qca8k_mii_write32(bus, 0x10 | r2, r1, 0); mutex_unlock(&bus->mdio_lock); @@ -1968,8 +1933,6 @@ qca8k_sw_probe(struct mdio_device *mdiodev) } priv->mdio_cache.page = 0xffff; - priv->mdio_cache.lo = 0xffff; - priv->mdio_cache.hi = 0xffff; /* Check the detected switch id */ ret = qca8k_read_switch_id(priv); diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index 0b7a5cb123216..03514f7a20bec 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -375,11 +375,6 @@ struct qca8k_mdio_cache { * mdio writes */ u16 page; -/* lo and hi can also be cached and from Documentation we can skip one - * extra mdio write if lo or hi is didn't change. - */ - u16 lo; - u16 hi; }; struct qca8k_pcs { From cfbd6de588ef659c198083205dc954a6d3ed2aec Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:35 +0100 Subject: [PATCH 0234/1593] net: dsa: qca8k: introduce single mii read/write lo/hi It may be useful to read/write just the lo or hi half of a reg. This is especially useful for phy poll with the use of mdio master. The mdio master reg is composed by the first 16 bit related to setup and the other half with the returned data or data to write. Refactor the mii function to permit single mii read/write of lo or hi half of the reg. Tested-by: Ronald Wahl Signed-off-by: Christian Marangi Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 106 ++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index fbcd5c2b13aeb..92c4bfef7c974 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -37,42 +37,104 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) } static int -qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) +qca8k_mii_write_lo(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) { int ret; + u16 lo; - ret = bus->read(bus, phy_id, regnum); - if (ret >= 0) { - *val = ret; - ret = bus->read(bus, phy_id, regnum + 1); - *val |= ret << 16; - } + lo = val & 0xffff; + ret = bus->write(bus, phy_id, regnum, lo); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit lo register\n"); - if (ret < 0) { + return ret; +} + +static int +qca8k_mii_write_hi(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +{ + int ret; + u16 hi; + + hi = (u16)(val >> 16); + ret = bus->write(bus, phy_id, regnum, hi); + if (ret < 0) dev_err_ratelimited(&bus->dev, - "failed to read qca8k 32bit register\n"); - *val = 0; - return ret; - } + "failed to write qca8k 32bit hi register\n"); + + return ret; +} + +static int +qca8k_mii_read_lo(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) +{ + int ret; + + ret = bus->read(bus, phy_id, regnum); + if (ret < 0) + goto err; + *val = ret & 0xffff; return 0; + +err: + dev_err_ratelimited(&bus->dev, + "failed to read qca8k 32bit lo register\n"); + *val = 0; + + return ret; } -static void -qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +static int +qca8k_mii_read_hi(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) { - u16 lo, hi; int ret; - lo = val & 0xffff; - hi = (u16)(val >> 16); + ret = bus->read(bus, phy_id, regnum); + if (ret < 0) + goto err; - ret = bus->write(bus, phy_id, regnum, lo); - if (ret >= 0) - ret = bus->write(bus, phy_id, regnum + 1, hi); + *val = ret << 16; + return 0; + +err: + dev_err_ratelimited(&bus->dev, + "failed to read qca8k 32bit hi register\n"); + *val = 0; + + return ret; +} + +static int +qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) +{ + u32 hi, lo; + int ret; + + *val = 0; + + ret = qca8k_mii_read_lo(bus, phy_id, regnum, &lo); if (ret < 0) - dev_err_ratelimited(&bus->dev, - "failed to write qca8k 32bit register\n"); + goto err; + + ret = qca8k_mii_read_hi(bus, phy_id, regnum + 1, &hi); + if (ret < 0) + goto err; + + *val = lo | hi; + +err: + return ret; +} + +static void +qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +{ + if (qca8k_mii_write_lo(bus, phy_id, regnum, val) < 0) + return; + + qca8k_mii_write_hi(bus, phy_id, regnum + 1, val); } static int From a4165830ca237f2b3318faf62562bce8ce12a389 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:36 +0100 Subject: [PATCH 0235/1593] net: dsa: qca8k: improve mdio master read/write by using single lo/hi Improve mdio master read/write by using singe mii read/write lo/hi. In a read and write we need to poll the mdio master regs in a busy loop to check for a specific bit present in the upper half of the reg. We can ignore the other half since it won't contain useful data. This will save an additional useless read for each read and write operation. In a read operation the returned data is present in the mdio master reg lower half. We can ignore the other half since it won't contain useful data. This will save an additional useless read for each read operation. In a read operation it's needed to just set the hi half of the mdio master reg as the lo half will be replaced by the result. This will save an additional useless write for each read operation. Tested-by: Ronald Wahl Signed-off-by: Christian Marangi Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 92c4bfef7c974..2f224b166bbb3 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -740,9 +740,9 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask) qca8k_split_addr(reg, &r1, &r2, &page); - ret = read_poll_timeout(qca8k_mii_read32, ret1, !(val & mask), 0, + ret = read_poll_timeout(qca8k_mii_read_hi, ret1, !(val & mask), 0, QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false, - bus, 0x10 | r2, r1, &val); + bus, 0x10 | r2, r1 + 1, &val); /* Check if qca8k_read has failed for a different reason * before returnting -ETIMEDOUT @@ -784,7 +784,7 @@ qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data) exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(bus, 0x10 | r2, r1, 0); + qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, 0); mutex_unlock(&bus->mdio_lock); @@ -814,18 +814,18 @@ qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum) if (ret) goto exit; - qca8k_mii_write32(bus, 0x10 | r2, r1, val); + qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); if (ret) goto exit; - ret = qca8k_mii_read32(bus, 0x10 | r2, r1, &val); + ret = qca8k_mii_read_lo(bus, 0x10 | r2, r1, &val); exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(bus, 0x10 | r2, r1, 0); + qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, 0); mutex_unlock(&bus->mdio_lock); From 6d4cfcf97986cc67635630a2bc1f8d5c92ecdbba Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 29 Dec 2022 15:21:20 -0500 Subject: [PATCH 0236/1593] net: phy: Update documentation for get_rate_matching Now that phylink no longer calls phy_get_rate_matching with PHY_INTERFACE_MODE_NA, phys no longer need to support it. Remove the documentation mandating support. Fixes: 7642cc28fd37 ("net: phylink: fix PHY validation with rate adaption") Signed-off-by: Sean Anderson Signed-off-by: David S. Miller --- include/linux/phy.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index 71eeb4e3b1fde..6378c997ded56 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -826,10 +826,7 @@ struct phy_driver { * whether to advertise lower-speed modes for that interface. It is * assumed that if a rate matching mode is supported on an interface, * then that interface's rate can be adapted to all slower link speeds - * supported by the phy. If iface is %PHY_INTERFACE_MODE_NA, and the phy - * supports any kind of rate matching for any interface, then it must - * return that rate matching mode (preferring %RATE_MATCH_PAUSE to - * %RATE_MATCH_CRS). If the interface is not supported, this should + * supported by the phy. If the interface is not supported, this should * return %RATE_MATCH_NONE. */ int (*get_rate_matching)(struct phy_device *phydev, From 9c4d7f45d60745a1cea0e841fa5e3444c398d2f1 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 30 Dec 2022 17:18:28 +0800 Subject: [PATCH 0237/1593] selftests: net: fix cleanup_v6() for arp_ndisc_evict_nocarrier The cleanup_v6() will cause the arp_ndisc_evict_nocarrier script exit with 255 (No such file or directory), even the tests are good: # selftests: net: arp_ndisc_evict_nocarrier.sh # run arp_evict_nocarrier=1 test # RTNETLINK answers: File exists # ok # run arp_evict_nocarrier=0 test # RTNETLINK answers: File exists # ok # run all.arp_evict_nocarrier=0 test # RTNETLINK answers: File exists # ok # run ndisc_evict_nocarrier=1 test # ok # run ndisc_evict_nocarrier=0 test # ok # run all.ndisc_evict_nocarrier=0 test # ok not ok 1 selftests: net: arp_ndisc_evict_nocarrier.sh # exit=255 This is because it's trying to modify the parameter for ipv4 instead. Also, tests for ipv6 (run_ndisc_evict_nocarrier_enabled() and run_ndisc_evict_nocarrier_disabled() are working on veth1, reflect this fact in cleanup_v6(). Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier") Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index b5af08af85595..b4ec1eeee6c90 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -24,8 +24,8 @@ cleanup_v6() ip netns del me ip netns del peer - sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1 - sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 } create_ns() From 1856628baa17032531916984808d1bdfd62700d4 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 30 Dec 2022 17:18:29 +0800 Subject: [PATCH 0238/1593] selftests: net: return non-zero for failures reported in arp_ndisc_evict_nocarrier Return non-zero return value if there is any failure reported in this script during the test. Otherwise it can only reflect the status of the last command. Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier") Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- .../selftests/net/arp_ndisc_evict_nocarrier.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index b4ec1eeee6c90..4a110bb01e53e 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -18,6 +18,7 @@ readonly V4_ADDR1=10.0.10.2 readonly V6_ADDR0=2001:db8:91::1 readonly V6_ADDR1=2001:db8:91::2 nsid=100 +ret=0 cleanup_v6() { @@ -61,7 +62,7 @@ setup_v6() { if [ $? -ne 0 ]; then cleanup_v6 echo "failed" - exit + exit 1 fi # Set veth2 down, which will put veth1 in NOCARRIER state @@ -88,7 +89,7 @@ setup_v4() { if [ $? -ne 0 ]; then cleanup_v4 echo "failed" - exit + exit 1 fi # Set veth1 down, which will put veth0 in NOCARRIER state @@ -115,6 +116,7 @@ run_arp_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -134,6 +136,7 @@ run_arp_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v4 @@ -164,6 +167,7 @@ run_ndisc_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -182,6 +186,7 @@ run_ndisc_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -198,6 +203,7 @@ run_ndisc_evict_nocarrier_disabled_all() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -218,3 +224,4 @@ if [ "$(id -u)" -ne 0 ];then fi run_all_tests +exit $ret From d9d71a89f28d27ac772c429b37d52668d011df7a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 30 Dec 2022 16:33:04 -0600 Subject: [PATCH 0239/1593] net: ipa: use proper endpoint mask for suspend It is now possible for a system to have more than 32 endpoints. As a result, registers related to endpoint suspend are parameterized, with 32 endpoints represented in one more registers. In ipa_interrupt_suspend_control(), the IPA_SUSPEND_EN register offset is determined properly, but the bit mask used still assumes the number of enpoints won't exceed 32. This is a bug. Fix it. Fixes: f298ba785e2d ("net: ipa: add a parameter to suspend registers") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_interrupt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index a49f66efacb87..d458a35839cce 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -132,10 +132,10 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, u32 endpoint_id, bool enable) { struct ipa *ipa = interrupt->ipa; + u32 mask = BIT(endpoint_id % 32); u32 unit = endpoint_id / 32; const struct ipa_reg *reg; u32 offset; - u32 mask; u32 val; WARN_ON(!test_bit(endpoint_id, ipa->available)); @@ -148,7 +148,6 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, offset = ipa_reg_n_offset(reg, unit); val = ioread32(ipa->reg_virt + offset); - mask = BIT(endpoint_id); if (enable) val |= mask; else From a3542b0ccd58f9fd42f34afa9daea435279a7c1c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 31 Dec 2022 16:05:46 -0600 Subject: [PATCH 0240/1593] dt-bindings: net: sun8i-emac: Add phy-supply property This property has always been supported by the Linux driver; see commit 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i"). In fact, the original driver submission includes the phy-supply code but no mention of it in the binding, so the omission appears to be accidental. In addition, the property is documented in the binding for the previous hardware generation, allwinner,sun7i-a20-gmac. Document phy-supply in the binding to fix devicetree validation for the 25+ boards that already use this property. Fixes: 0441bde003be ("dt-bindings: net-next: Add DT bindings documentation for Allwinner dwmac-sun8i") Acked-by: Rob Herring Reviewed-by: Andre Przywara Signed-off-by: Samuel Holland Signed-off-by: David S. Miller --- .../devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 1432fda3b603f..47bc2057e6292 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -40,6 +40,9 @@ properties: clock-names: const: stmmaceth + phy-supply: + description: PHY regulator + syscon: $ref: /schemas/types.yaml#/definitions/phandle description: From 91e2286160edd29d3fea8efff2dcda7df321878d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Grzelak?= Date: Thu, 29 Dec 2022 15:22:19 +0100 Subject: [PATCH 0241/1593] dt-bindings: net: marvell,orion-mdio: Fix examples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As stated in marvell-orion-mdio.txt deleted in commit 0781434af811f ("dt-bindings: net: orion-mdio: Convert to JSON schema") if 'interrupts' property is present, width of 'reg' should be 0x84. Otherwise, width of 'reg' should be 0x4. Fix 'examples:' and add constraints checking whether 'interrupts' property is present and validate it against fixed values in reg. Signed-off-by: Michał Grzelak Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- .../bindings/net/marvell,orion-mdio.yaml | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml index d2906b4a0f595..e35da8b01dc25 100644 --- a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml +++ b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml @@ -16,9 +16,6 @@ description: | 8k has a second unit which provides an interface with the xMDIO bus. This driver handles these interfaces. -allOf: - - $ref: "mdio.yaml#" - properties: compatible: enum: @@ -39,13 +36,38 @@ required: - compatible - reg +allOf: + - $ref: mdio.yaml# + + - if: + required: + - interrupts + + then: + properties: + reg: + items: + - items: + - $ref: /schemas/types.yaml#/definitions/cell + - const: 0x84 + + else: + properties: + reg: + items: + - items: + - $ref: /schemas/types.yaml#/definitions/cell + - enum: + - 0x4 + - 0x10 + unevaluatedProperties: false examples: - | mdio@d0072004 { compatible = "marvell,orion-mdio"; - reg = <0xd0072004 0x4>; + reg = <0xd0072004 0x84>; #address-cells = <1>; #size-cells = <0>; interrupts = <30>; From 029085b8949f5d269ae2bbd14915407dd0c7f902 Mon Sep 17 00:00:00 2001 From: Chengen Du Date: Fri, 30 Dec 2022 11:04:32 +0800 Subject: [PATCH 0242/1593] NFS: Judge the file access cache's timestamp in rcu path If the user's login time is newer than the cache's timestamp, we expect the cache may be stale and need to clear. The stale cache will remain in the list's tail if no other users operate on that inode. Once the user accesses the inode, the stale cache will be returned in rcu path. Signed-off-by: Chengen Du Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ea1ceffa1d3aa..d490d64a9ebd0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -3023,6 +3023,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre * but do it without locking. */ struct nfs_inode *nfsi = NFS_I(inode); + u64 login_time = nfs_access_login_time(current, cred); struct nfs_access_entry *cache; int err = -ECHILD; struct list_head *lh; @@ -3037,6 +3038,8 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre cache = NULL; if (cache == NULL) goto out; + if ((s64)(login_time - cache->timestamp) > 0) + goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; *mask = cache->mask; From 5e9a7b9c2ea18551759833146a181b14835bfe39 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Jan 2023 20:17:23 -0500 Subject: [PATCH 0243/1593] NFS: Fix up a sparse warning sparse is warning about an incorrect RCU dereference. fs/nfs/dir.c:2965:56: warning: incorrect type in argument 1 (different address spaces) fs/nfs/dir.c:2965:56: expected struct cred const * fs/nfs/dir.c:2965:56: got struct cred const [noderef] __rcu *const cred Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d490d64a9ebd0..f7e4a88d5d929 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2957,12 +2957,14 @@ static u64 nfs_access_login_time(const struct task_struct *task, const struct cred *cred) { const struct task_struct *parent; + const struct cred *pcred; u64 ret; rcu_read_lock(); for (;;) { parent = rcu_dereference(task->real_parent); - if (parent == task || cred_fscmp(parent->cred, cred) != 0) + pcred = rcu_dereference(parent->cred); + if (parent == task || cred_fscmp(pcred, cred) != 0) break; task = parent; } From cdfb2fef522d0c3f9cf293db51de88e9b3d46846 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Fri, 23 Dec 2022 11:59:31 +0100 Subject: [PATCH 0244/1593] ksmbd: send proper error response in smb2_tree_connect() Currently, smb2_tree_connect doesn't send an error response packet on error. This causes libsmb2 to skip the specific error code and fail with the following: smb2_service failed with : Failed to parse fixed part of command payload. Unexpected size of Error reply. Expected 9, got 8 Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 14d7f3599c639..38fbda52e06fe 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1928,13 +1928,13 @@ int smb2_tree_connect(struct ksmbd_work *work) if (conn->posix_ext_supported) status.tree_conn->posix_extensions = true; -out_err1: rsp->StructureSize = cpu_to_le16(16); + inc_rfc1001_len(work->response_buf, 16); +out_err1: rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; - inc_rfc1001_len(work->response_buf, 16); if (!IS_ERR(treename)) kfree(treename); @@ -1967,6 +1967,9 @@ int smb2_tree_connect(struct ksmbd_work *work) rsp->hdr.Status = STATUS_ACCESS_DENIED; } + if (status.ret != KSMBD_TREE_CONN_STATUS_OK) + smb2_set_err_rsp(work); + return rc; } From 797805d81baa814f76cf7bdab35f86408a79d707 Mon Sep 17 00:00:00 2001 From: William Liu Date: Fri, 30 Dec 2022 13:03:15 +0900 Subject: [PATCH 0245/1593] ksmbd: check nt_len to be at least CIFS_ENCPWD_SIZE in ksmbd_decode_ntlmssp_auth_blob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "nt_len - CIFS_ENCPWD_SIZE" is passed directly from ksmbd_decode_ntlmssp_auth_blob to ksmbd_auth_ntlmv2. Malicious requests can set nt_len to less than CIFS_ENCPWD_SIZE, which results in a negative number (or large unsigned value) used for a subsequent memcpy in ksmbd_auth_ntlvm2 and can cause a panic. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Signed-off-by: William Liu Signed-off-by: Hrvoje Mišetić Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/auth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 2a39ffb8423b7..6e61b5bc7d86e 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -322,7 +322,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); dn_len = le16_to_cpu(authblob->DomainName.Length); - if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) + if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len || + nt_len < CIFS_ENCPWD_SIZE) return -EINVAL; /* TODO : use domain name that imported from configuration file */ From 83dcedd5540d4ac61376ddff5362f7d9f866a6ec Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 31 Dec 2022 17:32:31 +0900 Subject: [PATCH 0246/1593] ksmbd: fix infinite loop in ksmbd_conn_handler_loop() If kernel_recvmsg() return -EAGAIN in ksmbd_tcp_readv() and go round again, It will cause infinite loop issue. And all threads from next connections would be doing that. This patch add max retry count(2) to avoid it. kernel_recvmsg() will wait during 7sec timeout and try to retry two time if -EAGAIN is returned. And add flags of kvmalloc to __GFP_NOWARN and __GFP_NORETRY to disconnect immediately without retrying on memory alloation failure. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-18259 Reviewed-by: Sergey Senozhatsky Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 7 +++++-- fs/ksmbd/transport_tcp.c | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 12be8386446a3..fd0a288af299e 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -316,9 +316,12 @@ int ksmbd_conn_handler_loop(void *p) /* 4 for rfc1002 length field */ size = pdu_size + 4; - conn->request_buf = kvmalloc(size, GFP_KERNEL); + conn->request_buf = kvmalloc(size, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); if (!conn->request_buf) - continue; + break; memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf)); if (!ksmbd_smb_request(conn)) diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 63d55f543bd2e..4c6bd0b699791 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -295,6 +295,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; + int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -321,9 +322,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if (length == -ERESTARTSYS || length == -EAGAIN) { + } else if ((length == -ERESTARTSYS || length == -EAGAIN) && + max_retry) { usleep_range(1000, 2000); length = 0; + max_retry--; continue; } else if (length <= 0) { total_read = -EAGAIN; From cc2e9d2b26c86c1dd8687f6916e5f621bcacd6f7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 27 Dec 2022 22:05:48 -0800 Subject: [PATCH 0247/1593] mm, slab: periodically resched in drain_freelist() drain_freelist() can be called with a very large number of slabs to free, such as for kmem_cache_shrink(), or depending on various settings of the slab cache when doing periodic reaping. If there is a potentially long list of slabs to drain, periodically schedule to ensure we aren't saturating the cpu for too long. Signed-off-by: David Rientjes Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka --- mm/slab.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/slab.c b/mm/slab.c index 7a269db050eed..29300fc1289a8 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2211,6 +2211,8 @@ static int drain_freelist(struct kmem_cache *cache, raw_spin_unlock_irq(&n->list_lock); slab_destroy(cache, slab); nr_freed++; + + cond_resched(); } out: return nr_freed; From 3b754ed6d1cd90017e66e5cc16f3923e4a952ffc Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Mon, 19 Dec 2022 09:43:05 +0100 Subject: [PATCH 0248/1593] drm/meson: Reduce the FIFO lines held when AFBC is not used Having a bigger number of FIFO lines held after vsync is only useful to SoCs using AFBC to give time to the AFBC decoder to be reset, configured and enabled again. For SoCs not using AFBC this, on the contrary, is causing on some displays issues and a few pixels vertical offset in the displayed image. Conditionally increase the number of lines held after vsync only for SoCs using AFBC, leaving the default value for all the others. Fixes: 24e0d4058eff ("drm/meson: hold 32 lines after vsync to give time for AFBC start") Signed-off-by: Carlo Caione Acked-by: Martin Blumenstingl Acked-by: Neil Armstrong [narmstrong: added fixes tag] Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20221216-afbc_s905x-v1-0-033bebf780d9@baylibre.com --- drivers/gpu/drm/meson/meson_viu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index d4b907889a21d..cd399b0b71814 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv) /* Initialize OSD1 fifo control register */ reg = VIU_OSD_DDR_PRIORITY_URGENT | - VIU_OSD_HOLD_FIFO_LINES(31) | VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */ VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */ VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) - reg |= VIU_OSD_BURST_LENGTH_32; + reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31)); else - reg |= VIU_OSD_BURST_LENGTH_64; + reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4)); writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT)); writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT)); From 7adde5ac25fa50dbb1fb237042316685cafe976c Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 8 Dec 2022 11:36:20 +0000 Subject: [PATCH 0249/1593] mtd: parsers: Fix potential memory leak in mtd_parser_tplink_safeloader_parse() The parts needs to be freed with all its elements, otherwise it will be leaked. Fixes: 00a3588084be ("mtd: parsers: add TP-Link SafeLoader partitions table parser") Signed-off-by: Yuan Can Reviewed-by: Dan Carpenter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221208113620.78855-1-yuancan@huawei.com --- drivers/mtd/parsers/tplink_safeloader.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/tplink_safeloader.c b/drivers/mtd/parsers/tplink_safeloader.c index f601e7bd86279..1c689dafca2ae 100644 --- a/drivers/mtd/parsers/tplink_safeloader.c +++ b/drivers/mtd/parsers/tplink_safeloader.c @@ -91,7 +91,7 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, buf = mtd_parser_tplink_safeloader_read_table(mtd); if (!buf) { err = -ENOENT; - goto err_out; + goto err_free_parts; } for (idx = 0, offset = TPLINK_SAFELOADER_DATA_OFFSET; @@ -118,6 +118,8 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, err_free: for (idx -= 1; idx >= 0; idx--) kfree(parts[idx].name); +err_free_parts: + kfree(parts); err_out: return err; }; From 105c14b84d93168431abba5d55e6c26fa4b65abb Mon Sep 17 00:00:00 2001 From: Mikhail Zhilkin Date: Thu, 8 Dec 2022 23:28:29 +0300 Subject: [PATCH 0250/1593] mtd: parsers: scpart: fix __udivdi3 undefined on mips This fixes the following compile error on mips architecture with clang version 16.0.0 reported by the 0-DAY CI Kernel Test Service: ld.lld: error: undefined symbol: __udivdi3 referenced by scpart.c mtd/parsers/scpart.o:(scpart_parse) in archive drivers/built-in.a As a workaround this makes 'offs' a 32-bit type. This is enough, because the mtd containing partition table practically does not exceed 1 MB. We can revert this when the [Link] has been resolved. Link: https://github.com/ClangBuiltLinux/linux/issues/1635 Fixes: 9b78ef0c7997 ("mtd: parsers: add support for Sercomm partitions") Reported-by: kernel test robot Suggested-by: Arnd Bergmann Signed-off-by: Mikhail Zhilkin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/805fe58e-690f-6a3f-5ebf-2f6f6e6e4599@gmail.com --- drivers/mtd/parsers/scpart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/scpart.c b/drivers/mtd/parsers/scpart.c index 02601bb33de4e..6e5e11c37078f 100644 --- a/drivers/mtd/parsers/scpart.c +++ b/drivers/mtd/parsers/scpart.c @@ -50,7 +50,7 @@ static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs, int cnt = 0; int res = 0; int res2; - loff_t offs; + uint32_t offs; size_t retlen; struct sc_part_desc *pdesc = NULL; struct sc_part_desc *tmpdesc; From d19ab1f785d0b6b9f709799f0938658903821ba1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Dec 2022 15:13:34 +0100 Subject: [PATCH 0251/1593] mtd: cfi: allow building spi-intel standalone When MTD or MTD_CFI_GEOMETRY is disabled, the spi-intel driver fails to build, as it includes the shared CFI header: include/linux/mtd/cfi.h:62:2: error: #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. [-Werror=cpp] 62 | #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. linux/mtd/spi-nor.h does not actually need to include cfi.h, so remove the inclusion here to fix the warning. This uncovers a missing #include in spi-nor/core.c so add that there to prevent a different build issue. Fixes: e23e5a05d1fd ("mtd: spi-nor: intel-spi: Convert to SPI MEM") Signed-off-by: Arnd Bergmann Reviewed-by: Mika Westerberg Reviewed-by: Tokunori Ikegami Acked-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221220141352.1486360-1-arnd@kernel.org --- drivers/mtd/spi-nor/core.c | 1 + include/linux/mtd/spi-nor.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index d8703d7dfd0af..d67c926bca8ba 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 25765556223a1..a3f8cdca90c83 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -7,7 +7,6 @@ #define __LINUX_MTD_SPI_NOR_H #include -#include #include #include From 461ab10ef7e6ea9b41a0571a7fc6a72af9549a3c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 17 Nov 2022 10:43:21 +0800 Subject: [PATCH 0252/1593] ceph: switch to vfs_inode_has_locks() to fix file lock bug For the POSIX locks they are using the same owner, which is the thread id. And multiple POSIX locks could be merged into single one, so when checking whether the 'file' has locks may fail. For a file where some openers use locking and others don't is a really odd usage pattern though. Locks are like stoplights -- they only work if everyone pays attention to them. Just switch ceph_get_caps() to check whether any locks are set on the inode. If there are POSIX/OFD/FLOCK locks on the file at the time, we should set CHECK_FILELOCK, regardless of what fd was used to set the lock. Fixes: ff5d913dfc71 ("ceph: return -EIO if read/write against filp that lost file locks") Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 2 +- fs/ceph/locks.c | 4 ---- fs/ceph/super.h | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 4b159f97fe7b5..f75ad432f375f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2913,7 +2913,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got while (true) { flags &= CEPH_FILE_MODE_MASK; - if (atomic_read(&fi->num_locks)) + if (vfs_inode_has_locks(inode)) flags |= CHECK_FILELOCK; _got = 0; ret = try_get_cap_refs(inode, need, want, endoff, diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index f3b461c708a8b..6b3b8c299c17a 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -32,18 +32,14 @@ void __init ceph_flock_init(void) static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - struct ceph_file_info *fi = dst->fl_file->private_data; struct inode *inode = file_inode(dst->fl_file); atomic_inc(&ceph_inode(inode)->i_filelock_ref); - atomic_inc(&fi->num_locks); } static void ceph_fl_release_lock(struct file_lock *fl) { - struct ceph_file_info *fi = fl->fl_file->private_data; struct inode *inode = file_inode(fl->fl_file); struct ceph_inode_info *ci = ceph_inode(inode); - atomic_dec(&fi->num_locks); if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 30bdb391a0dc0..0ed3be75bb9ae 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -790,7 +790,6 @@ struct ceph_file_info { struct list_head rw_contexts; u32 filp_gen; - atomic_t num_locks; }; struct ceph_dir_file_info { From 8e1858710d9a71d88acd922f2e95d1eddb90eea0 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 17 Nov 2022 10:57:53 +0800 Subject: [PATCH 0253/1593] ceph: avoid use-after-free in ceph_fl_release_lock() When ceph releasing the file_lock it will try to get the inode pointer from the fl->fl_file, which the memory could already be released by another thread in filp_close(). Because in VFS layer the fl->fl_file doesn't increase the file's reference counter. Will switch to use ceph dedicate lock info to track the inode. And in ceph_fl_release_lock() we should skip all the operations if the fl->fl_u.ceph.inode is not set, which should come from the request file_lock. And we will set fl->fl_u.ceph.inode when inserting it to the inode lock list, which is when copying the lock. Link: https://tracker.ceph.com/issues/57986 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 20 ++++++++++++++++++-- include/linux/fs.h | 3 +++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 6b3b8c299c17a..9c8dc8a55e7e1 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { struct inode *inode = file_inode(dst->fl_file); atomic_inc(&ceph_inode(inode)->i_filelock_ref); + dst->fl_u.ceph.inode = igrab(inode); } +/* + * Do not use the 'fl->fl_file' in release function, which + * is possibly already released by another thread. + */ static void ceph_fl_release_lock(struct file_lock *fl) { - struct inode *inode = file_inode(fl->fl_file); - struct ceph_inode_info *ci = ceph_inode(inode); + struct inode *inode = fl->fl_u.ceph.inode; + struct ceph_inode_info *ci; + + /* + * If inode is NULL it should be a request file_lock, + * nothing we can do. + */ + if (!inode) + return; + + ci = ceph_inode(inode); if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; spin_unlock(&ci->i_ceph_lock); } + fl->fl_u.ceph.inode = NULL; + iput(inode); } static const struct file_lock_operations ceph_fl_lock_ops = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 066555ad1bf81..c1769a2c5d708 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1119,6 +1119,9 @@ struct file_lock { int state; /* state of grant or error if -ve */ unsigned int debug_id; } afs; + struct { + struct inode *inode; + } ceph; } fl_u; } __randomize_layout; From 694175cd8a1643cde3acb45c9294bca44a8e08e9 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 2 Jan 2023 12:20:39 +0400 Subject: [PATCH 0254/1593] gpio: sifive: Fix refcount leak in sifive_gpio_probe of_irq_find_parent() returns a node pointer with refcount incremented, We should use of_node_put() on it when not needed anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs") Signed-off-by: Miaoqian Lin Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sifive.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index 238f3210970cf..bc5660f61c570 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -215,6 +215,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) return -ENODEV; } parent = irq_find_host(irq_parent); + of_node_put(irq_parent); if (!parent) { dev_err(dev, "no IRQ parent domain\n"); return -ENODEV; From 1e336aa0c0250ec84c6f16efac40c9f0138e367d Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 7 Dec 2022 19:23:15 +0800 Subject: [PATCH 0255/1593] mmc: sdhci-esdhc-imx: correct the tuning start tap and step setting Current code logic may be impacted by the setting of ROM/Bootloader, so unmask these bits first, then setting these bits accordingly. Fixes: 2b16cf326b70 ("mmc: sdhci-esdhc-imx: move tuning static configuration into hwinit function") Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221207112315.1812222-1-haibo.chen@nxp.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 89ef0c80ac371..9e73c34b64017 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -107,6 +107,7 @@ #define ESDHC_TUNING_START_TAP_DEFAULT 0x1 #define ESDHC_TUNING_START_TAP_MASK 0x7f #define ESDHC_TUNING_CMD_CRC_CHECK_DISABLE (1 << 7) +#define ESDHC_TUNING_STEP_DEFAULT 0x1 #define ESDHC_TUNING_STEP_MASK 0x00070000 #define ESDHC_TUNING_STEP_SHIFT 16 @@ -1368,7 +1369,7 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); struct cqhci_host *cq_host = host->mmc->cqe_private; - int tmp; + u32 tmp; if (esdhc_is_usdhc(imx_data)) { /* @@ -1423,17 +1424,24 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host) if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { tmp = readl(host->ioaddr + ESDHC_TUNING_CTRL); - tmp |= ESDHC_STD_TUNING_EN | - ESDHC_TUNING_START_TAP_DEFAULT; - if (imx_data->boarddata.tuning_start_tap) { - tmp &= ~ESDHC_TUNING_START_TAP_MASK; + tmp |= ESDHC_STD_TUNING_EN; + + /* + * ROM code or bootloader may config the start tap + * and step, unmask them first. + */ + tmp &= ~(ESDHC_TUNING_START_TAP_MASK | ESDHC_TUNING_STEP_MASK); + if (imx_data->boarddata.tuning_start_tap) tmp |= imx_data->boarddata.tuning_start_tap; - } + else + tmp |= ESDHC_TUNING_START_TAP_DEFAULT; if (imx_data->boarddata.tuning_step) { - tmp &= ~ESDHC_TUNING_STEP_MASK; tmp |= imx_data->boarddata.tuning_step << ESDHC_TUNING_STEP_SHIFT; + } else { + tmp |= ESDHC_TUNING_STEP_DEFAULT + << ESDHC_TUNING_STEP_SHIFT; } /* Disable the CMD CRC check for tuning, if not, need to From b3d40c3ec3dc4ad78017de6c3a38979f57aaaab8 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 22 Nov 2022 21:48:23 +0800 Subject: [PATCH 0256/1593] HID: intel_ish-hid: Add check for ishtp_dma_tx_map As the kcalloc may return NULL pointer, it should be better to check the ishtp_dma_tx_map before use in order to avoid NULL pointer dereference. Fixes: 3703f53b99e4 ("HID: intel_ish-hid: ISH Transport layer") Signed-off-by: Jiasheng Jiang Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp/dma-if.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ishtp/dma-if.c b/drivers/hid/intel-ish-hid/ishtp/dma-if.c index 40554c8daca07..00046cbfd4ed0 100644 --- a/drivers/hid/intel-ish-hid/ishtp/dma-if.c +++ b/drivers/hid/intel-ish-hid/ishtp/dma-if.c @@ -104,6 +104,11 @@ void *ishtp_cl_get_dma_send_buf(struct ishtp_device *dev, int required_slots = (size / DMA_SLOT_SIZE) + 1 * (size % DMA_SLOT_SIZE != 0); + if (!dev->ishtp_dma_tx_map) { + dev_err(dev->devc, "Fail to allocate Tx map\n"); + return NULL; + } + spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); for (i = 0; i <= (dev->ishtp_dma_num_slots - required_slots); i++) { free = 1; @@ -150,6 +155,11 @@ void ishtp_cl_release_dma_acked_mem(struct ishtp_device *dev, return; } + if (!dev->ishtp_dma_tx_map) { + dev_err(dev->devc, "Fail to allocate Tx map\n"); + return; + } + i = (msg_addr - dev->ishtp_host_dma_tx_buf) / DMA_SLOT_SIZE; spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); for (j = 0; j < acked_slots; j++) { From ae066f374687d7dd06bb8c732f66d6ab3c3fd480 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 28 Dec 2022 12:26:06 -0800 Subject: [PATCH 0257/1593] arm64: dts: imx8mm-venice-gw7901: fix USB2 controller OC polarity The GW7901 has USB2 routed to a USB VBUS supply with over-current protection via an active-low pin. Define the OC pin polarity properly. Fixes: 2b1649a83afc ("arm64: dts: imx: Add i.mx8mm Gateworks gw7901 dts support") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts index 750a1f07ecb7a..6433c205f8dde 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts @@ -771,6 +771,7 @@ &usbotg2 { dr_mode = "host"; vbus-supply = <®_usb2_vbus>; + over-current-active-low; status = "okay"; }; From 490748874ebf1875420fc29b335bba2075dd1b5e Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sat, 31 Dec 2022 13:58:48 +0400 Subject: [PATCH 0258/1593] soc: imx8m: Fix incorrect check for of_clk_get_by_name() of_clk_get_by_name() returns error pointers instead of NULL. Use IS_ERR() checks the return value to catch errors. Fixes: 836fb30949d9 ("soc: imx8m: Enable OCOTP clock before reading the register") Signed-off-by: Miaoqian Lin Signed-off-by: Shawn Guo --- drivers/soc/imx/soc-imx8m.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index 28144c699b0c3..32ed9dc88e455 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -66,8 +66,8 @@ static u32 __init imx8mq_soc_revision(void) ocotp_base = of_iomap(np, 0); WARN_ON(!ocotp_base); clk = of_clk_get_by_name(np, NULL); - if (!clk) { - WARN_ON(!clk); + if (IS_ERR(clk)) { + WARN_ON(IS_ERR(clk)); return 0; } From a2965c7be0522eaa18808684b7b82b248515511b Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 1 Jan 2023 16:57:43 -0500 Subject: [PATCH 0259/1593] net: sched: atm: dont intepret cls results when asked to drop If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume res.class contains a valid pointer Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent") Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index f52255fea652b..4a981ca90b0bf 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -393,10 +393,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = tcf_classify(skb, NULL, fl, &res, true); if (result < 0) continue; + if (result == TC_ACT_SHOT) + goto done; + flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - goto done; + goto drop; } } flow = NULL; From caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 1 Jan 2023 16:57:44 -0500 Subject: [PATCH 0260/1593] net: sched: cbq: dont intepret cls results when asked to drop If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that res.class contains a valid pointer Sample splat reported by Kyle Zeng [ 5.405624] 0: reclassify loop, rule prio 0, protocol 800 [ 5.406326] ================================================================== [ 5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0 [ 5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299 [ 5.408731] [ 5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15 [ 5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 5.410439] Call Trace: [ 5.410764] dump_stack+0x87/0xcd [ 5.411153] print_address_description+0x7a/0x6b0 [ 5.411687] ? vprintk_func+0xb9/0xc0 [ 5.411905] ? printk+0x76/0x96 [ 5.412110] ? cbq_enqueue+0x54b/0xea0 [ 5.412323] kasan_report+0x17d/0x220 [ 5.412591] ? cbq_enqueue+0x54b/0xea0 [ 5.412803] __asan_report_load1_noabort+0x10/0x20 [ 5.413119] cbq_enqueue+0x54b/0xea0 [ 5.413400] ? __kasan_check_write+0x10/0x20 [ 5.413679] __dev_queue_xmit+0x9c0/0x1db0 [ 5.413922] dev_queue_xmit+0xc/0x10 [ 5.414136] ip_finish_output2+0x8bc/0xcd0 [ 5.414436] __ip_finish_output+0x472/0x7a0 [ 5.414692] ip_finish_output+0x5c/0x190 [ 5.414940] ip_output+0x2d8/0x3c0 [ 5.415150] ? ip_mc_finish_output+0x320/0x320 [ 5.415429] __ip_queue_xmit+0x753/0x1760 [ 5.415664] ip_queue_xmit+0x47/0x60 [ 5.415874] __tcp_transmit_skb+0x1ef9/0x34c0 [ 5.416129] tcp_connect+0x1f5e/0x4cb0 [ 5.416347] tcp_v4_connect+0xc8d/0x18c0 [ 5.416577] __inet_stream_connect+0x1ae/0xb40 [ 5.416836] ? local_bh_enable+0x11/0x20 [ 5.417066] ? lock_sock_nested+0x175/0x1d0 [ 5.417309] inet_stream_connect+0x5d/0x90 [ 5.417548] ? __inet_stream_connect+0xb40/0xb40 [ 5.417817] __sys_connect+0x260/0x2b0 [ 5.418037] __x64_sys_connect+0x76/0x80 [ 5.418267] do_syscall_64+0x31/0x50 [ 5.418477] entry_SYSCALL_64_after_hwframe+0x61/0xc6 [ 5.418770] RIP: 0033:0x473bb7 [ 5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 24 89 [ 5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a [ 5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7 [ 5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007 [ 5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004 [ 5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 [ 5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002 [ 5.422471] [ 5.422562] Allocated by task 299: [ 5.422782] __kasan_kmalloc+0x12d/0x160 [ 5.423007] kasan_kmalloc+0x5/0x10 [ 5.423208] kmem_cache_alloc_trace+0x201/0x2e0 [ 5.423492] tcf_proto_create+0x65/0x290 [ 5.423721] tc_new_tfilter+0x137e/0x1830 [ 5.423957] rtnetlink_rcv_msg+0x730/0x9f0 [ 5.424197] netlink_rcv_skb+0x166/0x300 [ 5.424428] rtnetlink_rcv+0x11/0x20 [ 5.424639] netlink_unicast+0x673/0x860 [ 5.424870] netlink_sendmsg+0x6af/0x9f0 [ 5.425100] __sys_sendto+0x58d/0x5a0 [ 5.425315] __x64_sys_sendto+0xda/0xf0 [ 5.425539] do_syscall_64+0x31/0x50 [ 5.425764] entry_SYSCALL_64_after_hwframe+0x61/0xc6 [ 5.426065] [ 5.426157] The buggy address belongs to the object at ffff88800e312200 [ 5.426157] which belongs to the cache kmalloc-128 of size 128 [ 5.426955] The buggy address is located 42 bytes to the right of [ 5.426955] 128-byte region [ffff88800e312200, ffff88800e312280) [ 5.427688] The buggy address belongs to the page: [ 5.427992] page:000000009875fabc refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xe312 [ 5.428562] flags: 0x100000000000200(slab) [ 5.428812] raw: 0100000000000200 dead000000000100 dead000000000122 ffff888007843680 [ 5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff ffff88800e312401 [ 5.429875] page dumped because: kasan: bad access detected [ 5.430214] page->mem_cgroup:ffff88800e312401 [ 5.430471] [ 5.430564] Memory state around the buggy address: [ 5.430846] ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.431267] ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc [ 5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.432123] ^ [ 5.432391] ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc [ 5.432810] ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.433229] ================================================================== [ 5.433648] Disabling lock debugging due to kernel taint Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Kyle Zeng Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6568e17c4c634..36db5f6782f2c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -230,6 +230,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) result = tcf_classify(skb, NULL, fl, &res, true); if (!fl || result < 0) goto fallback; + if (result == TC_ACT_SHOT) + return NULL; cl = (void *)res.class; if (!cl) { @@ -250,8 +252,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; - case TC_ACT_SHOT: - return NULL; case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } From 43d253781f6321c6a07a5fe4ee72103a679a5f6b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 1 Jan 2023 23:17:37 -0800 Subject: [PATCH 0261/1593] net: sched: htb: fix htb_classify() kernel-doc Fix W=1 kernel-doc warning: net/sched/sch_htb.c:214: warning: expecting prototype for htb_classify(). Prototype was for HTB_DIRECT() instead by moving the HTB_DIRECT() macro above the function. Add kernel-doc notation for function parameters as well. Signed-off-by: Randy Dunlap Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index e5b4bbf3ce3d5..2238edece1a46 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -199,8 +199,14 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle) { return (unsigned long)htb_find(handle, sch); } + +#define HTB_DIRECT ((struct htb_class *)-1L) + /** * htb_classify - classify a packet into class + * @skb: the socket buffer + * @sch: the active queue discipline + * @qerr: pointer for returned status code * * It returns NULL if the packet should be dropped or -1 if the packet * should be passed directly thru. In all other cases leaf class is returned. @@ -211,8 +217,6 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle) * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful * then finish and return direct queue. */ -#define HTB_DIRECT ((struct htb_class *)-1L) - static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { From 06bf62944144a92d83dd14fd1378d2a288259561 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 2 Jan 2023 08:55:56 +0200 Subject: [PATCH 0262/1593] vxlan: Fix memory leaks in error path The memory allocated by vxlan_vnigroup_init() is not freed in the error path, leading to memory leaks [1]. Fix by calling vxlan_vnigroup_uninit() in the error path. The leaks can be reproduced by annotating gro_cells_init() with ALLOW_ERROR_INJECTION() and then running: # echo "100" > /sys/kernel/debug/fail_function/probability # echo "1" > /sys/kernel/debug/fail_function/times # echo "gro_cells_init" > /sys/kernel/debug/fail_function/inject # printf %#x -12 > /sys/kernel/debug/fail_function/gro_cells_init/retval # ip link add name vxlan0 type vxlan dstport 4789 external vnifilter RTNETLINK answers: Cannot allocate memory [1] unreferenced object 0xffff88810db84a00 (size 512): comm "ip", pid 330, jiffies 4295010045 (age 66.016s) hex dump (first 32 bytes): f8 d5 76 0e 81 88 ff ff 01 00 00 00 00 00 00 02 ..v............. 03 00 04 00 48 00 00 00 00 00 00 01 04 00 01 00 ....H........... backtrace: [] kmalloc_trace+0x2a/0x60 [] vxlan_vnigroup_init+0x4c/0x160 [] vxlan_init+0x1ae/0x280 [] register_netdevice+0x57a/0x16d0 [] __vxlan_dev_create+0x7c7/0xa50 [] vxlan_newlink+0xd6/0x130 [] __rtnl_newlink+0x112b/0x18a0 [] rtnl_newlink+0x6c/0xa0 [] rtnetlink_rcv_msg+0x43f/0xd40 [] netlink_rcv_skb+0x170/0x440 [] netlink_unicast+0x53f/0x810 [] netlink_sendmsg+0x958/0xe70 [] ____sys_sendmsg+0x78f/0xa90 [] ___sys_sendmsg+0x13a/0x1e0 [] __sys_sendmsg+0x11c/0x1f0 [] do_syscall_64+0x38/0x80 unreferenced object 0xffff88810e76d5f8 (size 192): comm "ip", pid 330, jiffies 4295010045 (age 66.016s) hex dump (first 32 bytes): 04 00 00 00 00 00 00 00 db e1 4f e7 00 00 00 00 ..........O..... 08 d6 76 0e 81 88 ff ff 08 d6 76 0e 81 88 ff ff ..v.......v..... backtrace: [] __kmalloc_node+0x4e/0x90 [] kvmalloc_node+0xa6/0x1f0 [] bucket_table_alloc.isra.0+0x83/0x460 [] rhashtable_init+0x43b/0x7c0 [] vxlan_vnigroup_init+0x6c/0x160 [] vxlan_init+0x1ae/0x280 [] register_netdevice+0x57a/0x16d0 [] __vxlan_dev_create+0x7c7/0xa50 [] vxlan_newlink+0xd6/0x130 [] __rtnl_newlink+0x112b/0x18a0 [] rtnl_newlink+0x6c/0xa0 [] rtnetlink_rcv_msg+0x43f/0xd40 [] netlink_rcv_skb+0x170/0x440 [] netlink_unicast+0x53f/0x810 [] netlink_sendmsg+0x958/0xe70 [] ____sys_sendmsg+0x78f/0xa90 Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 92224b36787a8..b1b179effe2a1 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2917,16 +2917,23 @@ static int vxlan_init(struct net_device *dev) vxlan_vnigroup_init(vxlan); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; + if (!dev->tstats) { + err = -ENOMEM; + goto err_vnigroup_uninit; + } err = gro_cells_init(&vxlan->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); - return err; - } + if (err) + goto err_free_percpu; return 0; + +err_free_percpu: + free_percpu(dev->tstats); +err_vnigroup_uninit: + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + vxlan_vnigroup_uninit(vxlan); + return err; } static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) From 588ab2dc25f60efeb516b4abedb6c551949cc185 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 2 Jan 2023 13:12:15 +0100 Subject: [PATCH 0263/1593] net: sparx5: Fix reading of the MAC address There is an issue with the checking of the return value of 'of_get_mac_address', which returns 0 on success and negative value on failure. The driver interpretated the result the opposite way. Therefore if there was a MAC address defined in the DT, then the driver was generating a random MAC address otherwise it would use address 0. Fix this by checking correctly the return value of 'of_get_mac_address' Fixes: b74ef9f9cb91 ("net: sparx5: Do not use mac_addr uninitialized in mchp_sparx5_probe()") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index d25f4f09faa06..3c5d4fe993737 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -834,7 +834,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) if (err) goto cleanup_config; - if (!of_get_mac_address(np, sparx5->base_mac)) { + if (of_get_mac_address(np, sparx5->base_mac)) { dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n"); eth_random_addr(sparx5->base_mac); sparx5->base_mac[5] = 0; From a31d47be64b9b74f8cfedffe03e0a8a1f9e51f23 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 30 Dec 2022 13:24:37 +0100 Subject: [PATCH 0264/1593] netfilter: ipset: fix hash:net,port,net hang with /0 subnet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hash:net,port,net set type supports /0 subnets. However, the patch commit 5f7b51bf09baca8e titled "netfilter: ipset: Limit the maximal range of consecutive elements to add/delete" did not take into account it and resulted in an endless loop. The bug is actually older but the patch 5f7b51bf09baca8e brings it out earlier. Handle /0 subnets properly in hash:net,port,net set types. Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") Reported-by: Марк Коренберг Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netportnet.c | 40 ++++++++++---------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 19bcdb3141f6e..005a7ce87217e 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } +static u32 +hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr) +{ + if (from == 0 && to == UINT_MAX) { + *cidr = 0; + return to; + } + return ip_set_range_to_cidr(from, to, cidr); +} + static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netportnet4 *h = set->data; + struct hash_netportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2, ipn; - u64 n = 0, m = 0; + u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; int ret; @@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); - n++; - } while (ipn++ < ip_to); - ipn = ip2_from; - do { - ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); - m++; - } while (ipn++ < ip2_to); - - if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); @@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], do { e.ip[0] = htonl(ip); - ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]); for (; p <= port_to; p++) { e.port = htons(p); do { + i++; e.ip[1] = htonl(ip2); - ip2 = ip_set_range_to_cidr(ip2, ip2_to, - &e.cidr[1]); + if (i > IPSET_MAX_RANGE) { + hash_netportnet4_data_next(&h->next, + &e); + return -ERANGE; + } + ip2 = hash_netportnet4_range_to_cidr(ip2, + ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; From 5e29dc36bd5e2166b834ceb19990d9e68a734d7d Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 30 Dec 2022 13:24:38 +0100 Subject: [PATCH 0265/1593] netfilter: ipset: Rework long task execution when adding/deleting entries When adding/deleting large number of elements in one step in ipset, it can take a reasonable amount of time and can result in soft lockup errors. The patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") tried to fix it by limiting the max elements to process at all. However it was not enough, it is still possible that we get hung tasks. Lowering the limit is not reasonable, so the approach in this patch is as follows: rely on the method used at resizing sets and save the state when we reach a smaller internal batch limit, unlock/lock and proceed from the saved state. Thus we can avoid long continuous tasks and at the same time removed the limit to add/delete large number of elements in one step. The nfnl mutex is held during the whole operation which prevents one to issue other ipset commands in parallel. Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 7 ++++--- net/netfilter/ipset/ip_set_hash_ip.c | 14 ++++++------- net/netfilter/ipset/ip_set_hash_ipmark.c | 13 ++++++------ net/netfilter/ipset/ip_set_hash_ipport.c | 13 ++++++------ net/netfilter/ipset/ip_set_hash_ipportip.c | 13 ++++++------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 +++++++----- net/netfilter/ipset/ip_set_hash_net.c | 17 +++++++-------- net/netfilter/ipset/ip_set_hash_netiface.c | 15 ++++++-------- net/netfilter/ipset/ip_set_hash_netnet.c | 23 +++++++-------------- net/netfilter/ipset/ip_set_hash_netport.c | 19 +++++++---------- 11 files changed, 68 insertions(+), 81 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ab934ad951a87..e8c350a3ade15 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -197,7 +197,7 @@ struct ip_set_region { }; /* Max range where every element is added/deleted in one step */ -#define IPSET_MAX_RANGE (1<<20) +#define IPSET_MAX_RANGE (1<<14) /* The max revision number supported by any set type + 1 */ #define IPSET_REVISION_MAX 9 diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e7ba5b6dd2b7c..46ebee9400dab 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); retried = true; - } while (ret == -EAGAIN && - set->variant->resize && - (ret = set->variant->resize(set, retried)) == 0); + } while (ret == -ERANGE || + (ret == -EAGAIN && + set->variant->resize && + (ret = set->variant->resize(set, retried)) == 0)); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index e30513cefd90e..c9f4e38596632 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -100,11 +100,11 @@ static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip4 *h = set->data; + struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, hosts; + u32 ip = 0, ip_to = 0, hosts, i = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) @@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - /* 64bit division is not allowed on 32bit */ - if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to;) { + for (; ip <= ip_to; i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 153de3457423e..a22ec1a6f6ec8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -97,11 +97,11 @@ static int hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark4 *h = set->data; + struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0; + u32 ip, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } - if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to; ip++, i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ipmark4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 2ffbd0b78a8c5..e977b5a9c48dc 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -112,11 +112,11 @@ static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport4 *h = set->data; + struct hash_ipport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 334fb1ad0e86c..39a01934b1536 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -108,11 +108,11 @@ static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip4 *h = set->data; + struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipportip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 7df94f437f600..5c6de605a9fb7 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -160,12 +160,12 @@ static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet4 *h = set->data; + struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); @@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], for (; p <= port_to; p++) { e.port = htons(p); do { + i++; e.ip2 = htonl(ip2); ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; + if (i > IPSET_MAX_RANGE) { + hash_ipportnet4_data_next(&h->next, + &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1422739d9aa25..ce0a9ce5a91f1 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -136,11 +136,11 @@ static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net4 *h = set->data; + struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_net4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 9810f5bf63f5e..0310732862362 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_netiface4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index cdfb78c6e0d3d..8fbe649c9dd3d 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -166,13 +166,12 @@ static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netnet4 *h = set->data; + struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; - u64 n = 0, m = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); - n++; - } while (ipn++ < ip_to); - ipn = ip2_from; - do { - ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); - m++; - } while (ipn++ < ip2_to); - - if (n*m > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); @@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip[0] = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); do { + i++; e.ip[1] = htonl(ip2); + if (i > IPSET_MAX_RANGE) { + hash_netnet4_data_next(&h->next, &e); + return -ERANGE; + } ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09cf72eb37f8d..d1a0628df4ef3 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -154,12 +154,11 @@ static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport4 *h = set->data; + struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; - u64 n = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); - n++; - } while (ipn++ < ip_to); - - if (n*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip); @@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_netport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; From a764da46cd15f8b40292d2c0b29c4bf9a3e66c7e Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 9 Nov 2022 17:19:05 +0800 Subject: [PATCH 0266/1593] drm/virtio: Fix memory leak in virtio_gpu_object_create() The virtio_gpu_object_shmem_init() will alloc memory and save it in @ents, so when virtio_gpu_array_alloc() fails, this memory should be freed, this patch fixes it. Fixes: e7fef0923303 ("drm/virtio: Simplify error handling of virtio_gpu_object_create()") Signed-off-by: Xiu Jianfeng Reviewed-by: Dmitry Osipenko Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20221109091905.55451-1-xiujianfeng@huawei.com --- drivers/gpu/drm/virtio/virtgpu_object.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 8d7728181de01..c7e74cf130221 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -184,7 +184,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, struct virtio_gpu_object_array *objs = NULL; struct drm_gem_shmem_object *shmem_obj; struct virtio_gpu_object *bo; - struct virtio_gpu_mem_entry *ents; + struct virtio_gpu_mem_entry *ents = NULL; unsigned int nents; int ret; @@ -210,7 +210,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, ret = -ENOMEM; objs = virtio_gpu_array_alloc(1); if (!objs) - goto err_put_id; + goto err_free_entry; virtio_gpu_array_add_obj(objs, &bo->base.base); ret = virtio_gpu_array_lock_resv(objs); @@ -239,6 +239,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, err_put_objs: virtio_gpu_array_put_free(objs); +err_free_entry: + kvfree(ents); err_put_id: virtio_gpu_resource_id_put(vgdev, bo->hw_res_handle); err_free_gem: From 03dec92c4f788c54a7c01b40a018f601eb8a6c52 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 23 Nov 2022 03:13:03 +0300 Subject: [PATCH 0267/1593] drm/scheduler: Fix lockup in drm_sched_entity_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm_sched_entity_kill() is invoked twice by drm_sched_entity_destroy() while userspace process is exiting or being killed. First time it's invoked when sched entity is flushed and second time when entity is released. This causes a lockup within wait_for_completion(entity_idle) due to how completion API works. Calling wait_for_completion() more times than complete() was invoked is a error condition that causes lockup because completion internally uses counter for complete/wait calls. The complete_all() must be used instead in such cases. This patch fixes lockup of Panfrost driver that is reproducible by killing any application in a middle of 3d drawing operation. Fixes: 2fdb8a8f07c2 ("drm/scheduler: rework entity flush, kill and fini") Signed-off-by: Dmitry Osipenko Reviewed-by: Christian König Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://patchwork.freedesktop.org/patch/msgid/20221123001303.533968-1-dmitry.osipenko@collabora.com --- drivers/gpu/drm/scheduler/sched_entity.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index fe09e5be79bdd..15d04a0ec6234 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -81,7 +81,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, init_completion(&entity->entity_idle); /* We start in an idle state. */ - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); spin_lock_init(&entity->rq_lock); spsc_queue_init(&entity->job_queue); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 31f3a1267be44..fd22d753b4ed0 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -987,7 +987,7 @@ static int drm_sched_main(void *param) sched_job = drm_sched_entity_pop_job(entity); if (!sched_job) { - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); continue; } @@ -998,7 +998,7 @@ static int drm_sched_main(void *param) trace_drm_run_job(sched_job, entity); fence = sched->ops->run_job(sched_job); - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); drm_sched_fence_scheduled(s_fence); if (!IS_ERR_OR_NULL(fence)) { From 0a6564ebd953c4590663c9a3c99a3ea9920ade6f Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 13:09:00 +0400 Subject: [PATCH 0268/1593] perf tools: Fix resources leak in perf_data__open_dir() In perf_data__open_dir(), opendir() opens the directory stream. Add missing closedir() to release it after use. Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function") Reviewed-by: Adrian Hunter Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a7f68c309545d..fc16299c915f9 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -132,6 +132,7 @@ int perf_data__open_dir(struct perf_data *data) file->size = st.st_size; } + closedir(dir); if (!files) return -EINVAL; @@ -140,6 +141,7 @@ int perf_data__open_dir(struct perf_data *data) return 0; out_err: + closedir(dir); close_dir(files, nr); return ret; } From f685dd7a8025f2554f73748cfdb8143a21fb92c7 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Mon, 2 Jan 2023 14:57:30 +0100 Subject: [PATCH 0269/1593] fbdev: matroxfb: G200eW: Increase max memory from 1 MB to 16 MB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 62d89a7d49af ("video: fbdev: matroxfb: set maxvram of vbG200eW to the same as vbG200 to avoid black screen") accidently decreases the maximum memory size for the Matrox G200eW (102b:0532) from 8 MB to 1 MB by missing one zero. This caused the driver initialization to fail with the messages below, as the minimum required VRAM size is 2 MB: [ 9.436420] matroxfb: Matrox MGA-G200eW (PCI) detected [ 9.444502] matroxfb: cannot determine memory size [ 9.449316] matroxfb: probe of 0000:0a:03.0 failed with error -1 So, add the missing 0 to make it the intended 16 MB. Successfully tested on the Dell PowerEdge R910/0KYD3D, BIOS 2.10.0 08/29/2013, that the warning is gone. While at it, add a leading 0 to the maxdisplayable entry, so it’s aligned properly. The value could probably also be increased from 8 MB to 16 MB, as the G200 uses the same values, but I have not checked any datasheet. Note, matroxfb is obsolete and superseded by the maintained DRM driver mga200, which is used by default on most systems where both drivers are available. Therefore, on most systems it was only a cosmetic issue. Fixes: 62d89a7d49af ("video: fbdev: matroxfb: set maxvram of vbG200eW to the same as vbG200 to avoid black screen") Link: https://lore.kernel.org/linux-fbdev/972999d3-b75d-5680-fcef-6e6905c52ac5@suse.de/T/#mb6953a9995ebd18acc8552f99d6db39787aec775 Cc: it+linux-fbdev@molgen.mpg.de Cc: Z. Liu Cc: Rich Felker Cc: stable@vger.kernel.org Signed-off-by: Paul Menzel Signed-off-by: Helge Deller --- drivers/video/fbdev/matrox/matroxfb_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 0d3cee7ae7268..a043a737ea9f7 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -1378,8 +1378,8 @@ static struct video_board vbG200 = { .lowlevel = &matrox_G100 }; static struct video_board vbG200eW = { - .maxvram = 0x100000, - .maxdisplayable = 0x800000, + .maxvram = 0x1000000, + .maxdisplayable = 0x0800000, .accelID = FB_ACCEL_MATROX_MGAG200, .lowlevel = &matrox_G100 }; From a8f54d940196c8bd9aced9c82557fdc63baefb02 Mon Sep 17 00:00:00 2001 From: Eric Lin Date: Sat, 31 Dec 2022 05:27:31 +0000 Subject: [PATCH 0270/1593] perf tools riscv: Fix build error on riscv due to missing header for 'struct perf_sample' Since the definition of 'struct perf_sample' has been moved to sample.h, we need to include this header file to fix the build error as follows: arch/riscv/util/unwind-libdw.c: In function 'libdw__arch_set_initial_registers': arch/riscv/util/unwind-libdw.c:12:50: error: invalid use of undefined type 'struct perf_sample' 12 | struct regs_dump *user_regs = &ui->sample->user_regs; | ^~ Fixes: 9823147da6c893d9 ("perf tools: Move 'struct perf_sample' to a separate header file to disentangle headers") Signed-off-by: Eric Lin Cc: Alexander Shishkin Cc: greentime.hu@sifive.com Cc: Jiri Olsa Cc: linux-riscv@lists.infradead.org Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Vincent Chen Link: https://lore.kernel.org/r/20221231052731.24908-1-eric.lin@sifive.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/riscv/util/unwind-libdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/riscv/util/unwind-libdw.c b/tools/perf/arch/riscv/util/unwind-libdw.c index 19536e1728507..54a198714eb82 100644 --- a/tools/perf/arch/riscv/util/unwind-libdw.c +++ b/tools/perf/arch/riscv/util/unwind-libdw.c @@ -4,7 +4,7 @@ #include #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { From 55c41f2e4f7e81e48f3ecc9fba1e316e770213f2 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 26 Dec 2022 08:57:03 +0000 Subject: [PATCH 0271/1593] perf help: Use HAVE_LIBTRACEEVENT to filter out unsupported commands Commands such as kmem, kwork, lock, sched, trace and timechart depend on libtraceevent, these commands need to be isolated using HAVE_LIBTRACEEVENT macro when cmdlist generation. The output of the generate-cmdlist.sh script is as follows: # ./util/generate-cmdlist.sh /* Automatically generated by ./util/generate-cmdlist.sh */ struct cmdname_help { char name[16]; char help[80]; }; static struct cmdname_help common_cmds[] = { {"annotate", "Read perf.data (created by perf record) and display annotated code"}, {"archive", "Create archive with object files with build-ids found in perf.data file"}, {"bench", "General framework for benchmark suites"}, {"buildid-cache", "Manage build-id cache."}, {"buildid-list", "List the buildids in a perf.data file"}, {"c2c", "Shared Data C2C/HITM Analyzer."}, {"config", "Get and set variables in a configuration file."}, {"daemon", "Run record sessions on background"}, {"data", "Data file related processing"}, {"diff", "Read perf.data files and display the differential profile"}, {"evlist", "List the event names in a perf.data file"}, {"ftrace", "simple wrapper for kernel's ftrace functionality"}, {"inject", "Filter to augment the events stream with additional information"}, {"iostat", "Show I/O performance metrics"}, {"kallsyms", "Searches running kernel for symbols"}, {"kvm", "Tool to trace/measure kvm guest os"}, {"list", "List all symbolic event types"}, {"mem", "Profile memory accesses"}, {"record", "Run a command and record its profile into perf.data"}, {"report", "Read perf.data (created by perf record) and display the profile"}, {"script", "Read perf.data (created by perf record) and display trace output"}, {"stat", "Run a command and gather performance counter statistics"}, {"test", "Runs sanity tests."}, {"top", "System profiling tool."}, {"version", "display the version of perf binary"}, #ifdef HAVE_LIBELF_SUPPORT {"probe", "Define new dynamic tracepoints"}, #endif /* HAVE_LIBELF_SUPPORT */ #if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)) {"trace", "strace inspired tool"}, #endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */ #ifdef HAVE_LIBTRACEEVENT {"kmem", "Tool to trace/measure kernel memory properties"}, {"kwork", "Tool to trace/measure kernel work properties (latencies)"}, {"lock", "Analyze lock events"}, {"sched", "Tool to trace/measure scheduler properties (latencies)"}, {"timechart", "Tool to visualize total system behavior during a workload"}, #endif /* HAVE_LIBTRACEEVENT */ }; Fixes: 378ef0f5d9d7f465 ("perf build: Use libtraceevent from the system") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221226085703.95081-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/command-list.txt | 10 +++++----- tools/perf/util/generate-cmdlist.sh | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 8fcab5ad00c5c..e8d2762adade1 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -16,20 +16,20 @@ perf-ftrace mainporcelain common perf-inject mainporcelain common perf-iostat mainporcelain common perf-kallsyms mainporcelain common -perf-kmem mainporcelain common +perf-kmem mainporcelain traceevent perf-kvm mainporcelain common -perf-kwork mainporcelain common +perf-kwork mainporcelain traceevent perf-list mainporcelain common -perf-lock mainporcelain common +perf-lock mainporcelain traceevent perf-mem mainporcelain common perf-probe mainporcelain full perf-record mainporcelain common perf-report mainporcelain common -perf-sched mainporcelain common +perf-sched mainporcelain traceevent perf-script mainporcelain common perf-stat mainporcelain common perf-test mainporcelain common -perf-timechart mainporcelain common +perf-timechart mainporcelain traceevent perf-top mainporcelain common perf-trace mainporcelain audit perf-version mainporcelain common diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index c3cef36d41768..1b5140e5ce997 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)" +echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd @@ -51,5 +51,20 @@ do p }' "Documentation/perf-$cmd.txt" done -echo "#endif /* HAVE_LIBELF_SUPPORT */" +echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */" + +echo "#ifdef HAVE_LIBTRACEEVENT" +sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "#endif /* HAVE_LIBTRACEEVENT */" echo "};" From 8509419758f2cc28dd05370385af0d91573b76b4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 9 Aug 2022 21:25:09 -0500 Subject: [PATCH 0272/1593] mmc: sunxi-mmc: Fix clock refcount imbalance during unbind If the controller is suspended by runtime PM, the clock is already disabled, so do not try to disable it again during removal. Use pm_runtime_disable() to flush any pending runtime PM transitions. Fixes: 9a8e1e8cc2c0 ("mmc: sunxi: Add runtime_pm support") Signed-off-by: Samuel Holland Acked-by: Jernej Skrabec Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220810022509.43743-1-samuel@sholland.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index b16e12e62e722..3db9f32d6a7b9 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1492,9 +1492,11 @@ static int sunxi_mmc_remove(struct platform_device *pdev) struct sunxi_mmc_host *host = mmc_priv(mmc); mmc_remove_host(mmc); - pm_runtime_force_suspend(&pdev->dev); - disable_irq(host->irq); - sunxi_mmc_disable(host); + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) { + disable_irq(host->irq); + sunxi_mmc_disable(host); + } dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma); mmc_free_host(mmc); From 92d43bd3bc9728c1fb114d7011d46f5ea9489e28 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 8 Nov 2022 15:14:20 +0100 Subject: [PATCH 0273/1593] drm/imx: ipuv3-plane: Fix overlay plane width ipu_src_rect_width() was introduced to support odd screen resolutions such as 1366x768 by internally rounding up primary plane width to a multiple of 8 and compensating with reduced horizontal blanking. This also caused overlay plane width to be rounded up, which was not intended. Fix overlay plane width by limiting the rounding up to the primary plane. drm_rect_width(&new_state->src) >> 16 is the same value as drm_rect_width(dst) because there is no plane scaling support. Fixes: 94dfec48fca7 ("drm/imx: Add 8 pixel alignment fix") Reviewed-by: Lucas Stach Link: https://lore.kernel.org/r/20221108141420.176696-1-p.zabel@pengutronix.de Signed-off-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20221108141420.176696-1-p.zabel@pengutronix.de Tested-by: Ian Ray (cherry picked from commit 4333472f8d7befe62359fecb1083cd57a6e07bfc) Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index dba4f7d81d693..80142d9a4a552 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -614,6 +614,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; } + if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG) + width = ipu_src_rect_width(new_state); + else + width = drm_rect_width(&new_state->src) >> 16; + eba = drm_plane_state_to_eba(new_state, 0); /* @@ -622,8 +627,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, */ if (ipu_state->use_pre) { axi_id = ipu_chan_assign_axi_id(ipu_plane->dma); - ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, - ipu_src_rect_width(new_state), + ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width, drm_rect_height(&new_state->src) >> 16, fb->pitches[0], fb->format->format, fb->modifier, &eba); @@ -678,9 +682,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; } - ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8)); + ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width); - width = ipu_src_rect_width(new_state); height = drm_rect_height(&new_state->src) >> 16; info = drm_format_info(fb->format->format); ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0], @@ -744,8 +747,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16); ipu_cpmem_zero(ipu_plane->alpha_ch); - ipu_cpmem_set_resolution(ipu_plane->alpha_ch, - ipu_src_rect_width(new_state), + ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width, drm_rect_height(&new_state->src) >> 16); ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8); ipu_cpmem_set_high_priority(ipu_plane->alpha_ch); From f24fb53984cfba42ff72a47466eabfd772da647b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?= Date: Tue, 27 Dec 2022 21:58:00 +0100 Subject: [PATCH 0274/1593] perf tools: Don't include signature in version strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This explodes the build if HEAD is signed, since the generated version is gpg: Signature made Mon 26 Dec 2022 20:34:48 CET, then a few more lines, then the SHA. Signed-off-by: Ahelenia Ziemiańska Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/7c9637711271f50ec2341fb8a7c29585335dab04.1672174189.git.nabijaczleweli@nabijaczleweli.xyz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 2 +- tools/perf/util/PERF-VERSION-GEN | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 6e7b88917ca0a..ba5d942e4c6aa 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -267,7 +267,7 @@ $(OUTPUT)%.xml : %.txt $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ -aperf_date=$(shell git log -1 --pretty="format:%cd" \ - --date=short $<) \ + --date=short --no-show-signature $<) \ -o $@+ $< && \ mv $@+ $@ diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 3cc42821d9b39..d7dc7c28508c0 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -19,7 +19,7 @@ TAG= if test -d ../../.git -o -f ../../.git then TAG=$(MAKEFLAGS= make -sC ../.. kernelversion) - CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" + CID=$(git log -1 --abbrev=12 --pretty=format:"%h" --no-show-signature 2>/dev/null) && CID="-g$CID" elif test -f ../../PERF-VERSION-FILE then TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g') From cad853374d85fe678d721512cecfabd7636e51f3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 13 Dec 2022 13:08:26 -0500 Subject: [PATCH 0275/1593] nfsd: fix handling of readdir in v4root vs. mount upcall timeout If v4 READDIR operation hits a mountpoint and gets back an error, then it will include that entry in the reply and set RDATTR_ERROR for it to the error. That's fine for "normal" exported filesystems, but on the v4root, we need to be more careful to only expose the existence of dentries that lead to exports. If the mountd upcall times out while checking to see whether a mountpoint on the v4root is exported, then we have no recourse other than to fail the whole operation. Cc: Steve Dickson Link: https://bugzilla.kernel.org/show_bug.cgi?id=216777 Reported-by: JianHong Yin Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Cc: --- fs/nfsd/nfs4xdr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2b4ae858c89be..ebb4d02a42ce6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3629,6 +3629,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, case nfserr_noent: xdr_truncate_encode(xdr, start_offset); goto skip_entry; + case nfserr_jukebox: + /* + * The pseudoroot should only display dentries that lead to + * exports. If we get EJUKEBOX here, then we can't tell whether + * this entry should be included. Just fail the whole READDIR + * with NFS4ERR_DELAY in that case, and hope that the situation + * will resolve itself by the client's next attempt. + */ + if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT) + goto fail; + fallthrough; default: /* * If the client requested the RDATTR_ERROR attribute, From 77fe30fed1c0cd282eadb0728999ea3d3350e0b0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Jan 2023 13:06:45 -0300 Subject: [PATCH 0276/1593] perf tools: Fix segfault when trying to process tracepoints in perf.data and not linked with libtraceevent When we have a perf.data file with tracepoints, such as: # perf evlist -f probe_perf:lzma_decompress_to_file # Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events # We end up segfaulting when using perf built with NO_LIBTRACEEVENT=1 by trying to find an evsel with a NULL 'event_name' variable: (gdb) run report --stdio -f Starting program: /root/bin/perf report --stdio -f Program received signal SIGSEGV, Segmentation fault. 0x000000000055219d in find_evsel (evlist=0xfda7b0, event_name=0x0) at util/sort.c:2830 warning: Source file is more recent than executable. 2830 if (event_name[0] == '%') { Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.8-11.fc36.x86_64 cyrus-sasl-lib-2.1.27-18.fc36.x86_64 elfutils-debuginfod-client-0.188-3.fc36.x86_64 elfutils-libelf-0.188-3.fc36.x86_64 elfutils-libs-0.188-3.fc36.x86_64 glibc-2.35-20.fc36.x86_64 keyutils-libs-1.6.1-4.fc36.x86_64 krb5-libs-1.19.2-12.fc36.x86_64 libbrotli-1.0.9-7.fc36.x86_64 libcap-2.48-4.fc36.x86_64 libcom_err-1.46.5-2.fc36.x86_64 libcurl-7.82.0-12.fc36.x86_64 libevent-2.1.12-6.fc36.x86_64 libgcc-12.2.1-4.fc36.x86_64 libidn2-2.3.4-1.fc36.x86_64 libnghttp2-1.51.0-1.fc36.x86_64 libpsl-0.21.1-5.fc36.x86_64 libselinux-3.3-4.fc36.x86_64 libssh-0.9.6-4.fc36.x86_64 libstdc++-12.2.1-4.fc36.x86_64 libunistring-1.0-1.fc36.x86_64 libunwind-1.6.2-2.fc36.x86_64 libxcrypt-4.4.33-4.fc36.x86_64 libzstd-1.5.2-2.fc36.x86_64 numactl-libs-2.0.14-5.fc36.x86_64 opencsd-1.2.0-1.fc36.x86_64 openldap-2.6.3-1.fc36.x86_64 openssl-libs-3.0.5-2.fc36.x86_64 slang-2.3.2-11.fc36.x86_64 xz-libs-5.2.5-9.fc36.x86_64 zlib-1.2.11-33.fc36.x86_64 (gdb) bt #0 0x000000000055219d in find_evsel (evlist=0xfda7b0, event_name=0x0) at util/sort.c:2830 #1 0x0000000000552416 in add_dynamic_entry (evlist=0xfda7b0, tok=0xffb6eb "trace", level=2) at util/sort.c:2976 #2 0x0000000000552d26 in sort_dimension__add (list=0xf93e00 , tok=0xffb6eb "trace", evlist=0xfda7b0, level=2) at util/sort.c:3193 #3 0x0000000000552e1c in setup_sort_list (list=0xf93e00 , str=0xffb6eb "trace", evlist=0xfda7b0) at util/sort.c:3227 #4 0x00000000005532fa in __setup_sorting (evlist=0xfda7b0) at util/sort.c:3381 #5 0x0000000000553cdc in setup_sorting (evlist=0xfda7b0) at util/sort.c:3608 #6 0x000000000042eb9f in cmd_report (argc=0, argv=0x7fffffffe470) at builtin-report.c:1596 #7 0x00000000004aee7e in run_builtin (p=0xf64ca0 , argc=3, argv=0x7fffffffe470) at perf.c:330 #8 0x00000000004af0f2 in handle_internal_command (argc=3, argv=0x7fffffffe470) at perf.c:384 #9 0x00000000004af241 in run_argv (argcp=0x7fffffffe29c, argv=0x7fffffffe290) at perf.c:428 #10 0x00000000004af5fc in main (argc=3, argv=0x7fffffffe470) at perf.c:562 (gdb) So check if we have tracepoint events in add_dynamic_entry() and bail out instead: # perf report --stdio -f This perf binary isn't linked with libtraceevent, can't process probe_perf:lzma_decompress_to_file Error: Unknown --sort key: `trace' # Fixes: 378ef0f5d9d7f465 ("perf build: Use libtraceevent from the system") Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/Y7MDb7kRaHZB6APC@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index e188f74698dd3..37662cdec5eef 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2971,6 +2971,18 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, ret = add_all_matching_fields(evlist, field_name, raw_trace, level); goto out; } +#else + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { + pr_err("%s %s", ret ? "," : "This perf binary isn't linked with libtraceevent, can't process", evsel__name(evsel)); + ret = -ENOTSUP; + } + } + + if (ret) { + pr_err("\n"); + goto out; + } #endif evsel = find_evsel(evlist, event_name); From 5b24ac2dfd3eb3e36f794af3aa7f2828b19035bd Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 22 Dec 2022 23:28:21 -0800 Subject: [PATCH 0277/1593] kcsan: test: don't put the expect array on the stack Size of the 'expect' array in the __report_matches is 1536 bytes, which is exactly the default frame size warning limit of the xtensa architecture. As a result allmodconfig xtensa kernel builds with the gcc that does not support the compiler plugins (which otherwise would push the said warning limit to 2K) fail with the following message: kernel/kcsan/kcsan_test.c:257:1: error: the frame size of 1680 bytes is larger than 1536 bytes Fix it by dynamically allocating the 'expect' array. Signed-off-by: Max Filippov Reviewed-by: Marco Elver Tested-by: Marco Elver --- kernel/kcsan/kcsan_test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index dcec1b743c694..a60c561724be9 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -159,7 +159,7 @@ static bool __report_matches(const struct expect_report *r) const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; bool ret = false; unsigned long flags; - typeof(observed.lines) expect; + typeof(*observed.lines) *expect; const char *end; char *cur; int i; @@ -168,6 +168,10 @@ static bool __report_matches(const struct expect_report *r) if (!report_available()) return false; + expect = kmalloc(sizeof(observed.lines), GFP_KERNEL); + if (WARN_ON(!expect)) + return false; + /* Generate expected report contents. */ /* Title */ @@ -253,6 +257,7 @@ static bool __report_matches(const struct expect_report *r) strstr(observed.lines[2], expect[1]))); out: spin_unlock_irqrestore(&observed.lock, flags); + kfree(expect); return ret; } From d00dd2f2645dca04cf399d8fc692f3f69b6dd996 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Nov 2022 12:51:22 +0100 Subject: [PATCH 0278/1593] x86/kexec: Fix double-free of elf header buffer After b3e34a47f989 ("x86/kexec: fix memory leak of elf header buffer"), freeing image->elf_headers in the error path of crash_load_segments() is not needed because kimage_file_post_load_cleanup() will take care of that later. And not clearing it could result in a double-free. Drop the superfluous vfree() call at the error path of crash_load_segments(). Fixes: b3e34a47f989 ("x86/kexec: fix memory leak of elf header buffer") Signed-off-by: Takashi Iwai Signed-off-by: Borislav Petkov (AMD) Acked-by: Baoquan He Acked-by: Vlastimil Babka Cc: Link: https://lore.kernel.org/r/20221122115122.13937-1-tiwai@suse.de --- arch/x86/kernel/crash.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 9730c88530fc8..305514431f26e 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -401,10 +401,8 @@ int crash_load_segments(struct kimage *image) kbuf.buf_align = ELF_CORE_HEADER_ALIGN; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); - if (ret) { - vfree((void *)image->elf_headers); + if (ret) return ret; - } image->elf_load_addr = kbuf.mem; pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); From 0226635c304cfd5c9db9b78c259cb713819b057e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 2 Jan 2023 23:05:33 +0900 Subject: [PATCH 0279/1593] fs/ntfs3: don't hold ni_lock when calling truncate_setsize() syzbot is reporting hung task at do_user_addr_fault() [1], for there is a silent deadlock between PG_locked bit and ni_lock lock. Since filemap_update_page() calls filemap_read_folio() after calling folio_trylock() which will set PG_locked bit, ntfs_truncate() must not call truncate_setsize() which will wait for PG_locked bit to be cleared when holding ni_lock lock. Link: https://lore.kernel.org/all/00000000000060d41f05f139aa44@google.com/ Link: https://syzkaller.appspot.com/bug?extid=bed15dbf10294aa4f2ae [1] Reported-by: syzbot Debugged-by: Linus Torvalds Co-developed-by: Hillf Danton Signed-off-by: Hillf Danton Signed-off-by: Tetsuo Handa Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") Signed-off-by: Linus Torvalds --- fs/ntfs3/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index e5399ebc3a2b2..d294cd9756887 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -390,10 +390,10 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); - ni_lock(ni); - truncate_setsize(inode, new_size); + ni_lock(ni); + down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, &new_valid, ni->mi.sbi->options->prealloc, NULL); From af82425c6a2d2f347c79b63ce74fca6dc6be157f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Jan 2023 16:49:46 -0700 Subject: [PATCH 0280/1593] io_uring/io-wq: free worker if task_work creation is canceled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we cancel the task_work, the worker will never come into existance. As this is the last reference to it, ensure that we get it freed appropriately. Cc: stable@vger.kernel.org Reported-by: 진호 Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 6f1d0e5df23ad..992dcd9f8c4cf 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,6 +1230,7 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); + kfree(worker); } } From 9c807965483f42df1d053b7436eedd6cf28ece6f Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 2 Jan 2023 12:53:35 +0300 Subject: [PATCH 0281/1593] drivers/net/bonding/bond_3ad: return when there's no aggregator Otherwise we would dereference a NULL aggregator pointer when calling __set_agg_ports_ready on the line below. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Daniil Tatianin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 455b555275f12..c99ffe6c683a3 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1549,6 +1549,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", port->actor_port_number); + return; } } /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE From 4af1b64f80fbe1275fb02c5f1c0cef099a4a231f Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 3 Jan 2023 09:20:12 +0530 Subject: [PATCH 0282/1593] octeontx2-pf: Fix lmtst ID used in aura free Current code uses per_cpu pointer to get the lmtst_id mapped to the core on which aura_free() is executed. Using per_cpu pointer without preemption disable causing mismatch between lmtst_id and core on which pointer gets freed. This patch fixes the issue by disabling preemption around aura_free. Fixes: ef6c8da71eaf ("octeontx2-pf: cn10K: Reserve LMTST lines per core") Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Signed-off-by: David S. Miller --- .../marvell/octeontx2/nic/otx2_common.c | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 9e10e7471b887..88f8772a61cd5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1012,6 +1012,7 @@ static void otx2_pool_refill_task(struct work_struct *work) rbpool = cq->rbpool; free_ptrs = cq->pool_ptrs; + get_cpu(); while (cq->pool_ptrs) { if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) { /* Schedule a WQ if we fails to free atleast half of the @@ -1031,6 +1032,7 @@ static void otx2_pool_refill_task(struct work_struct *work) pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } + put_cpu(); cq->refill_task_sched = false; } @@ -1368,6 +1370,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; + get_cpu(); /* Allocate pointers and free them to aura/pool */ for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); @@ -1376,18 +1379,24 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) sq = &qset->sq[qidx]; sq->sqb_count = 0; sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); - if (!sq->sqb_ptrs) - return -ENOMEM; + if (!sq->sqb_ptrs) { + err = -ENOMEM; + goto err_mem; + } for (ptr = 0; ptr < num_sqbs; ptr++) { - if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) - return -ENOMEM; + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto err_mem; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } } - return 0; +err_mem: + put_cpu(); + return err ? -ENOMEM : 0; + fail: otx2_mbox_reset(&pfvf->mbox.mbox, 0); otx2_aura_pool_free(pfvf); @@ -1426,18 +1435,21 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; + get_cpu(); /* Allocate pointers and free them to aura/pool */ for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { pool = &pfvf->qset.pool[pool_id]; for (ptr = 0; ptr < num_ptrs; ptr++) { - if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) - return -ENOMEM; + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto err_mem; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr + OTX2_HEAD_ROOM); } } - - return 0; +err_mem: + put_cpu(); + return err ? -ENOMEM : 0; fail: otx2_mbox_reset(&pfvf->mbox.mbox, 0); otx2_aura_pool_free(pfvf); From 7dc61838541928895abae6d2355258e02a251bba Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 3 Jan 2023 01:50:38 -0500 Subject: [PATCH 0283/1593] net: dpaa: Fix dtsec check for PCS availability We want to fail if the PCS is not available, not if it is available. Fix this condition. Fixes: 5d93cfcf7360 ("net: dpaa: Convert to phylink") Reported-by: Christian Zigotzky Signed-off-by: Sean Anderson Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_dtsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index d00bae15a9017..d528ca681b6f4 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1430,7 +1430,7 @@ int dtsec_initialization(struct mac_device *mac_dev, dtsec->dtsec_drv_param->tx_pad_crc = true; phy_node = of_parse_phandle(mac_node, "tbi-handle", 0); - if (!phy_node || of_device_is_available(phy_node)) { + if (!phy_node || !of_device_is_available(phy_node)) { of_node_put(phy_node); err = -EINVAL; dev_err_probe(mac_dev->dev, err, From c7dd13805f8b8fc1ce3b6d40f6aff47e66b72ad2 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Tue, 3 Jan 2023 10:17:09 +0100 Subject: [PATCH 0284/1593] usb: rndis_host: Secure rndis_query check against int overflow Variables off and len typed as uint32 in rndis_query function are controlled by incoming RNDIS response message thus their value may be manipulated. Setting off to a unexpectetly large value will cause the sum with len and 8 to overflow and pass the implemented validation step. Consequently the response pointer will be referring to a location past the expected buffer boundaries allowing information leakage e.g. via RNDIS_OID_802_3_PERMANENT_ADDRESS OID. Fixes: ddda08624013 ("USB: rndis_host, various cleanups") Signed-off-by: Szymon Heidrich Signed-off-by: David S. Miller --- drivers/net/usb/rndis_host.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index f79333fe17836..7b3739b29c8f7 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf, off = le32_to_cpu(u.get_c->offset); len = le32_to_cpu(u.get_c->len); - if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE)) + if (unlikely((off > CONTROL_BUFFER_SIZE - 8) || + (len > CONTROL_BUFFER_SIZE - 8 - off))) goto response_error; if (*reply_len != -1 && len != *reply_len) From 703c13fe3c9af557d312f5895ed6a5fda2711104 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Dec 2022 10:10:04 +0100 Subject: [PATCH 0285/1593] efi: fix NULL-deref in init error path In cases where runtime services are not supported or have been disabled, the runtime services workqueue will never have been allocated. Do not try to destroy the workqueue unconditionally in the unlikely event that EFI initialisation fails to avoid dereferencing a NULL pointer. Fixes: 98086df8b70c ("efi: add missed destroy_workqueue when efisubsys_init fails") Cc: stable@vger.kernel.org Cc: Li Heng Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 09716eebe8ac3..a2b0cbc8741c2 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -394,8 +394,8 @@ static int __init efisubsys_init(void) efi_kobj = kobject_create_and_add("efi", firmware_kobj); if (!efi_kobj) { pr_err("efi: Firmware registration failed.\n"); - destroy_workqueue(efi_rts_wq); - return -ENOMEM; + error = -ENOMEM; + goto err_destroy_wq; } if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | @@ -443,7 +443,10 @@ static int __init efisubsys_init(void) err_put: kobject_put(efi_kobj); efi_kobj = NULL; - destroy_workqueue(efi_rts_wq); +err_destroy_wq: + if (efi_rts_wq) + destroy_workqueue(efi_rts_wq); + return error; } From e006ac3003080177cf0b673441a4241f77aaecce Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Tue, 27 Dec 2022 23:09:36 +0800 Subject: [PATCH 0286/1593] efi: fix userspace infinite retry read efivars after EFI runtime services page fault After [1][2], if we catch exceptions due to EFI runtime service, we will clear EFI_RUNTIME_SERVICES bit to disable EFI runtime service, then the subsequent routine which invoke the EFI runtime service should fail. But the userspace cat efivars through /sys/firmware/efi/efivars/ will stuck and infinite loop calling read() due to efivarfs_file_read() return -EINTR. The -EINTR is converted from EFI_ABORTED by efi_status_to_err(), and is an improper return value in this situation, so let virt_efi_xxx() return EFI_DEVICE_ERROR and converted to -EIO to invoker. Cc: Fixes: 3425d934fc03 ("efi/x86: Handle page faults occurring while running EFI runtime services") Fixes: 23715a26c8d8 ("arm64: efi: Recover from synchronous exceptions occurring in firmware") Signed-off-by: Ding Hui Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/runtime-wrappers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 7feee3d9c2bfb..1fba4e09cdcff 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work; \ if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \ pr_warn_once("EFI Runtime Services are disabled!\n"); \ + efi_rts_work.status = EFI_DEVICE_ERROR; \ goto exit; \ } \ \ From 406504c7b0405d74d74c15a667cd4c4620c3e7a9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2022 14:03:52 +0000 Subject: [PATCH 0287/1593] KVM: arm64: Fix S1PTW handling on RO memslots A recent development on the EFI front has resulted in guests having their page tables baked in the firmware binary, and mapped into the IPA space as part of a read-only memslot. Not only is this legitimate, but it also results in added security, so thumbs up. It is possible to take an S1PTW translation fault if the S1 PTs are unmapped at stage-2. However, KVM unconditionally treats S1PTW as a write to correctly handle hardware AF/DB updates to the S1 PTs. Furthermore, KVM injects an exception into the guest for S1PTW writes. In the aforementioned case this results in the guest taking an abort it won't recover from, as the S1 PTs mapping the vectors suffer from the same problem. So clearly our handling is... wrong. Instead, switch to a two-pronged approach: - On S1PTW translation fault, handle the fault as a read - On S1PTW permission fault, handle the fault as a write This is of no consequence to SW that *writes* to its PTs (the write will trigger a non-S1PTW fault), and SW that uses RO PTs will not use HW-assisted AF/DB anyway, as that'd be wrong. Only in the case described in c4ad98e4b72c ("KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch") do we end-up with two back-to-back faults (page being evicted and faulted back). I don't think this is a case worth optimising for. Fixes: c4ad98e4b72c ("KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch") Reviewed-by: Oliver Upton Reviewed-by: Ard Biesheuvel Regression-tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 9bdba47f7e149..0d40c48d81329 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_abt_iss1tw(vcpu)) - return true; + if (kvm_vcpu_abt_iss1tw(vcpu)) { + /* + * Only a permission fault on a S1PTW should be + * considered as a write. Otherwise, page tables baked + * in a read-only memslot will result in an exception + * being delivered in the guest. + * + * The drawback is that we end-up faulting twice if the + * guest is using any of HW AF/DB: a translation fault + * to map the page containing the PT (read only at + * first), then a permission fault to allow the flags + * to be set. + */ + switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + case ESR_ELx_FSC_PERM: + return true; + default: + return false; + } + } if (kvm_vcpu_trap_is_iabt(vcpu)) return false; From b8f8d190fa8fa1909dda12d771df67125d6fbf0c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 22 Dec 2022 09:26:31 +0000 Subject: [PATCH 0288/1593] KVM: arm64: Document the behaviour of S1PTW faults on RO memslots Although the KVM API says that a write to a RO memslot must result in a KVM_EXIT_MMIO describing the write, the arm64 architecture doesn't provide the *data* written by a Stage-1 page table walk (we only get the address). Since there isn't much userspace can do with so little information anyway, document the fact that such an access results in a guest exception, not an exit. This is consistent with the guest being terminally broken anyway. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/api.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0dd5d8733dd58..42db72a0cbe68 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an mmap() that affects the region will be made visible immediately. Another example is madvise(MADV_DROP). +Note: On arm64, a write generated by the page-table walker (to update +the Access and Dirty flags, for example) never results in a +KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This +is because KVM cannot provide the data that would be written by the +page-table walker, making it impossible to emulate the access. +Instead, an abort (data abort if the cause of the page-table update +was a load or a store, instruction abort if it was an instruction +fetch) is injected in the guest. 4.36 KVM_SET_TSS_ADDR --------------------- From b0803ba72b558957fdcfe845939ee788b7ce5919 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2022 14:49:30 +0000 Subject: [PATCH 0289/1593] KVM: arm64: Convert FSC_* over to ESR_ELx_FSC_* The former is an AArch32 legacy, so let's move over to the verbose (and strictly identical) version. This involves moving some of the #defines that were private to KVM into the more generic esr.h. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/esr.h | 9 +++++++++ arch/arm64/include/asm/kvm_arm.h | 15 --------------- arch/arm64/include/asm/kvm_emulate.h | 20 ++++++++++---------- arch/arm64/kvm/hyp/include/hyp/fault.h | 2 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/mmu.c | 21 ++++++++++++--------- 6 files changed, 33 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 15b34fbfca667..206de10524e33 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -114,6 +114,15 @@ #define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_FSC_SEA_TTW0 (0x14) +#define ESR_ELx_FSC_SEA_TTW1 (0x15) +#define ESR_ELx_FSC_SEA_TTW2 (0x16) +#define ESR_ELx_FSC_SEA_TTW3 (0x17) +#define ESR_ELx_FSC_SECC (0x18) +#define ESR_ELx_FSC_SECC_TTW0 (0x1c) +#define ESR_ELx_FSC_SECC_TTW1 (0x1d) +#define ESR_ELx_FSC_SECC_TTW2 (0x1e) +#define ESR_ELx_FSC_SECC_TTW3 (0x1f) /* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV_SHIFT (24) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 0df3fc3a01737..26b0c97df9863 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -319,21 +319,6 @@ BIT(18) | \ GENMASK(16, 15)) -/* For compatibility with fault code shared with 32-bit */ -#define FSC_FAULT ESR_ELx_FSC_FAULT -#define FSC_ACCESS ESR_ELx_FSC_ACCESS -#define FSC_PERM ESR_ELx_FSC_PERM -#define FSC_SEA ESR_ELx_FSC_EXTABT -#define FSC_SEA_TTW0 (0x14) -#define FSC_SEA_TTW1 (0x15) -#define FSC_SEA_TTW2 (0x16) -#define FSC_SEA_TTW3 (0x17) -#define FSC_SECC (0x18) -#define FSC_SECC_TTW0 (0x1c) -#define FSC_SECC_TTW1 (0x1d) -#define FSC_SECC_TTW2 (0x1e) -#define FSC_SECC_TTW3 (0x1f) - /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) /* diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0d40c48d81329..193583df2d9c4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { - case FSC_SEA: - case FSC_SEA_TTW0: - case FSC_SEA_TTW1: - case FSC_SEA_TTW2: - case FSC_SEA_TTW3: - case FSC_SECC: - case FSC_SECC_TTW0: - case FSC_SECC_TTW1: - case FSC_SECC_TTW2: - case FSC_SECC_TTW3: + case ESR_ELx_FSC_EXTABT: + case ESR_ELx_FSC_SEA_TTW0: + case ESR_ELx_FSC_SEA_TTW1: + case ESR_ELx_FSC_SEA_TTW2: + case ESR_ELx_FSC_SEA_TTW3: + case ESR_ELx_FSC_SECC: + case ESR_ELx_FSC_SECC_TTW0: + case ESR_ELx_FSC_SECC_TTW1: + case ESR_ELx_FSC_SECC_TTW2: + case ESR_ELx_FSC_SECC_TTW3: return true; default: return false; diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 1b8a2dcd712f3..9ddcfe2c3e574 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) */ if (!(esr & ESR_ELx_S1PTW) && (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 3330d1b76bdd2..07d37ff88a3f2 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { bool valid; - valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT && kvm_vcpu_dabt_isvalid(vcpu) && !kvm_vcpu_abt_issea(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 31d7fa4c7c140..a3ee3b605c9b8 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); - if (fault_status == FSC_PERM && !write_fault && !exec_fault) { + if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } @@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * only exception to this is when dirty logging is enabled at runtime * and a write fault needs to collapse a block entry into a table. */ - if (fault_status != FSC_PERM || (logging_active && write_fault)) { + if (fault_status != ESR_ELx_FSC_PERM || + (logging_active && write_fault)) { ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm)); if (ret) @@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * backed by a THP and thus use block mapping if possible. */ if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { - if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE) + if (fault_status == ESR_ELx_FSC_PERM && + fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; else vma_pagesize = transparent_hugepage_adjust(kvm, memslot, @@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, &fault_ipa); } - if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { + if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ if (kvm_vma_mte_allowed(vma)) { sanitise_mte_tags(kvm, pfn, vma_pagesize); @@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * permissions only if vma_pagesize equals fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault_status == FSC_PERM && vma_pagesize == fault_granule) + if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, @@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (fault_status == FSC_FAULT) { + if (fault_status == ESR_ELx_FSC_FAULT) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); @@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - if (fault_status != FSC_FAULT && fault_status != FSC_PERM && - fault_status != FSC_ACCESS) { + if (fault_status != ESR_ELx_FSC_FAULT && + fault_status != ESR_ELx_FSC_PERM && + fault_status != ESR_ELx_FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Userspace should not be able to register out-of-bounds IPAs */ VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); - if (fault_status == FSC_ACCESS) { + if (fault_status == ESR_ELx_FSC_ACCESS) { handle_access_fault(vcpu, fault_ipa); ret = 1; goto out_unlock; From f3cb80804b8295323919e031281768ba3bf5f8da Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Jan 2023 19:28:49 -0800 Subject: [PATCH 0290/1593] time: Fix various kernel-doc problems Clean up kernel-doc complaints about function names and non-kernel-doc comments in kernel/time/. Fixes these warnings: kernel/time/time.c:479: warning: expecting prototype for set_normalized_timespec(). Prototype was for set_normalized_timespec64() instead kernel/time/time.c:553: warning: expecting prototype for msecs_to_jiffies(). Prototype was for __msecs_to_jiffies() instead kernel/time/timekeeping.c:1595: warning: contents before sections kernel/time/timekeeping.c:1705: warning: This comment starts with '/**', but isn't a kernel-doc comment. * We have three kinds of time sources to use for sleep time kernel/time/timekeeping.c:1726: warning: This comment starts with '/**', but isn't a kernel-doc comment. * 1) can be determined whether to use or not only when doing kernel/time/tick-oneshot.c:21: warning: missing initial short description on line: * tick_program_event kernel/time/tick-oneshot.c:107: warning: expecting prototype for tick_check_oneshot_mode(). Prototype was for tick_oneshot_mode_active() instead Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230103032849.12723-1-rdunlap@infradead.org --- kernel/time/tick-oneshot.c | 4 ++-- kernel/time/time.c | 8 ++++---- kernel/time/timekeeping.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 475ecceda7688..5e2c2c26b3cc9 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -18,7 +18,7 @@ #include "tick-internal.h" /** - * tick_program_event + * tick_program_event - program the CPU local timer device for the next event */ int tick_program_event(ktime_t expires, int force) { @@ -99,7 +99,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) } /** - * tick_check_oneshot_mode - check whether the system is in oneshot mode + * tick_oneshot_mode_active - check whether the system is in oneshot mode * * returns 1 when either nohz or highres are enabled. otherwise 0. */ diff --git a/kernel/time/time.c b/kernel/time/time.c index 526257b3727ca..f4198af60fee5 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -462,7 +462,7 @@ struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec) EXPORT_SYMBOL(ns_to_kernel_old_timeval); /** - * set_normalized_timespec - set timespec sec and nsec parts and normalize + * set_normalized_timespec64 - set timespec sec and nsec parts and normalize * * @ts: pointer to timespec variable to be set * @sec: seconds to set @@ -526,7 +526,7 @@ struct timespec64 ns_to_timespec64(s64 nsec) EXPORT_SYMBOL(ns_to_timespec64); /** - * msecs_to_jiffies: - convert milliseconds to jiffies + * __msecs_to_jiffies: - convert milliseconds to jiffies * @m: time in milliseconds * * conversion is done as follows: @@ -541,12 +541,12 @@ EXPORT_SYMBOL(ns_to_timespec64); * handling any 32-bit overflows. * for the details see __msecs_to_jiffies() * - * msecs_to_jiffies() checks for the passed in value being a constant + * __msecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the * code, __msecs_to_jiffies() is called if the value passed does not * allow constant folding and the actual conversion must be done at * runtime. - * the _msecs_to_jiffies helpers are the HZ dependent conversion + * The _msecs_to_jiffies helpers are the HZ dependent conversion * routines found in include/linux/jiffies.h */ unsigned long __msecs_to_jiffies(const unsigned int m) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f72b9f1de178e..5579ead449f25 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1590,10 +1590,10 @@ void __weak read_persistent_clock64(struct timespec64 *ts) /** * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset * from the boot. + * @wall_time: current time as returned by persistent clock + * @boot_offset: offset that is defined as wall_time - boot_time * * Weak dummy function for arches that do not yet support it. - * @wall_time: - current time as returned by persistent clock - * @boot_offset: - offset that is defined as wall_time - boot_time * * The default function calculates offset based on the current value of * local_clock(). This way architectures that support sched_clock() but don't @@ -1701,7 +1701,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, } #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) -/** +/* * We have three kinds of time sources to use for sleep time * injection, the preference order is: * 1) non-stop clocksource @@ -1722,7 +1722,7 @@ bool timekeeping_rtc_skipresume(void) return !suspend_timing_needed; } -/** +/* * 1) can be determined whether to use or not only when doing * timekeeping_resume() which is invoked after rtc_suspend(), * so we can't skip rtc_suspend() surely if system has 1). From 13678f3feb3009b23aab424864fd0dac0765c83e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 29 Oct 2022 22:56:36 -0700 Subject: [PATCH 0291/1593] reset: ti-sci: honor TI_SCI_PROTOCOL setting when not COMPILE_TEST There is a build error when COMPILE_TEST=y, TI_SCI_PROTOCOL=m, and RESET_TI_SCI=y: drivers/reset/reset-ti-sci.o: in function `ti_sci_reset_probe': reset-ti-sci.c:(.text+0x22c): undefined reference to `devm_ti_sci_get_handle' Fix this by making RESET_TI_SCI honor the Kconfig setting of TI_SCI_PROTOCOL when COMPILE_TEST is not set. When COMPILE_TEST is set, TI_SCI_PROTOCOL must be disabled (=n). Fixes: a6af504184c9 ("reset: ti-sci: Allow building under COMPILE_TEST") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Nishanth Menon Cc: Tero Kristo Cc: Santosh Shilimkar Cc: linux-arm-kernel@lists.infradead.org Cc: Philipp Zabel Reviewed-by: Nishanth Menon Link: https://lore.kernel.org/r/20221030055636.3139-1-rdunlap@infradead.org Signed-off-by: Philipp Zabel --- drivers/reset/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index de176c2fbad96..2a52c990d4fec 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -257,7 +257,7 @@ config RESET_SUNXI config RESET_TI_SCI tristate "TI System Control Interface (TI-SCI) reset driver" - depends on TI_SCI_PROTOCOL || COMPILE_TEST + depends on TI_SCI_PROTOCOL || (COMPILE_TEST && TI_SCI_PROTOCOL=n) help This enables the reset driver support over TI System Control Interface available on some new TI's SoCs. If you wish to use reset resources From 3a2390c6777e3f6662980c6cfc25cafe9e4fef98 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 14 Nov 2022 08:49:58 +0800 Subject: [PATCH 0292/1593] reset: uniphier-glue: Fix possible null-ptr-deref It will cause null-ptr-deref when resource_size(res) invoked, if platform_get_resource() returns NULL. Fixes: 499fef09a323 ("reset: uniphier: add USB3 core reset control") Signed-off-by: Hui Tang Reviewed-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/20221114004958.258513-1-tanghui20@huawei.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-uniphier-glue.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/reset/reset-uniphier-glue.c b/drivers/reset/reset-uniphier-glue.c index 146fd5d45e99d..15abac9fc72c0 100644 --- a/drivers/reset/reset-uniphier-glue.c +++ b/drivers/reset/reset-uniphier-glue.c @@ -47,7 +47,6 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct uniphier_glue_reset_priv *priv; struct resource *res; - resource_size_t size; int i, ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -60,7 +59,6 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev) return -EINVAL; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - size = resource_size(res); priv->rdata.membase = devm_ioremap_resource(dev, res); if (IS_ERR(priv->rdata.membase)) return PTR_ERR(priv->rdata.membase); @@ -96,7 +94,7 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev) spin_lock_init(&priv->rdata.lock); priv->rdata.rcdev.owner = THIS_MODULE; - priv->rdata.rcdev.nr_resets = size * BITS_PER_BYTE; + priv->rdata.rcdev.nr_resets = resource_size(res) * BITS_PER_BYTE; priv->rdata.rcdev.ops = &reset_simple_ops; priv->rdata.rcdev.of_node = dev->of_node; priv->rdata.active_low = true; From 69555549cfa42e10f2fdd2699ed4e34d9d4f392b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 23 Nov 2022 03:13:03 +0300 Subject: [PATCH 0293/1593] drm/scheduler: Fix lockup in drm_sched_entity_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm_sched_entity_kill() is invoked twice by drm_sched_entity_destroy() while userspace process is exiting or being killed. First time it's invoked when sched entity is flushed and second time when entity is released. This causes a lockup within wait_for_completion(entity_idle) due to how completion API works. Calling wait_for_completion() more times than complete() was invoked is a error condition that causes lockup because completion internally uses counter for complete/wait calls. The complete_all() must be used instead in such cases. This patch fixes lockup of Panfrost driver that is reproducible by killing any application in a middle of 3d drawing operation. Fixes: 2fdb8a8f07c2 ("drm/scheduler: rework entity flush, kill and fini") Signed-off-by: Dmitry Osipenko Reviewed-by: Christian König Tested-by: Guilherme G. Piccoli # Steam Deck Link: https://patchwork.freedesktop.org/patch/msgid/20221123001303.533968-1-dmitry.osipenko@collabora.com --- drivers/gpu/drm/scheduler/sched_entity.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index fe09e5be79bdd..15d04a0ec6234 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -81,7 +81,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, init_completion(&entity->entity_idle); /* We start in an idle state. */ - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); spin_lock_init(&entity->rq_lock); spsc_queue_init(&entity->job_queue); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 31f3a1267be44..fd22d753b4ed0 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -987,7 +987,7 @@ static int drm_sched_main(void *param) sched_job = drm_sched_entity_pop_job(entity); if (!sched_job) { - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); continue; } @@ -998,7 +998,7 @@ static int drm_sched_main(void *param) trace_drm_run_job(sched_job, entity); fence = sched->ops->run_job(sched_job); - complete(&entity->entity_idle); + complete_all(&entity->entity_idle); drm_sched_fence_scheduled(s_fence); if (!IS_ERR_OR_NULL(fence)) { From b963c1d6261eb7fba1ba14074fb447521be84add Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Jan 2023 10:21:01 -0300 Subject: [PATCH 0294/1593] perf test record_probe_libc_inet_pton: Fix failure due to extra inet_pton() backtrace in glibc >= 2.35 Starting with glibc 2.35 there are extra inet_pton() calls when doing a IPv6 ping as in one of the 'perf test' entry, which makes it fail: # perf test inet_pton 89: probe libc's inet_pton & backtrace it with ping : FAILED! # If we look at what this script is expecting (commenting out the removal of the temporary files in it): # cat /tmp/expected.aT6 ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\) .*inet_pton\+0x[[:xdigit:]]+[[:space:]]\(/usr/lib64/libc.so.6|inlined\)$ getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\(/usr/lib64/libc.so.6\)$ .*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$ # And looking at what we are getting out of 'perf script', to match with the above: # cat /tmp/perf.script.IUC ping 623883 [006] 265438.471610: probe_libc:inet_pton: (7f32bcf314c0) 1314c0 __GI___inet_pton+0x0 (/usr/lib64/libc.so.6) 29510 __libc_start_call_main+0x80 (/usr/lib64/libc.so.6) ping 623883 [006] 265438.471664: probe_libc:inet_pton: (7f32bcf314c0) 1314c0 __GI___inet_pton+0x0 (/usr/lib64/libc.so.6) fa6c6 getaddrinfo+0x126 (/usr/lib64/libc.so.6) 491e [unknown] (/usr/bin/ping) # We see that its just the first call to inet_pton() that didn't came thru getaddrinfo(), so if we ignore the first the script matches what it expects, testing that using 'perf probe' + 'perf record' + 'perf script' with callchains on userspace targets is producing the expected results. Since we don't have a 'perf script --skip' to help us here, use tac + grep to do that, resulting in a one liner that makes this script work on both older glibc versions as well as with 2.35. With it, on fedora 36, x86, glibc 2.35: # perf test inet_pton 90: probe libc's inet_pton & backtrace it with ping : Ok # perf test -v inet_pton 90: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 627197 ping 627220 1 267956.962402: probe_libc:inet_pton_1: (7f488bf314c0) 1314c0 __GI___inet_pton+0x0 (/usr/lib64/libc.so.6) fa6c6 getaddrinfo+0x126 (/usr/lib64/libc.so.6) 491e n (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok # And on Ubuntu 22.04.1 LTS on a Libre Computer ROC-RK3399-PC arm64 system: Before this patch it works (see that the script used has no 'tac' to remove the first event): root@roc-rk3399-pc:~# dpkg -l | grep libc-bin ii libc-bin 2.35-0ubuntu3.1 arm64 GNU C Library: Binaries root@roc-rk3399-pc:~# grep -w tac ~acme/libexec/perf-core/tests/shell/record+probe_libc_inet_pton.sh root@roc-rk3399-pc:~# perf test inet_pton 86: probe libc's inet_pton & backtrace it with ping : Ok root@roc-rk3399-pc:~# perf test -v inet_pton 86: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 1375 ping 1399 [000] 4114.417450: probe_libc:inet_pton: (ffffb3e26120) 106120 inet_pton+0x0 (/usr/lib/aarch64-linux-gnu/libc.so.6) d18bc getaddrinfo+0xec (/usr/lib/aarch64-linux-gnu/libc.so.6) 2b68 [unknown] (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok root@roc-rk3399-pc:~# And after it continues to work: root@roc-rk3399-pc:~# grep -w tac ~acme/libexec/perf-core/tests/shell/record+probe_libc_inet_pton.sh perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script root@roc-rk3399-pc:~# perf test inet_pton 86: probe libc's inet_pton & backtrace it with ping : Ok root@roc-rk3399-pc:~# perf test -v inet_pton 86: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 6995 ping 7019 [005] 4832.160741: probe_libc:inet_pton: (ffffa62e6120) 106120 inet_pton+0x0 (/usr/lib/aarch64-linux-gnu/libc.so.6) d18bc getaddrinfo+0xec (/usr/lib/aarch64-linux-gnu/libc.so.6) 2b68 [unknown] (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok root@roc-rk3399-pc:~# Reported-by: Thomas Richter Cc: Adrian Hunter Cc: Heiko Carstens Cc: Ian Rogers Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/Y7QyPkPlDYip3cZH@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 34c400ccbe046..216b6b64caa30 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -57,7 +57,7 @@ trace_libc_inet_pton_backtrace() { perf_data=`mktemp -u /tmp/perf.data.XXX` perf_script=`mktemp -u /tmp/perf.script.XXX` perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 - perf script -i $perf_data > $perf_script + perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script exec 3<$perf_script exec 4<$expected From 946c2923e76327343e4460e8bb7ec7b4d4589397 Mon Sep 17 00:00:00 2001 From: Tanmay Bhushan <007047221b@gmail.com> Date: Sat, 31 Dec 2022 16:05:01 +0100 Subject: [PATCH 0295/1593] btrfs: fix ASSERT em->len condition in btrfs_get_extent The em->len value is supposed to be verified in the assertion condition though we expect it to be same as the sectorsize. Fixes: a196a8944f77 ("btrfs: do not reset extent map members for inline extents read") Reviewed-by: Anand Jain Signed-off-by: Tanmay Bhushan <007047221b@gmail.com> Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bfcbe64eb8b3a..940b404c8f289 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7092,7 +7092,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, * Other members are not utilized for inline extents. */ ASSERT(em->block_start == EXTENT_MAP_INLINE); - ASSERT(em->len = fs_info->sectorsize); + ASSERT(em->len == fs_info->sectorsize); ret = read_inline_extent(inode, path, page); if (ret < 0) From 77177ed17d24ba060117bdb6efb8a01da7531676 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 29 Dec 2022 07:32:23 +0800 Subject: [PATCH 0296/1593] btrfs: add error message for metadata level mismatch From a recent regression report, we found that after commit 947a629988f1 ("btrfs: move tree block parentness check into validate_extent_buffer()") if we have a level mismatch (false alert though), there is no error message at all. This makes later debugging harder. This patch will add the proper error message for such case. Link: https://lore.kernel.org/linux-btrfs/CABXGCsNzVxo4iq-tJSGm_kO1UggHXgq6CdcHDL=z5FL4njYXSQ@mail.gmail.com/ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0888d484df80c..9940cc39dbc94 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -530,6 +530,9 @@ static int validate_extent_buffer(struct extent_buffer *eb, } if (found_level != check->level) { + btrfs_err(fs_info, + "level verify failed on logical %llu mirror %u wanted %u found %u", + eb->start, eb->read_mirror, check->level, found_level); ret = -EIO; goto out; } From 1d854e4fbabb0cb12ca4a7fcd784eb67a65de5f8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 29 Dec 2022 07:32:24 +0800 Subject: [PATCH 0297/1593] btrfs: fix false alert on bad tree level check [BUG] There is a bug report that on a RAID0 NVMe btrfs system, under heavy write load the filesystem can flip RO randomly. With extra debugging, it shows some tree blocks failed to pass their level checks, and if that happens at critical path of a transaction, we abort the transaction: BTRFS error (device nvme0n1p3): level verify failed on logical 5446121209856 mirror 1 wanted 0 found 1 BTRFS error (device nvme0n1p3: state A): Transaction aborted (error -5) BTRFS: error (device nvme0n1p3: state A) in btrfs_finish_ordered_io:3343: errno=-5 IO failure BTRFS info (device nvme0n1p3: state EA): forced readonly [CAUSE] The reporter has already bisected to commit 947a629988f1 ("btrfs: move tree block parentness check into validate_extent_buffer()"). And with extra debugging, it shows we can have btrfs_tree_parent_check filled with all zeros in the following call trace: submit_one_bio+0xd4/0xe0 submit_extent_page+0x142/0x550 read_extent_buffer_pages+0x584/0x9c0 ? __pfx_end_bio_extent_readpage+0x10/0x10 ? folio_unlock+0x1d/0x50 btrfs_read_extent_buffer+0x98/0x150 read_tree_block+0x43/0xa0 read_block_for_search+0x266/0x370 btrfs_search_slot+0x351/0xd30 ? lock_is_held_type+0xe8/0x140 btrfs_lookup_csum+0x63/0x150 btrfs_csum_file_blocks+0x197/0x6c0 ? sched_clock_cpu+0x9f/0xc0 ? lock_release+0x14b/0x440 ? _raw_read_unlock+0x29/0x50 btrfs_finish_ordered_io+0x441/0x860 btrfs_work_helper+0xfe/0x400 ? lock_is_held_type+0xe8/0x140 process_one_work+0x294/0x5b0 worker_thread+0x4f/0x3a0 ? __pfx_worker_thread+0x10/0x10 kthread+0xf5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 Currently we only copy the btrfs_tree_parent_check structure into bbio at read_extent_buffer_pages() after we have assembled the bbio. But as shown above, submit_extent_page() itself can already submit the bbio, leaving the bbio->parent_check uninitialized, and cause the false alert. [FIX] Instead of copying @check into bbio after bbio is assembled, we pass @check in btrfs_bio_ctrl::parent_check, and copy the content of parent_check in submit_one_bio() for metadata read. By this we should be able to pass the needed info for metadata endio verification, and fix the false alert. Reported-by: Mikhail Gavrilov Link: https://lore.kernel.org/linux-btrfs/CABXGCsNzVxo4iq-tJSGm_kO1UggHXgq6CdcHDL=z5FL4njYXSQ@mail.gmail.com/ Fixes: 947a629988f1 ("btrfs: move tree block parentness check into validate_extent_buffer()") Tested-by: Mikhail Gavrilov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 83dd3aa596635..9bd32daa9b9a6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -103,6 +103,15 @@ struct btrfs_bio_ctrl { u32 len_to_oe_boundary; btrfs_bio_end_io_t end_io_func; + /* + * This is for metadata read, to provide the extra needed verification + * info. This has to be provided for submit_one_bio(), as + * submit_one_bio() can submit a bio if it ends at stripe boundary. If + * no such parent_check is provided, the metadata can hit false alert at + * endio time. + */ + struct btrfs_tree_parent_check *parent_check; + /* * Tell writepage not to lock the state bits for this range, it still * does the unlocking. @@ -133,13 +142,24 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset; - if (!is_data_inode(&inode->vfs_inode)) + if (!is_data_inode(&inode->vfs_inode)) { + if (btrfs_op(bio) != BTRFS_MAP_WRITE) { + /* + * For metadata read, we should have the parent_check, + * and copy it to bbio for metadata verification. + */ + ASSERT(bio_ctrl->parent_check); + memcpy(&btrfs_bio(bio)->parent_check, + bio_ctrl->parent_check, + sizeof(struct btrfs_tree_parent_check)); + } btrfs_submit_metadata_bio(inode, bio, mirror_num); - else if (btrfs_op(bio) == BTRFS_MAP_WRITE) + } else if (btrfs_op(bio) == BTRFS_MAP_WRITE) { btrfs_submit_data_write_bio(inode, bio, mirror_num); - else + } else { btrfs_submit_data_read_bio(inode, bio, mirror_num, bio_ctrl->compress_type); + } /* The bio is owned by the end_io handler now */ bio_ctrl->bio = NULL; @@ -4829,6 +4849,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, struct extent_state *cached_state = NULL; struct btrfs_bio_ctrl bio_ctrl = { .mirror_num = mirror_num, + .parent_check = check, }; int ret = 0; @@ -4878,7 +4899,6 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, */ atomic_dec(&eb->io_pages); } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) { free_extent_state(cached_state); @@ -4905,6 +4925,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, unsigned long num_reads = 0; struct btrfs_bio_ctrl bio_ctrl = { .mirror_num = mirror_num, + .parent_check = check, }; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) @@ -4996,7 +5017,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, } } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) From 2f2e84ca60660402bd81d0859703567c59556e6a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 23 Dec 2022 18:28:53 +0000 Subject: [PATCH 0298/1593] btrfs: fix off-by-one in delalloc search during lseek MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During lseek, when searching for delalloc in a range that represents a hole and that range has a length of 1 byte, we end up not doing the actual delalloc search in the inode's io tree, resulting in not correctly reporting the offset with data or a hole. This actually only happens when the start offset is 0 because with any other start offset we round it down by sector size. Reproducer: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt/sdc $ xfs_io -f -c "pwrite -q 0 1" /mnt/sdc/foo $ xfs_io -c "seek -d 0" /mnt/sdc/foo Whence Result DATA EOF It should have reported an offset of 0 instead of EOF. Fix this by updating btrfs_find_delalloc_in_range() and count_range_bits() to deal with inclusive ranges properly. These functions are already supposed to work with inclusive end offsets, they just got it wrong in a couple places due to off-by-one mistakes. A test case for fstests will be added later. Reported-by: Joan Bruguera Micó Link: https://lore.kernel.org/linux-btrfs/20221223020509.457113-1-joanbrugueram@gmail.com/ Fixes: b6e833567ea1 ("btrfs: make hole and data seeking a lot more efficient") CC: stable@vger.kernel.org # 6.1 Tested-by: Joan Bruguera Micó Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-io-tree.c | 2 +- fs/btrfs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 9ae9cd1e70352..3c7766dfaa694 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1551,7 +1551,7 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 last = 0; int found = 0; - if (WARN_ON(search_end <= cur_start)) + if (WARN_ON(search_end < cur_start)) return 0; spin_lock(&tree->lock); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 91b00eb2440e7..834bbcb91102f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3354,7 +3354,7 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, bool search_io_tree = true; bool ret = false; - while (cur_offset < end) { + while (cur_offset <= end) { u64 delalloc_start; u64 delalloc_end; bool delalloc; From d73a27b86fc722c28a26ec64002e3a7dc86d1c07 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 1 Jan 2023 09:02:21 +0800 Subject: [PATCH 0299/1593] btrfs: handle case when repair happens with dev-replace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [BUG] There is a bug report that a BUG_ON() in btrfs_repair_io_failure() (originally repair_io_failure() in v6.0 kernel) got triggered when replacing a unreliable disk: BTRFS warning (device sda1): csum failed root 257 ino 2397453 off 39624704 csum 0xb0d18c75 expected csum 0x4dae9c5e mirror 3 kernel BUG at fs/btrfs/extent_io.c:2380! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 9 PID: 3614331 Comm: kworker/u257:2 Tainted: G OE 6.0.0-5-amd64 #1 Debian 6.0.10-2 Hardware name: Micro-Star International Co., Ltd. MS-7C60/TRX40 PRO WIFI (MS-7C60), BIOS 2.70 07/01/2021 Workqueue: btrfs-endio btrfs_end_bio_work [btrfs] RIP: 0010:repair_io_failure+0x24a/0x260 [btrfs] Call Trace: clean_io_failure+0x14d/0x180 [btrfs] end_bio_extent_readpage+0x412/0x6e0 [btrfs] ? __switch_to+0x106/0x420 process_one_work+0x1c7/0x380 worker_thread+0x4d/0x380 ? rescuer_thread+0x3a0/0x3a0 kthread+0xe9/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 [CAUSE] Before the BUG_ON(), we got some read errors from the replace target first, note the mirror number (3, which is beyond RAID1 duplication, thus it's read from the replace target device). Then at the BUG_ON() location, we are trying to writeback the repaired sectors back the failed device. The check looks like this: ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length, &bioc, mirror_num); if (ret) goto out_counter_dec; BUG_ON(mirror_num != bioc->mirror_num); But inside btrfs_map_block(), we can modify bioc->mirror_num especially for dev-replace: if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && !need_full_stripe(op) && dev_replace->tgtdev != NULL) { ret = get_extra_mirror_from_replace(fs_info, logical, *length, dev_replace->srcdev->devid, &mirror_num, &physical_to_patch_in_first_stripe); patch_the_first_stripe_for_dev_replace = 1; } Thus if we're repairing the replace target device, we're going to trigger that BUG_ON(). But in reality, the read failure from the replace target device may be that, our replace hasn't reached the range we're reading, thus we're reading garbage, but with replace running, the range would be properly filled later. Thus in that case, we don't need to do anything but let the replace routine to handle it. [FIX] Instead of a BUG_ON(), just skip the repair if we're repairing the device replace target device. Reported-by: 小太 Link: https://lore.kernel.org/linux-btrfs/CACsxjPYyJGQZ+yvjzxA1Nn2LuqkYqTCcUH43S=+wXhyf8S00Ag@mail.gmail.com/ CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/bio.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index b8fb7ef6b5206..8affc88b0e0a4 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -329,7 +329,16 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, &map_length, &bioc, mirror_num); if (ret) goto out_counter_dec; - BUG_ON(mirror_num != bioc->mirror_num); + /* + * This happens when dev-replace is also running, and the + * mirror_num indicates the dev-replace target. + * + * In this case, we don't need to do anything, as the read + * error just means the replace progress hasn't reached our + * read range, and later replace routine would handle it well. + */ + if (mirror_num != bioc->mirror_num) + goto out_counter_dec; } sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; From 39f501d68ec1ed5cd5c66ac6ec2a7131c517bb92 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 26 Dec 2022 09:00:40 +0800 Subject: [PATCH 0300/1593] btrfs: always report error in run_one_delayed_ref() Currently we have a btrfs_debug() for run_one_delayed_ref() failure, but if end users hit such problem, there will be no chance that btrfs_debug() is enabled. This can lead to very little useful info for debugging. This patch will: - Add extra info for error reporting Including: * logical bytenr * num_bytes * type * action * ref_mod - Replace the btrfs_debug() with btrfs_err() - Move the error reporting into run_one_delayed_ref() This is to avoid use-after-free, the @node can be freed in the caller. This error should only be triggered at most once. As if run_one_delayed_ref() failed, we trigger the error message, then causing the call chain to error out: btrfs_run_delayed_refs() `- btrfs_run_delayed_refs() `- btrfs_run_delayed_refs_for_head() `- run_one_delayed_ref() And we will abort the current transaction in btrfs_run_delayed_refs(). If we have to run delayed refs for the abort transaction, run_one_delayed_ref() will just cleanup the refs and do nothing, thus no new error messages would be output. Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 892d78c1853c7..72ba13b027a9e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1713,6 +1713,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG(); if (ret && insert_reserved) btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); + if (ret < 0) + btrfs_err(trans->fs_info, +"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", + node->bytenr, node->num_bytes, node->type, + node->action, node->ref_mod, ret); return ret; } @@ -1954,8 +1959,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, if (ret) { unselect_delayed_ref_head(delayed_refs, locked_ref); btrfs_put_delayed_ref(ref); - btrfs_debug(fs_info, "run_one_delayed_ref returned %d", - ret); return ret; } From 2ba48b20049b5a76f34a85f853c9496d1b10533a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 22 Dec 2022 07:59:17 +0800 Subject: [PATCH 0301/1593] btrfs: fix compat_ro checks against remount [BUG] Even with commit 81d5d61454c3 ("btrfs: enhance unsupported compat RO flags handling"), btrfs can still mount a fs with unsupported compat_ro flags read-only, then remount it RW: # btrfs ins dump-super /dev/loop0 | grep compat_ro_flags -A 3 compat_ro_flags 0x403 ( FREE_SPACE_TREE | FREE_SPACE_TREE_VALID | unknown flag: 0x400 ) # mount /dev/loop0 /mnt/btrfs mount: /mnt/btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error. dmesg(1) may have more information after failed mount system call. ^^^ RW mount failed as expected ^^^ # dmesg -t | tail -n5 loop0: detected capacity change from 0 to 1048576 BTRFS: device fsid cb5b82f5-0fdd-4d81-9b4b-78533c324afa devid 1 transid 7 /dev/loop0 scanned by mount (1146) BTRFS info (device loop0): using crc32c (crc32c-intel) checksum algorithm BTRFS info (device loop0): using free space tree BTRFS error (device loop0): cannot mount read-write because of unknown compat_ro features (0x403) BTRFS error (device loop0): open_ctree failed # mount /dev/loop0 -o ro /mnt/btrfs # mount -o remount,rw /mnt/btrfs ^^^ RW remount succeeded unexpectedly ^^^ [CAUSE] Currently we use btrfs_check_features() to check compat_ro flags against our current mount flags. That function get reused between open_ctree() and btrfs_remount(). But for btrfs_remount(), the super block we passed in still has the old mount flags, thus btrfs_check_features() still believes we're mounting read-only. [FIX] Replace the existing @sb argument with @is_rw_mount. As originally we only use @sb to determine if the mount is RW. Now it's callers' responsibility to determine if the mount is RW, and since there are only two callers, the check is pretty simple: - caller in open_ctree() Just pass !sb_rdonly(). - caller in btrfs_remount() Pass !(*flags & SB_RDONLY), as our check should be against the new flags. Now we can correctly reject the RW remount: # mount /dev/loop0 -o ro /mnt/btrfs # mount -o remount,rw /mnt/btrfs mount: /mnt/btrfs: mount point not mounted or bad option. dmesg(1) may have more information after failed mount system call. # dmesg -t | tail -n 1 BTRFS error (device loop0: state M): cannot mount read-write because of unknown compat_ro features (0x403) Reported-by: Chung-Chiang Cheng Fixes: 81d5d61454c3 ("btrfs: enhance unsupported compat RO flags handling") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 +++++--- fs/btrfs/disk-io.h | 2 +- fs/btrfs/super.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9940cc39dbc94..8aeaada1fcaec 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3384,6 +3384,8 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) /* * Do various sanity and dependency checks of different features. * + * @is_rw_mount: If the mount is read-write. + * * This is the place for less strict checks (like for subpage or artificial * feature dependencies). * @@ -3394,7 +3396,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) * (space cache related) can modify on-disk format like free space tree and * screw up certain feature dependencies. */ -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) { struct btrfs_super_block *disk_super = fs_info->super_copy; u64 incompat = btrfs_super_incompat_flags(disk_super); @@ -3433,7 +3435,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - if (compat_ro_unsupp && !sb_rdonly(sb)) { + if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", compat_ro); @@ -3636,7 +3638,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); if (ret < 0) { err = ret; goto fail_alloc; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 363935cfc0844..f2f295eb6103d 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -50,7 +50,7 @@ int __cold open_ctree(struct super_block *sb, void __cold close_ctree(struct btrfs_fs_info *fs_info); int btrfs_validate_super(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb, int mirror_num); -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb); +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d5de18d6517e7..433ce221dc5c7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1705,7 +1705,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY)); if (ret < 0) goto restore; From f528fe213a6ad21a6e8644dbd5de10dc264a89fd Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 2 Jan 2023 12:23:20 +0100 Subject: [PATCH 0302/1593] regulator: qcom-rpmh: PM8550 ldo11 regulator is an nldo This fixes the definition of the PM8550 ldo11 regulator matching it's capabilities since this LDO is designed to work between 1,2V and 1,5V. Fixes: e6e3776d682d ("regulator: qcom-rpmh: Add support for PM8550 regulators") Signed-off-by: Neil Armstrong Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20230102-topic-sm8550-upstream-fixes-reg-l11b-nldo-v1-1-d97def246338@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index 43b5b93777149..ae6021390143c 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -1016,7 +1016,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = { RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo_lv, "vdd-l8-l9"), RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"), RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo, "vdd-l1-l4-l10"), - RPMH_VREG("ldo11", "ldo%s11", &pmic5_pldo, "vdd-l11"), + RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo, "vdd-l11"), RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"), RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"), From e7a293658c20a7945014570e1921bf7d25d68a36 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 09:48:24 +0400 Subject: [PATCH 0303/1593] EDAC/highbank: Fix memory leak in highbank_mc_probe() When devres_open_group() fails, it returns -ENOMEM without freeing memory allocated by edac_mc_alloc(). Call edac_mc_free() on the error handling path to avoid a memory leak. [ bp: Massage commit message. ] Fixes: a1b01edb2745 ("edac: add support for Calxeda highbank memory controller") Signed-off-by: Miaoqian Lin Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20221229054825.1361993-1-linmq006@gmail.com --- drivers/edac/highbank_mc_edac.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 61b76ec226af1..19fba258ae108 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -174,8 +174,10 @@ static int highbank_mc_probe(struct platform_device *pdev) drvdata = mci->pvt_info; platform_set_drvdata(pdev, mci); - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) - return -ENOMEM; + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + res = -ENOMEM; + goto free; + } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { @@ -243,6 +245,7 @@ static int highbank_mc_probe(struct platform_device *pdev) edac_mc_del_mc(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); +free: edac_mc_free(mci); return res; } From f6ca5059dc0d6608dc46070f48e396d611f240d6 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:38:19 +0000 Subject: [PATCH 0304/1593] firmware: arm_scmi: Clear stale xfer->hdr.status Stale error status reported from a previous message transaction must be cleared before starting a new transaction to avoid being confusingly reported in the following SCMI message dump traces. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222183823.518856-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index f818d00bb2c69..ffdad59ec81fc 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -910,6 +910,8 @@ static int do_xfer(const struct scmi_protocol_handle *ph, xfer->hdr.protocol_id, xfer->hdr.seq, xfer->hdr.poll_completion); + /* Clear any stale status */ + xfer->hdr.status = SCMI_SUCCESS; xfer->state = SCMI_XFER_SENT_OK; /* * Even though spinlocking is not needed here since no race is possible From ad78b81a1077f7d956952cd8bdfe1e61504e3eb8 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:38:20 +0000 Subject: [PATCH 0305/1593] firmware: arm_scmi: Harden shared memory access in fetch_response A misbheaving SCMI platform firmware could reply with out-of-spec messages, shorter than the mimimum size comprising a header and a status field. Harden shmem_fetch_response to properly truncate such a bad messages. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222183823.518856-3-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/shmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 1dfe534b85184..135f8718000f5 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -81,10 +81,11 @@ u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem) void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer) { + size_t len = ioread32(&shmem->length); + xfer->hdr.status = ioread32(shmem->msg_payload); /* Skip the length of header and status in shmem area i.e 8 bytes */ - xfer->rx.len = min_t(size_t, xfer->rx.len, - ioread32(&shmem->length) - 8); + xfer->rx.len = min_t(size_t, xfer->rx.len, len > 8 ? len - 8 : 0); /* Take a copy to the rx buffer.. */ memcpy_fromio(xfer->rx.buf, shmem->msg_payload + 4, xfer->rx.len); From 9bae076cd4e3e3c3dc185cae829d80b2dddec86e Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:38:21 +0000 Subject: [PATCH 0306/1593] firmware: arm_scmi: Harden shared memory access in fetch_notification A misbheaving SCMI platform firmware could reply with out-of-spec notifications, shorter than the mimimum size comprising a header. Fixes: d5141f37c42e ("firmware: arm_scmi: Add notifications support in transport layer") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222183823.518856-4-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/shmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 135f8718000f5..87b4f4d35f062 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -94,8 +94,10 @@ void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, size_t max_len, struct scmi_xfer *xfer) { + size_t len = ioread32(&shmem->length); + /* Skip only the length of header in shmem area i.e 4 bytes */ - xfer->rx.len = min_t(size_t, max_len, ioread32(&shmem->length) - 4); + xfer->rx.len = min_t(size_t, max_len, len > 4 ? len - 4 : 0); /* Take a copy to the rx buffer.. */ memcpy_fromio(xfer->rx.buf, shmem->msg_payload, xfer->rx.len); From e325285de2cd82fbdcc4df8898e4c6a597674816 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:38:23 +0000 Subject: [PATCH 0307/1593] firmware: arm_scmi: Fix virtio channels cleanup on shutdown When unloading the SCMI core stack module, configured to use the virtio SCMI transport, LOCKDEP reports the splat down below about unsafe locks dependencies. In order to avoid this possible unsafe locking scenario call upfront virtio_break_device() before getting hold of vioch->lock. ===================================================== WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected 6.1.0-00067-g6b934395ba07-dirty #4 Not tainted ----------------------------------------------------- rmmod/307 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: ffff000080c510e0 (&dev->vqs_list_lock){+.+.}-{3:3}, at: virtio_break_device+0x28/0x68 and this task is already holding: ffff00008288ada0 (&channels[i].lock){-.-.}-{3:3}, at: virtio_chan_free+0x60/0x168 [scmi_module] which would create a new lock dependency: (&channels[i].lock){-.-.}-{3:3} -> (&dev->vqs_list_lock){+.+.}-{3:3} but this new dependency connects a HARDIRQ-irq-safe lock: (&channels[i].lock){-.-.}-{3:3} ... which became HARDIRQ-irq-safe at: lock_acquire+0x128/0x398 _raw_spin_lock_irqsave+0x78/0x140 scmi_vio_complete_cb+0xb4/0x3b8 [scmi_module] vring_interrupt+0x84/0x120 vm_interrupt+0x94/0xe8 __handle_irq_event_percpu+0xb4/0x3d8 handle_irq_event_percpu+0x20/0x68 handle_irq_event+0x50/0xb0 handle_fasteoi_irq+0xac/0x138 generic_handle_domain_irq+0x34/0x50 gic_handle_irq+0xa0/0xd8 call_on_irq_stack+0x2c/0x54 do_interrupt_handler+0x8c/0x90 el1_interrupt+0x40/0x78 el1h_64_irq_handler+0x18/0x28 el1h_64_irq+0x64/0x68 _raw_write_unlock_irq+0x48/0x80 ep_start_scan+0xf0/0x128 do_epoll_wait+0x390/0x858 do_compat_epoll_pwait.part.34+0x1c/0xb8 __arm64_sys_epoll_pwait+0x80/0xd0 invoke_syscall+0x4c/0x110 el0_svc_common.constprop.3+0x98/0x120 do_el0_svc+0x34/0xd0 el0_svc+0x40/0x98 el0t_64_sync_handler+0x98/0xc0 el0t_64_sync+0x170/0x174 to a HARDIRQ-irq-unsafe lock: (&dev->vqs_list_lock){+.+.}-{3:3} ... which became HARDIRQ-irq-unsafe at: ... lock_acquire+0x128/0x398 _raw_spin_lock+0x58/0x70 __vring_new_virtqueue+0x130/0x1c0 vring_create_virtqueue+0xc4/0x2b8 vm_find_vqs+0x20c/0x430 init_vq+0x308/0x390 virtblk_probe+0x114/0x9b0 virtio_dev_probe+0x1a4/0x248 really_probe+0xc8/0x3a8 __driver_probe_device+0x84/0x190 driver_probe_device+0x44/0x110 __driver_attach+0x104/0x1e8 bus_for_each_dev+0x7c/0xd0 driver_attach+0x2c/0x38 bus_add_driver+0x1e4/0x258 driver_register+0x6c/0x128 register_virtio_driver+0x2c/0x48 virtio_blk_init+0x70/0xac do_one_initcall+0x84/0x420 kernel_init_freeable+0x2d0/0x340 kernel_init+0x2c/0x138 ret_from_fork+0x10/0x20 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->vqs_list_lock); local_irq_disable(); lock(&channels[i].lock); lock(&dev->vqs_list_lock); lock(&channels[i].lock); *** DEADLOCK *** ================ Fixes: 42e90eb53bf3f ("firmware: arm_scmi: Add a virtio channel refcount") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222183823.518856-6-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/virtio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index 33c9b81a55cd1..1db975c088969 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -160,7 +160,6 @@ static void scmi_vio_channel_cleanup_sync(struct scmi_vio_channel *vioch) } vioch->shutdown_done = &vioch_shutdown_done; - virtio_break_device(vioch->vqueue->vdev); if (!vioch->is_rx && vioch->deferred_tx_wq) /* Cannot be kicked anymore after this...*/ vioch->deferred_tx_wq = NULL; @@ -482,6 +481,12 @@ static int virtio_chan_free(int id, void *p, void *data) struct scmi_chan_info *cinfo = p; struct scmi_vio_channel *vioch = cinfo->transport_info; + /* + * Break device to inhibit further traffic flowing while shutting down + * the channels: doing it later holding vioch->lock creates unsafe + * locking dependency chains as reported by LOCKDEP. + */ + virtio_break_device(vioch->vqueue->vdev); scmi_vio_channel_cleanup_sync(vioch); scmi_free_channel(cinfo, data, id); From 55d235361fccef573990dfa5724ab453866e7816 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Jan 2023 10:24:11 -0500 Subject: [PATCH 0308/1593] x86/asm: Fix an assembler warning with current binutils Fix a warning: "found `movsd'; assuming `movsl' was meant" Signed-off-by: Mikulas Patocka Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org --- arch/x86/lib/iomap_copy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index a1f9416bf67a5..6ff2f56cb0f71 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -10,6 +10,6 @@ */ SYM_FUNC_START(__iowrite32_copy) movl %edx,%ecx - rep movsd + rep movsl RET SYM_FUNC_END(__iowrite32_copy) From b3d83066cbebc76dbac8a5fca931f64b4c6fff34 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 30 Dec 2022 23:43:32 +0800 Subject: [PATCH 0309/1593] f2fs: fix to avoid NULL pointer dereference in f2fs_issue_flush() With below two cases, it will cause NULL pointer dereference when accessing SM_I(sbi)->fcc_info in f2fs_issue_flush(). a) If kthread_run() fails in f2fs_create_flush_cmd_control(), it will release SM_I(sbi)->fcc_info, - mount -o noflush_merge /dev/vda /mnt/f2fs - mount -o remount,flush_merge /dev/vda /mnt/f2fs -- kthread_run() fails - dd if=/dev/zero of=/mnt/f2fs/file bs=4k count=1 conv=fsync b) we will never allocate memory for SM_I(sbi)->fcc_info w/ below testcase, - mount -o ro /dev/vda /mnt/f2fs - mount -o rw,remount /dev/vda /mnt/f2fs - dd if=/dev/zero of=/mnt/f2fs/file bs=4k count=1 conv=fsync In order to fix this issue, let change as below: - fix error path handling in f2fs_create_flush_cmd_control(). - allocate SM_I(sbi)->fcc_info even if readonly is on. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 25ddea478fc1a..c3f8c8208eeca 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -663,8 +663,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) if (IS_ERR(fcc->f2fs_issue_flush)) { int err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); - SM_I(sbi)->fcc_info = NULL; + fcc->f2fs_issue_flush = NULL; return err; } @@ -5138,11 +5137,9 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) init_f2fs_rwsem(&sm_info->curseg_lock); - if (!f2fs_readonly(sbi->sb)) { - err = f2fs_create_flush_cmd_control(sbi); - if (err) - return err; - } + err = f2fs_create_flush_cmd_control(sbi); + if (err) + return err; err = create_discard_cmd_control(sbi); if (err) From fe59109ae5c0b34a8c7c07f693fc501b12b57787 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Dec 2022 14:05:44 -0800 Subject: [PATCH 0310/1593] f2fs: initialize extent_cache parameter This can avoid confusing tracepoint values. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/extent_cache.c | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6e43e19c7d1ca..97e816590cd95 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2183,7 +2183,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, sector_t last_block_in_file; const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; - struct extent_info ei = {0, }; + struct extent_info ei = {}; bool from_dnode = true; int i; int ret = 0; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 1bd38a78ebba9..3aa2f82960455 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -938,7 +938,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei) static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { - struct extent_info ei; + struct extent_info ei = {}; if (!__may_extent_tree(dn->inode, type)) return; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a6c4012798860..ecbc8c135b494 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2559,7 +2559,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map = { .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; - struct extent_info ei = {0, }; + struct extent_info ei = {}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c3f8c8208eeca..ae3c4e5474efa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3160,7 +3160,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_info ei; + struct extent_info ei = {}; if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) { if (!ei.age) From ed2724765e58e3310d3de48f4a1761631b3dd640 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Dec 2022 14:41:54 -0800 Subject: [PATCH 0311/1593] f2fs: don't mix to use union values in extent_info Let's explicitly use the defined values in block_age case only. Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 3aa2f82960455..cc3fed04dd6f7 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -881,7 +881,8 @@ static unsigned long long __calculate_block_age(unsigned long long new, } /* This returns a new age and allocated blocks in ei */ -static int __get_new_block_age(struct inode *inode, struct extent_info *ei) +static int __get_new_block_age(struct inode *inode, struct extent_info *ei, + block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t f_size = i_size_read(inode); @@ -894,7 +895,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei) * block here. */ if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && - ei->blk == NEW_ADDR) + blkaddr == NEW_ADDR) return -EINVAL; if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { @@ -915,14 +916,14 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei) return 0; } - f2fs_bug_on(sbi, ei->blk == NULL_ADDR); + f2fs_bug_on(sbi, blkaddr == NULL_ADDR); /* the data block was allocated for the first time */ - if (ei->blk == NEW_ADDR) + if (blkaddr == NEW_ADDR) goto out; - if (__is_valid_data_blkaddr(ei->blk) && - !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) { + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_bug_on(sbi, 1); return -EINVAL; } @@ -953,8 +954,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ else ei.blk = dn->data_blkaddr; } else if (type == EX_BLOCK_AGE) { - ei.blk = dn->data_blkaddr; - if (__get_new_block_age(dn->inode, &ei)) + if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) return; } __update_extent_tree_range(dn->inode, &ei, type); From 22a341b43036415718f2d50f5f98b2f891fe17e9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Dec 2022 16:36:36 -0800 Subject: [PATCH 0312/1593] f2fs: should use a temp extent_info for lookup Otherwise, __lookup_extent_tree() will override the given extent_info which will be used by caller. Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index cc3fed04dd6f7..7b191ff656319 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -888,6 +888,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, loff_t f_size = i_size_read(inode); unsigned long long cur_blocks = atomic64_read(&sbi->allocated_data_blocks); + struct extent_info tei = *ei; /* only fofs and len are valid */ /* * When I/O is not aligned to a PAGE_SIZE, update will happen to the last @@ -898,17 +899,17 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, blkaddr == NEW_ADDR) return -EINVAL; - if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { + if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { unsigned long long cur_age; - if (cur_blocks >= ei->last_blocks) - cur_age = cur_blocks - ei->last_blocks; + if (cur_blocks >= tei.last_blocks) + cur_age = cur_blocks - tei.last_blocks; else /* allocated_data_blocks overflow */ - cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks; + cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; - if (ei->age) - ei->age = __calculate_block_age(cur_age, ei->age); + if (tei.age) + ei->age = __calculate_block_age(cur_age, tei.age); else ei->age = cur_age; ei->last_blocks = cur_blocks; From df9d44b645b83fffccfb4e28c1f93376585fdec8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Dec 2022 16:14:10 -0800 Subject: [PATCH 0313/1593] f2fs: let's avoid panic if extent_tree is not created This patch avoids the below panic. pc : __lookup_extent_tree+0xd8/0x760 lr : f2fs_do_write_data_page+0x104/0x87c sp : ffffffc010cbb3c0 x29: ffffffc010cbb3e0 x28: 0000000000000000 x27: ffffff8803e7f020 x26: ffffff8803e7ed40 x25: ffffff8803e7f020 x24: ffffffc010cbb460 x23: ffffffc010cbb480 x22: 0000000000000000 x21: 0000000000000000 x20: ffffffff22e90900 x19: 0000000000000000 x18: ffffffc010c5d080 x17: 0000000000000000 x16: 0000000000000020 x15: ffffffdb1acdbb88 x14: ffffff888759e2b0 x13: 0000000000000000 x12: ffffff802da49000 x11: 000000000a001200 x10: ffffff8803e7ed40 x9 : ffffff8023195800 x8 : ffffff802da49078 x7 : 0000000000000001 x6 : 0000000000000000 x5 : 0000000000000006 x4 : ffffffc010cbba28 x3 : 0000000000000000 x2 : ffffffc010cbb480 x1 : 0000000000000000 x0 : ffffff8803e7ed40 Call trace: __lookup_extent_tree+0xd8/0x760 f2fs_do_write_data_page+0x104/0x87c f2fs_write_single_data_page+0x420/0xb60 f2fs_write_cache_pages+0x418/0xb1c __f2fs_write_data_pages+0x428/0x58c f2fs_write_data_pages+0x30/0x40 do_writepages+0x88/0x190 __writeback_single_inode+0x48/0x448 writeback_sb_inodes+0x468/0x9e8 __writeback_inodes_wb+0xb8/0x2a4 wb_writeback+0x33c/0x740 wb_do_writeback+0x2b4/0x400 wb_workfn+0xe4/0x34c process_one_work+0x24c/0x5bc worker_thread+0x3e8/0xa50 kthread+0x150/0x1b4 Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 7b191ff656319..342af24b2f8cf 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -546,7 +546,8 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_node *en; bool ret = false; - f2fs_bug_on(sbi, !et); + if (!et) + return false; trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); From 72bb8f8cc088730c4d84117a6906f458c2fc64bb Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 1 Jan 2023 17:29:04 +0100 Subject: [PATCH 0314/1593] x86/insn: Avoid namespace clash by separating instruction decoder MMIO type from MMIO trace type Both and define various MMIO_ enum constants, whose namespace overlaps. Rename the ones to have a INSN_ prefix, so that the headers can be used from the same source file. Signed-off-by: Jason A. Donenfeld Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230101162910.710293-2-Jason@zx2c4.com --- arch/x86/coco/tdx/tdx.c | 26 +++++++++++++------------- arch/x86/include/asm/insn-eval.h | 18 +++++++++--------- arch/x86/kernel/sev.c | 18 +++++++++--------- arch/x86/lib/insn-eval.c | 20 ++++++++++---------- 4 files changed, 41 insertions(+), 41 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index cfd4c95b9f045..669d9e4f29015 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -386,8 +386,8 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { unsigned long *reg, val, vaddr; char buffer[MAX_INSN_SIZE]; + enum insn_mmio_type mmio; struct insn insn = {}; - enum mmio_type mmio; int size, extend_size; u8 extend_val = 0; @@ -402,10 +402,10 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) return -EINVAL; mmio = insn_decode_mmio(&insn, &size); - if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) + if (WARN_ON_ONCE(mmio == INSN_MMIO_DECODE_FAILED)) return -EINVAL; - if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { + if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { reg = insn_get_modrm_reg_ptr(&insn, regs); if (!reg) return -EINVAL; @@ -426,23 +426,23 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) /* Handle writes first */ switch (mmio) { - case MMIO_WRITE: + case INSN_MMIO_WRITE: memcpy(&val, reg, size); if (!mmio_write(size, ve->gpa, val)) return -EIO; return insn.length; - case MMIO_WRITE_IMM: + case INSN_MMIO_WRITE_IMM: val = insn.immediate.value; if (!mmio_write(size, ve->gpa, val)) return -EIO; return insn.length; - case MMIO_READ: - case MMIO_READ_ZERO_EXTEND: - case MMIO_READ_SIGN_EXTEND: + case INSN_MMIO_READ: + case INSN_MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: /* Reads are handled below */ break; - case MMIO_MOVS: - case MMIO_DECODE_FAILED: + case INSN_MMIO_MOVS: + case INSN_MMIO_DECODE_FAILED: /* * MMIO was accessed with an instruction that could not be * decoded or handled properly. It was likely not using io.h @@ -459,15 +459,15 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) return -EIO; switch (mmio) { - case MMIO_READ: + case INSN_MMIO_READ: /* Zero-extend for 32-bit operation */ extend_size = size == 4 ? sizeof(*reg) : 0; break; - case MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_ZERO_EXTEND: /* Zero extend based on operand size */ extend_size = insn.opnd_bytes; break; - case MMIO_READ_SIGN_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: /* Sign extend based on operand size */ extend_size = insn.opnd_bytes; if (size == 1 && val & BIT(7)) diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index f07faa61c7f31..54368a43abf67 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -32,16 +32,16 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size); -enum mmio_type { - MMIO_DECODE_FAILED, - MMIO_WRITE, - MMIO_WRITE_IMM, - MMIO_READ, - MMIO_READ_ZERO_EXTEND, - MMIO_READ_SIGN_EXTEND, - MMIO_MOVS, +enum insn_mmio_type { + INSN_MMIO_DECODE_FAILED, + INSN_MMIO_WRITE, + INSN_MMIO_WRITE_IMM, + INSN_MMIO_READ, + INSN_MMIO_READ_ZERO_EXTEND, + INSN_MMIO_READ_SIGN_EXTEND, + INSN_MMIO_MOVS, }; -enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes); +enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes); #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index a428c62330d37..679026a640efd 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1536,32 +1536,32 @@ static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct insn *insn = &ctxt->insn; + enum insn_mmio_type mmio; unsigned int bytes = 0; - enum mmio_type mmio; enum es_result ret; u8 sign_byte; long *reg_data; mmio = insn_decode_mmio(insn, &bytes); - if (mmio == MMIO_DECODE_FAILED) + if (mmio == INSN_MMIO_DECODE_FAILED) return ES_DECODE_FAILED; - if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { + if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); if (!reg_data) return ES_DECODE_FAILED; } switch (mmio) { - case MMIO_WRITE: + case INSN_MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); ret = vc_do_mmio(ghcb, ctxt, bytes, false); break; - case MMIO_WRITE_IMM: + case INSN_MMIO_WRITE_IMM: memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); ret = vc_do_mmio(ghcb, ctxt, bytes, false); break; - case MMIO_READ: + case INSN_MMIO_READ: ret = vc_do_mmio(ghcb, ctxt, bytes, true); if (ret) break; @@ -1572,7 +1572,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) memcpy(reg_data, ghcb->shared_buffer, bytes); break; - case MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_ZERO_EXTEND: ret = vc_do_mmio(ghcb, ctxt, bytes, true); if (ret) break; @@ -1581,7 +1581,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) memset(reg_data, 0, insn->opnd_bytes); memcpy(reg_data, ghcb->shared_buffer, bytes); break; - case MMIO_READ_SIGN_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: ret = vc_do_mmio(ghcb, ctxt, bytes, true); if (ret) break; @@ -1600,7 +1600,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) memset(reg_data, sign_byte, insn->opnd_bytes); memcpy(reg_data, ghcb->shared_buffer, bytes); break; - case MMIO_MOVS: + case INSN_MMIO_MOVS: ret = vc_handle_mmio_movs(ctxt, bytes); break; default: diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 21104c41cba04..558a605929db5 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1595,16 +1595,16 @@ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, * Returns: * * Type of the instruction. Size of the memory operand is stored in - * @bytes. If decode failed, MMIO_DECODE_FAILED returned. + * @bytes. If decode failed, INSN_MMIO_DECODE_FAILED returned. */ -enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) +enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes) { - enum mmio_type type = MMIO_DECODE_FAILED; + enum insn_mmio_type type = INSN_MMIO_DECODE_FAILED; *bytes = 0; if (insn_get_opcode(insn)) - return MMIO_DECODE_FAILED; + return INSN_MMIO_DECODE_FAILED; switch (insn->opcode.bytes[0]) { case 0x88: /* MOV m8,r8 */ @@ -1613,7 +1613,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_WRITE; + type = INSN_MMIO_WRITE; break; case 0xc6: /* MOV m8, imm8 */ @@ -1622,7 +1622,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_WRITE_IMM; + type = INSN_MMIO_WRITE_IMM; break; case 0x8a: /* MOV r8, m8 */ @@ -1631,7 +1631,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_READ; + type = INSN_MMIO_READ; break; case 0xa4: /* MOVS m8, m8 */ @@ -1640,7 +1640,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_MOVS; + type = INSN_MMIO_MOVS; break; case 0x0f: /* Two-byte instruction */ @@ -1651,7 +1651,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xb7: /* MOVZX r32/r64, m16 */ if (!*bytes) *bytes = 2; - type = MMIO_READ_ZERO_EXTEND; + type = INSN_MMIO_READ_ZERO_EXTEND; break; case 0xbe: /* MOVSX r16/r32/r64, m8 */ @@ -1660,7 +1660,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xbf: /* MOVSX r32/r64, m16 */ if (!*bytes) *bytes = 2; - type = MMIO_READ_SIGN_EXTEND; + type = INSN_MMIO_READ_SIGN_EXTEND; break; } break; From c07311b5509f6035f1dd828db3e90ff4859cf3b9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Dec 2022 06:34:54 -0500 Subject: [PATCH 0315/1593] perf/x86/rapl: Treat Tigerlake like Icelake Since Tigerlake seems to have inherited its cstates and other RAPL power caps from Icelake, assume it also follows Icelake for its RAPL events. Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Zhang Rui Link: https://lore.kernel.org/r/20221228113454.1199118-1-rodrigo.vivi@intel.com --- arch/x86/events/rapl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index a829492bca4c1..ae5779ea44174 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -800,6 +800,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), From c0f399ff51495ac8d30367418f4f6292ecd61fbe Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 26 Dec 2022 10:11:18 -0800 Subject: [PATCH 0316/1593] xfs: fix off-by-one error in xfs_btree_space_to_height Lately I've been stress-testing extreme-sized rmap btrees by using the (new) xfs_db bmap_inflate command to clone bmbt mappings billions of times and then using xfs_repair to build new rmap and refcount btrees. This of course is /much/ faster than actually FICLONEing a file billions of times. Unfortunately, xfs_repair fails in xfs_btree_bload_compute_geometry with EOVERFLOW, which indicates that xfs_mount.m_rmap_maxlevels is not sufficiently large for the test scenario. For a 1TB filesystem (~67 million AG blocks, 4 AGs) the btheight command reports: $ xfs_db -c 'btheight -n 4400801200 -w min rmapbt' /dev/sda rmapbt: worst case per 4096-byte block: 84 records (leaf) / 45 keyptrs (node) level 0: 4400801200 records, 52390491 blocks level 1: 52390491 records, 1164234 blocks level 2: 1164234 records, 25872 blocks level 3: 25872 records, 575 blocks level 4: 575 records, 13 blocks level 5: 13 records, 1 block 6 levels, 53581186 blocks total The AG is sufficiently large to build this rmap btree. Unfortunately, m_rmap_maxlevels is 5. Augmenting the loop in the space->height function to report height, node blocks, and blocks remaining produces this: ht 1 node_blocks 45 blockleft 67108863 ht 2 node_blocks 2025 blockleft 67108818 ht 3 node_blocks 91125 blockleft 67106793 ht 4 node_blocks 4100625 blockleft 67015668 final height: 5 The goal of this function is to compute the maximum height btree that can be stored in the given number of ondisk fsblocks. Starting with the top level of the tree, each iteration through the loop adds the fanout factor of the next level down until we run out of blocks. IOWs, maximum height is achieved by using the smallest fanout factor that can apply to that level. However, the loop setup is not correct. Top level btree blocks are allowed to contain fewer than minrecs items, so the computation is incorrect because the first time through the loop it should be using a fanout factor of 2. With this corrected, the above becomes: ht 1 node_blocks 2 blockleft 67108863 ht 2 node_blocks 90 blockleft 67108861 ht 3 node_blocks 4050 blockleft 67108771 ht 4 node_blocks 182250 blockleft 67104721 ht 5 node_blocks 8201250 blockleft 66922471 final height: 6 Fixes: 9ec691205e7d ("xfs: compute the maximum height of the rmap btree when reflink enabled") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4c16c8c31fcbc..35f574421670d 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4666,7 +4666,12 @@ xfs_btree_space_to_height( const unsigned int *limits, unsigned long long leaf_blocks) { - unsigned long long node_blocks = limits[1]; + /* + * The root btree block can have fewer than minrecs pointers in it + * because the tree might not be big enough to require that amount of + * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. + */ + unsigned long long node_blocks = 2; unsigned long long blocks_left = leaf_blocks - 1; unsigned int height = 1; From 817644fa4525258992f17fecf4f1d6cdd2e1b731 Mon Sep 17 00:00:00 2001 From: Hironori Shiina Date: Mon, 26 Dec 2022 10:11:19 -0800 Subject: [PATCH 0317/1593] xfs: get root inode correctly at bulkstat The root inode number should be set to `breq->startino` for getting stat information of the root when XFS_BULK_IREQ_SPECIAL_ROOT is used. Otherwise, the inode search is started from 1 (XFS_BULK_IREQ_SPECIAL_ROOT) and the inode with the lowest number in a filesystem is returned. Fixes: bf3cb3944792 ("xfs: allow single bulkstat of special inodes") Signed-off-by: Hironori Shiina Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 13f1b2add3904..736510bc241b8 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -754,7 +754,7 @@ xfs_bulkstat_fmt( static int xfs_bulk_ireq_setup( struct xfs_mount *mp, - struct xfs_bulk_ireq *hdr, + const struct xfs_bulk_ireq *hdr, struct xfs_ibulk *breq, void __user *ubuffer) { @@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( switch (hdr->ino) { case XFS_BULK_IREQ_SPECIAL_ROOT: - hdr->ino = mp->m_sb.sb_rootino; + breq->startino = mp->m_sb.sb_rootino; break; default: return -EINVAL; From 4da112513c01d7d0acf1025b8764349d46e177d6 Mon Sep 17 00:00:00 2001 From: Wu Guanghao Date: Tue, 27 Dec 2022 09:41:30 -0800 Subject: [PATCH 0318/1593] xfs: Fix deadlock on xfs_inodegc_worker We are doing a test about deleting a large number of files when memory is low. A deadlock problem was found. [ 1240.279183] -> #1 (fs_reclaim){+.+.}-{0:0}: [ 1240.280450] lock_acquire+0x197/0x460 [ 1240.281548] fs_reclaim_acquire.part.0+0x20/0x30 [ 1240.282625] kmem_cache_alloc+0x2b/0x940 [ 1240.283816] xfs_trans_alloc+0x8a/0x8b0 [ 1240.284757] xfs_inactive_ifree+0xe4/0x4e0 [ 1240.285935] xfs_inactive+0x4e9/0x8a0 [ 1240.286836] xfs_inodegc_worker+0x160/0x5e0 [ 1240.287969] process_one_work+0xa19/0x16b0 [ 1240.289030] worker_thread+0x9e/0x1050 [ 1240.290131] kthread+0x34f/0x460 [ 1240.290999] ret_from_fork+0x22/0x30 [ 1240.291905] [ 1240.291905] -> #0 ((work_completion)(&gc->work)){+.+.}-{0:0}: [ 1240.293569] check_prev_add+0x160/0x2490 [ 1240.294473] __lock_acquire+0x2c4d/0x5160 [ 1240.295544] lock_acquire+0x197/0x460 [ 1240.296403] __flush_work+0x6bc/0xa20 [ 1240.297522] xfs_inode_mark_reclaimable+0x6f0/0xdc0 [ 1240.298649] destroy_inode+0xc6/0x1b0 [ 1240.299677] dispose_list+0xe1/0x1d0 [ 1240.300567] prune_icache_sb+0xec/0x150 [ 1240.301794] super_cache_scan+0x2c9/0x480 [ 1240.302776] do_shrink_slab+0x3f0/0xaa0 [ 1240.303671] shrink_slab+0x170/0x660 [ 1240.304601] shrink_node+0x7f7/0x1df0 [ 1240.305515] balance_pgdat+0x766/0xf50 [ 1240.306657] kswapd+0x5bd/0xd20 [ 1240.307551] kthread+0x34f/0x460 [ 1240.308346] ret_from_fork+0x22/0x30 [ 1240.309247] [ 1240.309247] other info that might help us debug this: [ 1240.309247] [ 1240.310944] Possible unsafe locking scenario: [ 1240.310944] [ 1240.312379] CPU0 CPU1 [ 1240.313363] ---- ---- [ 1240.314433] lock(fs_reclaim); [ 1240.315107] lock((work_completion)(&gc->work)); [ 1240.316828] lock(fs_reclaim); [ 1240.318088] lock((work_completion)(&gc->work)); [ 1240.319203] [ 1240.319203] *** DEADLOCK *** ... [ 2438.431081] Workqueue: xfs-inodegc/sda xfs_inodegc_worker [ 2438.432089] Call Trace: [ 2438.432562] __schedule+0xa94/0x1d20 [ 2438.435787] schedule+0xbf/0x270 [ 2438.436397] schedule_timeout+0x6f8/0x8b0 [ 2438.445126] wait_for_completion+0x163/0x260 [ 2438.448610] __flush_work+0x4c4/0xa40 [ 2438.455011] xfs_inode_mark_reclaimable+0x6ef/0xda0 [ 2438.456695] destroy_inode+0xc6/0x1b0 [ 2438.457375] dispose_list+0xe1/0x1d0 [ 2438.458834] prune_icache_sb+0xe8/0x150 [ 2438.461181] super_cache_scan+0x2b3/0x470 [ 2438.461950] do_shrink_slab+0x3cf/0xa50 [ 2438.462687] shrink_slab+0x17d/0x660 [ 2438.466392] shrink_node+0x87e/0x1d40 [ 2438.467894] do_try_to_free_pages+0x364/0x1300 [ 2438.471188] try_to_free_pages+0x26c/0x5b0 [ 2438.473567] __alloc_pages_slowpath.constprop.136+0x7aa/0x2100 [ 2438.482577] __alloc_pages+0x5db/0x710 [ 2438.485231] alloc_pages+0x100/0x200 [ 2438.485923] allocate_slab+0x2c0/0x380 [ 2438.486623] ___slab_alloc+0x41f/0x690 [ 2438.490254] __slab_alloc+0x54/0x70 [ 2438.491692] kmem_cache_alloc+0x23e/0x270 [ 2438.492437] xfs_trans_alloc+0x88/0x880 [ 2438.493168] xfs_inactive_ifree+0xe2/0x4e0 [ 2438.496419] xfs_inactive+0x4eb/0x8b0 [ 2438.497123] xfs_inodegc_worker+0x16b/0x5e0 [ 2438.497918] process_one_work+0xbf7/0x1a20 [ 2438.500316] worker_thread+0x8c/0x1060 [ 2438.504938] ret_from_fork+0x22/0x30 When the memory is insufficient, xfs_inonodegc_worker will trigger memory reclamation when memory is allocated, then flush_work() may be called to wait for the work to complete. This causes a deadlock. So use memalloc_nofs_save() to avoid triggering memory reclamation in xfs_inodegc_worker. Signed-off-by: Wu Guanghao Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_icache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f35e2cee52655..ddeaccc04aec9 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1853,12 +1853,20 @@ xfs_inodegc_worker( struct xfs_inodegc, work); struct llist_node *node = llist_del_all(&gc->list); struct xfs_inode *ip, *n; + unsigned int nofs_flag; WRITE_ONCE(gc->items, 0); if (!node) return; + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + ip = llist_entry(node, struct xfs_inode, i_gclist); trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); @@ -1867,6 +1875,8 @@ xfs_inodegc_worker( xfs_iflags_set(ip, XFS_INACTIVATING); xfs_inodegc_inactivate(ip); } + + memalloc_nofs_restore(nofs_flag); } /* From e195605ed28b5581dc2d509283cfea2e8635a251 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Tue, 3 Jan 2023 09:54:28 -0800 Subject: [PATCH 0319/1593] =?UTF-8?q?xfs:=20xfs=5Fqm:=20remove=20unnecessa?= =?UTF-8?q?ry=20=E2=80=980=E2=80=99=20values=20from=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit error is assigned first, so it does not need to initialize the assignment. Signed-off-by: Li zeming Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ff53d40a2dae3..e2c542f6dcd4d 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -68,7 +68,7 @@ xfs_qm_dquot_walk( while (1) { struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; - int error = 0; + int error; int i; mutex_lock(&qi->qi_tree_lock); From f89fb55714b620ff1352141a9f9315611f16573e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 2 Jan 2023 23:09:16 -0800 Subject: [PATCH 0320/1593] perf build: Don't propagate subdir to submakes for install_headers subdir is added to the OUTPUT which fails as part of building install_headers when passed from "make -C tools perf_install". Committer testing: The original reporter (see the Link: below) had trouble with this: $ make -C tools perf_install That ended up with errors like this: /var/home/acme/git/perf-urgent/tools/scripts/Makefile.include:17: *** output directory "/var/home/acme/git/perf-urgent/tools/perf/libperf/perf/" does not exist. Stop. With this patch applied we now get it installed at: INSTALL /var/home/acme/git/perf-urgent/tools/perf/libperf/include/perf/bpf_perf.h As expected: $ ls -la /var/home/acme/git/perf-urgent/tools/perf/libperf/include/perf/bpf_perf.h -rw-r--r--. 1 acme acme 1146 Jan 3 15:42 /var/home/acme/git/perf-urgent/tools/perf/libperf/include/perf/bpf_perf.h And if we clean tools with: $ make -C tools clean it gets cleaned up: $ ls -la /var/home/acme/git/perf-urgent/tools/perf/libperf/include/perf/bpf_perf.h ls: cannot access '/var/home/acme/git/perf-urgent/tools/perf/libperf/include/perf/bpf_perf.h': No such file or directory $ Fixes: 746bd29e348f99b4 ("perf build: Use tools/lib headers from install path") Reported-by: Torsten Hilbrich Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/fa4b3115-d555-3d7f-54d1-018002e99350@secunet.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 13e7d26e77f04..1e32c93b80429 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -819,7 +819,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ - DESTDIR=$(LIBAPI_DESTDIR) prefix= \ + DESTDIR=$(LIBAPI_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBAPI)-clean: @@ -828,7 +828,7 @@ $(LIBAPI)-clean: $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ - O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBBPF)-clean: @@ -837,7 +837,7 @@ $(LIBBPF)-clean: $(LIBPERF): FORCE | $(LIBPERF_OUTPUT) $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \ - DESTDIR=$(LIBPERF_DESTDIR) prefix= \ + DESTDIR=$(LIBPERF_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBPERF)-clean: @@ -846,7 +846,7 @@ $(LIBPERF)-clean: $(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ - DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \ + DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBSUBCMD)-clean: @@ -855,7 +855,7 @@ $(LIBSUBCMD)-clean: $(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT) $(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \ - DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \ + DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= subdir= \ $@ install_headers $(LIBSYMBOL)-clean: From d8d85ce86dc82de4f88b821a78f533b9d5b22a45 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 30 Dec 2022 11:26:27 +0100 Subject: [PATCH 0321/1593] perf lock contention: Fix core dump related to not finding the "__sched_text_end" symbol on s/390 The test case perf lock contention dumps core on s390. Run the following commands: # ./perf lock record -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 2.799 [sec] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.073 MB perf.data (100 samples) ] # # ./perf lock contention Segmentation fault (core dumped) # The function call stack is lengthy, here are the top 5 functions: # gdb ./perf core.24048 GNU gdb (GDB) Fedora Linux 12.1-6.fc37 Core was generated by `./perf lock contention'. Program terminated with signal SIGSEGV, Segmentation fault. #0 0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356 3356 machine->sched.text_end = kmap->unmap_ip(kmap, sym->start); (gdb) where #0 0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356 #1 0x000000000109f244 in callchain_id (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:957 #2 0x000000000109e094 in get_key_by_aggr_mode (key=0x3ffea4f7290, addr=27758136, evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:586 #3 0x000000000109f4d0 in report_lock_contention_begin_event (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1004 #4 0x00000000010a00ae in evsel__process_contention_begin (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1254 #5 0x00000000010a0e14 in process_sample_event (tool=0x3ffea4f8480, event=0x3ff85601ef8, sample=0x3ffea4f77d0, evsel=0x30313e0, machine=0x3029e28) at builtin-lock.c:1464 ..... The issue is in function machine__is_lock_function() in file ./util/machine.c lines 3355: /* should not fail from here */ sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap); machine->sched.text_end = kmap->unmap_ip(kmap, sym->start) On s390 the symbol __sched_text_end is *NOT* in the symbol list and the resulting pointer sym is set to NULL. The sym->start is then a NULL pointer access and generates the core dump. The reason why __sched_text_end is not in the symbol list on s390 is simple: When the symbol list is created at perf start up with function calls dso__load +--> dso__load_vmlinux_path +--> dso__load_vmlinux +--> dso__load_sym +--> dso__load_sym_internal (reads kernel symbols) +--> symbols__fixup_end +--> symbols__fixup_duplicate The issue is in function symbols__fixup_duplicate(). It deletes all symbols with have the same address. On s390: # nm -g ~/linux/vmlinux| fgrep c68390 0000000000c68390 T __cpuidle_text_start 0000000000c68390 T __sched_text_end # two symbols have identical addresses and __sched_text_end is considered duplicate (in ascending sort order) and removed from the symbol list. Therefore it is missing and an invalid pointer reference occurs. The code checks for symbol __sched_text_start and when it exists assumes symbol __sched_text_end is also in the symbol table. However this is not the case on s390. Same situation exists for symbol __lock_text_start: 0000000000c68770 T __cpuidle_text_end 0000000000c68770 T __lock_text_start This symbol is also removed from the symbol table but used in function machine__is_lock_function(). To fix this and keep duplicate symbols in the symbol table, set symbol_conf.allow_aliases to true. This prevents the removal of duplicate symbols in function symbols__fixup_duplicate(). Output After: # ./perf lock contention contended total wait max wait avg wait type caller 48 124.39 ms 123.99 ms 2.59 ms rwsem:W unlink_anon_vmas+0x24a 47 83.68 ms 83.26 ms 1.78 ms rwsem:W free_pgtables+0x132 5 41.22 us 10.55 us 8.24 us rwsem:W free_pgtables+0x140 4 40.12 us 20.55 us 10.03 us rwsem:W copy_process+0x1ac8 # Fixes: 0d2997f750d1de39 ("perf lock: Look up callchain for the contended locks") Signed-off-by: Thomas Richter Acked-by: Namhyung Kim Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20221230102627.2410847-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 718b82bfcdff9..506c2fe42d523 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1670,6 +1670,7 @@ static int __cmd_report(bool display_info) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (!data.is_pipe) { @@ -1757,6 +1758,7 @@ static int __cmd_contention(int argc, const char **argv) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (use_bpf) { From cf97eb7e47d4671084c7e114c5d88a3d0540ecbd Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 20 Dec 2022 17:11:24 -0500 Subject: [PATCH 0322/1593] drm/amdkfd: Fix kernel warning during topology setup This patch fixes the following kernel warning seen during driver load by correctly initializing the p2plink attr before creating the sysfs file: [ +0.002865] ------------[ cut here ]------------ [ +0.002327] kobject: '(null)' (0000000056260cfb): is not initialized, yet kobject_put() is being called. [ +0.004780] WARNING: CPU: 32 PID: 1006 at lib/kobject.c:718 kobject_put+0xaa/0x1c0 [ +0.001361] Call Trace: [ +0.001234] [ +0.001067] kfd_remove_sysfs_node_entry+0x24a/0x2d0 [amdgpu] [ +0.003147] kfd_topology_update_sysfs+0x3d/0x750 [amdgpu] [ +0.002890] kfd_topology_add_device+0xbd7/0xc70 [amdgpu] [ +0.002844] ? lock_release+0x13c/0x2e0 [ +0.001936] ? smu_cmn_send_smc_msg_with_param+0x1e8/0x2d0 [amdgpu] [ +0.003313] ? amdgpu_dpm_get_mclk+0x54/0x60 [amdgpu] [ +0.002703] kgd2kfd_device_init.cold+0x39f/0x4ed [amdgpu] [ +0.002930] amdgpu_amdkfd_device_init+0x13d/0x1f0 [amdgpu] [ +0.002944] amdgpu_device_init.cold+0x1464/0x17b4 [amdgpu] [ +0.002970] ? pci_bus_read_config_word+0x43/0x80 [ +0.002380] amdgpu_driver_load_kms+0x15/0x100 [amdgpu] [ +0.002744] amdgpu_pci_probe+0x147/0x370 [amdgpu] [ +0.002522] local_pci_probe+0x40/0x80 [ +0.001896] work_for_cpu_fn+0x10/0x20 [ +0.001892] process_one_work+0x26e/0x5a0 [ +0.002029] worker_thread+0x1fd/0x3e0 [ +0.001890] ? process_one_work+0x5a0/0x5a0 [ +0.002115] kthread+0xea/0x110 [ +0.001618] ? kthread_complete_and_exit+0x20/0x20 [ +0.002422] ret_from_fork+0x1f/0x30 [ +0.001808] [ +0.001103] irq event stamp: 59837 [ +0.001718] hardirqs last enabled at (59849): [] __up_console_sem+0x52/0x60 [ +0.004414] hardirqs last disabled at (59860): [] __up_console_sem+0x37/0x60 [ +0.004414] softirqs last enabled at (59654): [] irq_exit_rcu+0xd7/0x130 [ +0.004205] softirqs last disabled at (59649): [] irq_exit_rcu+0xd7/0x130 [ +0.004203] ---[ end trace 0000000000000000 ]--- Fixes: 0f28cca87e9a ("drm/amdkfd: Extend KFD device topology to surface peer-to-peer links") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index bceb1a5b25186..3fdaba56be6fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -801,7 +801,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, p2plink->attr.name = "properties"; p2plink->attr.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&iolink->attr); + sysfs_attr_init(&p2plink->attr); ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); if (ret < 0) return ret; From f3c23bea598ab7e8e4b8c5ca66598921310f718e Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Mon, 5 Dec 2022 11:08:40 -0500 Subject: [PATCH 0323/1593] drm/amd/display: Uninitialized variables causing 4k60 UCLK to stay at DPM1 and not DPM0 [Why] SwathSizePerSurfaceY[] and SwathSizePerSurfaceC[] values are uninitialized because we are using += instead of = operator. [How] Assign values in loop with = operator. Acked-by: Aurabindo Pillai Signed-off-by: Samson Tam Reviewed-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x --- .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5af601cff1a0f..b53feeaf5cf11 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -6257,12 +6257,12 @@ bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurface double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; bool NotEnoughDETSwathFillLatencyHiding = false; - /* calculate sum of single swath size for all pipes in bytes*/ + /* calculate sum of single swath size for all pipes in bytes */ for (k = 0; k < NumberOfActiveSurfaces; k++) { - SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; if (SwathHeightC[k] != 0) - SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; else SwathSizePerSurfaceC[k] = 0; From 2a12187d5853d9fd5102278cecef7dac7c8ce7ea Mon Sep 17 00:00:00 2001 From: Andreas Rammhold Date: Fri, 23 Dec 2022 12:27:47 +0100 Subject: [PATCH 0324/1593] of/fdt: run soc memory setup when early_init_dt_scan_memory fails If memory has been found early_init_dt_scan_memory now returns 1. If it hasn't found any memory it will return 0, allowing other memory setup mechanisms to carry on. Previously early_init_dt_scan_memory always returned 0 without distinguishing between any kind of memory setup being done or not. Any code path after the early_init_dt_scan memory call in the ramips plat_mem_setup code wouldn't be executed anymore. Making early_init_dt_scan_memory the only way to initialize the memory. Some boards, including my mt7621 based Cudy X6 board, depend on memory initialization being done via the soc_info.mem_detect function pointer. Those wouldn't be able to obtain memory and panic the kernel during early bootup with the message "early_init_dt_alloc_memory_arch: Failed to allocate 12416 bytes align=0x40". Fixes: 1f012283e936 ("of/fdt: Rework early_init_dt_scan_memory() to call directly") Cc: stable@vger.kernel.org Signed-off-by: Andreas Rammhold Link: https://lore.kernel.org/r/20221223112748.2935235-1-andreas@rammhold.de Signed-off-by: Rob Herring --- arch/mips/ralink/of.c | 2 +- drivers/of/fdt.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 01c132bc33d54..4d06de77d92a6 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -64,7 +64,7 @@ void __init plat_mem_setup(void) dtb = get_fdt(); __dt_setup_arch(dtb); - if (!early_init_dt_scan_memory()) + if (early_init_dt_scan_memory()) return; if (soc_info.mem_detect) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index b2272bccf85c9..02cc4a285cb98 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1099,7 +1099,7 @@ u64 __init dt_mem_next_cell(int s, const __be32 **cellp) */ int __init early_init_dt_scan_memory(void) { - int node; + int node, found_memory = 0; const void *fdt = initial_boot_params; fdt_for_each_subnode(node, fdt, 0) { @@ -1139,6 +1139,8 @@ int __init early_init_dt_scan_memory(void) early_init_dt_add_memory_arch(base, size); + found_memory = 1; + if (!hotpluggable) continue; @@ -1147,7 +1149,7 @@ int __init early_init_dt_scan_memory(void) base, base + size); } } - return 0; + return found_memory; } int __init early_init_dt_scan_chosen(char *cmdline) From 1d7a4a40bf76e2305c8beed4a019bf58af6121ac Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 23 Dec 2022 14:21:59 +0100 Subject: [PATCH 0325/1593] dt-bindings: soundwire: qcom,soundwire: correct sizes related to number of ports There are several properties depending on number of ports. Some of them had maximum limit of 5 and some of 8. SM8450 AudioReach comes with 8 ports, so fix the limits: sm8450-sony-xperia-nagara-pdx224.dtb: soundwire-controller@3250000: qcom,ports-word-length: 'oneOf' conditional failed, one must be fixed: [[255, 255, 255, 255, 255, 255, 255, 255]] is too short [255, 255, 255, 255, 255, 255, 255, 255] is too long Fixes: febc50b82bc9 ("dt-bindings: soundwire: Convert text bindings to DT Schema") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221223132159.81211-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- .../devicetree/bindings/soundwire/qcom,soundwire.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml index bcbfa71536cda..3efdc192ab019 100644 --- a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml +++ b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml @@ -80,7 +80,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 qcom,ports-sinterval-low: $ref: /schemas/types.yaml#/definitions/uint8-array @@ -124,7 +124,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 qcom,ports-block-pack-mode: $ref: /schemas/types.yaml#/definitions/uint8-array @@ -154,7 +154,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 items: oneOf: - minimum: 0 @@ -171,7 +171,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 items: oneOf: - minimum: 0 @@ -187,7 +187,7 @@ properties: or applicable for the respective data port. More info in MIPI Alliance SoundWire 1.0 Specifications. minItems: 3 - maxItems: 5 + maxItems: 8 items: oneOf: - minimum: 0 From 937e7a39068d806c1a951f19e38e1326be20f1b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 29 Dec 2022 12:07:46 +0200 Subject: [PATCH 0326/1593] pinctrl: nomadik: Add missing header(s) Do not imply that some of the generic headers may be always included. Instead, include explicitly what we are direct user of. While at it, sort headers alphabetically. Fixes: e5530adc17a7 ("pinctrl: Clean up headers") Cc: Randy Dunlap Reported-by: Arnd Bergmann Reported-by: kernel test robot Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221229100746.35047-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/nomadik/pinctrl-ab8500.c | 3 +- drivers/pinctrl/nomadik/pinctrl-ab8505.c | 3 +- drivers/pinctrl/nomadik/pinctrl-abx500.c | 32 +++++++++++-------- drivers/pinctrl/nomadik/pinctrl-abx500.h | 4 +++ .../pinctrl/nomadik/pinctrl-nomadik-db8500.c | 3 ++ .../pinctrl/nomadik/pinctrl-nomadik-stn8815.c | 3 ++ drivers/pinctrl/nomadik/pinctrl-nomadik.c | 32 ++++++++++--------- drivers/pinctrl/nomadik/pinctrl-nomadik.h | 5 +++ 8 files changed, 54 insertions(+), 31 deletions(-) diff --git a/drivers/pinctrl/nomadik/pinctrl-ab8500.c b/drivers/pinctrl/nomadik/pinctrl-ab8500.c index 3106a21cd277f..d7b244df058f2 100644 --- a/drivers/pinctrl/nomadik/pinctrl-ab8500.c +++ b/drivers/pinctrl/nomadik/pinctrl-ab8500.c @@ -6,9 +6,10 @@ */ #include -#include #include + #include + #include "pinctrl-abx500.h" /* All the pins that can be used for GPIO and some other functions */ diff --git a/drivers/pinctrl/nomadik/pinctrl-ab8505.c b/drivers/pinctrl/nomadik/pinctrl-ab8505.c index b93af1fb37f01..45aa958b573e0 100644 --- a/drivers/pinctrl/nomadik/pinctrl-ab8505.c +++ b/drivers/pinctrl/nomadik/pinctrl-ab8505.c @@ -6,9 +6,10 @@ */ #include -#include #include + #include + #include "pinctrl-abx500.h" /* All the pins that can be used for GPIO and some other functions */ diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c index 7aa534576a459..28c3403df1b03 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.c +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c @@ -6,33 +6,37 @@ * * Driver allows to use AxB5xx unused pins to be used as GPIO */ -#include -#include -#include -#include +#include #include -#include -#include -#include #include +#include +#include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include + #include #include -#include + #include -#include -#include -#include #include +#include +#include +#include +#include -#include "pinctrl-abx500.h" #include "../core.h" #include "../pinconf.h" #include "../pinctrl-utils.h" +#include "pinctrl-abx500.h" + /* * GPIO registers offset * Bank: 0x10 diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.h b/drivers/pinctrl/nomadik/pinctrl-abx500.h index 90bb12fe8073f..d675220846752 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.h +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.h @@ -2,6 +2,10 @@ #ifndef PINCTRL_PINCTRL_ABx500_H #define PINCTRL_PINCTRL_ABx500_H +#include + +struct pinctrl_pin_desc; + /* Package definitions */ #define PINCTRL_AB8500 0 #define PINCTRL_AB8505 1 diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c index 758d21f0a8503..490e0959e8be6 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include + #include + #include "pinctrl-nomadik.h" /* All the pins that can be used for GPIO and some other functions */ diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c b/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c index c0d7c86d09391..1552222ac68e7 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include + #include + #include "pinctrl-nomadik.h" /* All the pins that can be used for GPIO and some other functions */ diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index f7d02513d8cc1..86a638077a697 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -7,30 +7,34 @@ * Rewritten based on work by Prafulla WADASKAR * Copyright (C) 2011-2013 Linus Walleij */ -#include -#include -#include -#include -#include +#include #include +#include #include #include -#include +#include #include -#include -#include +#include +#include #include -#include +#include +#include +#include +#include +#include + +/* Since we request GPIOs from ourself */ +#include #include +#include #include #include -#include -/* Since we request GPIOs from ourself */ -#include -#include "pinctrl-nomadik.h" + #include "../core.h" #include "../pinctrl-utils.h" +#include "pinctrl-nomadik.h" + /* * The GPIO module in the Nomadik family of Systems-on-Chip is an * AMBA device, managing 32 pins and alternate functions. The logic block @@ -907,8 +911,6 @@ static int nmk_gpio_get_mode(struct nmk_gpio_chip *nmk_chip, int offset) return (afunc ? NMK_GPIO_ALT_A : 0) | (bfunc ? NMK_GPIO_ALT_B : 0); } -#include - static void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev, struct gpio_chip *chip, unsigned offset, unsigned gpio) diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.h b/drivers/pinctrl/nomadik/pinctrl-nomadik.h index 84e2977573357..1ef2559bc5710 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.h +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.h @@ -2,6 +2,11 @@ #ifndef PINCTRL_PINCTRL_NOMADIK_H #define PINCTRL_PINCTRL_NOMADIK_H +#include +#include + +#include + /* Package definitions */ #define PINCTRL_NMK_STN8815 0 #define PINCTRL_NMK_DB8500 1 From 9ffa13ff78a0a55df968a72d6f0ebffccee5c9f4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 4 Jan 2023 01:34:02 +0000 Subject: [PATCH 0327/1593] io_uring: pin context while queueing deferred tw Unlike normal tw, nothing prevents deferred tw to be executed right after an tw item added to ->work_llist in io_req_local_work_add(). For instance, the waiting task may get waken up by CQ posting or a normal tw. Thus we need to pin the ring for the rest of io_req_local_work_add() Cc: stable@vger.kernel.org Fixes: c0e0d6ba25f18 ("io_uring: add IORING_SETUP_DEFER_TASKRUN") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1a79362b9c10b8523ef70b061d96523650a23344.1672795998.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 58ac13b69dc8d..6bed448556798 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1236,13 +1236,18 @@ static void io_req_local_work_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) + percpu_ref_get(&ctx->refs); + + if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) { + percpu_ref_put(&ctx->refs); return; + } /* need it for the following io_cqring_wake() */ smp_mb__after_atomic(); if (unlikely(atomic_read(&req->task->io_uring->in_idle))) { io_move_task_work_from_local(ctx); + percpu_ref_put(&ctx->refs); return; } @@ -1252,6 +1257,7 @@ static void io_req_local_work_add(struct io_kiocb *req) if (ctx->has_evfd) io_eventfd_signal(ctx); __io_cqring_wake(ctx); + percpu_ref_put(&ctx->refs); } void __io_req_task_work_add(struct io_kiocb *req, bool allow_local) From f26cc9593581bd734c846bf827401350b36dc3c9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 4 Jan 2023 01:34:57 +0000 Subject: [PATCH 0328/1593] io_uring: lockdep annotate CQ locking Locking around CQE posting is complex and depends on options the ring is created with, add more thorough lockdep annotations checking all invariants. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/aa3770b4eacae3915d782cc2ab2f395a99b4b232.1672795976.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 5 ++--- io_uring/io_uring.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 6bed448556798..472574192dd63 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -731,6 +731,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, size_t ocq_size = sizeof(struct io_overflow_cqe); bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); + lockdep_assert_held(&ctx->completion_lock); + if (is_cqe32) ocq_size += sizeof(struct io_uring_cqe); @@ -820,9 +822,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, { struct io_uring_cqe *cqe; - if (!ctx->task_complete) - lockdep_assert_held(&ctx->completion_lock); - ctx->cq_extra++; /* diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e9f0d41ebb996..ab4b2a1c3b7e8 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -79,6 +79,19 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); +#define io_lockdep_assert_cq_locked(ctx) \ + do { \ + if (ctx->flags & IORING_SETUP_IOPOLL) { \ + lockdep_assert_held(&ctx->uring_lock); \ + } else if (!ctx->task_complete) { \ + lockdep_assert_held(&ctx->completion_lock); \ + } else if (ctx->submitter_task->flags & PF_EXITING) { \ + lockdep_assert(current_work()); \ + } else { \ + lockdep_assert(current == ctx->submitter_task); \ + } \ + } while (0) + static inline void io_req_task_work_add(struct io_kiocb *req) { __io_req_task_work_add(req, true); @@ -92,6 +105,8 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx); static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx, bool overflow) { + io_lockdep_assert_cq_locked(ctx); + if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { struct io_uring_cqe *cqe = ctx->cqe_cached; From 1cba0d150fa102439114a91b3e215909efc9f169 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 27 Dec 2022 18:16:24 -0800 Subject: [PATCH 0329/1593] drm/msm/dp: do not complete dp_aux_cmd_fifo_tx() if irq is not for aux transfer There are 3 possible interrupt sources are handled by DP controller, HPDstatus, Controller state changes and Aux read/write transaction. At every irq, DP controller have to check isr status of every interrupt sources and service the interrupt if its isr status bits shows interrupts are pending. There is potential race condition may happen at current aux isr handler implementation since it is always complete dp_aux_cmd_fifo_tx() even irq is not for aux read or write transaction. This may cause aux read transaction return premature if host aux data read is in the middle of waiting for sink to complete transferring data to host while irq happen. This will cause host's receiving buffer contains unexpected data. This patch fixes this problem by checking aux isr and return immediately at aux isr handler if there are no any isr status bits set. Current there is a bug report regrading eDP edid corruption happen during system booting up. After lengthy debugging to found that VIDEO_READY interrupt was continuously firing during system booting up which cause dp_aux_isr() to complete dp_aux_cmd_fifo_tx() prematurely to retrieve data from aux hardware buffer which is not yet contains complete data transfer from sink. This cause edid corruption. Follows are the signature at kernel logs when problem happen, EDID has corrupt header panel-simple-dp-aux aux-aea0000.edp: Couldn't identify panel via EDID Changes in v2: -- do complete if (ret == IRQ_HANDLED) ay dp-aux_isr() -- add more commit text Changes in v3: -- add Stephen suggested -- dp_aux_isr() return IRQ_XXX back to caller -- dp_ctrl_isr() return IRQ_XXX back to caller Changes in v4: -- split into two patches Changes in v5: -- delete empty line between tags Changes in v6: -- remove extra "that" and fixed line more than 75 char at commit text Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Tested-by: Douglas Anderson Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/516121/ Link: https://lore.kernel.org/r/1672193785-11003-2-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_aux.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index d030a93a08c36..cc3efed593aa1 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -423,6 +423,10 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux) isr = dp_catalog_aux_get_irq(aux->catalog); + /* no interrupts pending, return immediately */ + if (!isr) + return; + if (!aux->cmd_busy) return; From 67fcb2c598bc7643f694e8194d5c300a52af5aa9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 29 Dec 2022 14:04:46 -0800 Subject: [PATCH 0330/1593] cifs: Fix kmap_local_page() unmapping kmap_local_page() requires kunmap_local() to unmap the mapping. In addition memcpy_page() is provided to perform this common memcpy pattern. Replace the kmap_local_page() and broken kunmap() with memcpy_page() Fixes: d406d26745ab ("cifs: skip alloc when request has no pages") Reviewed-by: Paulo Alcantara Reviewed-by: "Fabio M. De Francesco" Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Ira Weiny Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index dc160de7a6de4..0d7e9bcd9f345 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4488,17 +4488,12 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, /* copy pages form the old */ for (j = 0; j < npages; j++) { - char *dst, *src; unsigned int offset, len; rqst_page_get_length(new, j, &len, &offset); - dst = kmap_local_page(new->rq_pages[j]) + offset; - src = kmap_local_page(old->rq_pages[j]) + offset; - - memcpy(dst, src, len); - kunmap(new->rq_pages[j]); - kunmap(old->rq_pages[j]); + memcpy_page(new->rq_pages[j], offset, + old->rq_pages[j], offset, len); } } From 9e6002c8738a9d5675ba706fcdbc0a544f814974 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 29 Dec 2022 12:33:55 -0300 Subject: [PATCH 0331/1593] cifs: ignore ipc reconnect failures during dfs failover If it failed to reconnect ipc used for getting referrals, we can just ignore it as it is not required for reconnecting the share. The worst case would be not being able to detect or chase nested links as long as dfs root server is unreachable. Before patch: $ mount.cifs //root/dfs/link /mnt -o echo_interval=10,... -> target share: /fs0/share disconnect root & fs0 $ ls /mnt ls: cannot access '/mnt': Host is down connect fs0 $ ls /mnt ls: cannot access '/mnt': Resource temporarily unavailable After patch: $ mount.cifs //root/dfs/link /mnt -o echo_interval=10,... -> target share: /fs0/share disconnect root & fs0 $ ls /mnt ls: cannot access '/mnt': Host is down connect fs0 $ ls /mnt bar.rtf dir1 foo Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/dfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index b541e68378f64..30086f2060a10 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -401,8 +401,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t if (ipc->need_reconnect) { scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls); - if (rc) - break; + cifs_dbg(FYI, "%s: reconnect ipc: %d\n", __func__, rc); } scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); From 775e44d6d86dca400d614cbda5dab4def4951fe7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 29 Dec 2022 12:33:56 -0300 Subject: [PATCH 0332/1593] cifs: fix race in assemble_neg_contexts() Serialise access of TCP_Server_Info::hostname in assemble_neg_contexts() by holding the server's mutex otherwise it might end up accessing an already-freed hostname pointer from cifs_reconnect() or cifs_resolve_server(). Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a5695748a89b1..2c484d47c5922 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -541,9 +541,10 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req, struct TCP_Server_Info *server, unsigned int *total_len) { - char *pneg_ctxt; - char *hostname = NULL; unsigned int ctxt_len, neg_context_count; + struct TCP_Server_Info *pserver; + char *pneg_ctxt; + char *hostname; if (*total_len > 200) { /* In case length corrupted don't want to overrun smb buffer */ @@ -574,8 +575,9 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, * secondary channels don't have the hostname field populated * use the hostname field in the primary channel instead */ - hostname = CIFS_SERVER_IS_CHAN(server) ? - server->primary_server->hostname : server->hostname; + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + cifs_server_lock(pserver); + hostname = pserver->hostname; if (hostname && (hostname[0] != 0)) { ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, hostname); @@ -584,6 +586,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, neg_context_count = 3; } else neg_context_count = 2; + cifs_server_unlock(pserver); build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); From faf28e240dd118d9521c68aeb9388b9b8f02d9d0 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 3 Jan 2023 18:11:29 +0100 Subject: [PATCH 0333/1593] cpufreq: Add SM6375 to cpufreq-dt-platdev blocklist The Qualcomm SM6375 platform uses the qcom-cpufreq-hw driver, so add it to the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index e329d29d1f9d1..e857036510986 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -151,6 +151,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sdm845", }, { .compatible = "qcom,sm6115", }, { .compatible = "qcom,sm6350", }, + { .compatible = "qcom,sm6375", }, { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, { .compatible = "qcom,sm8350", }, From 53cd37c1368d07649421517ed0aab94a49cca003 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 27 Dec 2022 15:42:02 +0100 Subject: [PATCH 0334/1593] dt-bindings: cpufreq: cpufreq-qcom-hw: document interrupts The Qualcomm Soc cpufreq hardware engine has LMh/thermal throttling interrupts (already present in SM8250 and SM8450 DTS) and Linux driver uses them: sm8250-hdk.dtb: cpufreq@18591000: 'interrupt-names', 'interrupts' do not match any of the regexes: 'pinctrl-[0-9]+' sm8450-qrd.dtb: cpufreq@17d91000: 'interrupt-names', 'interrupts' do not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- .../devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 903b31129f012..99e159bc5fb13 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -54,6 +54,17 @@ properties: - const: xo - const: alternate + interrupts: + minItems: 1 + maxItems: 3 + + interrupt-names: + minItems: 1 + items: + - const: dcvsh-irq-0 + - const: dcvsh-irq-1 + - const: dcvsh-irq-2 + '#freq-domain-cells': const: 1 From 558016722e9d5bc0ac79c246ccd14a8a4eb028d4 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat (VMware)" Date: Tue, 3 Jan 2023 14:09:41 -0800 Subject: [PATCH 0335/1593] MAINTAINERS: Update maintainers for ptp_vmw driver Vivek has decided to transfer the maintainership of the VMware virtual PTP clock driver (ptp_vmw) to Srivatsa and Deep. Update the MAINTAINERS file to reflect this change, and also add Alexey as a reviewer for the driver. Signed-off-by: Srivatsa S. Bhat (VMware) Acked-by: Vivek Thampi Acked-by: Deep Shah Acked-by: Alexey Makhalov Signed-off-by: David S. Miller --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7f0b7181e60a5..758878c0eddf3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22243,7 +22243,9 @@ F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER -M: Vivek Thampi +M: Srivatsa S. Bhat (VMware) +M: Deep Shah +R: Alexey Makhalov R: VMware PV-Drivers Reviewers L: netdev@vger.kernel.org S: Supported From a664ec9158eeddd75121d39c9a0758016097fa96 Mon Sep 17 00:00:00 2001 From: Rodrigo Branco Date: Tue, 3 Jan 2023 14:17:51 -0600 Subject: [PATCH 0336/1593] x86/bugs: Flush IBP in ib_prctl_set() We missed the window between the TIF flag update and the next reschedule. Signed-off-by: Rodrigo Branco Reviewed-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: --- arch/x86/kernel/cpu/bugs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d970ddb0cc65b..bca0bd8f48464 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1981,6 +1981,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); task_update_spec_tif(task); + if (task == current) + indirect_branch_prediction_barrier(); break; default: return -ERANGE; From 340726747336716350eb5a928b860a29db955f05 Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Wed, 4 Jan 2023 10:07:37 +0000 Subject: [PATCH 0337/1593] memblock tests: Fix compilation error. Commit cf4694be2b2cf ("tools: Add atomic_test_and_set_bit()") changed tools/arch/x86/include/asm/atomic.h to include , which causes 'make -C tools/testing/memblock' to fail with: In file included from ../../include/asm/atomic.h:6, from ../../include/linux/atomic.h:5, from ./linux/mmzone.h:5, from ../../include/linux/mm.h:5, from ../../include/linux/pfn.h:5, from ./linux/memory_hotplug.h:6, from ./linux/init.h:7, from ./linux/memblock.h:11, from tests/common.h:8, from tests/basic_api.h:5, from main.c:2: ../../include/asm/../../arch/x86/include/asm/atomic.h:11:10: fatal error: asm/asm.h: No such file or directory 11 | #include | ^~~~~~~~~~~ Create a symlink to asm/asm.h in the same manner as the existing one to asm/cmpxchg.h. Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/010101857c402765-96e2dbc6-b82b-47e2-a437-4834dbe0b96b-000000@us-west-2.amazonses.com Signed-off-by: Mike Rapoport (IBM) --- tools/testing/memblock/.gitignore | 1 + tools/testing/memblock/Makefile | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore index 654338e0be52e..4cc7cd5aac2b1 100644 --- a/tools/testing/memblock/.gitignore +++ b/tools/testing/memblock/.gitignore @@ -1,4 +1,5 @@ main memblock.c linux/memblock.h +asm/asm.h asm/cmpxchg.h diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 2310ac4d080ec..7a1ca694a9825 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -29,13 +29,14 @@ include: ../../../include/linux/memblock.h ../../include/linux/*.h \ @mkdir -p linux test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h + test -L asm/asm.h || ln -s ../../../arch/x86/include/asm/asm.h asm/asm.h test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h memblock.c: $(EXTR_SRC) test -L memblock.c || ln -s $(EXTR_SRC) memblock.c clean: - $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/cmpxchg.h + $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/asm.h asm/cmpxchg.h help: @echo 'Memblock simulator' From fa81ab49bbe4e1ce756581c970486de0ddb14309 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 16 Dec 2022 14:03:03 +0400 Subject: [PATCH 0338/1593] memblock: Fix doc for memblock_phys_free memblock_phys_free() is the counterpart to memblock_phys_alloc. Change memblock_alloc_xx() with memblock_phys_alloc_xx() to keep consistency. Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20221216100304.688209-1-linmq006@gmail.com Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 511d4783dcf1d..d036c7861310c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -836,7 +836,7 @@ void __init_memblock memblock_free(void *ptr, size_t size) * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes * - * Free boot memory block previously allocated by memblock_alloc_xx() API. + * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. * The freeing memory will not be released to the buddy allocator. */ int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) From c956541736b94944047ee52ebfc5ee4babcd6ca1 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 4 Jan 2023 19:21:49 +0900 Subject: [PATCH 0339/1593] cpufreq: apple-soc: Switch to the lowest frequency on suspend Without this, the CPUs are left in a random pstate. Since we don't support deep idle yet (which powers down the CPUs), this results in significantly increased idle power consumption in suspend. Fixes: 6286bbb40576 ("cpufreq: apple-soc: Add new driver to control Apple SoC CPU P-states") Signed-off-by: Hector Martin Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 6f26395184c41..c11d22fd84c37 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -280,6 +280,7 @@ static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; policy->dvfs_possible_from_any_cpu = true; policy->fast_switch_possible = true; + policy->suspend_freq = freq_table[0].frequency; if (policy_has_boost_freq(policy)) { ret = cpufreq_enable_boost_support(); @@ -328,6 +329,7 @@ static struct cpufreq_driver apple_soc_cpufreq_driver = { .fast_switch = apple_soc_cpufreq_fast_switch, .register_em = cpufreq_register_em_with_opp, .attr = apple_soc_cpufreq_hw_attr, + .suspend = cpufreq_generic_suspend, }; static int __init apple_soc_cpufreq_module_init(void) From 760d560f71c828a97c77596af5c3f9978aefd9d1 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 15 Dec 2022 11:34:52 +0100 Subject: [PATCH 0340/1593] PCI: dwc: Adjust to recent removal of PCI_MSI_IRQ_DOMAIN a474d3fbe287 ("PCI/MSI: Get rid of PCI_MSI_IRQ_DOMAIN") removed PCI_MSI_IRQ_DOMAIN and changed all references to refer to PCI_MSI instead. ba6ed462dcf4 ("PCI: dwc: Add Baikal-T1 PCIe controller support") independently added PCIE_BT1, depending on PCI_MSI_IRQ_DOMAIN. Both commits appeared in v6.2-rc1, so the latter missed the conversion from PCI_MSI_IRQ_DOMAIN to PCI_MSI. Update PCIE_BT1 to depend on PCI_MSI instead. [bhelgaas: commit log] Link: https://lore.kernel.org/r/20221215103452.23131-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin --- drivers/pci/controller/dwc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index a0d2713f0e889..99ec91e2a5cfa 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -225,7 +225,7 @@ config PCIE_ARTPEC6_EP config PCIE_BT1 tristate "Baikal-T1 PCIe controller" depends on MIPS_BAIKAL_T1 || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Enables support for the PCIe controller in the Baikal-T1 SoC to work From fb710ddee75fb96f50ee6d004ef777a0cf7ad5a3 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 28 Dec 2022 15:57:03 +0100 Subject: [PATCH 0341/1593] perf test record_probe_libc_inet_pton: Fix test on s/390 where 'text_to_binary_address' now appears on the backtrace perf test '84: probe libc's inet_pton & backtrace it with ping' fails on s390. Debugging revealed a changed stack trace for the ping command using probes: ping 35729 [002] 8006.365063: probe_libc:inet_pton: (3ff9603e7c0) 13e7c0 __GI___inet_pton+0x0 (/usr/lib64/libc.so.6) ---> 104371 text_to_binary_address+0xef1 (inlined) 104371 gaih_inet+0xef1 (inlined) 104371 __GI_getaddrinfo+0xef1 (inlined) 5d4b main+0x139b (/usr/bin/ping) The line "---> text_to_binary_address ..." is new. It was introduced with glibc version 2.36.7.2 released with Fedora 37 for s390. Output before # perf test inet_pton 84: probe libc's inet_pton & backtrace it with ping : FAILED! # Output after: # perf test inet_pton 84: probe libc's inet_pton & backtrace it with ping : Ok # Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20221228145704.2702487-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 216b6b64caa30..57e7a6a470c93 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -37,6 +37,7 @@ trace_libc_inet_pton_backtrace() { case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' + echo "text_to_binary_address.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected From 2d656b0f81b22101db0447f890e39fdd736b745e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 3 Jan 2023 22:44:01 -0800 Subject: [PATCH 0342/1593] perf stat: Fix handling of unsupported cgroup events when using BPF counters When --for-each-cgroup option is used, it fails when any of events is not supported and exits immediately. This is not how 'perf stat' handles unsupported events. Let's ignore the failure and proceed with others so that the output is similar to when BPF counters are not used: Before: $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1 Failed to open first cgroup events $ After it shows output similat to when --bpf-counters isn't specified: $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1 Performance counter stats for 'system wide': L1-icache-loads system.slice 29,892,418 L1-dcache-loads system.slice L1-icache-loads user.slice 52,497,220 L1-dcache-loads user.slice $ Fixes: 944138f048f7d759 ("perf stat: Enable BPF counter with --for-each-cgroup") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Link: https://lore.kernel.org/r/20230104064402.1551516-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter_cgroup.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 3c2df7522f6fc..1c82377ed78b9 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -116,27 +116,19 @@ static int bperf_load_program(struct evlist *evlist) /* open single copy of the events w/o cgroup */ err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1); - if (err) { - pr_err("Failed to open first cgroup events\n"); - goto out; - } + if (err == 0) + evsel->supported = true; map_fd = bpf_map__fd(skel->maps.events); perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) { int fd = FD(evsel, j); __u32 idx = evsel->core.idx * total_cpus + cpu.cpu; - err = bpf_map_update_elem(map_fd, &idx, &fd, - BPF_ANY); - if (err < 0) { - pr_err("Failed to update perf_event fd\n"); - goto out; - } + bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); } evsel->cgrp = leader_cgrp; } - evsel->supported = true; if (evsel->cgrp == cgrp) continue; From 54b353a20c7e8be98414754f5aff98c8a68fcc1f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 3 Jan 2023 22:44:02 -0800 Subject: [PATCH 0343/1593] perf stat: Fix handling of --for-each-cgroup with --bpf-counters to match non BPF mode The --for-each-cgroup can have the same cgroup multiple times, but this confuses BPF counters (since they have the same cgroup id), making only the last cgroup events to be counted. Let's check the cgroup name before adding a new entry to the cgroups list. Before: $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 Performance counter stats for 'system wide': msec cpu-clock / context-switches / cpu-migrations / page-faults / cycles / instructions / branches / branch-misses / 8,016.04 msec cpu-clock / # 7.998 CPUs utilized 6,152 context-switches / # 767.461 /sec 250 cpu-migrations / # 31.187 /sec 442 page-faults / # 55.139 /sec 613,111,487 cycles / # 0.076 GHz 280,599,604 instructions / # 0.46 insn per cycle 57,692,724 branches / # 7.197 M/sec 3,385,168 branch-misses / # 5.87% of all branches 1.002220125 seconds time elapsed After it becomes similar to the non-BPF mode: $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 Performance counter stats for 'system wide': 8,013.38 msec cpu-clock / # 7.998 CPUs utilized 6,859 context-switches / # 855.944 /sec 334 cpu-migrations / # 41.680 /sec 345 page-faults / # 43.053 /sec 782,326,119 cycles / # 0.098 GHz 471,645,724 instructions / # 0.60 insn per cycle 94,963,430 branches / # 11.851 M/sec 3,685,511 branch-misses / # 3.88% of all branches 1.001864539 seconds time elapsed Committer notes: As a reminder, to test with BPF counters one has to use BUILD_BPF_SKEL=1 in the make command line and have clang/llvm installed when building perf, otherwise the --bpf-counters option will not be available: # perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 Error: unknown option `bpf-counters' Usage: perf stat [] [] -a, --all-cpus system-wide collection from all CPUs # Fixes: bb1c15b60b981d10 ("perf stat: Support regex pattern in --for-each-cgroup") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: bpf@vger.kernel.org Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Link: https://lore.kernel.org/r/20230104064402.1551516-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cgroup.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index e99b41f9be45a..cd978c240e0dd 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus return 0; } +static int check_and_add_cgroup_name(const char *fpath) +{ + struct cgroup_name *cn; + + list_for_each_entry(cn, &cgroup_list, list) { + if (!strcmp(cn->name, fpath)) + return 0; + } + + /* pretend if it's added by ftw() */ + return add_cgroup_name(fpath, NULL, FTW_D, NULL); +} + static void release_cgroup_list(void) { struct cgroup_name *cn; @@ -242,7 +255,7 @@ static int list_cgroups(const char *str) struct cgroup_name *cn; char *s; - /* use given name as is - for testing purpose */ + /* use given name as is when no regex is given */ for (;;) { p = strchr(str, ','); e = p ? p : eos; @@ -253,13 +266,13 @@ static int list_cgroups(const char *str) s = strndup(str, e - str); if (!s) return -1; - /* pretend if it's added by ftw() */ - ret = add_cgroup_name(s, NULL, FTW_D, NULL); + + ret = check_and_add_cgroup_name(s); free(s); - if (ret) + if (ret < 0) return -1; } else { - if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) + if (check_and_add_cgroup_name("/") < 0) return -1; } From 191f8453fc99a537ea78b727acea739782378b0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 07:48:37 -0700 Subject: [PATCH 0344/1593] ARM: renumber bits related to _TIF_WORK_MASK We want to ensure that the mask related to calling do_work_pending() is within the first 16 bits. Move bits unrelated to that outside of that range, to avoid spuriously calling do_work_pending() when we don't need to. Cc: stable@vger.kernel.org Fixes: 32d59773da38 ("arm: add support for TIF_NOTIFY_SIGNAL") Reported-and-tested-by: Hui Tang Suggested-by: Russell King (Oracle) Link: https://lore.kernel.org/lkml/7ecb8f3c-2aeb-a905-0d4a-aa768b9649b5@huawei.com/ Signed-off-by: Jens Axboe --- arch/arm/include/asm/thread_info.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index aecc403b28804..7f092cb55a417 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -128,15 +128,16 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_UPROBE 3 /* breakpointed or singlestepping */ -#define TIF_SYSCALL_TRACE 4 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ -#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ +#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 20 +#define TIF_RESTORE_SIGMASK 19 +#define TIF_SYSCALL_TRACE 20 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 21 /* syscall auditing active */ +#define TIF_SYSCALL_TRACEPOINT 22 /* syscall tracepoint instrumentation */ +#define TIF_SECCOMP 23 /* seccomp syscall filtering active */ + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) From 39a154fc2d172a3a5865e5a9fa2a2983eb7a99ac Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 29 Dec 2022 18:43:46 -0300 Subject: [PATCH 0345/1593] cifs: protect access of TCP_Server_Info::{dstaddr,hostname} Use the appropriate locks to protect access of hostname and dstaddr fields in cifs_tree_connect() as they might get changed by other tasks. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/dfs.c | 22 +++++++++++----------- fs/cifs/misc.c | 2 ++ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index 30086f2060a10..b64d20374b9c8 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -327,8 +327,8 @@ static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb return rc; } -static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host, - size_t tcp_host_len, char *share, bool *target_match) +static int target_share_matches_server(struct TCP_Server_Info *server, char *share, + bool *target_match) { int rc = 0; const char *dfs_host; @@ -338,13 +338,16 @@ static int target_share_matches_server(struct TCP_Server_Info *server, const cha extract_unc_hostname(share, &dfs_host, &dfs_host_len); /* Check if hostnames or addresses match */ - if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { - cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, - dfs_host, (int)tcp_host_len, tcp_host); + cifs_server_lock(server); + if (dfs_host_len != strlen(server->hostname) || + strncasecmp(dfs_host, server->hostname, dfs_host_len)) { + cifs_dbg(FYI, "%s: %.*s doesn't match %s\n", __func__, + (int)dfs_host_len, dfs_host, server->hostname); rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); if (rc) cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); } + cifs_server_unlock(server); return rc; } @@ -358,13 +361,9 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t struct cifs_ses *root_ses = CIFS_DFS_ROOT_SES(tcon->ses); struct cifs_tcon *ipc = root_ses->tcon_ipc; char *share = NULL, *prefix = NULL; - const char *tcp_host; - size_t tcp_host_len; struct dfs_cache_tgt_iterator *tit; bool target_match; - extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); - tit = dfs_cache_get_tgt_iterator(tl); if (!tit) { rc = -ENOENT; @@ -387,8 +386,7 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t break; } - rc = target_share_matches_server(server, tcp_host, tcp_host_len, share, - &target_match); + rc = target_share_matches_server(server, share, &target_match); if (rc) break; if (!target_match) { @@ -497,7 +495,9 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru } if (tcon->ipc) { + cifs_server_lock(server); scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); + cifs_server_unlock(server); rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); goto out; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4d3c586785a59..2a19c7987c5bd 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1277,7 +1277,9 @@ int match_target_ip(struct TCP_Server_Info *server, if (rc < 0) return rc; + spin_lock(&server->srv_lock); *result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss); + spin_unlock(&server->srv_lock); cifs_dbg(FYI, "%s: ip addresses match: %u\n", __func__, *result); return 0; } From 3792fc508c095abd84b10ceae12bd773e61fdc36 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Nov 2022 16:15:18 +0300 Subject: [PATCH 0346/1593] drm/i915: unpin on error in intel_vgpu_shadow_mm_pin() Call intel_vgpu_unpin_mm() on this error path. Fixes: 418741480809 ("drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.") Signed-off-by: Dan Carpenter Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/Y3OQ5tgZIVxyQ/WV@kili Reviewed-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9cd8fcbf7cad1..8009239935f7b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -695,6 +695,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || !workload->shadow_mm->ppgtt_mm.shadowed) { + intel_vgpu_unpin_mm(workload->shadow_mm); gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); return -EINVAL; } From c4b850d1f448a901fbf4f7f36dec38c84009b489 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 19 Dec 2022 22:03:56 +0800 Subject: [PATCH 0347/1593] drm/i915/gvt: fix gvt debugfs destroy When gvt debug fs is destroyed, need to have a sane check if drm minor's debugfs root is still available or not, otherwise in case like device remove through unbinding, drm minor's debugfs directory has already been removed, then intel_gvt_debugfs_clean() would act upon dangling pointer like below oops. i915 0000:00:02.0: Direct firmware load for i915/gvt/vid_0x8086_did_0x1926_rid_0x0a.golden_hw_state failed with error -2 i915 0000:00:02.0: MDEV: Registered Console: switching to colour dummy device 80x25 i915 0000:00:02.0: MDEV: Unregistering BUG: kernel NULL pointer dereference, address: 00000000000000a0 PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP PTI CPU: 2 PID: 2486 Comm: gfx-unbind.sh Tainted: G I 6.1.0-rc8+ #15 Hardware name: Dell Inc. XPS 13 9350/0JXC1H, BIOS 1.13.0 02/10/2020 RIP: 0010:down_write+0x1f/0x90 Code: 1d ff ff 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 53 48 89 fb e8 62 c0 ff ff bf 01 00 00 00 e8 28 5e 31 ff 31 c0 ba 01 00 00 00 48 0f b1 13 75 33 65 48 8b 04 25 c0 bd 01 00 48 89 43 08 bf 01 RSP: 0018:ffff9eb3036ffcc8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000000000a0 RCX: ffffff8100000000 RDX: 0000000000000001 RSI: 0000000000000064 RDI: ffffffffa48787a8 RBP: ffff9eb3036ffd30 R08: ffffeb1fc45a0608 R09: ffffeb1fc45a05c0 R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000000 R13: ffff91acc33fa328 R14: ffff91acc033f080 R15: ffff91acced533e0 FS: 00007f6947bba740(0000) GS:ffff91ae36d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000a0 CR3: 00000001133a2002 CR4: 00000000003706e0 Call Trace: simple_recursive_removal+0x9f/0x2a0 ? start_creating.part.0+0x120/0x120 ? _raw_spin_lock+0x13/0x40 debugfs_remove+0x40/0x60 intel_gvt_debugfs_clean+0x15/0x30 [kvmgt] intel_gvt_clean_device+0x49/0xe0 [kvmgt] intel_gvt_driver_remove+0x2f/0xb0 i915_driver_remove+0xa4/0xf0 i915_pci_remove+0x1a/0x30 pci_device_remove+0x33/0xa0 device_release_driver_internal+0x1b2/0x230 unbind_store+0xe0/0x110 kernfs_fop_write_iter+0x11b/0x1f0 vfs_write+0x203/0x3d0 ksys_write+0x63/0xe0 do_syscall_64+0x37/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f6947cb5190 Code: 40 00 48 8b 15 71 9c 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 80 3d 51 24 0e 00 00 74 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 48 89 RSP: 002b:00007ffcbac45a28 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f6947cb5190 RDX: 000000000000000d RSI: 0000555e35c866a0 RDI: 0000000000000001 RBP: 0000555e35c866a0 R08: 0000000000000002 R09: 0000555e358cb97c R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000001 R13: 000000000000000d R14: 0000000000000000 R15: 0000555e358cb8e0 Modules linked in: kvmgt CR2: 00000000000000a0 ---[ end trace 0000000000000000 ]--- Cc: Wang, Zhi Cc: He, Yu Cc: stable@vger.kernel.org Reviewed-by: Zhi Wang Fixes: bc7b0be316ae ("drm/i915/gvt: Add basic debugfs infrastructure") Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221219140357.769557-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/debugfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 9f1c209d92511..d7df27feee8c2 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -199,6 +199,10 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt) */ void intel_gvt_debugfs_clean(struct intel_gvt *gvt) { - debugfs_remove_recursive(gvt->debugfs_root); - gvt->debugfs_root = NULL; + struct drm_minor *minor = gvt->gt->i915->drm.primary; + + if (minor->debugfs_root) { + debugfs_remove_recursive(gvt->debugfs_root); + gvt->debugfs_root = NULL; + } } From 704f3384f322b40ba24d958473edfb1c9750c8fd Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 19 Dec 2022 22:03:57 +0800 Subject: [PATCH 0348/1593] drm/i915/gvt: fix vgpu debugfs clean in remove Check carefully on root debugfs available when destroying vgpu, e.g in remove case drm minor's debugfs root might already be destroyed, which led to kernel oops like below. Console: switching to colour dummy device 80x25 i915 0000:00:02.0: MDEV: Unregistering intel_vgpu_mdev b1338b2d-a709-4c23-b766-cc436c36cdf0: Removing from iommu group 14 BUG: kernel NULL pointer dereference, address: 0000000000000150 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 3 PID: 1046 Comm: driverctl Not tainted 6.1.0-rc2+ #6 Hardware name: HP HP ProDesk 600 G3 MT/829D, BIOS P02 Ver. 02.44 09/13/2022 RIP: 0010:__lock_acquire+0x5e2/0x1f90 Code: 87 ad 09 00 00 39 05 e1 1e cc 02 0f 82 f1 09 00 00 ba 01 00 00 00 48 83 c4 48 89 d0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 45 31 ff <48> 81 3f 60 9e c2 b6 45 0f 45 f8 83 fe 01 0f 87 55 fa ff ff 89 f0 RSP: 0018:ffff9f770274f948 EFLAGS: 00010046 RAX: 0000000000000003 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000150 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8895d1173300 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000150 R14: 0000000000000000 R15: 0000000000000000 FS: 00007fc9b2ba0740(0000) GS:ffff889cdfcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000150 CR3: 000000010fd93005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: lock_acquire+0xbf/0x2b0 ? simple_recursive_removal+0xa5/0x2b0 ? lock_release+0x13d/0x2d0 down_write+0x2a/0xd0 ? simple_recursive_removal+0xa5/0x2b0 simple_recursive_removal+0xa5/0x2b0 ? start_creating.part.0+0x110/0x110 ? _raw_spin_unlock+0x29/0x40 debugfs_remove+0x40/0x60 intel_gvt_debugfs_remove_vgpu+0x15/0x30 [kvmgt] intel_gvt_destroy_vgpu+0x60/0x100 [kvmgt] intel_vgpu_release_dev+0xe/0x20 [kvmgt] device_release+0x30/0x80 kobject_put+0x79/0x1b0 device_release_driver_internal+0x1b8/0x230 bus_remove_device+0xec/0x160 device_del+0x189/0x400 ? up_write+0x9c/0x1b0 ? mdev_device_remove_common+0x60/0x60 [mdev] mdev_device_remove_common+0x22/0x60 [mdev] mdev_device_remove_cb+0x17/0x20 [mdev] device_for_each_child+0x56/0x80 mdev_unregister_parent+0x5a/0x81 [mdev] intel_gvt_clean_device+0x2d/0xe0 [kvmgt] intel_gvt_driver_remove+0x2e/0xb0 [i915] i915_driver_remove+0xac/0x100 [i915] i915_pci_remove+0x1a/0x30 [i915] pci_device_remove+0x31/0xa0 device_release_driver_internal+0x1b8/0x230 unbind_store+0xd8/0x100 kernfs_fop_write_iter+0x156/0x210 vfs_write+0x236/0x4a0 ksys_write+0x61/0xd0 do_syscall_64+0x55/0x80 ? find_held_lock+0x2b/0x80 ? lock_release+0x13d/0x2d0 ? up_read+0x17/0x20 ? lock_is_held_type+0xe3/0x140 ? asm_exc_page_fault+0x22/0x30 ? lockdep_hardirqs_on+0x7d/0x100 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fc9b2c9e0c4 Code: 15 71 7d 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 80 3d 3d 05 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 48 89 54 24 18 48 RSP: 002b:00007ffec29c81c8 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007fc9b2c9e0c4 RDX: 000000000000000d RSI: 0000559f8b5f48a0 RDI: 0000000000000001 RBP: 0000559f8b5f48a0 R08: 0000559f8b5f3540 R09: 00007fc9b2d76d30 R10: 0000000000000000 R11: 0000000000000202 R12: 000000000000000d R13: 00007fc9b2d77780 R14: 000000000000000d R15: 00007fc9b2d72a00 Modules linked in: sunrpc intel_rapl_msr intel_rapl_common intel_pmc_core_pltdrv intel_pmc_core intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ee1004 igbvf rapl vfat fat intel_cstate intel_uncore pktcdvd i2c_i801 pcspkr wmi_bmof i2c_smbus acpi_pad vfio_pci vfio_pci_core vfio_virqfd zram fuse dm_multipath kvmgt mdev vfio_iommu_type1 vfio kvm irqbypass i915 nvme e1000e igb nvme_core crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic serio_raw ghash_clmulni_intel sha512_ssse3 dca drm_buddy intel_gtt video wmi drm_display_helper ttm CR2: 0000000000000150 ---[ end trace 0000000000000000 ]--- Cc: Wang Zhi Cc: He Yu Cc: Alex Williamson Cc: stable@vger.kernel.org Reviewed-by: Zhi Wang Tested-by: Yu He Fixes: bc7b0be316ae ("drm/i915/gvt: Add basic debugfs infrastructure") Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221219140357.769557-2-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/debugfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index d7df27feee8c2..e08ed0e9f1653 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -175,8 +175,13 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) */ void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) { - debugfs_remove_recursive(vgpu->debugfs); - vgpu->debugfs = NULL; + struct intel_gvt *gvt = vgpu->gvt; + struct drm_minor *minor = gvt->gt->i915->drm.primary; + + if (minor->debugfs_root && gvt->debugfs_root) { + debugfs_remove_recursive(vgpu->debugfs); + vgpu->debugfs = NULL; + } } /** From a06d4b9e15c0ea4e05b200cfb1f1050e785a5e87 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Thu, 10 Nov 2022 12:20:34 +0000 Subject: [PATCH 0349/1593] drm/i915/gvt: use atomic operations to change the vGPU status Several vGPU status are used to decide the availability of GVT-g core logics when creating a vGPU. Use atomic operations on changing the vGPU status to avoid the racing. Cc: Zhenyu Wang Cc: Kevin Tian Cc: Jason Gunthorpe Cc: intel-gvt-dev@lists.freedesktop.org Suggested-by: Alex Williamson Signed-off-by: Zhi Wang Reviewed-by: Zhenyu Wang Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221110122034.3382-2-zhi.a.wang@intel.com --- drivers/gpu/drm/i915/gvt/debugfs.c | 19 ++++++++++++++- drivers/gpu/drm/i915/gvt/dmabuf.c | 3 ++- drivers/gpu/drm/i915/gvt/gtt.c | 4 ++-- drivers/gpu/drm/i915/gvt/gvt.h | 15 ++++++++---- drivers/gpu/drm/i915/gvt/interrupt.c | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 35 +++++++++++----------------- drivers/gpu/drm/i915/gvt/scheduler.c | 3 ++- drivers/gpu/drm/i915/gvt/vgpu.c | 12 ++++------ 8 files changed, 53 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index e08ed0e9f1653..0616b73175f3e 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -151,6 +151,22 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set, "0x%llx\n"); +static int vgpu_status_get(void *data, u64 *val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + + *val = 0; + + if (test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) + *val |= (1 << INTEL_VGPU_STATUS_ATTACHED); + if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status)) + *val |= (1 << INTEL_VGPU_STATUS_ACTIVE); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vgpu_status_fops, vgpu_status_get, NULL, "0x%llx\n"); + /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU * @vgpu: a vGPU @@ -162,11 +178,12 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) snprintf(name, 16, "vgpu%d", vgpu->id); vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root); - debugfs_create_bool("active", 0444, vgpu->debugfs, &vgpu->active); debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, vgpu, &vgpu_mmio_diff_fops); debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, vgpu, &vgpu_scan_nonprivbb_fops); + debugfs_create_file("status", 0644, vgpu->debugfs, vgpu, + &vgpu_status_fops); } /** diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 355f1c0e86641..ffe41e9be04fc 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -134,7 +134,8 @@ static void dmabuf_gem_object_free(struct kref *kref) struct list_head *pos; struct intel_vgpu_dmabuf_obj *dmabuf_obj; - if (vgpu && vgpu->active && !list_empty(&vgpu->dmabuf_obj_list_head)) { + if (vgpu && test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status) && + !list_empty(&vgpu->dmabuf_obj_list_head)) { list_for_each(pos, &vgpu->dmabuf_obj_list_head) { dmabuf_obj = list_entry(pos, struct intel_vgpu_dmabuf_obj, list); if (dmabuf_obj == obj) { diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 51e5e8fb505bc..6b4039010caee 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -55,7 +55,7 @@ static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn) int idx; bool ret; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return false; idx = srcu_read_lock(&kvm->srcu); @@ -1178,7 +1178,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu, if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M)) return 0; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -EINVAL; pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry)); if (is_error_noslot_pfn(pfn)) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 62823c0e13ab8..2d65800d8e93b 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -172,13 +172,18 @@ struct intel_vgpu_submission { #define KVMGT_DEBUGFS_FILENAME "kvmgt_nr_cache_entries" +enum { + INTEL_VGPU_STATUS_ATTACHED = 0, + INTEL_VGPU_STATUS_ACTIVE, + INTEL_VGPU_STATUS_NR_BITS, +}; + struct intel_vgpu { struct vfio_device vfio_device; struct intel_gvt *gvt; struct mutex vgpu_lock; int id; - bool active; - bool attached; + DECLARE_BITMAP(status, INTEL_VGPU_STATUS_NR_BITS); bool pv_notified; bool failsafe; unsigned int resetting_eng; @@ -467,7 +472,7 @@ void intel_vgpu_write_fence(struct intel_vgpu *vgpu, #define for_each_active_vgpu(gvt, vgpu, id) \ idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \ - for_each_if(vgpu->active) + for_each_if(test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status)) static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu, u32 offset, u32 val, bool low) @@ -725,7 +730,7 @@ static inline bool intel_gvt_mmio_is_cmd_write_patch( static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa, void *buf, unsigned long len) { - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -ESRCH; return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, false); } @@ -743,7 +748,7 @@ static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa, static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu, unsigned long gpa, void *buf, unsigned long len) { - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -ESRCH; return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, true); } diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index a6b2021b665ff..68eca023bbc68 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -433,7 +433,7 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu) * enabled by guest. so if msi_trigger is null, success is still * returned and don't inject interrupt into guest. */ - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -ESRCH; if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1) return -EFAULT; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index f5451adcd4890..8ae7039b36832 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -638,7 +638,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) mutex_lock(&vgpu->gvt->lock); for_each_active_vgpu(vgpu->gvt, itr, id) { - if (!itr->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status)) continue; if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) { @@ -655,9 +655,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - if (vgpu->attached) - return -EEXIST; - if (!vgpu->vfio_device.kvm || vgpu->vfio_device.kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); @@ -667,14 +664,14 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; - vgpu->attached = true; - vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status); + debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, &vgpu->nr_cache_entries); @@ -698,11 +695,10 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - if (!vgpu->attached) - return; - intel_gvt_release_vgpu(vgpu); + clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status); + debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, @@ -718,8 +714,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) vgpu->dma_addr_cache = RB_ROOT; intel_vgpu_release_msi_eventfd_ctx(vgpu); - - vgpu->attached = false; } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) @@ -1512,9 +1506,6 @@ static void intel_vgpu_remove(struct mdev_device *mdev) { struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev); - if (WARN_ON_ONCE(vgpu->attached)) - return; - vfio_unregister_group_dev(&vgpu->vfio_device); vfio_put_device(&vgpu->vfio_device); } @@ -1559,7 +1550,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) struct kvm_memory_slot *slot; int idx; - if (!info->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) return -ESRCH; idx = srcu_read_lock(&kvm->srcu); @@ -1589,8 +1580,8 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) struct kvm_memory_slot *slot; int idx; - if (!info->attached) - return 0; + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) + return -ESRCH; idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); @@ -1668,7 +1659,7 @@ int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, struct gvt_dma *entry; int ret; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -EINVAL; mutex_lock(&vgpu->cache_lock); @@ -1714,8 +1705,8 @@ int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr) struct gvt_dma *entry; int ret = 0; - if (!vgpu->attached) - return -ENODEV; + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) + return -EINVAL; mutex_lock(&vgpu->cache_lock); entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); @@ -1742,7 +1733,7 @@ void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu, { struct gvt_dma *entry; - if (!vgpu->attached) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return; mutex_lock(&vgpu->cache_lock); @@ -1778,7 +1769,7 @@ static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt) idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) { if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id, (void *)&gvt->service_request)) { - if (vgpu->active) + if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status)) intel_vgpu_emulate_vblank(vgpu); } } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 8009239935f7b..f4055804aad1f 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -866,7 +866,8 @@ pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine) goto out; } - if (!scheduler->current_vgpu->active || + if (!test_bit(INTEL_VGPU_STATUS_ACTIVE, + scheduler->current_vgpu->status) || list_empty(workload_q_head(scheduler->current_vgpu, engine))) goto out; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 3c529c2705ddc..a5497440484f1 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -166,9 +166,7 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt) */ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) { - mutex_lock(&vgpu->vgpu_lock); - vgpu->active = true; - mutex_unlock(&vgpu->vgpu_lock); + set_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status); } /** @@ -183,7 +181,7 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) { mutex_lock(&vgpu->vgpu_lock); - vgpu->active = false; + clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status); if (atomic_read(&vgpu->submission.running_workload_num)) { mutex_unlock(&vgpu->vgpu_lock); @@ -228,7 +226,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *i915 = gvt->gt->i915; - drm_WARN(&i915->drm, vgpu->active, "vGPU is still active!\n"); + drm_WARN(&i915->drm, test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status), + "vGPU is still active!\n"); /* * remove idr first so later clean can judge if need to stop @@ -285,8 +284,7 @@ struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) if (ret) goto out_free_vgpu; - vgpu->active = false; - + clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status); return vgpu; out_free_vgpu: From 4a61648af68f5ba4884f0e3b494ee1cabc4b6620 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 30 Dec 2022 00:56:41 +0800 Subject: [PATCH 0350/1593] drm/i915/gvt: fix double free bug in split_2MB_gtt_entry If intel_gvt_dma_map_guest_page failed, it will call ppgtt_invalidate_spt, which will finally free the spt. But the caller function ppgtt_populate_spt_by_guest_entry does not notice that, it will free spt again in its error path. Fix this by canceling the mapping of DMA address and freeing sub_spt. Besides, leave the handle of spt destroy to caller function instead of callee function when error occurs. Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support") Signed-off-by: Zheng Wang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221229165641.1192455-1-zyytlz.wz@163.com --- drivers/gpu/drm/i915/gvt/gtt.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 6b4039010caee..4ec85308379a4 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1209,10 +1209,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, for_each_shadow_entry(sub_spt, &sub_se, sub_index) { ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, PAGE_SIZE, &dma_addr); - if (ret) { - ppgtt_invalidate_spt(spt); - return ret; - } + if (ret) + goto err; sub_se.val64 = se->val64; /* Copy the PAT field from PDE. */ @@ -1231,6 +1229,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, ops->set_pfn(se, sub_spt->shadow_page.mfn); ppgtt_set_shadow_entry(spt, se, index); return 0; +err: + /* Cancel the existing addess mappings of DMA addr. */ + for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { + gvt_vdbg_mm("invalidate 4K entry\n"); + ppgtt_invalidate_pte(sub_spt, &sub_se); + } + /* Release the new allocated spt. */ + trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, + sub_spt->guest_page.gfn, sub_spt->shadow_page.type); + ppgtt_free_spt(sub_spt); + return ret; } static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, From 613b14884b8595e20b9fac4126bf627313827fbe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 08:51:19 -0700 Subject: [PATCH 0351/1593] block: handle bio_split_to_limits() NULL return This can't happen right now, but in preparation for allowing bio_split_to_limits() returning NULL if it ended the bio, check for it in all the callers. Signed-off-by: Jens Axboe --- block/blk-merge.c | 4 +++- block/blk-mq.c | 5 ++++- drivers/block/drbd/drbd_req.c | 2 ++ drivers/block/ps3vram.c | 2 ++ drivers/md/dm.c | 2 ++ drivers/md/md.c | 2 ++ drivers/nvme/host/multipath.c | 2 ++ drivers/s390/block/dcssblk.c | 2 ++ 8 files changed, 19 insertions(+), 2 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 35a8f75cc45d1..071c5f8cf0cfe 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -358,11 +358,13 @@ struct bio *__bio_split_to_limits(struct bio *bio, default: split = bio_split_rw(bio, lim, nr_segs, bs, get_max_io_size(bio, lim) << SECTOR_SHIFT); + if (IS_ERR(split)) + return NULL; break; } if (split) { - /* there isn't chance to merge the splitted bio */ + /* there isn't chance to merge the split bio */ split->bi_opf |= REQ_NOMERGE; blkcg_bio_issue_init(split); diff --git a/block/blk-mq.c b/block/blk-mq.c index c5cf0dbca1db8..2c49b4151da15 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2951,8 +2951,11 @@ void blk_mq_submit_bio(struct bio *bio) blk_status_t ret; bio = blk_queue_bounce(bio, q); - if (bio_may_exceed_limits(bio, &q->limits)) + if (bio_may_exceed_limits(bio, &q->limits)) { bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); + if (!bio) + return; + } if (!bio_integrity_prep(bio)) return; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index eb14ec8ec04cf..e36216d50753c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1607,6 +1607,8 @@ void drbd_submit_bio(struct bio *bio) struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; bio = bio_split_to_limits(bio); + if (!bio) + return; /* * what we "blindly" assume: diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index c76e0148eada3..574e470b220b0 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -587,6 +587,8 @@ static void ps3vram_submit_bio(struct bio *bio) dev_dbg(&dev->core, "%s\n", __func__); bio = bio_split_to_limits(bio); + if (!bio) + return; spin_lock_irq(&priv->lock); busy = !bio_list_empty(&priv->list); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e1ea3a7bd9d9f..b424a6ee27baf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1742,6 +1742,8 @@ static void dm_split_and_process_bio(struct mapped_device *md, * otherwise associated queue_limits won't be imposed. */ bio = bio_split_to_limits(bio); + if (!bio) + return; } init_clone_info(&ci, md, map, bio, is_abnormal); diff --git a/drivers/md/md.c b/drivers/md/md.c index 775f1dde190a2..8af639296b3c8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -455,6 +455,8 @@ static void md_submit_bio(struct bio *bio) } bio = bio_split_to_limits(bio); + if (!bio) + return; if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index c03093b6813c5..fc39d01e7b63b 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -376,6 +376,8 @@ static void nvme_ns_head_submit_bio(struct bio *bio) * pool from the original queue to allocate the bvecs from. */ bio = bio_split_to_limits(bio); + if (!bio) + return; srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b392b9f5482e0..c0f85ffb2b62d 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -865,6 +865,8 @@ dcssblk_submit_bio(struct bio *bio) unsigned long bytes_done; bio = bio_split_to_limits(bio); + if (!bio) + return; bytes_done = 0; dev_info = bio->bi_bdev->bd_disk->private_data; From be79f805a1e1b95605c825f1c513bdd2c8b167ed Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 29 Dec 2022 12:44:38 +0000 Subject: [PATCH 0352/1593] dt-bindings: msm: dsi-phy-28nm: Add missing qcom, dsi-phy-regulator-ldo-mode Add in missing qcom,dsi-phy-regulator-ldo-mode to the 28nm DSI PHY. When converting from .txt to .yaml we missed this one. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/516205/ Link: https://lore.kernel.org/r/20221229124438.504770-2-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar --- .../devicetree/bindings/display/msm/dsi-phy-28nm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml index 3d8540a06fe22..2f1fd140c87df 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml @@ -34,6 +34,10 @@ properties: vddio-supply: description: Phandle to vdd-io regulator device node. + qcom,dsi-phy-regulator-ldo-mode: + type: boolean + description: Indicates if the LDO mode PHY regulator is wanted. + required: - compatible - reg From 481028dbf1daa2808e1be06f6a865b5fe5939efc Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Wed, 4 Jan 2023 11:34:14 -0800 Subject: [PATCH 0353/1593] perf tools: Fix build on uClibc systems by adding missing sys/types.h include Not all libc implementations define ssize_t as part of stdio.h like glibc does since the standard only requires this type to be defined by unistd.h and sys/types.h. For this reason the perf build is currently broken for toolchains based on uClibc, for instance. Include sys/types.h explicitly to fix that. Committer notes: In addition, in the past this worked in uClibc test systems as there was another way to get to sys/types.h that got removed in that cset: tools/perf/util/trace-event.h /usr/include/traceevent/event_parse.h # This got removed from util/trace-event.h in 378ef0f5d9d7f465 /usr/include/regex.h /usr/include/sys/types.h typedef __ssize_t ssize_t; So the size_t that is used in tools/perf/util/trace-event.h was being obtained indirectly, by chance. Fixes: 378ef0f5d9d7f465 ("perf build: Use libtraceevent from the system") Signed-off-by: Jesus Sanchez-Palencia Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20230104193414.606905-1-jesussanp@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index add6c5d9531cd..9b3cd79cca121 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -4,6 +4,7 @@ #include #include +#include #include struct evlist; From f52853a668bfeddd79f319d536a506f68cc2b478 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:58:30 +0800 Subject: [PATCH 0354/1593] perf/x86/rapl: Add support for Intel Meteor Lake Meteor Lake RAPL support is the same as previous Sky Lake. Add Meteor Lake model for RAPL. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230104145831.25498-1-rui.zhang@intel.com --- arch/x86/events/rapl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index ae5779ea44174..589c6885560d2 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -809,6 +809,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); From 57512b57dcfaf63c52d8ad2fb35321328cde31b0 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:58:31 +0800 Subject: [PATCH 0355/1593] perf/x86/rapl: Add support for Intel Emerald Rapids Emerald Rapids RAPL support is the same as previous Sapphire Rapids. Add Emerald Rapids model for RAPL. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230104145831.25498-2-rui.zhang@intel.com --- arch/x86/events/rapl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 589c6885560d2..52e6e7ed4f78a 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -806,6 +806,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), From 9cea62b2cbabff8ed46f2df17778b624ad9dd25a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 08:52:06 -0700 Subject: [PATCH 0356/1593] block: don't allow splitting of a REQ_NOWAIT bio If we split a bio marked with REQ_NOWAIT, then we can trigger spurious EAGAIN if constituent parts of that split bio end up failing request allocations. Parts will complete just fine, but just a single failure in one of the chained bios will yield an EAGAIN final result for the parent bio. Return EAGAIN early if we end up needing to split such a bio, which allows for saner recovery handling. Cc: stable@vger.kernel.org # 5.15+ Link: https://github.com/axboe/liburing/issues/766 Reported-by: Michael Kelley Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-merge.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/blk-merge.c b/block/blk-merge.c index 071c5f8cf0cfe..b7c193d67185d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -309,6 +309,16 @@ static struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, *segs = nsegs; return NULL; split: + /* + * We can't sanely support splitting for a REQ_NOWAIT bio. End it + * with EAGAIN if splitting is required and return an error pointer. + */ + if (bio->bi_opf & REQ_NOWAIT) { + bio->bi_status = BLK_STS_AGAIN; + bio_endio(bio); + return ERR_PTR(-EAGAIN); + } + *segs = nsegs; /* From fa8e442e832a3647cdd90f3e606c473a51bc1b26 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 4 Jan 2023 21:32:35 +0800 Subject: [PATCH 0357/1593] ublk: honor IO_URING_F_NONBLOCK for handling control command Most of control command handlers may sleep, so return -EAGAIN in case of IO_URING_F_NONBLOCK to defer the handling into io wq context. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Reported-by: Jens Axboe Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230104133235.836536-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e9de9d846b730..17b677b5d3b22 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1992,6 +1992,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; int ret = -EINVAL; + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + ublk_ctrl_cmd_dump(cmd); if (!(issue_flags & IO_URING_F_SQE128)) From 59b745bb4e0bd445366c45b8df6b51b69134f4f5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 13:49:54 -0700 Subject: [PATCH 0358/1593] io_uring: move 'poll_multi_queue' bool in io_ring_ctx The cacheline section holding this variable has two gaps, where one is caused by this bool not packing well with structs. This causes it to blow into the next cacheline. Move the variable, shrinking io_ring_ctx by a full cacheline in size. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index dcd8a563ab522..128a67a40065f 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -292,6 +292,8 @@ struct io_ring_ctx { struct { spinlock_t completion_lock; + bool poll_multi_queue; + /* * ->iopoll_list is protected by the ctx->uring_lock for * io_uring instances that don't use IORING_SETUP_SQPOLL. @@ -300,7 +302,6 @@ struct io_ring_ctx { */ struct io_wq_work_list iopoll_list; struct io_hash_table cancel_table; - bool poll_multi_queue; struct llist_head work_llist; From ee4b4e2248565babfba807d82c0f3e00c392a4c0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 14:43:27 -0700 Subject: [PATCH 0359/1593] Revert "block: bio_copy_data_iter" This reverts commit db1c7d77976775483a8ef240b4c705f113e13ea1. We're reinstating the pktcdvd driver, which needs this API. Signed-off-by: Jens Axboe --- block/bio.c | 37 ++++++++++++++++++++++--------------- include/linux/bio.h | 2 ++ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/block/bio.c b/block/bio.c index 5f96fcae3f754..ab59a491a883e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1401,6 +1401,27 @@ void __bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(__bio_advance); +void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter) +{ + while (src_iter->bi_size && dst_iter->bi_size) { + struct bio_vec src_bv = bio_iter_iovec(src, *src_iter); + struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter); + unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len); + void *src_buf = bvec_kmap_local(&src_bv); + void *dst_buf = bvec_kmap_local(&dst_bv); + + memcpy(dst_buf, src_buf, bytes); + + kunmap_local(dst_buf); + kunmap_local(src_buf); + + bio_advance_iter_single(src, src_iter, bytes); + bio_advance_iter_single(dst, dst_iter, bytes); + } +} +EXPORT_SYMBOL(bio_copy_data_iter); + /** * bio_copy_data - copy contents of data buffers from one bio to another * @src: source bio @@ -1414,21 +1435,7 @@ void bio_copy_data(struct bio *dst, struct bio *src) struct bvec_iter src_iter = src->bi_iter; struct bvec_iter dst_iter = dst->bi_iter; - while (src_iter.bi_size && dst_iter.bi_size) { - struct bio_vec src_bv = bio_iter_iovec(src, src_iter); - struct bio_vec dst_bv = bio_iter_iovec(dst, dst_iter); - unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len); - void *src_buf = bvec_kmap_local(&src_bv); - void *dst_buf = bvec_kmap_local(&dst_bv); - - memcpy(dst_buf, src_buf, bytes); - - kunmap_local(dst_buf); - kunmap_local(src_buf); - - bio_advance_iter_single(src, &src_iter, bytes); - bio_advance_iter_single(dst, &dst_iter, bytes); - } + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); } EXPORT_SYMBOL(bio_copy_data); diff --git a/include/linux/bio.h b/include/linux/bio.h index 22078a28d7cb1..c1da63f6c8080 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -475,6 +475,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); +extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); From 050a4f341f35bf51db321c7f68700f9e0b1a7552 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 14:44:02 -0700 Subject: [PATCH 0360/1593] Revert "block: remove devnode callback from struct block_device_operations" This reverts commit 85d6ce58e493ac8b7122e2fbe3f41b94d6ebdc11. We're reinstating the pktcdvd driver, which needs this API. Signed-off-by: Jens Axboe --- block/genhd.c | 11 +++++++++++ include/linux/blkdev.h | 1 + 2 files changed, 12 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index 08f76135a6373..14329dc278b29 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1201,10 +1201,21 @@ struct class block_class = { .dev_uevent = block_uevent, }; +static char *block_devnode(struct device *dev, umode_t *mode, + kuid_t *uid, kgid_t *gid) +{ + struct gendisk *disk = dev_to_disk(dev); + + if (disk->fops->devnode) + return disk->fops->devnode(disk, mode); + return NULL; +} + const struct device_type disk_type = { .name = "disk", .groups = disk_attr_groups, .release = disk_release, + .devnode = block_devnode, }; #ifdef CONFIG_PROC_FS diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 301cf1cf4f2fa..43d4e073b1115 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1395,6 +1395,7 @@ struct block_device_operations { void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); + char *(*devnode)(struct gendisk *disk, umode_t *mode); /* returns the length of the identifier or a negative errno: */ int (*get_unique_id)(struct gendisk *disk, u8 id[16], enum blk_unique_id id_type); From 4b83e99ee7092df37a5cf292fde976ebc475ea63 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 14:44:13 -0700 Subject: [PATCH 0361/1593] Revert "pktcdvd: remove driver." MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f40eb99897af665f11858dd7b56edcb62c3f3c67. There are apparently still users out there of this driver. While we'd love to remove it to ease the maintenance burden, let's reinstate it for now until better (userspace) solutions can be developed. Link: https://lore.kernel.org/lkml/20230104190115.ceglfefco475ev6c@pali/ Reported-by: Pali Rohár Signed-off-by: Jens Axboe --- Documentation/ABI/testing/debugfs-pktcdvd | 18 + Documentation/ABI/testing/sysfs-class-pktcdvd | 97 + MAINTAINERS | 7 + drivers/block/Kconfig | 43 + drivers/block/Makefile | 1 + drivers/block/pktcdvd.c | 2944 +++++++++++++++++ include/linux/pktcdvd.h | 197 ++ include/uapi/linux/pktcdvd.h | 112 + 8 files changed, 3419 insertions(+) create mode 100644 Documentation/ABI/testing/debugfs-pktcdvd create mode 100644 Documentation/ABI/testing/sysfs-class-pktcdvd create mode 100644 drivers/block/pktcdvd.c create mode 100644 include/linux/pktcdvd.h create mode 100644 include/uapi/linux/pktcdvd.h diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd new file mode 100644 index 0000000000000..f6f65a4faea06 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-pktcdvd @@ -0,0 +1,18 @@ +What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7] +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + +The pktcdvd module (packet writing driver) creates +these files in debugfs: + +/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/ + + ==== ====== ==================================== + info 0444 Lots of driver statistics and infos. + ==== ====== ==================================== + +Example:: + + cat /sys/kernel/debug/pktcdvd/pktcdvd0/info diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd new file mode 100644 index 0000000000000..ba1ce626591d9 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-pktcdvd @@ -0,0 +1,97 @@ +sysfs interface +--------------- +The pktcdvd module (packet writing driver) creates the following files in the +sysfs: ( is in the format major:minor) + +What: /sys/class/pktcdvd/add +What: /sys/class/pktcdvd/remove +What: /sys/class/pktcdvd/device_map +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + + ========== ============================================== + add (WO) Write a block device id (major:minor) to + create a new pktcdvd device and map it to the + block device. + + remove (WO) Write the pktcdvd device id (major:minor) + to remove the pktcdvd device. + + device_map (RO) Shows the device mapping in format: + pktcdvd[0-7] + ========== ============================================== + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/dev +What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + dev: (RO) Device id + + uevent: (WO) To send a uevent + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + packets_started: (RO) Number of started packets. + + packets_finished: (RO) Number of finished packets. + + kb_written: (RO) kBytes written. + + kb_read: (RO) kBytes read. + + kb_read_gather: (RO) kBytes read to fill write packets. + + reset: (WO) Write any value to it to reset + pktcdvd device statistic values, like + bytes read/written. + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + ============== ================================================ + size (RO) Contains the size of the bio write queue. + + congestion_off (RW) If bio write queue size is below this mark, + accept new bio requests from the block layer. + + congestion_on (RW) If bio write queue size is higher as this + mark, do no longer accept bio write requests + from the block layer and wait till the pktcdvd + device has processed enough bio's so that bio + write queue size is below congestion off mark. + A value of <= 0 disables congestion control. + ============== ================================================ + + +Example: +-------- +To use the pktcdvd sysfs interface directly, you can do:: + + # create a new pktcdvd device mapped to /dev/hdc + echo "22:0" >/sys/class/pktcdvd/add + cat /sys/class/pktcdvd/device_map + # assuming device pktcdvd0 was created, look at stat's + cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written + # print the device id of the mapped block device + fgrep pktcdvd0 /sys/class/pktcdvd/device_map + # remove device, using pktcdvd0 device id 253:0 + echo "253:0" >/sys/class/pktcdvd/remove diff --git a/MAINTAINERS b/MAINTAINERS index d53b3a6cdc67d..3ef137fea4f6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16520,6 +16520,13 @@ S: Supported F: Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml F: drivers/input/keyboard/pinephone-keyboard.c +PKTCDVD DRIVER +M: linux-block@vger.kernel.org +S: Orphan +F: drivers/block/pktcdvd.c +F: include/linux/pktcdvd.h +F: include/uapi/linux/pktcdvd.h + PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER M: Tomasz Duszynski S: Maintained diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a2184b4284936..a41145d52de94 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -285,6 +285,49 @@ config BLK_DEV_RAM_SIZE The default value is 4096 kilobytes. Only change this if you know what you are doing. +config CDROM_PKTCDVD + tristate "Packet writing on CD/DVD media (DEPRECATED)" + depends on !UML + depends on SCSI + select CDROM + help + Note: This driver is deprecated and will be removed from the + kernel in the near future! + + If you have a CDROM/DVD drive that supports packet writing, say + Y to include support. It should work with any MMC/Mt Fuji + compliant ATAPI or SCSI drive, which is just about any newer + DVD/CD writer. + + Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs + is possible. + DVD-RW disks must be in restricted overwrite mode. + + See the file + for further information on the use of this driver. + + To compile this driver as a module, choose M here: the + module will be called pktcdvd. + +config CDROM_PKTCDVD_BUFFERS + int "Free buffers for data gathering" + depends on CDROM_PKTCDVD + default "8" + help + This controls the maximum number of active concurrent packets. More + concurrent packets can increase write performance, but also require + more memory. Each concurrent packet will require approximately 64Kb + of non-swappable kernel memory, memory which will be allocated when + a disc is opened for writing. + +config CDROM_PKTCDVD_WCACHE + bool "Enable write caching" + depends on CDROM_PKTCDVD + help + If enabled, write caching will be set for the CD-R/W device. For now + this option is dangerous unless the CD-RW media is known good, as we + don't do deferred write error handling yet. + config ATA_OVER_ETH tristate "ATA over Ethernet support" depends on NET diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 962ee65d8ca30..101612cba303a 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_N64CART) += n64cart.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o +obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c new file mode 100644 index 0000000000000..4cea3b08087ed --- /dev/null +++ b/drivers/block/pktcdvd.c @@ -0,0 +1,2944 @@ +/* + * Copyright (C) 2000 Jens Axboe + * Copyright (C) 2001-2004 Peter Osterlund + * Copyright (C) 2006 Thomas Maier + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and + * DVD-RAM devices. + * + * Theory of operation: + * + * At the lowest level, there is the standard driver for the CD/DVD device, + * such as drivers/scsi/sr.c. This driver can handle read and write requests, + * but it doesn't know anything about the special restrictions that apply to + * packet writing. One restriction is that write requests must be aligned to + * packet boundaries on the physical media, and the size of a write request + * must be equal to the packet size. Another restriction is that a + * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read + * command, if the previous command was a write. + * + * The purpose of the packet writing driver is to hide these restrictions from + * higher layers, such as file systems, and present a block device that can be + * randomly read and written using 2kB-sized blocks. + * + * The lowest layer in the packet writing driver is the packet I/O scheduler. + * Its data is defined by the struct packet_iosched and includes two bio + * queues with pending read and write requests. These queues are processed + * by the pkt_iosched_process_queue() function. The write requests in this + * queue are already properly aligned and sized. This layer is responsible for + * issuing the flush cache commands and scheduling the I/O in a good order. + * + * The next layer transforms unaligned write requests to aligned writes. This + * transformation requires reading missing pieces of data from the underlying + * block device, assembling the pieces to full packets and queuing them to the + * packet I/O scheduler. + * + * At the top layer there is a custom ->submit_bio function that forwards + * read requests directly to the iosched queue and puts write requests in the + * unaligned write queue. A kernel thread performs the necessary read + * gathering to convert the unaligned writes to aligned writes and then feeds + * them to the packet I/O scheduler. + * + *************************************************************************/ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "pktcdvd" + +#define pkt_err(pd, fmt, ...) \ + pr_err("%s: " fmt, pd->name, ##__VA_ARGS__) +#define pkt_notice(pd, fmt, ...) \ + pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__) +#define pkt_info(pd, fmt, ...) \ + pr_info("%s: " fmt, pd->name, ##__VA_ARGS__) + +#define pkt_dbg(level, pd, fmt, ...) \ +do { \ + if (level == 2 && PACKET_DEBUG >= 2) \ + pr_notice("%s: %s():" fmt, \ + pd->name, __func__, ##__VA_ARGS__); \ + else if (level == 1 && PACKET_DEBUG >= 1) \ + pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \ +} while (0) + +#define MAX_SPEED 0xffff + +static DEFINE_MUTEX(pktcdvd_mutex); +static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; +static struct proc_dir_entry *pkt_proc; +static int pktdev_major; +static int write_congestion_on = PKT_WRITE_CONGESTION_ON; +static int write_congestion_off = PKT_WRITE_CONGESTION_OFF; +static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */ +static mempool_t psd_pool; +static struct bio_set pkt_bio_set; + +static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */ +static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */ + +/* forward declaration */ +static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev); +static int pkt_remove_dev(dev_t pkt_dev); +static int pkt_seq_show(struct seq_file *m, void *p); + +static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd) +{ + return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1); +} + +/********************************************************** + * sysfs interface for pktcdvd + * by (C) 2006 Thomas Maier + + /sys/class/pktcdvd/pktcdvd[0-7]/ + stat/reset + stat/packets_started + stat/packets_finished + stat/kb_written + stat/kb_read + stat/kb_read_gather + write_queue/size + write_queue/congestion_off + write_queue/congestion_on + **********************************************************/ + +static ssize_t packets_started_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started); +} +static DEVICE_ATTR_RO(packets_started); + +static ssize_t packets_finished_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended); +} +static DEVICE_ATTR_RO(packets_finished); + +static ssize_t kb_written_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1); +} +static DEVICE_ATTR_RO(kb_written); + +static ssize_t kb_read_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1); +} +static DEVICE_ATTR_RO(kb_read); + +static ssize_t kb_read_gather_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1); +} +static DEVICE_ATTR_RO(kb_read_gather); + +static ssize_t reset_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + if (len > 0) { + pd->stats.pkt_started = 0; + pd->stats.pkt_ended = 0; + pd->stats.secs_w = 0; + pd->stats.secs_rg = 0; + pd->stats.secs_r = 0; + } + return len; +} +static DEVICE_ATTR_WO(reset); + +static struct attribute *pkt_stat_attrs[] = { + &dev_attr_packets_finished.attr, + &dev_attr_packets_started.attr, + &dev_attr_kb_read.attr, + &dev_attr_kb_written.attr, + &dev_attr_kb_read_gather.attr, + &dev_attr_reset.attr, + NULL, +}; + +static const struct attribute_group pkt_stat_group = { + .name = "stat", + .attrs = pkt_stat_attrs, +}; + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->bio_queue_size); + spin_unlock(&pd->lock); + return n; +} +static DEVICE_ATTR_RO(size); + +static void init_write_congestion_marks(int* lo, int* hi) +{ + if (*hi > 0) { + *hi = max(*hi, 500); + *hi = min(*hi, 1000000); + if (*lo <= 0) + *lo = *hi - 100; + else { + *lo = min(*lo, *hi - 100); + *lo = max(*lo, 100); + } + } else { + *hi = -1; + *lo = -1; + } +} + +static ssize_t congestion_off_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_off); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_off_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; + + if (sscanf(buf, "%d", &val) == 1) { + spin_lock(&pd->lock); + pd->write_congestion_off = val; + init_write_congestion_marks(&pd->write_congestion_off, + &pd->write_congestion_on); + spin_unlock(&pd->lock); + } + return len; +} +static DEVICE_ATTR_RW(congestion_off); + +static ssize_t congestion_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_on); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; + + if (sscanf(buf, "%d", &val) == 1) { + spin_lock(&pd->lock); + pd->write_congestion_on = val; + init_write_congestion_marks(&pd->write_congestion_off, + &pd->write_congestion_on); + spin_unlock(&pd->lock); + } + return len; +} +static DEVICE_ATTR_RW(congestion_on); + +static struct attribute *pkt_wq_attrs[] = { + &dev_attr_congestion_on.attr, + &dev_attr_congestion_off.attr, + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group pkt_wq_group = { + .name = "write_queue", + .attrs = pkt_wq_attrs, +}; + +static const struct attribute_group *pkt_groups[] = { + &pkt_stat_group, + &pkt_wq_group, + NULL, +}; + +static void pkt_sysfs_dev_new(struct pktcdvd_device *pd) +{ + if (class_pktcdvd) { + pd->dev = device_create_with_groups(class_pktcdvd, NULL, + MKDEV(0, 0), pd, pkt_groups, + "%s", pd->name); + if (IS_ERR(pd->dev)) + pd->dev = NULL; + } +} + +static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd) +{ + if (class_pktcdvd) + device_unregister(pd->dev); +} + + +/******************************************************************** + /sys/class/pktcdvd/ + add map block device + remove unmap packet dev + device_map show mappings + *******************************************************************/ + +static void class_pktcdvd_release(struct class *cls) +{ + kfree(cls); +} + +static ssize_t device_map_show(struct class *c, struct class_attribute *attr, + char *data) +{ + int n = 0; + int idx; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + for (idx = 0; idx < MAX_WRITERS; idx++) { + struct pktcdvd_device *pd = pkt_devs[idx]; + if (!pd) + continue; + n += sprintf(data+n, "%s %u:%u %u:%u\n", + pd->name, + MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev), + MAJOR(pd->bdev->bd_dev), + MINOR(pd->bdev->bd_dev)); + } + mutex_unlock(&ctl_mutex); + return n; +} +static CLASS_ATTR_RO(device_map); + +static ssize_t add_store(struct class *c, struct class_attribute *attr, + const char *buf, size_t count) +{ + unsigned int major, minor; + + if (sscanf(buf, "%u:%u", &major, &minor) == 2) { + /* pkt_setup_dev() expects caller to hold reference to self */ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + pkt_setup_dev(MKDEV(major, minor), NULL); + + module_put(THIS_MODULE); + + return count; + } + + return -EINVAL; +} +static CLASS_ATTR_WO(add); + +static ssize_t remove_store(struct class *c, struct class_attribute *attr, + const char *buf, size_t count) +{ + unsigned int major, minor; + if (sscanf(buf, "%u:%u", &major, &minor) == 2) { + pkt_remove_dev(MKDEV(major, minor)); + return count; + } + return -EINVAL; +} +static CLASS_ATTR_WO(remove); + +static struct attribute *class_pktcdvd_attrs[] = { + &class_attr_add.attr, + &class_attr_remove.attr, + &class_attr_device_map.attr, + NULL, +}; +ATTRIBUTE_GROUPS(class_pktcdvd); + +static int pkt_sysfs_init(void) +{ + int ret = 0; + + /* + * create control files in sysfs + * /sys/class/pktcdvd/... + */ + class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL); + if (!class_pktcdvd) + return -ENOMEM; + class_pktcdvd->name = DRIVER_NAME; + class_pktcdvd->owner = THIS_MODULE; + class_pktcdvd->class_release = class_pktcdvd_release; + class_pktcdvd->class_groups = class_pktcdvd_groups; + ret = class_register(class_pktcdvd); + if (ret) { + kfree(class_pktcdvd); + class_pktcdvd = NULL; + pr_err("failed to create class pktcdvd\n"); + return ret; + } + return 0; +} + +static void pkt_sysfs_cleanup(void) +{ + if (class_pktcdvd) + class_destroy(class_pktcdvd); + class_pktcdvd = NULL; +} + +/******************************************************************** + entries in debugfs + + /sys/kernel/debug/pktcdvd[0-7]/ + info + + *******************************************************************/ + +static int pkt_debugfs_seq_show(struct seq_file *m, void *p) +{ + return pkt_seq_show(m, p); +} + +static int pkt_debugfs_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, pkt_debugfs_seq_show, inode->i_private); +} + +static const struct file_operations debug_fops = { + .open = pkt_debugfs_fops_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static void pkt_debugfs_dev_new(struct pktcdvd_device *pd) +{ + if (!pkt_debugfs_root) + return; + pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root); + if (!pd->dfs_d_root) + return; + + pd->dfs_f_info = debugfs_create_file("info", 0444, + pd->dfs_d_root, pd, &debug_fops); +} + +static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd) +{ + if (!pkt_debugfs_root) + return; + debugfs_remove(pd->dfs_f_info); + debugfs_remove(pd->dfs_d_root); + pd->dfs_f_info = NULL; + pd->dfs_d_root = NULL; +} + +static void pkt_debugfs_init(void) +{ + pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL); +} + +static void pkt_debugfs_cleanup(void) +{ + debugfs_remove(pkt_debugfs_root); + pkt_debugfs_root = NULL; +} + +/* ----------------------------------------------------------*/ + + +static void pkt_bio_finished(struct pktcdvd_device *pd) +{ + BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0); + if (atomic_dec_and_test(&pd->cdrw.pending_bios)) { + pkt_dbg(2, pd, "queue empty\n"); + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); + } +} + +/* + * Allocate a packet_data struct + */ +static struct packet_data *pkt_alloc_packet_data(int frames) +{ + int i; + struct packet_data *pkt; + + pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL); + if (!pkt) + goto no_pkt; + + pkt->frames = frames; + pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL); + if (!pkt->w_bio) + goto no_bio; + + for (i = 0; i < frames / FRAMES_PER_PAGE; i++) { + pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!pkt->pages[i]) + goto no_page; + } + + spin_lock_init(&pkt->lock); + bio_list_init(&pkt->orig_bios); + + for (i = 0; i < frames; i++) { + pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL); + if (!pkt->r_bios[i]) + goto no_rd_bio; + } + + return pkt; + +no_rd_bio: + for (i = 0; i < frames; i++) + kfree(pkt->r_bios[i]); +no_page: + for (i = 0; i < frames / FRAMES_PER_PAGE; i++) + if (pkt->pages[i]) + __free_page(pkt->pages[i]); + kfree(pkt->w_bio); +no_bio: + kfree(pkt); +no_pkt: + return NULL; +} + +/* + * Free a packet_data struct + */ +static void pkt_free_packet_data(struct packet_data *pkt) +{ + int i; + + for (i = 0; i < pkt->frames; i++) + kfree(pkt->r_bios[i]); + for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++) + __free_page(pkt->pages[i]); + kfree(pkt->w_bio); + kfree(pkt); +} + +static void pkt_shrink_pktlist(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + BUG_ON(!list_empty(&pd->cdrw.pkt_active_list)); + + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) { + pkt_free_packet_data(pkt); + } + INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); +} + +static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets) +{ + struct packet_data *pkt; + + BUG_ON(!list_empty(&pd->cdrw.pkt_free_list)); + + while (nr_packets > 0) { + pkt = pkt_alloc_packet_data(pd->settings.size >> 2); + if (!pkt) { + pkt_shrink_pktlist(pd); + return 0; + } + pkt->id = nr_packets; + pkt->pd = pd; + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + nr_packets--; + } + return 1; +} + +static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node) +{ + struct rb_node *n = rb_next(&node->rb_node); + if (!n) + return NULL; + return rb_entry(n, struct pkt_rb_node, rb_node); +} + +static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + rb_erase(&node->rb_node, &pd->bio_queue); + mempool_free(node, &pd->rb_pool); + pd->bio_queue_size--; + BUG_ON(pd->bio_queue_size < 0); +} + +/* + * Find the first node in the pd->bio_queue rb tree with a starting sector >= s. + */ +static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s) +{ + struct rb_node *n = pd->bio_queue.rb_node; + struct rb_node *next; + struct pkt_rb_node *tmp; + + if (!n) { + BUG_ON(pd->bio_queue_size > 0); + return NULL; + } + + for (;;) { + tmp = rb_entry(n, struct pkt_rb_node, rb_node); + if (s <= tmp->bio->bi_iter.bi_sector) + next = n->rb_left; + else + next = n->rb_right; + if (!next) + break; + n = next; + } + + if (s > tmp->bio->bi_iter.bi_sector) { + tmp = pkt_rbtree_next(tmp); + if (!tmp) + return NULL; + } + BUG_ON(s > tmp->bio->bi_iter.bi_sector); + return tmp; +} + +/* + * Insert a node into the pd->bio_queue rb tree. + */ +static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + struct rb_node **p = &pd->bio_queue.rb_node; + struct rb_node *parent = NULL; + sector_t s = node->bio->bi_iter.bi_sector; + struct pkt_rb_node *tmp; + + while (*p) { + parent = *p; + tmp = rb_entry(parent, struct pkt_rb_node, rb_node); + if (s < tmp->bio->bi_iter.bi_sector) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->rb_node, parent, p); + rb_insert_color(&node->rb_node, &pd->bio_queue); + pd->bio_queue_size++; +} + +/* + * Send a packet_command to the underlying block device and + * wait for completion. + */ +static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + struct request_queue *q = bdev_get_queue(pd->bdev); + struct scsi_cmnd *scmd; + struct request *rq; + int ret = 0; + + rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); + scmd = blk_mq_rq_to_pdu(rq); + + if (cgc->buflen) { + ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, + GFP_NOIO); + if (ret) + goto out; + } + + scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]); + memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE); + + rq->timeout = 60*HZ; + if (cgc->quiet) + rq->rq_flags |= RQF_QUIET; + + blk_execute_rq(rq, false); + if (scmd->result) + ret = -EIO; +out: + blk_mq_free_request(rq); + return ret; +} + +static const char *sense_key_string(__u8 index) +{ + static const char * const info[] = { + "No sense", "Recovered error", "Not ready", + "Medium error", "Hardware error", "Illegal request", + "Unit attention", "Data protect", "Blank check", + }; + + return index < ARRAY_SIZE(info) ? info[index] : "INVALID"; +} + +/* + * A generic sense dump / resolve mechanism should be implemented across + * all ATAPI + SCSI devices. + */ +static void pkt_dump_sense(struct pktcdvd_device *pd, + struct packet_command *cgc) +{ + struct scsi_sense_hdr *sshdr = cgc->sshdr; + + if (sshdr) + pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n", + CDROM_PACKET_SIZE, cgc->cmd, + sshdr->sense_key, sshdr->asc, sshdr->ascq, + sense_key_string(sshdr->sense_key)); + else + pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd); +} + +/* + * flush the drive cache to media + */ +static int pkt_flush_cache(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_FLUSH_CACHE; + cgc.quiet = 1; + + /* + * the IMMED bit -- we default to not setting it, although that + * would allow a much faster close, this is safer + */ +#if 0 + cgc.cmd[1] = 1 << 1; +#endif + return pkt_generic_packet(pd, &cgc); +} + +/* + * speed is given as the normal factor, e.g. 4 for 4x + */ +static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, + unsigned write_speed, unsigned read_speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + int ret; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_SET_SPEED; + cgc.cmd[2] = (read_speed >> 8) & 0xff; + cgc.cmd[3] = read_speed & 0xff; + cgc.cmd[4] = (write_speed >> 8) & 0xff; + cgc.cmd[5] = write_speed & 0xff; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + pkt_dump_sense(pd, &cgc); + + return ret; +} + +/* + * Queue a bio for processing by the low-level CD device. Must be called + * from process context. + */ +static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio) +{ + spin_lock(&pd->iosched.lock); + if (bio_data_dir(bio) == READ) + bio_list_add(&pd->iosched.read_queue, bio); + else + bio_list_add(&pd->iosched.write_queue, bio); + spin_unlock(&pd->iosched.lock); + + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); +} + +/* + * Process the queued read/write requests. This function handles special + * requirements for CDRW drives: + * - A cache flush command must be inserted before a read request if the + * previous request was a write. + * - Switching between reading and writing is slow, so don't do it more often + * than necessary. + * - Optimize for throughput at the expense of latency. This means that streaming + * writes will never be interrupted by a read, but if the drive has to seek + * before the next write, switch to reading instead if there are any pending + * read requests. + * - Set the read speed according to current usage pattern. When only reading + * from the device, it's best to use the highest possible read speed, but + * when switching often between reading and writing, it's better to have the + * same read and write speeds. + */ +static void pkt_iosched_process_queue(struct pktcdvd_device *pd) +{ + + if (atomic_read(&pd->iosched.attention) == 0) + return; + atomic_set(&pd->iosched.attention, 0); + + for (;;) { + struct bio *bio; + int reads_queued, writes_queued; + + spin_lock(&pd->iosched.lock); + reads_queued = !bio_list_empty(&pd->iosched.read_queue); + writes_queued = !bio_list_empty(&pd->iosched.write_queue); + spin_unlock(&pd->iosched.lock); + + if (!reads_queued && !writes_queued) + break; + + if (pd->iosched.writing) { + int need_write_seek = 1; + spin_lock(&pd->iosched.lock); + bio = bio_list_peek(&pd->iosched.write_queue); + spin_unlock(&pd->iosched.lock); + if (bio && (bio->bi_iter.bi_sector == + pd->iosched.last_write)) + need_write_seek = 0; + if (need_write_seek && reads_queued) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + pkt_dbg(2, pd, "write, waiting\n"); + break; + } + pkt_flush_cache(pd); + pd->iosched.writing = 0; + } + } else { + if (!reads_queued && writes_queued) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + pkt_dbg(2, pd, "read, waiting\n"); + break; + } + pd->iosched.writing = 1; + } + } + + spin_lock(&pd->iosched.lock); + if (pd->iosched.writing) + bio = bio_list_pop(&pd->iosched.write_queue); + else + bio = bio_list_pop(&pd->iosched.read_queue); + spin_unlock(&pd->iosched.lock); + + if (!bio) + continue; + + if (bio_data_dir(bio) == READ) + pd->iosched.successive_reads += + bio->bi_iter.bi_size >> 10; + else { + pd->iosched.successive_reads = 0; + pd->iosched.last_write = bio_end_sector(bio); + } + if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { + if (pd->read_speed == pd->write_speed) { + pd->read_speed = MAX_SPEED; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } else { + if (pd->read_speed != pd->write_speed) { + pd->read_speed = pd->write_speed; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } + + atomic_inc(&pd->cdrw.pending_bios); + submit_bio_noacct(bio); + } +} + +/* + * Special care is needed if the underlying block device has a small + * max_phys_segments value. + */ +static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q) +{ + if ((pd->settings.size << 9) / CD_FRAMESIZE + <= queue_max_segments(q)) { + /* + * The cdrom device can handle one segment/frame + */ + clear_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else if ((pd->settings.size << 9) / PAGE_SIZE + <= queue_max_segments(q)) { + /* + * We can handle this case at the expense of some extra memory + * copies during write operations + */ + set_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else { + pkt_err(pd, "cdrom max_phys_segments too small\n"); + return -EIO; + } +} + +static void pkt_end_io_read(struct bio *bio) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", + bio, (unsigned long long)pkt->sector, + (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status); + + if (bio->bi_status) + atomic_inc(&pkt->io_errors); + bio_uninit(bio); + if (atomic_dec_and_test(&pkt->io_wait)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + pkt_bio_finished(pd); +} + +static void pkt_end_io_packet_write(struct bio *bio) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status); + + pd->stats.pkt_ended++; + + bio_uninit(bio); + pkt_bio_finished(pd); + atomic_dec(&pkt->io_wait); + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); +} + +/* + * Schedule reads for the holes in a packet + */ +static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int frames_read = 0; + struct bio *bio; + int f; + char written[PACKET_MAX_SIZE]; + + BUG_ON(bio_list_empty(&pkt->orig_bios)); + + atomic_set(&pkt->io_wait, 0); + atomic_set(&pkt->io_errors, 0); + + /* + * Figure out which frames we need to read before we can write. + */ + memset(written, 0, sizeof(written)); + spin_lock(&pkt->lock); + bio_list_for_each(bio, &pkt->orig_bios) { + int first_frame = (bio->bi_iter.bi_sector - pkt->sector) / + (CD_FRAMESIZE >> 9); + int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE; + pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9); + BUG_ON(first_frame < 0); + BUG_ON(first_frame + num_frames > pkt->frames); + for (f = first_frame; f < first_frame + num_frames; f++) + written[f] = 1; + } + spin_unlock(&pkt->lock); + + if (pkt->cache_valid) { + pkt_dbg(2, pd, "zone %llx cached\n", + (unsigned long long)pkt->sector); + goto out_account; + } + + /* + * Schedule reads for missing parts of the packet. + */ + for (f = 0; f < pkt->frames; f++) { + int p, offset; + + if (written[f]) + continue; + + bio = pkt->r_bios[f]; + bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ); + bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); + bio->bi_end_io = pkt_end_io_read; + bio->bi_private = pkt; + + p = (f * CD_FRAMESIZE) / PAGE_SIZE; + offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n", + f, pkt->pages[p], offset); + if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset)) + BUG(); + + atomic_inc(&pkt->io_wait); + pkt_queue_bio(pd, bio); + frames_read++; + } + +out_account: + pkt_dbg(2, pd, "need %d frames for zone %llx\n", + frames_read, (unsigned long long)pkt->sector); + pd->stats.pkt_started++; + pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9); +} + +/* + * Find a packet matching zone, or the least recently used packet if + * there is no match. + */ +static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone) +{ + struct packet_data *pkt; + + list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) { + if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) { + list_del_init(&pkt->list); + if (pkt->sector != zone) + pkt->cache_valid = 0; + return pkt; + } + } + BUG(); + return NULL; +} + +static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + if (pkt->cache_valid) { + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + } else { + list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list); + } +} + +static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state) +{ +#if PACKET_DEBUG > 1 + static const char *state_name[] = { + "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED" + }; + enum packet_data_state old_state = pkt->state; + pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n", + pkt->id, (unsigned long long)pkt->sector, + state_name[old_state], state_name[state]); +#endif + pkt->state = state; +} + +/* + * Scan the work queue to see if we can start a new packet. + * returns non-zero if any work was done. + */ +static int pkt_handle_queue(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *p; + struct bio *bio = NULL; + sector_t zone = 0; /* Suppress gcc warning */ + struct pkt_rb_node *node, *first_node; + struct rb_node *n; + + atomic_set(&pd->scan_queue, 0); + + if (list_empty(&pd->cdrw.pkt_free_list)) { + pkt_dbg(2, pd, "no pkt\n"); + return 0; + } + + /* + * Try to find a zone we are not already working on. + */ + spin_lock(&pd->lock); + first_node = pkt_rbtree_find(pd, pd->current_sector); + if (!first_node) { + n = rb_first(&pd->bio_queue); + if (n) + first_node = rb_entry(n, struct pkt_rb_node, rb_node); + } + node = first_node; + while (node) { + bio = node->bio; + zone = get_zone(bio->bi_iter.bi_sector, pd); + list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { + if (p->sector == zone) { + bio = NULL; + goto try_next_bio; + } + } + break; +try_next_bio: + node = pkt_rbtree_next(node); + if (!node) { + n = rb_first(&pd->bio_queue); + if (n) + node = rb_entry(n, struct pkt_rb_node, rb_node); + } + if (node == first_node) + node = NULL; + } + spin_unlock(&pd->lock); + if (!bio) { + pkt_dbg(2, pd, "no bio\n"); + return 0; + } + + pkt = pkt_get_packet_data(pd, zone); + + pd->current_sector = zone + pd->settings.size; + pkt->sector = zone; + BUG_ON(pkt->frames != pd->settings.size >> 2); + pkt->write_size = 0; + + /* + * Scan work queue for bios in the same zone and link them + * to this packet. + */ + spin_lock(&pd->lock); + pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone); + while ((node = pkt_rbtree_find(pd, zone)) != NULL) { + bio = node->bio; + pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long) + get_zone(bio->bi_iter.bi_sector, pd)); + if (get_zone(bio->bi_iter.bi_sector, pd) != zone) + break; + pkt_rbtree_erase(pd, node); + spin_lock(&pkt->lock); + bio_list_add(&pkt->orig_bios, bio); + pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE; + spin_unlock(&pkt->lock); + } + /* check write congestion marks, and if bio_queue_size is + * below, wake up any waiters + */ + if (pd->congested && + pd->bio_queue_size <= pd->write_congestion_off) { + pd->congested = false; + wake_up_var(&pd->congested); + } + spin_unlock(&pd->lock); + + pkt->sleep_time = max(PACKET_WAIT_TIME, 1); + pkt_set_state(pkt, PACKET_WAITING_STATE); + atomic_set(&pkt->run_sm, 1); + + spin_lock(&pd->cdrw.active_list_lock); + list_add(&pkt->list, &pd->cdrw.pkt_active_list); + spin_unlock(&pd->cdrw.active_list_lock); + + return 1; +} + +/** + * bio_list_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * Stops when it reaches the end of either the @src list or @dst list - that is, + * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of + * bios). + */ +static void bio_list_copy_data(struct bio *dst, struct bio *src) +{ + struct bvec_iter src_iter = src->bi_iter; + struct bvec_iter dst_iter = dst->bi_iter; + + while (1) { + if (!src_iter.bi_size) { + src = src->bi_next; + if (!src) + break; + + src_iter = src->bi_iter; + } + + if (!dst_iter.bi_size) { + dst = dst->bi_next; + if (!dst) + break; + + dst_iter = dst->bi_iter; + } + + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); + } +} + +/* + * Assemble a bio to write one packet and queue the bio for processing + * by the underlying block device. + */ +static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int f; + + bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames, + REQ_OP_WRITE); + pkt->w_bio->bi_iter.bi_sector = pkt->sector; + pkt->w_bio->bi_end_io = pkt_end_io_packet_write; + pkt->w_bio->bi_private = pkt; + + /* XXX: locking? */ + for (f = 0; f < pkt->frames; f++) { + struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; + unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + + if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset)) + BUG(); + } + pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt); + + /* + * Fill-in bvec with data from orig_bios. + */ + spin_lock(&pkt->lock); + bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head); + + pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); + spin_unlock(&pkt->lock); + + pkt_dbg(2, pd, "Writing %d frames for zone %llx\n", + pkt->write_size, (unsigned long long)pkt->sector); + + if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) + pkt->cache_valid = 1; + else + pkt->cache_valid = 0; + + /* Start the write request */ + atomic_set(&pkt->io_wait, 1); + pkt_queue_bio(pd, pkt->w_bio); +} + +static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status) +{ + struct bio *bio; + + if (status) + pkt->cache_valid = 0; + + /* Finish all bios corresponding to this packet */ + while ((bio = bio_list_pop(&pkt->orig_bios))) { + bio->bi_status = status; + bio_endio(bio); + } +} + +static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + pkt_dbg(2, pd, "pkt %d\n", pkt->id); + + for (;;) { + switch (pkt->state) { + case PACKET_WAITING_STATE: + if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0)) + return; + + pkt->sleep_time = 0; + pkt_gather_data(pd, pkt); + pkt_set_state(pkt, PACKET_READ_WAIT_STATE); + break; + + case PACKET_READ_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (atomic_read(&pkt->io_errors) > 0) { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } else { + pkt_start_write(pd, pkt); + } + break; + + case PACKET_WRITE_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (!pkt->w_bio->bi_status) { + pkt_set_state(pkt, PACKET_FINISHED_STATE); + } else { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } + break; + + case PACKET_RECOVERY_STATE: + pkt_dbg(2, pd, "No recovery possible\n"); + pkt_set_state(pkt, PACKET_FINISHED_STATE); + break; + + case PACKET_FINISHED_STATE: + pkt_finish_packet(pkt, pkt->w_bio->bi_status); + return; + + default: + BUG(); + break; + } + } +} + +static void pkt_handle_packets(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + /* + * Run state machine for active packets + */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) { + atomic_set(&pkt->run_sm, 0); + pkt_run_state_machine(pd, pkt); + } + } + + /* + * Move no longer active packets to the free list + */ + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) { + if (pkt->state == PACKET_FINISHED_STATE) { + list_del(&pkt->list); + pkt_put_packet_data(pd, pkt); + pkt_set_state(pkt, PACKET_IDLE_STATE); + atomic_set(&pd->scan_queue, 1); + } + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +static void pkt_count_states(struct pktcdvd_device *pd, int *states) +{ + struct packet_data *pkt; + int i; + + for (i = 0; i < PACKET_NUM_STATES; i++) + states[i] = 0; + + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + states[pkt->state]++; + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +/* + * kcdrwd is woken up when writes have been queued for one of our + * registered devices + */ +static int kcdrwd(void *foobar) +{ + struct pktcdvd_device *pd = foobar; + struct packet_data *pkt; + long min_sleep_time, residue; + + set_user_nice(current, MIN_NICE); + set_freezable(); + + for (;;) { + DECLARE_WAITQUEUE(wait, current); + + /* + * Wait until there is something to do + */ + add_wait_queue(&pd->wqueue, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* Check if we need to run pkt_handle_queue */ + if (atomic_read(&pd->scan_queue) > 0) + goto work_to_do; + + /* Check if we need to run the state machine for some packet */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) + goto work_to_do; + } + + /* Check if we need to process the iosched queues */ + if (atomic_read(&pd->iosched.attention) != 0) + goto work_to_do; + + /* Otherwise, go to sleep */ + if (PACKET_DEBUG > 1) { + int states[PACKET_NUM_STATES]; + pkt_count_states(pd, states); + pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], + states[3], states[4], states[5]); + } + + min_sleep_time = MAX_SCHEDULE_TIMEOUT; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sleep_time && pkt->sleep_time < min_sleep_time) + min_sleep_time = pkt->sleep_time; + } + + pkt_dbg(2, pd, "sleeping\n"); + residue = schedule_timeout(min_sleep_time); + pkt_dbg(2, pd, "wake up\n"); + + /* make swsusp happy with our thread */ + try_to_freeze(); + + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (!pkt->sleep_time) + continue; + pkt->sleep_time -= min_sleep_time - residue; + if (pkt->sleep_time <= 0) { + pkt->sleep_time = 0; + atomic_inc(&pkt->run_sm); + } + } + + if (kthread_should_stop()) + break; + } +work_to_do: + set_current_state(TASK_RUNNING); + remove_wait_queue(&pd->wqueue, &wait); + + if (kthread_should_stop()) + break; + + /* + * if pkt_handle_queue returns true, we can queue + * another request. + */ + while (pkt_handle_queue(pd)) + ; + + /* + * Handle packet state machine + */ + pkt_handle_packets(pd); + + /* + * Handle iosched queues + */ + pkt_iosched_process_queue(pd); + } + + return 0; +} + +static void pkt_print_settings(struct pktcdvd_device *pd) +{ + pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n", + pd->settings.fp ? "Fixed" : "Variable", + pd->settings.size >> 2, + pd->settings.block_mode == 8 ? '1' : '2'); +} + +static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + + cgc->cmd[0] = GPCMD_MODE_SENSE_10; + cgc->cmd[2] = page_code | (page_control << 6); + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_READ; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + memset(cgc->buffer, 0, 2); + cgc->cmd[0] = GPCMD_MODE_SELECT_10; + cgc->cmd[1] = 0x10; /* PF */ + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_WRITE; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di) +{ + struct packet_command cgc; + int ret; + + /* set up command and get the disc info */ + init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_DISC_INFO; + cgc.cmd[8] = cgc.buflen = 2; + cgc.quiet = 1; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + return ret; + + /* not all drives have the same disc_info length, so requeue + * packet with the length the drive tells us it can supply + */ + cgc.buflen = be16_to_cpu(di->disc_information_length) + + sizeof(di->disc_information_length); + + if (cgc.buflen > sizeof(disc_information)) + cgc.buflen = sizeof(disc_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti) +{ + struct packet_command cgc; + int ret; + + init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO; + cgc.cmd[1] = type & 3; + cgc.cmd[4] = (track & 0xff00) >> 8; + cgc.cmd[5] = track & 0xff; + cgc.cmd[8] = 8; + cgc.quiet = 1; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + return ret; + + cgc.buflen = be16_to_cpu(ti->track_information_length) + + sizeof(ti->track_information_length); + + if (cgc.buflen > sizeof(track_information)) + cgc.buflen = sizeof(track_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, + long *last_written) +{ + disc_information di; + track_information ti; + __u32 last_track; + int ret; + + ret = pkt_get_disc_info(pd, &di); + if (ret) + return ret; + + last_track = (di.last_track_msb << 8) | di.last_track_lsb; + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) + return ret; + + /* if this track is blank, try the previous. */ + if (ti.blank) { + last_track--; + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) + return ret; + } + + /* if last recorded field is valid, return it. */ + if (ti.lra_v) { + *last_written = be32_to_cpu(ti.last_rec_address); + } else { + /* make it up instead */ + *last_written = be32_to_cpu(ti.track_start) + + be32_to_cpu(ti.track_size); + if (ti.free_blocks) + *last_written -= (be32_to_cpu(ti.free_blocks) + 7); + } + return 0; +} + +/* + * write mode select package based on pd->settings + */ +static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + write_param_page *wp; + char buffer[128]; + int ret, size; + + /* doesn't apply to DVD+RW or DVD-RAM */ + if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12)) + return 0; + + memset(buffer, 0, sizeof(buffer)); + init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); + cgc.sshdr = &sshdr; + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff)); + pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff); + if (size > sizeof(buffer)) + size = sizeof(buffer); + + /* + * now get it all + */ + init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); + cgc.sshdr = &sshdr; + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + /* + * write page is offset header + block descriptor length + */ + wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset]; + + wp->fp = pd->settings.fp; + wp->track_mode = pd->settings.track_mode; + wp->write_type = pd->settings.write_type; + wp->data_block_type = pd->settings.block_mode; + + wp->multi_session = 0; + +#ifdef PACKET_USE_LS + wp->link_size = 7; + wp->ls_v = 1; +#endif + + if (wp->data_block_type == PACKET_BLOCK_MODE1) { + wp->session_format = 0; + wp->subhdr2 = 0x20; + } else if (wp->data_block_type == PACKET_BLOCK_MODE2) { + wp->session_format = 0x20; + wp->subhdr2 = 8; +#if 0 + wp->mcn[0] = 0x80; + memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1); +#endif + } else { + /* + * paranoia + */ + pkt_err(pd, "write mode wrong %d\n", wp->data_block_type); + return 1; + } + wp->packet_size = cpu_to_be32(pd->settings.size >> 2); + + cgc.buflen = cgc.cmd[8] = size; + ret = pkt_mode_select(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + pkt_print_settings(pd); + return 0; +} + +/* + * 1 -- we can write to this track, 0 -- we can't + */ +static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti) +{ + switch (pd->mmc3_profile) { + case 0x1a: /* DVD+RW */ + case 0x12: /* DVD-RAM */ + /* The track is always writable on DVD+RW/DVD-RAM */ + return 1; + default: + break; + } + + if (!ti->packet || !ti->fp) + return 0; + + /* + * "good" settings as per Mt Fuji. + */ + if (ti->rt == 0 && ti->blank == 0) + return 1; + + if (ti->rt == 0 && ti->blank == 1) + return 1; + + if (ti->rt == 1 && ti->blank == 0) + return 1; + + pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet); + return 0; +} + +/* + * 1 -- we can write to this disc, 0 -- we can't + */ +static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di) +{ + switch (pd->mmc3_profile) { + case 0x0a: /* CD-RW */ + case 0xffff: /* MMC3 not supported */ + break; + case 0x1a: /* DVD+RW */ + case 0x13: /* DVD-RW */ + case 0x12: /* DVD-RAM */ + return 1; + default: + pkt_dbg(2, pd, "Wrong disc profile (%x)\n", + pd->mmc3_profile); + return 0; + } + + /* + * for disc type 0xff we should probably reserve a new track. + * but i'm not sure, should we leave this to user apps? probably. + */ + if (di->disc_type == 0xff) { + pkt_notice(pd, "unknown disc - no track?\n"); + return 0; + } + + if (di->disc_type != 0x20 && di->disc_type != 0) { + pkt_err(pd, "wrong disc type (%x)\n", di->disc_type); + return 0; + } + + if (di->erasable == 0) { + pkt_notice(pd, "disc not erasable\n"); + return 0; + } + + if (di->border_status == PACKET_SESSION_RESERVED) { + pkt_err(pd, "can't write to last track (reserved)\n"); + return 0; + } + + return 1; +} + +static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + unsigned char buf[12]; + disc_information di; + track_information ti; + int ret, track; + + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_GET_CONFIGURATION; + cgc.cmd[8] = 8; + ret = pkt_generic_packet(pd, &cgc); + pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7]; + + memset(&di, 0, sizeof(disc_information)); + memset(&ti, 0, sizeof(track_information)); + + ret = pkt_get_disc_info(pd, &di); + if (ret) { + pkt_err(pd, "failed get_disc\n"); + return ret; + } + + if (!pkt_writable_disc(pd, &di)) + return -EROFS; + + pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR; + + track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ + ret = pkt_get_track_info(pd, track, 1, &ti); + if (ret) { + pkt_err(pd, "failed get_track\n"); + return ret; + } + + if (!pkt_writable_track(pd, &ti)) { + pkt_err(pd, "can't write to this track\n"); + return -EROFS; + } + + /* + * we keep packet size in 512 byte units, makes it easier to + * deal with request calculations. + */ + pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2; + if (pd->settings.size == 0) { + pkt_notice(pd, "detected zero packet size!\n"); + return -ENXIO; + } + if (pd->settings.size > PACKET_MAX_SECTORS) { + pkt_err(pd, "packet size is too big\n"); + return -EROFS; + } + pd->settings.fp = ti.fp; + pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1); + + if (ti.nwa_v) { + pd->nwa = be32_to_cpu(ti.next_writable); + set_bit(PACKET_NWA_VALID, &pd->flags); + } + + /* + * in theory we could use lra on -RW media as well and just zero + * blocks that haven't been written yet, but in practice that + * is just a no-go. we'll use that for -R, naturally. + */ + if (ti.lra_v) { + pd->lra = be32_to_cpu(ti.last_rec_address); + set_bit(PACKET_LRA_VALID, &pd->flags); + } else { + pd->lra = 0xffffffff; + set_bit(PACKET_LRA_VALID, &pd->flags); + } + + /* + * fine for now + */ + pd->settings.link_loss = 7; + pd->settings.write_type = 0; /* packet */ + pd->settings.track_mode = ti.track_mode; + + /* + * mode1 or mode2 disc + */ + switch (ti.data_mode) { + case PACKET_MODE1: + pd->settings.block_mode = PACKET_BLOCK_MODE1; + break; + case PACKET_MODE2: + pd->settings.block_mode = PACKET_BLOCK_MODE2; + break; + default: + pkt_err(pd, "unknown data mode\n"); + return -EROFS; + } + return 0; +} + +/* + * enable/disable write caching on drive + */ +static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd, + int set) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[64]; + int ret; + + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.buflen = pd->mode_offset + 12; + + /* + * caching mode page might not be there, so quiet this command + */ + cgc.quiet = 1; + + ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0); + if (ret) + return ret; + + buf[pd->mode_offset + 10] |= (!!set << 2); + + cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff)); + ret = pkt_mode_select(pd, &cgc); + if (ret) { + pkt_err(pd, "write caching control failed\n"); + pkt_dump_sense(pd, &cgc); + } else if (!ret && set) + pkt_notice(pd, "enabled write caching\n"); + return ret; +} + +static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL; + cgc.cmd[4] = lockflag ? 1 : 0; + return pkt_generic_packet(pd, &cgc); +} + +/* + * Returns drive maximum write speed + */ +static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd, + unsigned *write_speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[256+18]; + unsigned char *cap_buf; + int ret, offset; + + cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset]; + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN); + cgc.sshdr = &sshdr; + + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + cgc.buflen = pd->mode_offset + cap_buf[1] + 2 + + sizeof(struct mode_page_header); + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + } + + offset = 20; /* Obsoleted field, used by older drives */ + if (cap_buf[1] >= 28) + offset = 28; /* Current write speed selected */ + if (cap_buf[1] >= 30) { + /* If the drive reports at least one "Logical Unit Write + * Speed Performance Descriptor Block", use the information + * in the first block. (contains the highest speed) + */ + int num_spdb = (cap_buf[30] << 8) + cap_buf[31]; + if (num_spdb > 0) + offset = 34; + } + + *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1]; + return 0; +} + +/* These tables from cdrecord - I don't have orange book */ +/* standard speed CD-RW (1-4x) */ +static char clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* high speed CD-RW (-10x) */ +static char hs_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* ultra high speed CD-RW */ +static char us_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0 +}; + +/* + * reads the maximum media speed from ATIP + */ +static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, + unsigned *speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[64]; + unsigned int size, st, sp; + int ret; + + init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; /* READ ATIP */ + cgc.cmd[8] = 2; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + size = ((unsigned int) buf[0]<<8) + buf[1] + 2; + if (size > sizeof(buf)) + size = sizeof(buf); + + init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; + cgc.cmd[8] = size; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + if (!(buf[6] & 0x40)) { + pkt_notice(pd, "disc type is not CD-RW\n"); + return 1; + } + if (!(buf[6] & 0x4)) { + pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n"); + return 1; + } + + st = (buf[6] >> 3) & 0x7; /* disc sub-type */ + + sp = buf[16] & 0xf; /* max speed from ATIP A1 field */ + + /* Info from cdrecord */ + switch (st) { + case 0: /* standard speed */ + *speed = clv_to_speed[sp]; + break; + case 1: /* high speed */ + *speed = hs_clv_to_speed[sp]; + break; + case 2: /* ultra high speed */ + *speed = us_clv_to_speed[sp]; + break; + default: + pkt_notice(pd, "unknown disc sub-type %d\n", st); + return 1; + } + if (*speed) { + pkt_info(pd, "maximum media speed: %d\n", *speed); + return 0; + } else { + pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st); + return 1; + } +} + +static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + int ret; + + pkt_dbg(2, pd, "Performing OPC\n"); + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sshdr = &sshdr; + cgc.timeout = 60*HZ; + cgc.cmd[0] = GPCMD_SEND_OPC; + cgc.cmd[1] = 1; + ret = pkt_generic_packet(pd, &cgc); + if (ret) + pkt_dump_sense(pd, &cgc); + return ret; +} + +static int pkt_open_write(struct pktcdvd_device *pd) +{ + int ret; + unsigned int write_speed, media_write_speed, read_speed; + + ret = pkt_probe_settings(pd); + if (ret) { + pkt_dbg(2, pd, "failed probe\n"); + return ret; + } + + ret = pkt_set_write_settings(pd); + if (ret) { + pkt_dbg(1, pd, "failed saving write settings\n"); + return -EIO; + } + + pkt_write_caching(pd, USE_WCACHING); + + ret = pkt_get_max_speed(pd, &write_speed); + if (ret) + write_speed = 16 * 177; + switch (pd->mmc3_profile) { + case 0x13: /* DVD-RW */ + case 0x1a: /* DVD+RW */ + case 0x12: /* DVD-RAM */ + pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed); + break; + default: + ret = pkt_media_speed(pd, &media_write_speed); + if (ret) + media_write_speed = 16; + write_speed = min(write_speed, media_write_speed * 177); + pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176); + break; + } + read_speed = write_speed; + + ret = pkt_set_speed(pd, write_speed, read_speed); + if (ret) { + pkt_dbg(1, pd, "couldn't set write speed\n"); + return -EIO; + } + pd->write_speed = write_speed; + pd->read_speed = read_speed; + + ret = pkt_perform_opc(pd); + if (ret) { + pkt_dbg(1, pd, "Optimum Power Calibration failed\n"); + } + + return 0; +} + +/* + * called at open time. + */ +static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) +{ + int ret; + long lba; + struct request_queue *q; + struct block_device *bdev; + + /* + * We need to re-open the cdrom device without O_NONBLOCK to be able + * to read/write from/to it. It is already opened in O_NONBLOCK mode + * so open should not fail. + */ + bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + ret = pkt_get_last_written(pd, &lba); + if (ret) { + pkt_err(pd, "pkt_get_last_written failed\n"); + goto out_putdev; + } + + set_capacity(pd->disk, lba << 2); + set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); + + q = bdev_get_queue(pd->bdev); + if (write) { + ret = pkt_open_write(pd); + if (ret) + goto out_putdev; + /* + * Some CDRW drives can not handle writes larger than one packet, + * even if the size is a multiple of the packet size. + */ + blk_queue_max_hw_sectors(q, pd->settings.size); + set_bit(PACKET_WRITABLE, &pd->flags); + } else { + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + clear_bit(PACKET_WRITABLE, &pd->flags); + } + + ret = pkt_set_segment_merging(pd, q); + if (ret) + goto out_putdev; + + if (write) { + if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { + pkt_err(pd, "not enough memory for buffers\n"); + ret = -ENOMEM; + goto out_putdev; + } + pkt_info(pd, "%lukB available on disc\n", lba << 1); + } + + return 0; + +out_putdev: + blkdev_put(bdev, FMODE_READ | FMODE_EXCL); +out: + return ret; +} + +/* + * called when the device is closed. makes sure that the device flushes + * the internal cache before we close. + */ +static void pkt_release_dev(struct pktcdvd_device *pd, int flush) +{ + if (flush && pkt_flush_cache(pd)) + pkt_dbg(1, pd, "not flushing cache\n"); + + pkt_lock_door(pd, 0); + + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); + + pkt_shrink_pktlist(pd); +} + +static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor) +{ + if (dev_minor >= MAX_WRITERS) + return NULL; + + dev_minor = array_index_nospec(dev_minor, MAX_WRITERS); + return pkt_devs[dev_minor]; +} + +static int pkt_open(struct block_device *bdev, fmode_t mode) +{ + struct pktcdvd_device *pd = NULL; + int ret; + + mutex_lock(&pktcdvd_mutex); + mutex_lock(&ctl_mutex); + pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev)); + if (!pd) { + ret = -ENODEV; + goto out; + } + BUG_ON(pd->refcnt < 0); + + pd->refcnt++; + if (pd->refcnt > 1) { + if ((mode & FMODE_WRITE) && + !test_bit(PACKET_WRITABLE, &pd->flags)) { + ret = -EBUSY; + goto out_dec; + } + } else { + ret = pkt_open_dev(pd, mode & FMODE_WRITE); + if (ret) + goto out_dec; + /* + * needed here as well, since ext2 (among others) may change + * the blocksize at mount time + */ + set_blocksize(bdev, CD_FRAMESIZE); + } + + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); + return 0; + +out_dec: + pd->refcnt--; +out: + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); + return ret; +} + +static void pkt_close(struct gendisk *disk, fmode_t mode) +{ + struct pktcdvd_device *pd = disk->private_data; + + mutex_lock(&pktcdvd_mutex); + mutex_lock(&ctl_mutex); + pd->refcnt--; + BUG_ON(pd->refcnt < 0); + if (pd->refcnt == 0) { + int flush = test_bit(PACKET_WRITABLE, &pd->flags); + pkt_release_dev(pd, flush); + } + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); +} + + +static void pkt_end_io_read_cloned(struct bio *bio) +{ + struct packet_stacked_data *psd = bio->bi_private; + struct pktcdvd_device *pd = psd->pd; + + psd->bio->bi_status = bio->bi_status; + bio_put(bio); + bio_endio(psd->bio); + mempool_free(psd, &psd_pool); + pkt_bio_finished(pd); +} + +static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) +{ + struct bio *cloned_bio = + bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set); + struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); + + psd->pd = pd; + psd->bio = bio; + cloned_bio->bi_private = psd; + cloned_bio->bi_end_io = pkt_end_io_read_cloned; + pd->stats.secs_r += bio_sectors(bio); + pkt_queue_bio(pd, cloned_bio); +} + +static void pkt_make_request_write(struct request_queue *q, struct bio *bio) +{ + struct pktcdvd_device *pd = q->queuedata; + sector_t zone; + struct packet_data *pkt; + int was_empty, blocked_bio; + struct pkt_rb_node *node; + + zone = get_zone(bio->bi_iter.bi_sector, pd); + + /* + * If we find a matching packet in state WAITING or READ_WAIT, we can + * just append this bio to that packet. + */ + spin_lock(&pd->cdrw.active_list_lock); + blocked_bio = 0; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sector == zone) { + spin_lock(&pkt->lock); + if ((pkt->state == PACKET_WAITING_STATE) || + (pkt->state == PACKET_READ_WAIT_STATE)) { + bio_list_add(&pkt->orig_bios, bio); + pkt->write_size += + bio->bi_iter.bi_size / CD_FRAMESIZE; + if ((pkt->write_size >= pkt->frames) && + (pkt->state == PACKET_WAITING_STATE)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + spin_unlock(&pkt->lock); + spin_unlock(&pd->cdrw.active_list_lock); + return; + } else { + blocked_bio = 1; + } + spin_unlock(&pkt->lock); + } + } + spin_unlock(&pd->cdrw.active_list_lock); + + /* + * Test if there is enough room left in the bio work queue + * (queue size >= congestion on mark). + * If not, wait till the work queue size is below the congestion off mark. + */ + spin_lock(&pd->lock); + if (pd->write_congestion_on > 0 + && pd->bio_queue_size >= pd->write_congestion_on) { + struct wait_bit_queue_entry wqe; + + init_wait_var_entry(&wqe, &pd->congested, 0); + for (;;) { + prepare_to_wait_event(__var_waitqueue(&pd->congested), + &wqe.wq_entry, + TASK_UNINTERRUPTIBLE); + if (pd->bio_queue_size <= pd->write_congestion_off) + break; + pd->congested = true; + spin_unlock(&pd->lock); + schedule(); + spin_lock(&pd->lock); + } + } + spin_unlock(&pd->lock); + + /* + * No matching packet found. Store the bio in the work queue. + */ + node = mempool_alloc(&pd->rb_pool, GFP_NOIO); + node->bio = bio; + spin_lock(&pd->lock); + BUG_ON(pd->bio_queue_size < 0); + was_empty = (pd->bio_queue_size == 0); + pkt_rbtree_insert(pd, node); + spin_unlock(&pd->lock); + + /* + * Wake up the worker thread. + */ + atomic_set(&pd->scan_queue, 1); + if (was_empty) { + /* This wake_up is required for correct operation */ + wake_up(&pd->wqueue); + } else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) { + /* + * This wake up is not required for correct operation, + * but improves performance in some cases. + */ + wake_up(&pd->wqueue); + } +} + +static void pkt_submit_bio(struct bio *bio) +{ + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata; + struct bio *split; + + bio = bio_split_to_limits(bio); + + pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", + (unsigned long long)bio->bi_iter.bi_sector, + (unsigned long long)bio_end_sector(bio)); + + /* + * Clone READ bios so we can have our own bi_end_io callback. + */ + if (bio_data_dir(bio) == READ) { + pkt_make_request_read(pd, bio); + return; + } + + if (!test_bit(PACKET_WRITABLE, &pd->flags)) { + pkt_notice(pd, "WRITE for ro device (%llu)\n", + (unsigned long long)bio->bi_iter.bi_sector); + goto end_io; + } + + if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) { + pkt_err(pd, "wrong bio size\n"); + goto end_io; + } + + do { + sector_t zone = get_zone(bio->bi_iter.bi_sector, pd); + sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd); + + if (last_zone != zone) { + BUG_ON(last_zone != zone + pd->settings.size); + + split = bio_split(bio, last_zone - + bio->bi_iter.bi_sector, + GFP_NOIO, &pkt_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } + + pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); + } while (split != bio); + + return; +end_io: + bio_io_error(bio); +} + +static void pkt_init_queue(struct pktcdvd_device *pd) +{ + struct request_queue *q = pd->disk->queue; + + blk_queue_logical_block_size(q, CD_FRAMESIZE); + blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); + q->queuedata = pd; +} + +static int pkt_seq_show(struct seq_file *m, void *p) +{ + struct pktcdvd_device *pd = m->private; + char *msg; + int states[PACKET_NUM_STATES]; + + seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev); + + seq_printf(m, "\nSettings:\n"); + seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); + + if (pd->settings.write_type == 0) + msg = "Packet"; + else + msg = "Unknown"; + seq_printf(m, "\twrite type:\t\t%s\n", msg); + + seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable"); + seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss); + + seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode); + + if (pd->settings.block_mode == PACKET_BLOCK_MODE1) + msg = "Mode 1"; + else if (pd->settings.block_mode == PACKET_BLOCK_MODE2) + msg = "Mode 2"; + else + msg = "Unknown"; + seq_printf(m, "\tblock mode:\t\t%s\n", msg); + + seq_printf(m, "\nStatistics:\n"); + seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started); + seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended); + seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1); + seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1); + seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1); + + seq_printf(m, "\nMisc:\n"); + seq_printf(m, "\treference count:\t%d\n", pd->refcnt); + seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags); + seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed); + seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed); + seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset); + seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset); + + seq_printf(m, "\nQueue state:\n"); + seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size); + seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios)); + seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector); + + pkt_count_states(pd, states); + seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], states[3], states[4], states[5]); + + seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n", + pd->write_congestion_off, + pd->write_congestion_on); + return 0; +} + +static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) +{ + int i; + struct block_device *bdev; + struct scsi_device *sdev; + + if (pd->pkt_dev == dev) { + pkt_err(pd, "recursive setup not allowed\n"); + return -EBUSY; + } + for (i = 0; i < MAX_WRITERS; i++) { + struct pktcdvd_device *pd2 = pkt_devs[i]; + if (!pd2) + continue; + if (pd2->bdev->bd_dev == dev) { + pkt_err(pd, "%pg already setup\n", pd2->bdev); + return -EBUSY; + } + if (pd2->pkt_dev == dev) { + pkt_err(pd, "can't chain pktcdvd devices\n"); + return -EBUSY; + } + } + + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + sdev = scsi_device_from_queue(bdev->bd_disk->queue); + if (!sdev) { + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + return -EINVAL; + } + put_device(&sdev->sdev_gendev); + + /* This is safe, since we have a reference from open(). */ + __module_get(THIS_MODULE); + + pd->bdev = bdev; + set_blocksize(bdev, CD_FRAMESIZE); + + pkt_init_queue(pd); + + atomic_set(&pd->cdrw.pending_bios, 0); + pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); + if (IS_ERR(pd->cdrw.thread)) { + pkt_err(pd, "can't start kernel thread\n"); + goto out_mem; + } + + proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd); + pkt_dbg(1, pd, "writer mapped to %pg\n", bdev); + return 0; + +out_mem: + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + return -ENOMEM; +} + +static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +{ + struct pktcdvd_device *pd = bdev->bd_disk->private_data; + int ret; + + pkt_dbg(2, pd, "cmd %x, dev %d:%d\n", + cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); + + mutex_lock(&pktcdvd_mutex); + switch (cmd) { + case CDROMEJECT: + /* + * The door gets locked when the device is opened, so we + * have to unlock it or else the eject command fails. + */ + if (pd->refcnt == 1) + pkt_lock_door(pd, 0); + fallthrough; + /* + * forward selected CDROM ioctls to CD-ROM, for UDF + */ + case CDROMMULTISESSION: + case CDROMREADTOCENTRY: + case CDROM_LAST_WRITTEN: + case CDROM_SEND_PACKET: + case SCSI_IOCTL_SEND_COMMAND: + if (!bdev->bd_disk->fops->ioctl) + ret = -ENOTTY; + else + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); + break; + default: + pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); + ret = -ENOTTY; + } + mutex_unlock(&pktcdvd_mutex); + + return ret; +} + +static unsigned int pkt_check_events(struct gendisk *disk, + unsigned int clearing) +{ + struct pktcdvd_device *pd = disk->private_data; + struct gendisk *attached_disk; + + if (!pd) + return 0; + if (!pd->bdev) + return 0; + attached_disk = pd->bdev->bd_disk; + if (!attached_disk || !attached_disk->fops->check_events) + return 0; + return attached_disk->fops->check_events(attached_disk, clearing); +} + +static char *pkt_devnode(struct gendisk *disk, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name); +} + +static const struct block_device_operations pktcdvd_ops = { + .owner = THIS_MODULE, + .submit_bio = pkt_submit_bio, + .open = pkt_open, + .release = pkt_close, + .ioctl = pkt_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, + .check_events = pkt_check_events, + .devnode = pkt_devnode, +}; + +/* + * Set up mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) +{ + int idx; + int ret = -ENOMEM; + struct pktcdvd_device *pd; + struct gendisk *disk; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + for (idx = 0; idx < MAX_WRITERS; idx++) + if (!pkt_devs[idx]) + break; + if (idx == MAX_WRITERS) { + pr_err("max %d writers supported\n", MAX_WRITERS); + ret = -EBUSY; + goto out_mutex; + } + + pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL); + if (!pd) + goto out_mutex; + + ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE, + sizeof(struct pkt_rb_node)); + if (ret) + goto out_mem; + + INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); + INIT_LIST_HEAD(&pd->cdrw.pkt_active_list); + spin_lock_init(&pd->cdrw.active_list_lock); + + spin_lock_init(&pd->lock); + spin_lock_init(&pd->iosched.lock); + bio_list_init(&pd->iosched.read_queue); + bio_list_init(&pd->iosched.write_queue); + sprintf(pd->name, DRIVER_NAME"%d", idx); + init_waitqueue_head(&pd->wqueue); + pd->bio_queue = RB_ROOT; + + pd->write_congestion_on = write_congestion_on; + pd->write_congestion_off = write_congestion_off; + + ret = -ENOMEM; + disk = blk_alloc_disk(NUMA_NO_NODE); + if (!disk) + goto out_mem; + pd->disk = disk; + disk->major = pktdev_major; + disk->first_minor = idx; + disk->minors = 1; + disk->fops = &pktcdvd_ops; + disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART; + strcpy(disk->disk_name, pd->name); + disk->private_data = pd; + + pd->pkt_dev = MKDEV(pktdev_major, idx); + ret = pkt_new_dev(pd, dev); + if (ret) + goto out_mem2; + + /* inherit events of the host device */ + disk->events = pd->bdev->bd_disk->events; + + ret = add_disk(disk); + if (ret) + goto out_mem2; + + pkt_sysfs_dev_new(pd); + pkt_debugfs_dev_new(pd); + + pkt_devs[idx] = pd; + if (pkt_dev) + *pkt_dev = pd->pkt_dev; + + mutex_unlock(&ctl_mutex); + return 0; + +out_mem2: + put_disk(disk); +out_mem: + mempool_exit(&pd->rb_pool); + kfree(pd); +out_mutex: + mutex_unlock(&ctl_mutex); + pr_err("setup of pktcdvd device failed\n"); + return ret; +} + +/* + * Tear down mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_remove_dev(dev_t pkt_dev) +{ + struct pktcdvd_device *pd; + int idx; + int ret = 0; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + for (idx = 0; idx < MAX_WRITERS; idx++) { + pd = pkt_devs[idx]; + if (pd && (pd->pkt_dev == pkt_dev)) + break; + } + if (idx == MAX_WRITERS) { + pr_debug("dev not setup\n"); + ret = -ENXIO; + goto out; + } + + if (pd->refcnt > 0) { + ret = -EBUSY; + goto out; + } + if (!IS_ERR(pd->cdrw.thread)) + kthread_stop(pd->cdrw.thread); + + pkt_devs[idx] = NULL; + + pkt_debugfs_dev_remove(pd); + pkt_sysfs_dev_remove(pd); + + blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); + + remove_proc_entry(pd->name, pkt_proc); + pkt_dbg(1, pd, "writer unmapped\n"); + + del_gendisk(pd->disk); + put_disk(pd->disk); + + mempool_exit(&pd->rb_pool); + kfree(pd); + + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + +out: + mutex_unlock(&ctl_mutex); + return ret; +} + +static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd) +{ + struct pktcdvd_device *pd; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index); + if (pd) { + ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev); + ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); + } else { + ctrl_cmd->dev = 0; + ctrl_cmd->pkt_dev = 0; + } + ctrl_cmd->num_devices = MAX_WRITERS; + + mutex_unlock(&ctl_mutex); +} + +static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct pkt_ctrl_command ctrl_cmd; + int ret = 0; + dev_t pkt_dev = 0; + + if (cmd != PACKET_CTRL_CMD) + return -ENOTTY; + + if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + + switch (ctrl_cmd.command) { + case PKT_CTRL_CMD_SETUP: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev); + ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev); + break; + case PKT_CTRL_CMD_TEARDOWN: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev)); + break; + case PKT_CTRL_CMD_STATUS: + pkt_get_status(&ctrl_cmd); + break; + default: + return -ENOTTY; + } + + if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT +static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +static const struct file_operations pkt_ctl_fops = { + .open = nonseekable_open, + .unlocked_ioctl = pkt_ctl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pkt_ctl_compat_ioctl, +#endif + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +static struct miscdevice pkt_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = DRIVER_NAME, + .nodename = "pktcdvd/control", + .fops = &pkt_ctl_fops +}; + +static int __init pkt_init(void) +{ + int ret; + + mutex_init(&ctl_mutex); + + ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE, + sizeof(struct packet_stacked_data)); + if (ret) + return ret; + ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0); + if (ret) { + mempool_exit(&psd_pool); + return ret; + } + + ret = register_blkdev(pktdev_major, DRIVER_NAME); + if (ret < 0) { + pr_err("unable to register block device\n"); + goto out2; + } + if (!pktdev_major) + pktdev_major = ret; + + ret = pkt_sysfs_init(); + if (ret) + goto out; + + pkt_debugfs_init(); + + ret = misc_register(&pkt_misc); + if (ret) { + pr_err("unable to register misc device\n"); + goto out_misc; + } + + pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL); + + return 0; + +out_misc: + pkt_debugfs_cleanup(); + pkt_sysfs_cleanup(); +out: + unregister_blkdev(pktdev_major, DRIVER_NAME); +out2: + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); + return ret; +} + +static void __exit pkt_exit(void) +{ + remove_proc_entry("driver/"DRIVER_NAME, NULL); + misc_deregister(&pkt_misc); + + pkt_debugfs_cleanup(); + pkt_sysfs_cleanup(); + + unregister_blkdev(pktdev_major, DRIVER_NAME); + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); +} + +MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives"); +MODULE_AUTHOR("Jens Axboe "); +MODULE_LICENSE("GPL"); + +module_init(pkt_init); +module_exit(pkt_exit); diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h new file mode 100644 index 0000000000000..f9c5ac80d59b3 --- /dev/null +++ b/include/linux/pktcdvd.h @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2000 Jens Axboe + * Copyright (C) 2001-2004 Peter Osterlund + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and + * DVD-RW devices. + * + */ +#ifndef __PKTCDVD_H +#define __PKTCDVD_H + +#include +#include +#include +#include +#include +#include +#include + +/* default bio write queue congestion marks */ +#define PKT_WRITE_CONGESTION_ON 10000 +#define PKT_WRITE_CONGESTION_OFF 9000 + + +struct packet_settings +{ + __u32 size; /* packet size in (512 byte) sectors */ + __u8 fp; /* fixed packets */ + __u8 link_loss; /* the rest is specified + * as per Mt Fuji */ + __u8 write_type; + __u8 track_mode; + __u8 block_mode; +}; + +/* + * Very crude stats for now + */ +struct packet_stats +{ + unsigned long pkt_started; + unsigned long pkt_ended; + unsigned long secs_w; + unsigned long secs_rg; + unsigned long secs_r; +}; + +struct packet_cdrw +{ + struct list_head pkt_free_list; + struct list_head pkt_active_list; + spinlock_t active_list_lock; /* Serialize access to pkt_active_list */ + struct task_struct *thread; + atomic_t pending_bios; +}; + +/* + * Switch to high speed reading after reading this many kilobytes + * with no interspersed writes. + */ +#define HI_SPEED_SWITCH 512 + +struct packet_iosched +{ + atomic_t attention; /* Set to non-zero when queue processing is needed */ + int writing; /* Non-zero when writing, zero when reading */ + spinlock_t lock; /* Protecting read/write queue manipulations */ + struct bio_list read_queue; + struct bio_list write_queue; + sector_t last_write; /* The sector where the last write ended */ + int successive_reads; +}; + +/* + * 32 buffers of 2048 bytes + */ +#if (PAGE_SIZE % CD_FRAMESIZE) != 0 +#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE" +#endif +#define PACKET_MAX_SIZE 128 +#define FRAMES_PER_PAGE (PAGE_SIZE / CD_FRAMESIZE) +#define PACKET_MAX_SECTORS (PACKET_MAX_SIZE * CD_FRAMESIZE >> 9) + +enum packet_data_state { + PACKET_IDLE_STATE, /* Not used at the moment */ + PACKET_WAITING_STATE, /* Waiting for more bios to arrive, so */ + /* we don't have to do as much */ + /* data gathering */ + PACKET_READ_WAIT_STATE, /* Waiting for reads to fill in holes */ + PACKET_WRITE_WAIT_STATE, /* Waiting for the write to complete */ + PACKET_RECOVERY_STATE, /* Recover after read/write errors */ + PACKET_FINISHED_STATE, /* After write has finished */ + + PACKET_NUM_STATES /* Number of possible states */ +}; + +/* + * Information needed for writing a single packet + */ +struct pktcdvd_device; + +struct packet_data +{ + struct list_head list; + + spinlock_t lock; /* Lock protecting state transitions and */ + /* orig_bios list */ + + struct bio_list orig_bios; /* Original bios passed to pkt_make_request */ + /* that will be handled by this packet */ + int write_size; /* Total size of all bios in the orig_bios */ + /* list, measured in number of frames */ + + struct bio *w_bio; /* The bio we will send to the real CD */ + /* device once we have all data for the */ + /* packet we are going to write */ + sector_t sector; /* First sector in this packet */ + int frames; /* Number of frames in this packet */ + + enum packet_data_state state; /* Current state */ + atomic_t run_sm; /* Incremented whenever the state */ + /* machine needs to be run */ + long sleep_time; /* Set this to non-zero to make the state */ + /* machine run after this many jiffies. */ + + atomic_t io_wait; /* Number of pending IO operations */ + atomic_t io_errors; /* Number of read/write errors during IO */ + + struct bio *r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */ + struct page *pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE]; + + int cache_valid; /* If non-zero, the data for the zone defined */ + /* by the sector variable is completely cached */ + /* in the pages[] vector. */ + + int id; /* ID number for debugging */ + struct pktcdvd_device *pd; +}; + +struct pkt_rb_node { + struct rb_node rb_node; + struct bio *bio; +}; + +struct packet_stacked_data +{ + struct bio *bio; /* Original read request bio */ + struct pktcdvd_device *pd; +}; +#define PSD_POOL_SIZE 64 + +struct pktcdvd_device +{ + struct block_device *bdev; /* dev attached */ + dev_t pkt_dev; /* our dev */ + char name[20]; + struct packet_settings settings; + struct packet_stats stats; + int refcnt; /* Open count */ + int write_speed; /* current write speed, kB/s */ + int read_speed; /* current read speed, kB/s */ + unsigned long offset; /* start offset */ + __u8 mode_offset; /* 0 / 8 */ + __u8 type; + unsigned long flags; + __u16 mmc3_profile; + __u32 nwa; /* next writable address */ + __u32 lra; /* last recorded address */ + struct packet_cdrw cdrw; + wait_queue_head_t wqueue; + + spinlock_t lock; /* Serialize access to bio_queue */ + struct rb_root bio_queue; /* Work queue of bios we need to handle */ + int bio_queue_size; /* Number of nodes in bio_queue */ + bool congested; /* Someone is waiting for bio_queue_size + * to drop. */ + sector_t current_sector; /* Keep track of where the elevator is */ + atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ + /* needs to be run. */ + mempool_t rb_pool; /* mempool for pkt_rb_node allocations */ + + struct packet_iosched iosched; + struct gendisk *disk; + + int write_congestion_off; + int write_congestion_on; + + struct device *dev; /* sysfs pktcdvd[0-7] dev */ + + struct dentry *dfs_d_root; /* debugfs: devname directory */ + struct dentry *dfs_f_info; /* debugfs: info file */ +}; + +#endif /* __PKTCDVD_H */ diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h new file mode 100644 index 0000000000000..9cbb55d21c94a --- /dev/null +++ b/include/uapi/linux/pktcdvd.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2000 Jens Axboe + * Copyright (C) 2001-2004 Peter Osterlund + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and + * DVD-RW devices. + * + */ +#ifndef _UAPI__PKTCDVD_H +#define _UAPI__PKTCDVD_H + +#include + +/* + * 1 for normal debug messages, 2 is very verbose. 0 to turn it off. + */ +#define PACKET_DEBUG 1 + +#define MAX_WRITERS 8 + +#define PKT_RB_POOL_SIZE 512 + +/* + * How long we should hold a non-full packet before starting data gathering. + */ +#define PACKET_WAIT_TIME (HZ * 5 / 1000) + +/* + * use drive write caching -- we need deferred error handling to be + * able to successfully recover with this option (drive will return good + * status as soon as the cdb is validated). + */ +#if defined(CONFIG_CDROM_PKTCDVD_WCACHE) +#define USE_WCACHING 1 +#else +#define USE_WCACHING 0 +#endif + +/* + * No user-servicable parts beyond this point -> + */ + +/* + * device types + */ +#define PACKET_CDR 1 +#define PACKET_CDRW 2 +#define PACKET_DVDR 3 +#define PACKET_DVDRW 4 + +/* + * flags + */ +#define PACKET_WRITABLE 1 /* pd is writable */ +#define PACKET_NWA_VALID 2 /* next writable address valid */ +#define PACKET_LRA_VALID 3 /* last recorded address valid */ +#define PACKET_MERGE_SEGS 4 /* perform segment merging to keep */ + /* underlying cdrom device happy */ + +/* + * Disc status -- from READ_DISC_INFO + */ +#define PACKET_DISC_EMPTY 0 +#define PACKET_DISC_INCOMPLETE 1 +#define PACKET_DISC_COMPLETE 2 +#define PACKET_DISC_OTHER 3 + +/* + * write type, and corresponding data block type + */ +#define PACKET_MODE1 1 +#define PACKET_MODE2 2 +#define PACKET_BLOCK_MODE1 8 +#define PACKET_BLOCK_MODE2 10 + +/* + * Last session/border status + */ +#define PACKET_SESSION_EMPTY 0 +#define PACKET_SESSION_INCOMPLETE 1 +#define PACKET_SESSION_RESERVED 2 +#define PACKET_SESSION_COMPLETE 3 + +#define PACKET_MCN "4a656e734178626f65323030300000" + +#undef PACKET_USE_LS + +#define PKT_CTRL_CMD_SETUP 0 +#define PKT_CTRL_CMD_TEARDOWN 1 +#define PKT_CTRL_CMD_STATUS 2 + +struct pkt_ctrl_command { + __u32 command; /* in: Setup, teardown, status */ + __u32 dev_index; /* in/out: Device index */ + __u32 dev; /* in/out: Device nr for cdrw device */ + __u32 pkt_dev; /* in/out: Device nr for packet device */ + __u32 num_devices; /* out: Largest device index + 1 */ + __u32 padding; /* Not used */ +}; + +/* + * packet ioctls + */ +#define PACKET_IOCTL_MAGIC ('X') +#define PACKET_CTRL_CMD _IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command) + + +#endif /* _UAPI__PKTCDVD_H */ From 6fe6ece398f7431784847e922a2c8c385dc58a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 21 Dec 2022 16:24:13 +0100 Subject: [PATCH 0362/1593] Revert "drm/amd/display: Enable Freesync Video Mode by default" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit de05abe6b9d0fe08f65d744f7f75a4cba4df27ad. The bug referenced below was bisected to this commit. There has been no activity toward fixing it in 3 months, so let's revert for now. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2162 Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 27 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6b74df446694b..e3e2e6e3b4859 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -195,6 +195,7 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b4f2d61ea0d53..1353ffd089882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -181,6 +181,7 @@ int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -879,6 +880,32 @@ module_param_named(backlight, amdgpu_backlight, bint, 0444); MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); +/** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + /** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 86bc23a67d973..1b7f20a9d4aeb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5835,7 +5835,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ DRM_DEBUG_DRIVER("No preferred mode found\n"); } else { - recalculate_timing = is_freesync_video_mode(&mode, aconnector); + recalculate_timing = amdgpu_freesync_vid_mode && + is_freesync_video_mode(&mode, aconnector); if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); @@ -6986,7 +6987,7 @@ static void amdgpu_dm_connector_add_freesync_modes(struct drm_connector *connect struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); - if (!edid) + if (!(amdgpu_freesync_vid_mode && edid)) return; if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) @@ -8850,7 +8851,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * TODO: Refactor this function to allow this check to work * in all conditions. */ - if (dm_new_crtc_state->stream && + if (amdgpu_freesync_vid_mode && + dm_new_crtc_state->stream && is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state)) goto skip_modeset; @@ -8885,7 +8887,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, if (!dm_old_crtc_state->stream) goto skip_modeset; - if (dm_new_crtc_state->stream && + if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream && is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state)) { new_crtc_state->mode_changed = false; @@ -8897,7 +8899,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, set_freesync_fixed_config(dm_new_crtc_state); goto skip_modeset; - } else if (aconnector && + } else if (amdgpu_freesync_vid_mode && aconnector && is_freesync_video_mode(&new_crtc_state->mode, aconnector)) { struct drm_display_mode *high_mode; From bd0ddcfc83d85bc30c868f2c3457312c7f1ccee2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 3 Jan 2023 12:00:31 -0600 Subject: [PATCH 0363/1593] Revert "of: fdt: Honor CONFIG_CMDLINE* even without /chosen node" This reverts commit a7d550f82b445cf218b47a2c1a9c56e97ecb8c7a. Some arches (PPC at least) don't call early_init_dt_scan_nodes(), so moving the cmdline processing there breaks them. Reported-by: Geoff Levand Cc: Alexander Sverdlin Tested-by: Geoff Levand Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20230103-dt-cmdline-fix-v1-1-7038e88b18b6@kernel.org Signed-off-by: Rob Herring --- drivers/of/fdt.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 02cc4a285cb98..4f88e8bbdd279 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1175,6 +1175,26 @@ int __init early_init_dt_scan_chosen(char *cmdline) if (p != NULL && l > 0) strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE)); + /* + * CONFIG_CMDLINE is meant to be a default in case nothing else + * managed to set the command line, unless CONFIG_CMDLINE_FORCE + * is set in which case we override whatever was found earlier. + */ +#ifdef CONFIG_CMDLINE +#if defined(CONFIG_CMDLINE_EXTEND) + strlcat(cmdline, " ", COMMAND_LINE_SIZE); + strlcat(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#elif defined(CONFIG_CMDLINE_FORCE) + strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#else + /* No arguments from boot loader, use kernel's cmdl*/ + if (!((char *)cmdline)[0]) + strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#endif +#endif /* CONFIG_CMDLINE */ + + pr_debug("Command line is: %s\n", (char *)cmdline); + rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); if (rng_seed && l > 0) { add_bootloader_randomness(rng_seed, l); @@ -1279,26 +1299,6 @@ void __init early_init_dt_scan_nodes(void) if (rc) pr_warn("No chosen node found, continuing without\n"); - /* - * CONFIG_CMDLINE is meant to be a default in case nothing else - * managed to set the command line, unless CONFIG_CMDLINE_FORCE - * is set in which case we override whatever was found earlier. - */ -#ifdef CONFIG_CMDLINE -#if defined(CONFIG_CMDLINE_EXTEND) - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#elif defined(CONFIG_CMDLINE_FORCE) - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#else - /* No arguments from boot loader, use kernel's cmdl */ - if (!boot_command_line[0]) - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif -#endif /* CONFIG_CMDLINE */ - - pr_debug("Command line is: %s\n", boot_command_line); - /* Setup memory, calling early_init_dt_add_memory_arch */ early_init_dt_scan_memory(); From 064e32dc5b03114d0767893fecdaf7b5dfd8c286 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 3 Jan 2023 12:00:32 -0600 Subject: [PATCH 0364/1593] of: fdt: Honor CONFIG_CMDLINE* even without /chosen node, take 2 I do not read a strict requirement on /chosen node in either ePAPR or in Documentation/devicetree. Help text for CONFIG_CMDLINE and CONFIG_CMDLINE_EXTEND doesn't make their behavior explicitly dependent on the presence of /chosen or the presense of /chosen/bootargs. However the early check for /chosen and bailing out in early_init_dt_scan_chosen() skips CONFIG_CMDLINE handling which is not really related to /chosen node or the particular method of passing cmdline from bootloader. This leads to counterintuitive combinations (assuming CONFIG_CMDLINE_EXTEND=y): a) bootargs="foo", CONFIG_CMDLINE="bar" => cmdline=="foo bar" b) /chosen missing, CONFIG_CMDLINE="bar" => cmdline=="" c) bootargs="", CONFIG_CMDLINE="bar" => cmdline==" bar" Rework early_init_dt_scan_chosen() so that the cmdline config options are always handled. [commit msg written by Alexander Sverdlin] Cc: Alexander Sverdlin Cc: Linus Walleij Cc: Arnd Bergmann Tested-by: Geoff Levand Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20230103-dt-cmdline-fix-v1-2-7038e88b18b6@kernel.org Signed-off-by: Rob Herring --- drivers/of/fdt.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 4f88e8bbdd279..f08b25195ae79 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1163,18 +1163,32 @@ int __init early_init_dt_scan_chosen(char *cmdline) if (node < 0) node = fdt_path_offset(fdt, "/chosen@0"); if (node < 0) - return -ENOENT; + /* Handle the cmdline config options even if no /chosen node */ + goto handle_cmdline; chosen_node_offset = node; early_init_dt_check_for_initrd(node); early_init_dt_check_for_elfcorehdr(node); + rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); + if (rng_seed && l > 0) { + add_bootloader_randomness(rng_seed, l); + + /* try to clear seed so it won't be found. */ + fdt_nop_property(initial_boot_params, node, "rng-seed"); + + /* update CRC check value */ + of_fdt_crc32 = crc32_be(~0, initial_boot_params, + fdt_totalsize(initial_boot_params)); + } + /* Retrieve command line */ p = of_get_flat_dt_prop(node, "bootargs", &l); if (p != NULL && l > 0) strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE)); +handle_cmdline: /* * CONFIG_CMDLINE is meant to be a default in case nothing else * managed to set the command line, unless CONFIG_CMDLINE_FORCE @@ -1195,18 +1209,6 @@ int __init early_init_dt_scan_chosen(char *cmdline) pr_debug("Command line is: %s\n", (char *)cmdline); - rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); - if (rng_seed && l > 0) { - add_bootloader_randomness(rng_seed, l); - - /* try to clear seed so it won't be found. */ - fdt_nop_property(initial_boot_params, node, "rng-seed"); - - /* update CRC check value */ - of_fdt_crc32 = crc32_be(~0, initial_boot_params, - fdt_totalsize(initial_boot_params)); - } - return 0; } From 5401c3e0992860b11fb4b25796e4c4f1921740df Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Tue, 3 Jan 2023 16:30:21 -0700 Subject: [PATCH 0365/1593] qed: allow sleep in qed_mcp_trace_dump() By default, qed_mcp_cmd_and_union() delays 10us at a time in a loop that can run 500K times, so calls to qed_mcp_nvm_rd_cmd() may block the current thread for over 5s. We observed thread scheduling delays over 700ms in production, with stacktraces pointing to this code as the culprit. qed_mcp_trace_dump() is called from ethtool, so sleeping is permitted. It already can sleep in qed_mcp_halt(), which calls qed_mcp_cmd(). Add a "can sleep" parameter to qed_find_nvram_image() and qed_nvram_read() so they can sleep during qed_mcp_trace_dump(). qed_mcp_trace_get_meta_info() and qed_mcp_trace_read_meta(), called only by qed_mcp_trace_dump(), allow these functions to sleep. I can't tell if the other caller (qed_grc_dump_mcp_hw_dump()) can sleep, so keep b_can_sleep set to false when it calls these functions. An example stacktrace from a custom warning we added to the kernel showing a thread that has not scheduled despite long needing resched: [ 2745.362925,17] ------------[ cut here ]------------ [ 2745.362941,17] WARNING: CPU: 23 PID: 5640 at arch/x86/kernel/irq.c:233 do_IRQ+0x15e/0x1a0() [ 2745.362946,17] Thread not rescheduled for 744 ms after irq 99 [ 2745.362956,17] Modules linked in: ... [ 2745.363339,17] CPU: 23 PID: 5640 Comm: lldpd Tainted: P O 4.4.182+ #202104120910+6d1da174272d.61x [ 2745.363343,17] Hardware name: FOXCONN MercuryB/Quicksilver Controller, BIOS H11P1N09 07/08/2020 [ 2745.363346,17] 0000000000000000 ffff885ec07c3ed8 ffffffff8131eb2f ffff885ec07c3f20 [ 2745.363358,17] ffffffff81d14f64 ffff885ec07c3f10 ffffffff81072ac2 ffff88be98ed0000 [ 2745.363369,17] 0000000000000063 0000000000000174 0000000000000074 0000000000000000 [ 2745.363379,17] Call Trace: [ 2745.363382,17] [] dump_stack+0x8e/0xcf [ 2745.363393,17] [] warn_slowpath_common+0x82/0xc0 [ 2745.363398,17] [] warn_slowpath_fmt+0x4c/0x50 [ 2745.363404,17] [] ? rcu_irq_exit+0xae/0xc0 [ 2745.363408,17] [] do_IRQ+0x15e/0x1a0 [ 2745.363413,17] [] common_interrupt+0x89/0x89 [ 2745.363416,17] [] ? delay_tsc+0x24/0x50 [ 2745.363425,17] [] __udelay+0x34/0x40 [ 2745.363457,17] [] qed_mcp_cmd_and_union+0x36f/0x7d0 [qed] [ 2745.363473,17] [] qed_mcp_nvm_rd_cmd+0x4d/0x90 [qed] [ 2745.363490,17] [] qed_mcp_trace_dump+0x4a7/0x630 [qed] [ 2745.363504,17] [] ? qed_fw_asserts_dump+0x1d6/0x1f0 [qed] [ 2745.363520,17] [] qed_dbg_mcp_trace_get_dump_buf_size+0x37/0x80 [qed] [ 2745.363536,17] [] qed_dbg_feature_size+0x61/0xa0 [qed] [ 2745.363551,17] [] qed_dbg_all_data_size+0x247/0x260 [qed] [ 2745.363560,17] [] qede_get_regs_len+0x30/0x40 [qede] [ 2745.363566,17] [] ethtool_get_drvinfo+0xe3/0x190 [ 2745.363570,17] [] dev_ethtool+0x1362/0x2140 [ 2745.363575,17] [] ? finish_task_switch+0x76/0x260 [ 2745.363580,17] [] ? __schedule+0x3c6/0x9d0 [ 2745.363585,17] [] ? hrtimer_start_range_ns+0x1d0/0x370 [ 2745.363589,17] [] ? dev_get_by_name_rcu+0x6b/0x90 [ 2745.363594,17] [] dev_ioctl+0xe8/0x710 [ 2745.363599,17] [] sock_do_ioctl+0x48/0x60 [ 2745.363603,17] [] sock_ioctl+0x1c7/0x280 [ 2745.363608,17] [] ? seccomp_phase1+0x83/0x220 [ 2745.363612,17] [] do_vfs_ioctl+0x2b3/0x4e0 [ 2745.363616,17] [] SyS_ioctl+0x41/0x70 [ 2745.363619,17] [] entry_SYSCALL_64_fastpath+0x1e/0x79 [ 2745.363622,17] ---[ end trace f6954aa440266421 ]--- Fixes: c965db4446291 ("qed: Add support for debug data collection") Signed-off-by: Caleb Sander Acked-by: Alok Prasad Link: https://lore.kernel.org/r/20230103233021.1457646-1-csander@purestorage.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 28 +++++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 86ecb080b1536..cdcead614e9fa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -1832,7 +1832,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 image_type, u32 *nvram_offset_bytes, - u32 *nvram_size_bytes) + u32 *nvram_size_bytes, + bool b_can_sleep) { u32 ret_mcp_resp, ret_mcp_param, ret_txn_size; struct mcp_file_att file_att; @@ -1846,7 +1847,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, &ret_mcp_resp, &ret_mcp_param, &ret_txn_size, - (u32 *)&file_att, false); + (u32 *)&file_att, + b_can_sleep); /* Check response */ if (nvm_result || (ret_mcp_resp & FW_MSG_CODE_MASK) != @@ -1873,7 +1875,9 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 nvram_offset_bytes, - u32 nvram_size_bytes, u32 *ret_buf) + u32 nvram_size_bytes, + u32 *ret_buf, + bool b_can_sleep) { u32 ret_mcp_resp, ret_mcp_param, ret_read_size, bytes_to_copy; s32 bytes_left = nvram_size_bytes; @@ -1899,7 +1903,7 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, &ret_mcp_resp, &ret_mcp_param, &ret_read_size, (u32 *)((u8 *)ret_buf + read_offset), - false)) + b_can_sleep)) return DBG_STATUS_NVRAM_READ_FAILED; /* Check response */ @@ -3380,7 +3384,8 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn, p_ptt, NVM_TYPE_HW_DUMP_OUT, &hw_dump_offset_bytes, - &hw_dump_size_bytes); + &hw_dump_size_bytes, + false); if (status != DBG_STATUS_OK) return 0; @@ -3397,7 +3402,9 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn, status = qed_nvram_read(p_hwfn, p_ptt, hw_dump_offset_bytes, - hw_dump_size_bytes, dump_buf + offset); + hw_dump_size_bytes, + dump_buf + offset, + false); if (status != DBG_STATUS_OK) { DP_NOTICE(p_hwfn, "Failed to read MCP HW Dump image from NVRAM\n"); @@ -4123,7 +4130,9 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn, return qed_find_nvram_image(p_hwfn, p_ptt, nvram_image_type, - trace_meta_offset, trace_meta_size); + trace_meta_offset, + trace_meta_size, + true); } /* Reads the MCP Trace meta data from NVRAM into the specified buffer */ @@ -4139,7 +4148,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, /* Read meta data from NVRAM */ status = qed_nvram_read(p_hwfn, p_ptt, - nvram_offset_in_bytes, size_in_bytes, buf); + nvram_offset_in_bytes, + size_in_bytes, + buf, + true); if (status != DBG_STATUS_OK) return status; From 2c02d41d71f90a5168391b6a5f2954112ba2307c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 3 Jan 2023 12:19:17 +0100 Subject: [PATCH 0366/1593] net/ulp: prevent ULP without clone op from entering the LISTEN status When an ULP-enabled socket enters the LISTEN status, the listener ULP data pointer is copied inside the child/accepted sockets by sk_clone_lock(). The relevant ULP can take care of de-duplicating the context pointer via the clone() operation, but only MPTCP and SMC implement such op. Other ULPs may end-up with a double-free at socket disposal time. We can't simply clear the ULP data at clone time, as TLS replaces the socket ops with custom ones assuming a valid TLS ULP context is available. Instead completely prevent clone-less ULP sockets from entering the LISTEN status. Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") Reported-by: slipper Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/4b80c3d1dbe3d0ab072f80450c202d9bc88b4b03.1672740602.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ net/ipv4/tcp_ulp.c | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 848ffc3e0239c..d1f8375793983 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1200,12 +1200,26 @@ void inet_csk_prepare_forced_close(struct sock *sk) } EXPORT_SYMBOL(inet_csk_prepare_forced_close); +static int inet_ulp_can_listen(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) + return -EINVAL; + + return 0; +} + int inet_csk_listen_start(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); int err; + err = inet_ulp_can_listen(sk); + if (unlikely(err)) + return err; + reqsk_queue_alloc(&icsk->icsk_accept_queue); sk->sk_ack_backlog = 0; diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 9ae50b1bd8444..05b6077b9f2c3 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (sk->sk_socket) clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + err = -EINVAL; + if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) + goto out_err; + err = ulp_ops->init(sk); if (err) goto out_err; From 1ac88557447088ccd15eb2f2520ce46d463c8e0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Jan 2023 19:27:36 +0000 Subject: [PATCH 0367/1593] inet: control sockets should not use current thread task_frag Because ICMP handlers run from softirq contexts, they must not use current thread task_frag. Previously, all sockets allocated by inet_ctl_sock_create() would use the per-socket page fragment, with no chance of recursion. Fixes: 98123866fcf3 ("Treewide: Stop corrupting socket's task_frag") Reported-by: syzbot+bebc6f1acdf4cbb79b03@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Benjamin Coddington Acked-by: Guillaume Nault Link: https://lore.kernel.org/r/20230103192736.454149-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ab4a06be489b5..6c0ec27899431 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1665,6 +1665,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, if (rc == 0) { *sk = sock->sk; (*sk)->sk_allocation = GFP_ATOMIC; + (*sk)->sk_use_task_frag = false; /* * Unhash it so that IP input processing does not even see it, * we do not wish this socket to see incoming packets. From 7246210ecdd0cda97fa3e3bb15c32c6c2d9a23b5 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 27 Dec 2022 11:29:28 +0000 Subject: [PATCH 0368/1593] cifs: refcount only the selected iface during interface update When the server interface for a channel is not active anymore, we have the logic to select an alternative interface. However this was not breaking out of the loop as soon as a new alternative was found. As a result, some interfaces may get refcounted unintentionally. There was also a bug in checking if we found an alternate iface. Fixed that too. Fixes: b54034a73baf ("cifs: during reconnect, update interface if necessary") Cc: stable@vger.kernel.org # 5.19+ Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/sess.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 9e7d9f0baa18a..0b842a07e1579 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -292,9 +292,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) continue; } kref_get(&iface->refcount); + break; } - if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { rc = 1; iface = NULL; cifs_dbg(FYI, "unable to find a suitable iface\n"); From cc7d79d4fad6a4eab3f88c4bb237de72be4478f1 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 22 Dec 2022 12:54:44 +0000 Subject: [PATCH 0369/1593] cifs: fix interface count calculation during refresh The last fix to iface_count did fix the overcounting issue. However, during each refresh, we could end up undercounting the iface_count, if a match was found. Fixing this by doing increments and decrements instead of setting it to 0 before each parsing of server interfaces. Fixes: 096bbeec7bd6 ("smb3: interface count displayed incorrectly") Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0d7e9bcd9f345..e6bcd2baf446a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -530,7 +530,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; spin_lock(&ses->iface_lock); - ses->iface_count = 0; /* * Go through iface_list and do kref_put to remove * any unused ifaces. ifaces in use will be removed @@ -540,6 +539,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, iface_head) { iface->is_active = 0; kref_put(&iface->refcount, release_iface); + ses->iface_count--; } spin_unlock(&ses->iface_lock); @@ -618,6 +618,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, /* just get a ref so that it doesn't get picked/freed */ iface->is_active = 1; kref_get(&iface->refcount); + ses->iface_count++; spin_unlock(&ses->iface_lock); goto next_iface; } else if (ret < 0) { From a53da43decaa3936998fa7dce2346855a6942166 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 1 Jan 2023 15:07:09 +0900 Subject: [PATCH 0370/1593] kbuild: fix single *.ko build The single *.ko build is broken since commit f65a486821cf ("kbuild: change module.order to list *.o instead of *.ko"). Fixes: f65a486821cf ("kbuild: change module.order to list *.o instead of *.ko") Reported-by: Marc Kleine-Budde Signed-off-by: Masahiro Yamada Tested-by: Marc Kleine-Budde --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c05b4fb7121e5..dfba294ae7907 100644 --- a/Makefile +++ b/Makefile @@ -1986,7 +1986,7 @@ $(single-no-ko): $(build-dir) # Remove MODORDER when done because it is not the real one. PHONY += single_modules single_modules: $(single-no-ko) modules_prepare - $(Q){ $(foreach m, $(single-ko), echo $(extmod_prefix)$m;) } > $(MODORDER) + $(Q){ $(foreach m, $(single-ko), echo $(extmod_prefix)$(m:%.ko=%.o);) } > $(MODORDER) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost ifneq ($(KBUILD_MODPOST_NOFINAL),1) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modfinal From 735aec59afb18c3e2da0a637037e69ad62dbda6a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Jan 2023 23:04:59 +0900 Subject: [PATCH 0371/1593] kbuild: readd -w option when vmlinux.o or Module.symver is missing Commit 63ffe00d8c93 ("kbuild: Fix running modpost with musl libc") accidentally turned the unresolved symbol warnings into errors when vmlinux.o (for in-tree builds) or Module.symver (for external module builds) is missing. In those cases, unresolved symbols are expected, but the -w option is not set because 'missing-input' is referenced before set. Move $(missing-input) back to the original place. This should be fine for musl libc because vmlinux.o and -w are not added at the same time. With this change, -w may be passed twice, but it is not a big deal. Link: https://lore.kernel.org/all/b56a03b8-2a2a-f833-a5d2-cdc50a7ca2bb@cschramm.eu/ Fixes: 63ffe00d8c93 ("kbuild: Fix running modpost with musl libc") Reported-by: Christopher Schramm Signed-off-by: Masahiro Yamada Tested-by: Samuel Holland --- scripts/Makefile.modpost | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 0ee296cf520c2..43343e13c5425 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -44,6 +44,7 @@ modpost-args = \ $(if $(CONFIG_MODVERSIONS),-m) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ + $(if $(KBUILD_MODPOST_WARN),-w) \ $(if $(KBUILD_NSDEPS),-d $(MODULES_NSDEPS)) \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ -o $@ @@ -55,10 +56,6 @@ ifneq ($(findstring i,$(filter-out --%,$(MAKEFLAGS))),) modpost-args += -n endif -ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),) -modpost-args += -w -endif - # Read out modules.order to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". ifdef KBUILD_MODULES @@ -124,6 +121,10 @@ modpost-args += -e $(addprefix -i , $(KBUILD_EXTRA_SYMBOLS)) endif # ($(KBUILD_EXTMOD),) +ifneq ($(missing-input),) +modpost-args += -w +endif + quiet_cmd_modpost = MODPOST $@ cmd_modpost = \ $(if $(missing-input), \ From fe69230f05897b3de758427b574fc98025dfc907 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 4 Jan 2023 14:51:46 +0800 Subject: [PATCH 0372/1593] caif: fix memory leak in cfctrl_linkup_request() When linktype is unknown or kzalloc failed in cfctrl_linkup_request(), pkt is not released. Add release process to error path. Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack") Fixes: 8d545c8f958f ("caif: Disconnect without waiting for response") Signed-off-by: Zhengchao Shao Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20230104065146.1153009-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- net/caif/cfctrl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index cc405d8c7c303..8480684f27625 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer, default: pr_warn("Request setup of bad link type = %d\n", param->linktype); + cfpkt_destroy(pkt); return -EINVAL; } req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) + if (!req) { + cfpkt_destroy(pkt); return -ENOMEM; + } + req->client_layer = user_layer; req->cmd = CFCTRL_CMD_LINK_SETUP; req->param = *param; From 634cf6ead93988b0da9ac054521ab63a3ba189db Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 18:02:28 +0100 Subject: [PATCH 0373/1593] fbdev: omapfb: avoid stack overflow warning The dsi_irq_stats structure is a little too big to fit on the stack of a 32-bit task, depending on the specific gcc options: fbdev/omap2/omapfb/dss/dsi.c: In function 'dsi_dump_dsidev_irqs': fbdev/omap2/omapfb/dss/dsi.c:1621:1: error: the frame size of 1064 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Since this is only a debugfs file, performance is not critical, so just dynamically allocate it, and print an error message in there in place of a failure code when the allocation fails. Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/dss/dsi.c | 28 ++++++++++++++-------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c index 54b0f034c2edf..7cddb7b8ae344 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c @@ -1536,22 +1536,28 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, { struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev); unsigned long flags; - struct dsi_irq_stats stats; + struct dsi_irq_stats *stats; + + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) { + seq_printf(s, "out of memory\n"); + return; + } spin_lock_irqsave(&dsi->irq_stats_lock, flags); - stats = dsi->irq_stats; + *stats = dsi->irq_stats; memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats)); dsi->irq_stats.last_reset = jiffies; spin_unlock_irqrestore(&dsi->irq_stats_lock, flags); seq_printf(s, "period %u ms\n", - jiffies_to_msecs(jiffies - stats.last_reset)); + jiffies_to_msecs(jiffies - stats->last_reset)); - seq_printf(s, "irqs %d\n", stats.irq_count); + seq_printf(s, "irqs %d\n", stats->irq_count); #define PIS(x) \ - seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]) + seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1]) seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1); PIS(VC0); @@ -1575,10 +1581,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, #define PIS(x) \ seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \ - stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \ - stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \ - stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \ - stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]); + stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \ + stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \ + stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \ + stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]); seq_printf(s, "-- VC interrupts --\n"); PIS(CS); @@ -1594,7 +1600,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, #define PIS(x) \ seq_printf(s, "%-20s %10d\n", #x, \ - stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]); + stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]); seq_printf(s, "-- CIO interrupts --\n"); PIS(ERRSYNCESC1); @@ -1618,6 +1624,8 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, PIS(ULPSACTIVENOT_ALL0); PIS(ULPSACTIVENOT_ALL1); #undef PIS + + kfree(stats); } static void dsi1_dump_irqs(struct seq_file *s) From 4b9880dbf3bdba3a7c56445137c3d0e30aaa0a40 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 5 Jan 2023 22:05:04 +1100 Subject: [PATCH 0374/1593] powerpc/vmlinux.lds: Define RUNTIME_DISCARD_EXIT The powerpc linker script explicitly includes .exit.text, because otherwise the link fails due to references from __bug_table and __ex_table. The code is freed (discarded) at runtime along with .init.text and data. That has worked in the past despite powerpc not defining RUNTIME_DISCARD_EXIT because DISCARDS appears late in the powerpc linker script (line 410), and the explicit inclusion of .exit.text earlier (line 280) supersedes the discard. However commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") introduced an earlier use of DISCARD as part of the RO_DATA macro (line 136). With binutils < 2.36 that causes the DISCARD directives later in the script to be applied earlier [1], causing .exit.text to actually be discarded at link time, leading to build errors: '.exit.text' referenced in section '__bug_table' of crypto/algboss.o: defined in discarded section '.exit.text' of crypto/algboss.o '.exit.text' referenced in section '__ex_table' of drivers/nvdimm/core.o: defined in discarded section '.exit.text' of drivers/nvdimm/core.o Fix it by defining RUNTIME_DISCARD_EXIT, which causes the generic DISCARDS macro to not include .exit.text at all. 1: https://lore.kernel.org/lkml/87fscp2v7k.fsf@igel.home/ Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230105132349.384666-1-mpe@ellerman.id.au --- arch/powerpc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8c3862b4c259d..c5ea7d03d5390 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #define BSS_FIRST_SECTIONS *(.bss.prominit) #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define RUNTIME_DISCARD_EXIT #define SOFT_MASK_TABLE(align) \ . = ALIGN(align); \ From 07b050f9290ee012a407a0f64151db902a1520f5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 5 Jan 2023 22:28:36 +1100 Subject: [PATCH 0375/1593] powerpc/vmlinux.lds: Don't discard .rela* for relocatable builds Relocatable kernels must not discard relocations, they need to be processed at runtime. As such they are included for CONFIG_RELOCATABLE builds in the powerpc linker script (line 340). However they are also unconditionally discarded later in the script (line 414). Previously that worked because the earlier inclusion superseded the discard. However commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") introduced an earlier use of DISCARD as part of the RO_DATA macro (line 137). With binutils < 2.36 that causes the DISCARD directives later in the script to be applied earlier, causing .rela* to actually be discarded at link time, leading to build warnings and a kernel that doesn't boot: ld: warning: discarding dynamic section .rela.init.rodata Fix it by conditionally discarding .rela* only when CONFIG_RELOCATABLE is disabled. Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230105132349.384666-2-mpe@ellerman.id.au --- arch/powerpc/kernel/vmlinux.lds.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index c5ea7d03d5390..a4c6efadc90c1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -411,9 +411,12 @@ SECTIONS DISCARDS /DISCARD/ : { *(*.EMB.apuinfo) - *(.glink .iplt .plt .rela* .comment) + *(.glink .iplt .plt .comment) *(.gnu.version*) *(.gnu.attributes) *(.eh_frame) +#ifndef CONFIG_RELOCATABLE + *(.rela*) +#endif } } From be5f95c8779e19779dd81927c8574fec5aaba36c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 5 Jan 2023 22:42:59 +1100 Subject: [PATCH 0376/1593] powerpc/vmlinux.lds: Don't discard .comment Although the powerpc linker script mentions .comment in the DISCARD section, that has never actually caused it to be discarded, because the earlier ELF_DETAILS macro (previously STABS_DEBUG) explicitly includes .comment. However commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") introduced an earlier use of DISCARD as part of the RO_DATA macro. With binutils < 2.36 that causes the DISCARD directives later in the script to be applied earlier, causing .comment to actually be discarded. It's confusing to explicitly include and discard .comment, and even more so if the behaviour depends on the toolchain version. So don't discard .comment in order to maintain the existing behaviour in all cases. Fixes: 83a092cf95f2 ("powerpc: Link warning for orphan sections") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230105132349.384666-3-mpe@ellerman.id.au --- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a4c6efadc90c1..958e77a24f85b 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -411,7 +411,7 @@ SECTIONS DISCARDS /DISCARD/ : { *(*.EMB.apuinfo) - *(.glink .iplt .plt .comment) + *(.glink .iplt .plt) *(.gnu.version*) *(.gnu.attributes) *(.eh_frame) From 12521a5d5cb7ff0ad43eadfc9c135d86e1131fa8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 5 Jan 2023 10:49:15 +0000 Subject: [PATCH 0377/1593] io_uring: fix CQ waiting timeout handling Jiffy to ktime CQ waiting conversion broke how we treat timeouts, in particular we rearm it anew every time we get into io_cqring_wait_schedule() without adjusting the timeout. Waiting for 2 CQEs and getting a task_work in the middle may double the timeout value, or even worse in some cases task may wait indefinitely. Cc: stable@vger.kernel.org Fixes: 228339662b398 ("io_uring: don't convert to jiffies for waiting on timeouts") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f7bffddd71b08f28a877d44d37ac953ddb01590d.1672915663.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 472574192dd63..2ac1cd8d23ea6 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2470,7 +2470,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx) /* when returns >0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - ktime_t timeout) + ktime_t *timeout) { int ret; unsigned long check_cq; @@ -2488,7 +2488,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)) return -EBADR; } - if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) return -ETIME; /* @@ -2564,7 +2564,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); - ret = io_cqring_wait_schedule(ctx, &iowq, timeout); + ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); if (__io_cqring_events_user(ctx) >= min_events) break; cond_resched(); From 74c2f81054510d45b813548cb0a1c4ebf87cdd5f Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Mon, 21 Nov 2022 15:36:08 +0800 Subject: [PATCH 0378/1593] arm64/mm: fix incorrect file_map_count for invalid pmd The page table check trigger BUG_ON() unexpectedly when split hugepage: ------------[ cut here ]------------ kernel BUG at mm/page_table_check.c:119! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 7 PID: 210 Comm: transhuge-stres Not tainted 6.1.0-rc3+ #748 Hardware name: linux,dummy-virt (DT) pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : page_table_check_set.isra.0+0x398/0x468 lr : page_table_check_set.isra.0+0x1c0/0x468 [...] Call trace: page_table_check_set.isra.0+0x398/0x468 __page_table_check_pte_set+0x160/0x1c0 __split_huge_pmd_locked+0x900/0x1648 __split_huge_pmd+0x28c/0x3b8 unmap_page_range+0x428/0x858 unmap_single_vma+0xf4/0x1c8 zap_page_range+0x2b0/0x410 madvise_vma_behavior+0xc44/0xe78 do_madvise+0x280/0x698 __arm64_sys_madvise+0x90/0xe8 invoke_syscall.constprop.0+0xdc/0x1d8 do_el0_svc+0xf4/0x3f8 el0_svc+0x58/0x120 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x19c/0x1a0 [...] On arm64, pmd_leaf() will return true even if the pmd is invalid due to pmd_present_invalid() check. So in pmdp_invalidate() the file_map_count will not only decrease once but also increase once. Then in set_pte_at(), the file_map_count increase again, and so trigger BUG_ON() unexpectedly. Add !pmd_present_invalid() check in pmd_user_accessible_page() to fix the problem. Fixes: 42b2547137f5 ("arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK") Reported-by: Denys Vlasenko Signed-off-by: Liu Shixin Acked-by: Pasha Tatashin Acked-by: David Hildenbrand Reviewed-by: Kefeng Wang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20221121073608.4183459-1-liushixin2@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b4bbeed80fb6e..67fc6fdb03fd2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -862,7 +862,7 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) From 730a11f982e61aaef758ab552dfb7c30de79e99b Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 22 Nov 2022 20:31:37 +0800 Subject: [PATCH 0379/1593] arm64/mm: add pud_user_exec() check in pud_user_accessible_page() Add check for the executable case in pud_user_accessible_page() too like what we did for pte and pmd. Fixes: 42b2547137f5 ("arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK") Suggested-by: Will Deacon Signed-off-by: Liu Shixin Link: https://lore.kernel.org/r/20221122123137.429686-1-liushixin2@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 67fc6fdb03fd2..780973a6cbb61 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -681,7 +681,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) #define pud_user(pud) pte_user(pud_pte(pud)) - +#define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { @@ -867,7 +867,7 @@ static inline bool pmd_user_accessible_page(pmd_t pmd) static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && pud_user(pud); + return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif From eb9a85261e297292c4cc44b628c1373c996cedc2 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 14 Dec 2022 21:59:43 +0800 Subject: [PATCH 0380/1593] arm64: ptrace: Use ARM64_SME to guard the SME register enumerations We currently guard REGSET_{SSVE, ZA} using ARM64_SVE for no good reason. Both enumerations would be pointless without ARM64_SME and create two empty entries in aarch64_regsets[] which would then become part of a process's native regset view (they should be ignored though). Switch to use ARM64_SME instead. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Signed-off-by: Zenghui Yu Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20221214135943.379-1-yuzenghui@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2686ab1576017..0c321ad23cd3a 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1357,7 +1357,7 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif -#ifdef CONFIG_ARM64_SVE +#ifdef CONFIG_ARM64_SME REGSET_SSVE, REGSET_ZA, #endif From 736eedc974eaafbf4360e0ea85fc892cea72a223 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:49 +0000 Subject: [PATCH 0381/1593] arm64: mte: Fix double-freeing of the temporary tag storage during coredump Commit 16decce22efa ("arm64: mte: Fix the stack frame size warning in mte_dump_tag_range()") moved the temporary tag storage array from the stack to slab but it also introduced an error in double freeing this object. Remove the in-loop freeing. Fixes: 16decce22efa ("arm64: mte: Fix the stack frame size warning in mte_dump_tag_range()") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Cc: Will Deacon Link: https://lore.kernel.org/r/20221222181251.1345752-2-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 353009d7f307f..45f0aba1d8752 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -65,7 +65,6 @@ static int mte_dump_tag_range(struct coredump_params *cprm, mte_save_page_tags(page_address(page), tags); put_page(page); if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { - mte_free_tag_storage(tags); ret = 0; break; } From 19e183b54528f11fafeca60fc6d0821e29ff281e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:50 +0000 Subject: [PATCH 0382/1593] elfcore: Add a cprm parameter to elf_core_extra_{phdrs,data_size} A subsequent fix for arm64 will use this parameter to parse the vma information from the snapshot created by dump_vma_snapshot() rather than traversing the vma list without the mmap_lock. Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Suggested-by: Seth Jenkins Cc: Will Deacon Cc: Eric Biederman Cc: Kees Cook Link: https://lore.kernel.org/r/20221222181251.1345752-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 4 ++-- arch/ia64/kernel/elfcore.c | 4 ++-- arch/x86/um/elfcore.c | 4 ++-- fs/binfmt_elf.c | 4 ++-- fs/binfmt_elf_fdpic.c | 4 ++-- include/linux/elfcore.h | 8 ++++---- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 45f0aba1d8752..f25519e283a58 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -76,7 +76,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm, return ret; } -Elf_Half elf_core_extra_phdrs(void) +Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { struct vm_area_struct *vma; int vma_count = 0; @@ -113,7 +113,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { struct vm_area_struct *vma; size_t data_size = 0; diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c index 94680521fbf91..8895df1215404 100644 --- a/arch/ia64/kernel/elfcore.c +++ b/arch/ia64/kernel/elfcore.c @@ -7,7 +7,7 @@ #include -Elf64_Half elf_core_extra_phdrs(void) +Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return GATE_EHDR->e_phnum; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { const struct elf_phdr *const gate_phdrs = (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c index 48a3eb09d9516..650cdbbdaf45e 100644 --- a/arch/x86/um/elfcore.c +++ b/arch/x86/um/elfcore.c @@ -7,7 +7,7 @@ #include -Elf32_Half elf_core_extra_phdrs(void) +Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { if ( vsyscall_ehdr ) { const struct elfhdr *const ehdrp = diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index de63572a94044..9a780fafc5397 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2034,7 +2034,7 @@ static int elf_core_dump(struct coredump_params *cprm) * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -2074,7 +2074,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 096e3520a0b10..a05eafcacfb27 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 9ec81290e3c89..bd5560542c799 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -105,14 +105,14 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu); * Dumping its extra ELF program headers includes all the other information * a debugger needs to easily find how the gate DSO was being used. */ -extern Elf_Half elf_core_extra_phdrs(void); +extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm); extern int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); -extern size_t elf_core_extra_data_size(void); +extern size_t elf_core_extra_data_size(struct coredump_params *cprm); #else -static inline Elf_Half elf_core_extra_phdrs(void) +static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return 0; } @@ -127,7 +127,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -static inline size_t elf_core_extra_data_size(void) +static inline size_t elf_core_extra_data_size(struct coredump_params *cprm) { return 0; } From 4f4c549feb4ecca95ae9abb88887b941d196f83a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:51 +0000 Subject: [PATCH 0383/1593] arm64: mte: Avoid the racy walk of the vma list during core dump The MTE coredump code in arch/arm64/kernel/elfcore.c iterates over the vma list without the mmap_lock held. This can race with another process or userfaultfd concurrently modifying the vma list. Change the for_each_mte_vma macro and its callers to instead use the vma snapshot taken by dump_vma_snapshot() and stored in the cprm object. Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Suggested-by: Seth Jenkins Cc: Will Deacon Link: https://lore.kernel.org/r/20221222181251.1345752-4-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 56 +++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index f25519e283a58..2e94d20c4ac7a 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,28 +8,27 @@ #include #include -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(cprm, i, m) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ - if (vma->vm_flags & VM_MTE) + for (i = 0, m = cprm->vma_meta; \ + i < cprm->vma_count; \ + i++, m = cprm->vma_meta + i) \ + if (m->flags & VM_MTE) -static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +static unsigned long mte_vma_tag_dump_size(struct core_vma_metadata *m) { - if (vma->vm_flags & VM_DONTDUMP) - return 0; - - return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; + return (m->dump_size >> PAGE_SHIFT) * MTE_PAGE_TAG_STORAGE; } /* Derived from dump_user_range(); start/end must be page-aligned */ static int mte_dump_tag_range(struct coredump_params *cprm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long len) { int ret = 1; unsigned long addr; void *tags = NULL; - for (addr = start; addr < end; addr += PAGE_SIZE) { + for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page = get_dump_page(addr); /* @@ -78,11 +77,11 @@ static int mte_dump_tag_range(struct coredump_params *cprm, Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(cprm, i, m) vma_count++; return vma_count; @@ -90,18 +89,18 @@ Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); + int i; + struct core_vma_metadata *m; - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(cprm, i, m) { struct elf_phdr phdr; phdr.p_type = PT_AARCH64_MEMTAG_MTE; phdr.p_offset = offset; - phdr.p_vaddr = vma->vm_start; + phdr.p_vaddr = m->start; phdr.p_paddr = 0; - phdr.p_filesz = mte_vma_tag_dump_size(vma); - phdr.p_memsz = vma->vm_end - vma->vm_start; + phdr.p_filesz = mte_vma_tag_dump_size(m); + phdr.p_memsz = m->end - m->start; offset += phdr.p_filesz; phdr.p_flags = 0; phdr.p_align = 0; @@ -115,26 +114,23 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) size_t elf_core_extra_data_size(struct coredump_params *cprm) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) - data_size += mte_vma_tag_dump_size(vma); + for_each_mte_vma(cprm, i, m) + data_size += mte_vma_tag_dump_size(m); return data_size; } int elf_core_write_extra_data(struct coredump_params *cprm) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - - for_each_mte_vma(vmi, vma) { - if (vma->vm_flags & VM_DONTDUMP) - continue; + int i; + struct core_vma_metadata *m; - if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + for_each_mte_vma(cprm, i, m) { + if (!mte_dump_tag_range(cprm, m->start, m->dump_size)) return 0; } From decb17aeb8fa21484a0140c0696dc5a477cc5c57 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jan 2023 09:50:20 +0000 Subject: [PATCH 0384/1593] KVM: arm64: vgic: Add Apple M2 cpus to the list of broken SEIS implementations I really hoped that Apple had fixed their not-quite-a-vgic implementation when moving from M1 to M2. Alas, it seems they didn't, and running a buggy EFI version results in the vgic generating SErrors outside of the guest and taking the host down. Apply the same workaround as for M1. Yes, this is all a bit crap. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230103095022.3230946-2-maz@kernel.org --- arch/arm64/include/asm/cputype.h | 4 ++++ arch/arm64/kvm/vgic/vgic-v3.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 4e8b66c74ea2b..683ca3af40848 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -124,6 +124,8 @@ #define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 #define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 +#define APPLE_CPU_PART_M2_BLIZZARD 0x032 +#define APPLE_CPU_PART_M2_AVALANCHE 0x033 #define AMPERE_CPU_PART_AMPERE1 0xAC3 @@ -177,6 +179,8 @@ #define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) +#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD) +#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 826ff6f2a4e7b..2074521d4a8ce 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -616,6 +616,8 @@ static const struct midr_range broken_seis[] = { MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), {}, }; From ef08c0fadd8a17ebe429b85e23952dac3263ad34 Mon Sep 17 00:00:00 2001 From: junhua huang Date: Wed, 28 Dec 2022 09:54:12 +0800 Subject: [PATCH 0385/1593] arm64/uprobes: change the uprobe_opcode_t typedef to fix the sparse warning After we fixed the uprobe inst endian in aarch_be, the sparse check report the following warning info: sparse warnings: (new ones prefixed by >>) >> kernel/events/uprobes.c:223:25: sparse: sparse: restricted __le32 degrades to integer >> kernel/events/uprobes.c:574:56: sparse: sparse: incorrect type in argument 4 (different base types) @@ expected unsigned int [addressable] [usertype] opcode @@ got restricted __le32 [usertype] @@ kernel/events/uprobes.c:574:56: sparse: expected unsigned int [addressable] [usertype] opcode kernel/events/uprobes.c:574:56: sparse: got restricted __le32 [usertype] >> kernel/events/uprobes.c:1483:32: sparse: sparse: incorrect type in initializer (different base types) @@ expected unsigned int [usertype] insn @@ got restricted __le32 [usertype] @@ kernel/events/uprobes.c:1483:32: sparse: expected unsigned int [usertype] insn kernel/events/uprobes.c:1483:32: sparse: got restricted __le32 [usertype] use the __le32 to u32 for uprobe_opcode_t, to keep the same. Fixes: 60f07e22a73d ("arm64:uprobe fix the uprobe SWBP_INSN in big-endian") Reported-by: kernel test robot Signed-off-by: junhua huang Link: https://lore.kernel.org/r/202212280954121197626@zte.com.cn Signed-off-by: Will Deacon --- arch/arm64/include/asm/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index ba4bff5ca6749..2b09495499c61 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -16,7 +16,7 @@ #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE #define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES -typedef u32 uprobe_opcode_t; +typedef __le32 uprobe_opcode_t; struct arch_uprobe_task { }; From 36d7546b56a254709a38f1904231e1a93f1c5717 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jan 2023 12:39:33 +0000 Subject: [PATCH 0386/1593] MAINTAINERS: Add Zenghui Yu as a KVM/arm64 reviewer Zenghui has been around for quite some time, and has been instrumental in reviewing the GICv4/4.1 KVM support. I'm delighted that he's agreed to help with the patch review in a more official capacity! Acked-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230103123933.3234865-1-maz@kernel.org --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f61eb221415bd..551544d877a37 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11359,6 +11359,7 @@ R: James Morse R: Alexandru Elisei R: Suzuki K Poulose R: Oliver Upton +R: Zenghui Yu L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.linux.dev L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers) From 7ed906e576a782b8272727f39c68a1762ea3ef98 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 3 Jan 2023 12:07:36 +0000 Subject: [PATCH 0387/1593] MAINTAINERS: Remove myself as a KVM/arm64 reviewer Haven't done any meaningful reviews for more than a year, and it doesn't look like I'll be able to do so in the future. Make it official and remove myself from the KVM/arm64 "Reviewers" list. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230103120736.116523-1-alexandru.elisei@arm.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 551544d877a37..6a0fc23455bdd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11356,7 +11356,6 @@ F: virt/kvm/* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) M: Marc Zyngier R: James Morse -R: Alexandru Elisei R: Suzuki K Poulose R: Oliver Upton R: Zenghui Yu From 031af50045ea97ed4386eb3751ca2c134d0fc911 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 4 Jan 2023 15:16:26 +0000 Subject: [PATCH 0388/1593] arm64: cmpxchg_double*: hazard against entire exchange variable The inline assembly for arm64's cmpxchg_double*() implementations use a +Q constraint to hazard against other accesses to the memory location being exchanged. However, the pointer passed to the constraint is a pointer to unsigned long, and thus the hazard only applies to the first 8 bytes of the location. GCC can take advantage of this, assuming that other portions of the location are unchanged, leading to a number of potential problems. This is similar to what we fixed back in commit: fee960bed5e857eb ("arm64: xchg: hazard against entire exchange variable") ... but we forgot to adjust cmpxchg_double*() similarly at the same time. The same problem applies, as demonstrated with the following test: | struct big { | u64 lo, hi; | } __aligned(128); | | unsigned long foo(struct big *b) | { | u64 hi_old, hi_new; | | hi_old = b->hi; | cmpxchg_double_local(&b->lo, &b->hi, 0x12, 0x34, 0x56, 0x78); | hi_new = b->hi; | | return hi_old ^ hi_new; | } ... which GCC 12.1.0 compiles as: | 0000000000000000 : | 0: d503233f paciasp | 4: aa0003e4 mov x4, x0 | 8: 1400000e b 40 | c: d2800240 mov x0, #0x12 // #18 | 10: d2800681 mov x1, #0x34 // #52 | 14: aa0003e5 mov x5, x0 | 18: aa0103e6 mov x6, x1 | 1c: d2800ac2 mov x2, #0x56 // #86 | 20: d2800f03 mov x3, #0x78 // #120 | 24: 48207c82 casp x0, x1, x2, x3, [x4] | 28: ca050000 eor x0, x0, x5 | 2c: ca060021 eor x1, x1, x6 | 30: aa010000 orr x0, x0, x1 | 34: d2800000 mov x0, #0x0 // #0 <--- BANG | 38: d50323bf autiasp | 3c: d65f03c0 ret | 40: d2800240 mov x0, #0x12 // #18 | 44: d2800681 mov x1, #0x34 // #52 | 48: d2800ac2 mov x2, #0x56 // #86 | 4c: d2800f03 mov x3, #0x78 // #120 | 50: f9800091 prfm pstl1strm, [x4] | 54: c87f1885 ldxp x5, x6, [x4] | 58: ca0000a5 eor x5, x5, x0 | 5c: ca0100c6 eor x6, x6, x1 | 60: aa0600a6 orr x6, x5, x6 | 64: b5000066 cbnz x6, 70 | 68: c8250c82 stxp w5, x2, x3, [x4] | 6c: 35ffff45 cbnz w5, 54 | 70: d2800000 mov x0, #0x0 // #0 <--- BANG | 74: d50323bf autiasp | 78: d65f03c0 ret Notice that at the lines with "BANG" comments, GCC has assumed that the higher 8 bytes are unchanged by the cmpxchg_double() call, and that `hi_old ^ hi_new` can be reduced to a constant zero, for both LSE and LL/SC versions of cmpxchg_double(). This patch fixes the issue by passing a pointer to __uint128_t into the +Q constraint, ensuring that the compiler hazards against the entire 16 bytes being modified. With this change, GCC 12.1.0 compiles the above test as: | 0000000000000000 : | 0: f9400407 ldr x7, [x0, #8] | 4: d503233f paciasp | 8: aa0003e4 mov x4, x0 | c: 1400000f b 48 | 10: d2800240 mov x0, #0x12 // #18 | 14: d2800681 mov x1, #0x34 // #52 | 18: aa0003e5 mov x5, x0 | 1c: aa0103e6 mov x6, x1 | 20: d2800ac2 mov x2, #0x56 // #86 | 24: d2800f03 mov x3, #0x78 // #120 | 28: 48207c82 casp x0, x1, x2, x3, [x4] | 2c: ca050000 eor x0, x0, x5 | 30: ca060021 eor x1, x1, x6 | 34: aa010000 orr x0, x0, x1 | 38: f9400480 ldr x0, [x4, #8] | 3c: d50323bf autiasp | 40: ca0000e0 eor x0, x7, x0 | 44: d65f03c0 ret | 48: d2800240 mov x0, #0x12 // #18 | 4c: d2800681 mov x1, #0x34 // #52 | 50: d2800ac2 mov x2, #0x56 // #86 | 54: d2800f03 mov x3, #0x78 // #120 | 58: f9800091 prfm pstl1strm, [x4] | 5c: c87f1885 ldxp x5, x6, [x4] | 60: ca0000a5 eor x5, x5, x0 | 64: ca0100c6 eor x6, x6, x1 | 68: aa0600a6 orr x6, x5, x6 | 6c: b5000066 cbnz x6, 78 | 70: c8250c82 stxp w5, x2, x3, [x4] | 74: 35ffff45 cbnz w5, 5c | 78: f9400480 ldr x0, [x4, #8] | 7c: d50323bf autiasp | 80: ca0000e0 eor x0, x7, x0 | 84: d65f03c0 ret ... sampling the high 8 bytes before and after the cmpxchg, and performing an EOR, as we'd expect. For backporting, I've tested this atop linux-4.9.y with GCC 5.5.0. Note that linux-4.9.y is oldest currently supported stable release, and mandates GCC 5.1+. Unfortunately I couldn't get a GCC 5.1 binary to run on my machines due to library incompatibilities. I've also used a standalone test to check that we can use a __uint128_t pointer in a +Q constraint at least as far back as GCC 4.8.5 and LLVM 3.9.1. Fixes: 5284e1b4bc8a ("arm64: xchg: Implement cmpxchg_double") Fixes: e9a4b795652f ("arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU") Reported-by: Boqun Feng Link: https://lore.kernel.org/lkml/Y6DEfQXymYVgL3oJ@boqun-archlinux/ Reported-by: Peter Zijlstra Link: https://lore.kernel.org/lkml/Y6GXoO4qmH9OIZ5Q@hirez.programming.kicks-ass.net/ Signed-off-by: Mark Rutland Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Steve Capper Cc: Will Deacon Link: https://lore.kernel.org/r/20230104151626.3262137-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_ll_sc.h | 2 +- arch/arm64/include/asm/atomic_lse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 0890e4f568fb7..cbb3d961123b1 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -315,7 +315,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ " cbnz %w0, 1b\n" \ " " #mb "\n" \ "2:" \ - : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ + : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \ : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ : cl); \ \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 52075e93de6c0..a94d6dacc0292 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -311,7 +311,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]" \ : [old1] "+&r" (x0), [old2] "+&r" (x1), \ - [v] "+Q" (*(unsigned long *)ptr) \ + [v] "+Q" (*(__uint128_t *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ : cl); \ From df875276d8c69b27e1ed242d3cee6bc23d3b7f43 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 10 Dec 2022 21:03:49 +0100 Subject: [PATCH 0389/1593] dt-bindings: interconnect: Add UFS clocks to MSM8996 A2NoC MSM8996 A2NoC contains a UFS master, which means the UFS hardware is accessed every time sync_state is called within the interconnect framework. It's all good on devices where this clock is already enabled (most likely from the bootloader), but devices with eMMC storage are rather unlikely to have it like that. Add the missing 2 clocks to the binding. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Tested-by: Dmitry Baryshkov #db820c Link: https://lore.kernel.org/r/20221210200353.418391-2-konrad.dybcio@linaro.org Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,rpm.yaml | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml index 4b37aa88a375b..5e6be4e79201e 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml @@ -84,7 +84,6 @@ allOf: - qcom,msm8939-pcnoc - qcom,msm8939-snoc - qcom,msm8996-a1noc - - qcom,msm8996-a2noc - qcom,msm8996-bimc - qcom,msm8996-cnoc - qcom,msm8996-pnoc @@ -186,6 +185,29 @@ allOf: required: - power-domains + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8996-a2noc + + then: + properties: + clock-names: + items: + - const: bus + - const: bus_a + - const: aggre2_ufs_axi + - const: ufs_axi + + clocks: + items: + - description: Bus Clock + - description: Bus A Clock + - description: Aggregate2 NoC UFS AXI Clock + - description: UFS AXI Clock + - if: properties: compatible: From 60426ff08af6a21275d9c879c0dfb09406469868 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 10 Dec 2022 21:03:50 +0100 Subject: [PATCH 0390/1593] interconnect: qcom: msm8996: Provide UFS clocks to A2NoC On eMMC devices the bootloader has no business enabling UFS clocks. That results in a platform hang and hard reboot when trying to vote on paths including MASTER_UFS and since sync_state guarantees that it's done at boot time, this effectively prevents such devices from booting. Fix that. Fixes: 7add937f5222 ("interconnect: qcom: Add MSM8996 interconnect provider driver") Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov #db820c Link: https://lore.kernel.org/r/20221210200353.418391-3-konrad.dybcio@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/msm8996.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/interconnect/qcom/msm8996.c b/drivers/interconnect/qcom/msm8996.c index c2903ae3b3bc3..7ddb1f23fb2a3 100644 --- a/drivers/interconnect/qcom/msm8996.c +++ b/drivers/interconnect/qcom/msm8996.c @@ -33,6 +33,13 @@ static const char * const bus_a0noc_clocks[] = { "aggre0_noc_mpu_cfg" }; +static const char * const bus_a2noc_clocks[] = { + "bus", + "bus_a", + "aggre2_ufs_axi", + "ufs_axi" +}; + static const u16 mas_a0noc_common_links[] = { MSM8996_SLAVE_A0NOC_SNOC }; @@ -1859,6 +1866,8 @@ static const struct qcom_icc_desc msm8996_a2noc = { .type = QCOM_ICC_NOC, .nodes = a2noc_nodes, .num_nodes = ARRAY_SIZE(a2noc_nodes), + .clocks = bus_a2noc_clocks, + .num_clocks = ARRAY_SIZE(bus_a2noc_clocks), .regmap_cfg = &msm8996_a2noc_regmap_config }; From 4be39d5d86c690c60e2afe55787fc5ec4409d0f0 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 10 Dec 2022 21:03:51 +0100 Subject: [PATCH 0391/1593] interconnect: qcom: msm8996: Fix regmap max_register values The device tree reg starts at BUS_BASE + QoS_OFFSET, but the regmap configs in the ICC driver had values suggesting the reg started at BUS_BASE. Shrink them down (where they haven't been already, so for providers where QoS_OFFSET = 0) to make sure they stay within their window. Fixes: 7add937f5222 ("interconnect: qcom: Add MSM8996 interconnect provider driver") Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov #db820c Link: https://lore.kernel.org/r/20221210200353.418391-4-konrad.dybcio@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/msm8996.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/interconnect/qcom/msm8996.c b/drivers/interconnect/qcom/msm8996.c index 7ddb1f23fb2a3..25a1a32bc611f 100644 --- a/drivers/interconnect/qcom/msm8996.c +++ b/drivers/interconnect/qcom/msm8996.c @@ -1813,7 +1813,7 @@ static const struct regmap_config msm8996_a0noc_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0x9000, + .max_register = 0x6000, .fast_io = true }; @@ -1837,7 +1837,7 @@ static const struct regmap_config msm8996_a1noc_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0x7000, + .max_register = 0x5000, .fast_io = true }; @@ -1858,7 +1858,7 @@ static const struct regmap_config msm8996_a2noc_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0xa000, + .max_register = 0x7000, .fast_io = true }; @@ -1886,7 +1886,7 @@ static const struct regmap_config msm8996_bimc_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0x62000, + .max_register = 0x5a000, .fast_io = true }; @@ -1997,7 +1997,7 @@ static const struct regmap_config msm8996_mnoc_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0x20000, + .max_register = 0x1c000, .fast_io = true }; From dd42ec8ea5b979edcebbf0ba05807d866884b567 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 10 Dec 2022 21:03:52 +0100 Subject: [PATCH 0392/1593] interconnect: qcom: rpm: Use _optional func for provider clocks It turned out that - very unfortunately - msm8996 needs a binding update, adding 2 more clocks to the A2NoC node. Use the _optional variant of devm_clk_get to make sure old DTs will still probe with newer versions of the driver. Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov #db820c Link: https://lore.kernel.org/r/20221210200353.418391-5-konrad.dybcio@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c index ba6781f54ab73..df3196f725368 100644 --- a/drivers/interconnect/qcom/icc-rpm.c +++ b/drivers/interconnect/qcom/icc-rpm.c @@ -488,7 +488,7 @@ int qnoc_probe(struct platform_device *pdev) } regmap_done: - ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks); + ret = devm_clk_bulk_get_optional(dev, qp->num_clks, qp->bus_clks); if (ret) return ret; From 0cab5b4964c7064893e6ff5e81087a9206c63908 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Dec 2022 17:12:04 +0000 Subject: [PATCH 0393/1593] arm64/sme: Fix context switch for SME only systems When refactoring fpsimd_load() to support keeping SVE enabled over syscalls support for systems with SME but not SVE was broken. The code that selects between loading regular FPSIMD and SVE states was guarded by using system_supports_sve() but is also needed to handle the streaming SVE state in SME only systems where that check will be false. Fix this by also checking for system_supports_sme(). Fixes: a0136be443d5 ("arm64/fpsimd: Load FP state based on recorded data type") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221223-arm64-fix-sme-only-v1-1-938d663f69e5@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dcc81e7200d40..b6ef1af0122eb 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -385,7 +385,7 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (system_supports_sve()) { + if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { case FP_STATE_FPSIMD: /* Stop tracking SVE for this task until next use. */ From 7dde62f0687c8856b6c0660066c7ee83a6a6f033 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Dec 2022 17:12:05 +0000 Subject: [PATCH 0394/1593] arm64/signal: Always accept SVE signal frames on SME only systems Currently we reject an attempt to restore a SVE signal frame on a system with SME but not SVE supported. This means that it is not possible to disable streaming mode via signal return as this is configured via the flags in the SVE signal context. Instead accept the signal frame, we will require it to have a vector length of 0 specified and no payload since the task will have no SVE vector length configured. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221223-arm64-fix-sme-only-v1-2-938d663f69e5@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e0d09bf5b01b8..f90ee2dc413cd 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -281,7 +281,12 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) vl = task_get_sme_vl(current); } else { - if (!system_supports_sve()) + /* + * A SME only system use SVE for streaming mode so can + * have a SVE formatted context with a zero VL and no + * payload data. + */ + if (!system_supports_sve() && !system_supports_sme()) return -EINVAL; vl = task_get_sve_vl(current); From f26cd7372160da2eba31061d7943348ab9f2c01d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Dec 2022 17:12:06 +0000 Subject: [PATCH 0395/1593] arm64/signal: Always allocate SVE signal frames on SME only systems Currently we only allocate space for SVE signal frames on systems that support SVE, meaning that SME only systems do not allocate a signal frame for streaming mode SVE state. Change the check so space is allocated if either feature is supported. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221223-arm64-fix-sme-only-v1-3-938d663f69e5@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f90ee2dc413cd..be279fd482480 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -737,7 +737,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } - if (system_supports_sve()) { + if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; if (add_all || test_thread_flag(TIF_SVE) || From 601a27ea09a317d0fe2895df7d875381fb393041 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Wed, 4 Jan 2023 10:05:43 -0800 Subject: [PATCH 0396/1593] xfs: fix extent busy updating In xfs_extent_busy_update_extent() case 6 and 7, whenever bno is modified on extent busy, the relavent length has to be modified accordingly. Signed-off-by: Wengang Wang Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_extent_busy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index ad22a003f9595..f3d328e4a4408 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -236,6 +236,7 @@ xfs_extent_busy_update_extent( * */ busyp->bno = fend; + busyp->length = bend - fend; } else if (bbno < fbno) { /* * Case 8: From b2b50d572135c5c6e10c2ff79cd828d5a8141ef6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Jan 2023 16:37:53 -0800 Subject: [PATCH 0397/1593] block: Remove "select SRCU" Now that the SRCU Kconfig option is unconditionally selected, there is no longer any point in selecting it. Therefore, remove the "select SRCU" Kconfig statements. Signed-off-by: Paul E. McKenney Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Jens Axboe --- block/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/block/Kconfig b/block/Kconfig index 444c5ab3b67e2..5d9d9c84d5165 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -6,7 +6,6 @@ menuconfig BLOCK bool "Enable the block layer" if EXPERT default y select SBITMAP - select SRCU help Provide block layer support for the kernel. From 38892ea4cefbb6ed3a91e76d3af84a1f8077d2d4 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 4 Jan 2023 18:36:27 +0900 Subject: [PATCH 0398/1593] spi: dt-bindings: Rename spi-cs-setup-ns to spi-cs-setup-delay-ns Other delay values follow the delay-ns naming convention, so unify the newly introduced spi-cs-setup-ns property for consistency. Also fix a typo while we're here. Fixes: f6c911f3308c ("spi: dt-bindings: Introduce spi-cs-setup-ns property") Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230104093631.15611-2-marcan@marcan.st Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/spi-peripheral-props.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index ead2cccf658fd..9a60c0664bbe7 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -44,9 +44,9 @@ properties: description: Maximum SPI clocking speed of the device in Hz. - spi-cs-setup-ns: + spi-cs-setup-delay-ns: description: - Delay in nanosecods to be introduced by the controller after CS is + Delay in nanoseconds to be introduced by the controller after CS is asserted. spi-rx-bus-width: From e0fe6a31cac84735939c29d1e05055d58325c6c0 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 4 Jan 2023 18:36:28 +0900 Subject: [PATCH 0399/1593] spi: Rename spi-cs-setup-ns property to spi-cs-setup-delay-ns As mentioned in the corresponding DT binding commit, the naming scheme for delay properties includes "delay" in the name, so let's keep that consistent. Fixes: 33a2fde5f77b ("spi: Introduce spi-cs-setup-ns property") Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230104093631.15611-3-marcan@marcan.st Signed-off-by: Mark Brown --- drivers/spi/spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 3cc7bb4d03dec..15f174f4e0561 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2310,7 +2310,7 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, if (!of_property_read_u32(nc, "spi-max-frequency", &value)) spi->max_speed_hz = value; - if (!of_property_read_u16(nc, "spi-cs-setup-ns", &cs_setup)) { + if (!of_property_read_u16(nc, "spi-cs-setup-delay-ns", &cs_setup)) { spi->cs_setup.value = cs_setup; spi->cs_setup.unit = SPI_DELAY_UNIT_NSECS; } From 6fea87637bf36bd285227f490132e83582ab7513 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 16 Dec 2022 17:12:53 +0800 Subject: [PATCH 0400/1593] drm/amd/pm: correct the reference clock for fan speed(rpm) calculation Correct the reference clock as 25Mhz for SMU13 fan speed calculation. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index e54b760b875bf..d1f50d42288d2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1261,7 +1261,8 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed) { struct amdgpu_device *adev = smu->adev; - uint32_t tach_period, crystal_clock_freq; + uint32_t crystal_clock_freq = 2500; + uint32_t tach_period; int ret; if (!speed) @@ -1271,7 +1272,6 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, if (ret) return ret; - crystal_clock_freq = amdgpu_asic_get_xclk(adev); tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); WREG32_SOC15(THM, 0, regCG_TACH_CTRL, REG_SET_FIELD(RREG32_SOC15(THM, 0, regCG_TACH_CTRL), From 318ca20893c19ead02845a08204c3f9249bb74cd Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 4 Jan 2023 10:45:01 +0800 Subject: [PATCH 0401/1593] drm/amd/pm: add the missing mapping for PPT feature on SMU13.0.0 and 13.0.7 Then we are able to set a new ppt limit via the hwmon interface(power1_cap). Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 9643b21c636a9..4c20d17e7416e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -213,6 +213,7 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 5c6c6ad011ca6..e87db7e02e8a5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -192,6 +192,7 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { From ed21f6c3fe423b17211fa5a85cd028621a749a2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Jan 2023 10:45:36 +0100 Subject: [PATCH 0402/1593] drm/amdgpu: fix another missing fence reference in the CS code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_sched_job_add_dependency() consumes the references of the gang members. Only triggered by mesh shaders. Signed-off-by: Christian König Fixes: 1728baa7e4e6 ("drm/amdgpu: use scheduler dependencies for CS") Tested-by: Mike Lothian Tested-by: Bert Karwatzki Link: https://patchwork.freedesktop.org/patch/msgid/20230105111703.52695-1-christian.koenig@amd.com Reviewed-by: Alex Deucher Reviewed-by: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8516c814bc9b5..47763ac0d14a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1254,9 +1254,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, continue; fence = &p->jobs[i]->base.s_fence->scheduled; + dma_fence_get(fence); r = drm_sched_job_add_dependency(&leader->base, fence); - if (r) + if (r) { + dma_fence_put(fence); goto error_cleanup; + } } if (p->gang_size > 1) { From 41cc108b2451e0c65e8c9c9f6a3e103b62b60bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Jan 2023 11:27:16 +0100 Subject: [PATCH 0403/1593] drm/amdgpu: fix missing dma_fence_put in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the fence can't be added we need to drop the reference. Suggested-by: Bert Karwatzki Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230105111703.52695-2-christian.koenig@amd.com Reviewed-by: Alex Deucher Reviewed-by: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bac7976975bd3..dcd8c066bc1f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -391,8 +391,10 @@ int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) dma_fence_get(f); r = drm_sched_job_add_dependency(&job->base, f); - if (r) + if (r) { + dma_fence_put(f); return r; + } } return 0; } From e95d50d74b93a767a026f588e8de0b9718a0105e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 5 Jan 2023 13:23:39 +0200 Subject: [PATCH 0404/1593] lib/scatterlist: Fix to merge contiguous pages into the last SG properly When sg_alloc_append_table_from_pages() calls to pages_are_mergeable() in its 'sgt_append->prv' flow to check whether it can merge contiguous pages into the last SG, it passes the page arguments in the wrong order. The first parameter should be the next candidate page to be merged to the last page and not the opposite. The current code leads to a corrupted SG which resulted in OOPs and unexpected errors when non-contiguous pages are merged wrongly. Fix to pass the page parameters in the right order. Fixes: 1567b49d1a40 ("lib/scatterlist: add check when merging zone device pages") Link: https://lore.kernel.org/r/20230105112339.107969-1-yishaih@nvidia.com Signed-off-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Reviewed-by: Logan Gunthorpe Signed-off-by: Jason Gunthorpe --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a0ad2a7959b5d..f72aa50c6654b 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -476,7 +476,7 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, /* Merge contiguous pages into the last SG */ prv_len = sgt_append->prv->length; last_pg = sg_page(sgt_append->prv); - while (n_pages && pages_are_mergeable(last_pg, pages[0])) { + while (n_pages && pages_are_mergeable(pages[0], last_pg)) { if (sgt_append->prv->length + PAGE_SIZE > max_segment) break; sgt_append->prv->length += PAGE_SIZE; From b2d473a6019ef9a54b0156ecdb2e0398c9fa6a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 2 Jan 2023 17:07:48 +0100 Subject: [PATCH 0405/1593] riscv, kprobes: Stricter c.jr/c.jalr decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the compressed instruction extension, c.jr, c.jalr, c.mv, and c.add is encoded the following way (each instruction is 16b): ---+-+-----------+-----------+-- 100 0 rs1[4:0]!=0 00000 10 : c.jr 100 1 rs1[4:0]!=0 00000 10 : c.jalr 100 0 rd[4:0]!=0 rs2[4:0]!=0 10 : c.mv 100 1 rd[4:0]!=0 rs2[4:0]!=0 10 : c.add The following logic is used to decode c.jr and c.jalr: insn & 0xf007 == 0x8002 => instruction is an c.jr insn & 0xf007 == 0x9002 => instruction is an c.jalr When 0xf007 is used to mask the instruction, c.mv can be incorrectly decoded as c.jr, and c.add as c.jalr. Correct the decoding by changing the mask from 0xf007 to 0xf07f. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Signed-off-by: Björn Töpel Reviewed-by: Conor Dooley Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20230102160748.1307289-1-bjorn@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/simulate-insn.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h index cb6ff7dccb92e..de8474146a9b6 100644 --- a/arch/riscv/kernel/probes/simulate-insn.h +++ b/arch/riscv/kernel/probes/simulate-insn.h @@ -31,9 +31,9 @@ __RISCV_INSN_FUNCS(fence, 0x7f, 0x0f); } while (0) __RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001); -__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002); +__RISCV_INSN_FUNCS(c_jr, 0xf07f, 0x8002); __RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001); -__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002); +__RISCV_INSN_FUNCS(c_jalr, 0xf07f, 0x9002); __RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001); __RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001); __RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002); From b9b916aee6715cd7f3318af6dc360c4729417b94 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 29 Dec 2022 17:05:45 +0000 Subject: [PATCH 0406/1593] riscv: uaccess: fix type of 0 variable on error in get_user() If the get_user(x, ptr) has x as a pointer, then the setting of (x) = 0 is going to produce the following sparse warning, so fix this by forcing the type of 'x' when access_ok() fails. fs/aio.c:2073:21: warning: Using plain integer as NULL pointer Signed-off-by: Ben Dooks Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221229170545.718264-1-ben-linux@fluff.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 855450bed9f52..ec0cab9fbddd0 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -165,7 +165,7 @@ do { \ might_fault(); \ access_ok(__p, sizeof(*__p)) ? \ __get_user((x), __p) : \ - ((x) = 0, -EFAULT); \ + ((x) = (__force __typeof__(x))0, -EFAULT); \ }) #define __put_user_asm(insn, x, ptr, err) \ From cdf64343f91a1225e9e3d4ce4261962cd41b4ddd Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:08 +0000 Subject: [PATCH 0407/1593] dt-bindings: msm: dsi-controller-main: Fix operating-points-v2 constraint The existing msm8916.dtsi does not depend on nor require operating points. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515940/ Link: https://lore.kernel.org/r/20221223021025.1646636-2-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index f2c143730a551..55bfe1101d6f4 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -138,7 +138,6 @@ required: - assigned-clocks - assigned-clock-parents - power-domains - - operating-points-v2 - ports additionalProperties: false From a6f033938beb31f893302a93f83ec0b6460c6cac Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:09 +0000 Subject: [PATCH 0408/1593] dt-bindings: msm: dsi-controller-main: Fix power-domain constraint power-domain is required for the sc7180 dispcc GDSC but not every qcom SoC has a similar dependency for example the apq8064. Most Qcom SoC's using mdss-dsi-ctrl seem to have the ability to power-collapse the MDP without collapsing DSI. For example the qcom vendor kernel commit for apq8084, msm8226, msm8916, msm8974. https://review.carbonrom.org/plugins/gitiles/CarbonROM/android_kernel_oneplus_msm8994/+/7b5c011a770daa2811778937ed646237a28a8694 "ARM: dts: msm: add mdss gdsc supply to dsi controller device It is possible for the DSI controller to be active when MDP is power collapsed. DSI controller needs to have it's own vote for mdss gdsc to ensure that gdsc remains on in such cases." This however doesn't appear to be the case for the apq8064 so we shouldn't be marking power-domain as required in yaml checks. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515958/ Link: https://lore.kernel.org/r/20221223021025.1646636-3-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 55bfe1101d6f4..8ba61fef576a5 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -137,7 +137,6 @@ required: - phys - assigned-clocks - assigned-clock-parents - - power-domains - ports additionalProperties: false From 654ffe4b793b42ed6b5909daff0b91809916d94e Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:10 +0000 Subject: [PATCH 0409/1593] dt-bindings: msm: dsi-controller-main: Fix description of core clock There's a typo in describing the core clock as an 'escape' clock. The accurate description is 'core'. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515938/ Link: https://lore.kernel.org/r/20221223021025.1646636-4-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 8ba61fef576a5..6e2fd6e9fa7f0 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -32,7 +32,7 @@ properties: - description: Display byte clock - description: Display byte interface clock - description: Display pixel clock - - description: Display escape clock + - description: Display core clock - description: Display AHB clock - description: Display AXI clock From 00dd060ab3cf95ca6ede7853bc14397014971b5e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jan 2023 03:47:43 +0200 Subject: [PATCH 0410/1593] drm/msm: another fix for the headless Adreno GPU Fix another oops reproducible when rebooting the board with the Adreno GPU working in the headless mode (e.g. iMX platforms). Unable to handle kernel NULL pointer dereference at virtual address 00000000 when read [00000000] *pgd=74936831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] ARM CPU: 0 PID: 51 Comm: reboot Not tainted 6.2.0-rc1-dirty #11 Hardware name: Freescale i.MX53 (Device Tree Support) PC is at msm_atomic_commit_tail+0x50/0x970 LR is at commit_tail+0x9c/0x188 pc : [] lr : [] psr: 600e0013 sp : e0851d30 ip : ee4eb7eb fp : 00090acc r10: 00000058 r9 : c2193014 r8 : c4310000 r7 : c4759380 r6 : 07bef61d r5 : 00000000 r4 : 00000000 r3 : c44cc440 r2 : 00000000 r1 : 00000000 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 74910019 DAC: 00000051 Register r0 information: NULL pointer Register r1 information: NULL pointer Register r2 information: NULL pointer Register r3 information: slab kmalloc-1k start c44cc400 pointer offset 64 size 1024 Register r4 information: NULL pointer Register r5 information: NULL pointer Register r6 information: non-paged memory Register r7 information: slab kmalloc-128 start c4759380 pointer offset 0 size 128 Register r8 information: slab kmalloc-2k start c4310000 pointer offset 0 size 2048 Register r9 information: non-slab/vmalloc memory Register r10 information: non-paged memory Register r11 information: non-paged memory Register r12 information: non-paged memory Process reboot (pid: 51, stack limit = 0xc80046d9) Stack: (0xe0851d30 to 0xe0852000) 1d20: c4759380 fbd77200 000005ff 002b9c70 1d40: c4759380 c4759380 00000000 07bef61d 00000600 c0d6fe7c c2193014 00000058 1d60: 00090acc c067a214 00000000 c4759380 c4310000 00000000 c44cc854 c067a89c 1d80: 00000000 00000000 00000000 c4310468 00000000 c4759380 c4310000 c4310468 1da0: c4310470 c0643258 c4759380 00000000 00000000 c0c4ee24 00000000 c44cc810 1dc0: 00000000 c0c4ee24 00000000 c44cc810 00000000 0347d2a8 e0851e00 e0851e00 1de0: c4759380 c067ad20 c4310000 00000000 c44cc810 c27f8718 c44cc854 c067adb8 1e00: c4933000 00000002 00000001 00000000 00000000 c2130850 00000000 c2130854 1e20: c25fc488 00000000 c0ff162c 00000000 00000001 00000002 00000000 00000000 1e40: c43102c0 c43102c0 00000000 0347d2a8 c44cc810 c44cc814 c2133da8 c06d1a60 1e60: 00000000 00000000 00079028 c2012f24 fee1dead c4933000 00000058 c01431e4 1e80: 01234567 c0143a20 00000000 00000000 00000000 00000000 00000000 00000000 1ea0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1ec0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1ee0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f00: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f20: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f80: 00000000 00000000 00000000 0347d2a8 00000002 00000004 00000078 00000058 1fa0: c010028c c0100060 00000002 00000004 fee1dead 28121969 01234567 00079028 1fc0: 00000002 00000004 00000078 00000058 0002fdc5 00000000 00000000 00090acc 1fe0: 00000058 becc9c64 b6e97e05 b6e0e5f6 600e0030 fee1dead 00000000 00000000 msm_atomic_commit_tail from commit_tail+0x9c/0x188 commit_tail from drm_atomic_helper_commit+0x160/0x188 drm_atomic_helper_commit from drm_atomic_commit+0xac/0xe0 drm_atomic_commit from drm_atomic_helper_disable_all+0x1b0/0x1c0 drm_atomic_helper_disable_all from drm_atomic_helper_shutdown+0x88/0x140 drm_atomic_helper_shutdown from device_shutdown+0x16c/0x240 device_shutdown from kernel_restart+0x38/0x90 kernel_restart from __do_sys_reboot+0x174/0x224 __do_sys_reboot from ret_fast_syscall+0x0/0x1c Exception stack(0xe0851fa8 to 0xe0851ff0) 1fa0: 00000002 00000004 fee1dead 28121969 01234567 00079028 1fc0: 00000002 00000004 00000078 00000058 0002fdc5 00000000 00000000 00090acc 1fe0: 00000058 becc9c64 b6e97e05 b6e0e5f6 Code: 15922088 1184421c e1500003 1afffff8 (e5953000) ---[ end trace 0000000000000000 ]--- Fixes: 0a58d2ae572a ("drm/msm: Make .remove and .shutdown HW shutdown consistent") Reported-by: kernel test robot Signed-off-by: Dmitry Baryshkov Reviewed-by: Rob Clark Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/516909/ Link: https://lore.kernel.org/r/20230105014743.1478110-1-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index cffba9ca7ec24..5737364fbb542 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1279,7 +1279,7 @@ void msm_drv_shutdown(struct platform_device *pdev) * msm_drm_init, drm_dev->registered is used as an indicator that the * shutdown will be successful. */ - if (drm && drm->registered) + if (drm && drm->registered && priv->kms) drm_atomic_helper_shutdown(drm); } From 13ef096e342b00e30b95a90c6c13eee1f0bec4c5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 2 Jan 2023 11:02:00 +0100 Subject: [PATCH 0411/1593] drm/msm/adreno: Make adreno quirks not overwrite each other So far the adreno quirks have all been assigned with an OR operator, which is problematic, because they were assigned consecutive integer values, which makes checking them with an AND operator kind of no bueno.. Switch to using BIT(n) so that only the quirks that the programmer chose are taken into account when evaluating info->quirks & ADRENO_QUIRK_... Fixes: 370063ee427a ("drm/msm/adreno: Add A540 support") Reviewed-by: Dmitry Baryshkov Reviewed-by: Marijn Suijten Reviewed-by: Rob Clark Signed-off-by: Konrad Dybcio Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/516456/ Link: https://lore.kernel.org/r/20230102100201.77286-1-konrad.dybcio@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 5d4b1c95033ff..b4f9b1343d637 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -29,11 +29,9 @@ enum { ADRENO_FW_MAX, }; -enum adreno_quirks { - ADRENO_QUIRK_TWO_PASS_USE_WFI = 1, - ADRENO_QUIRK_FAULT_DETECT_MASK = 2, - ADRENO_QUIRK_LMLOADKILL_DISABLE = 3, -}; +#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) +#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(1) +#define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2) struct adreno_rev { uint8_t core; @@ -65,7 +63,7 @@ struct adreno_info { const char *name; const char *fw[ADRENO_FW_MAX]; uint32_t gmem; - enum adreno_quirks quirks; + u64 quirks; struct msm_gpu *(*init)(struct drm_device *dev); const char *zapfw; u32 inactive_period; From f4a75b5933c998e60fd812a7680e0971eb1c7cee Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Fri, 16 Dec 2022 22:33:14 +0530 Subject: [PATCH 0412/1593] drm/msm/a6xx: Avoid gx gbit halt during rpm suspend As per the downstream driver, gx gbif halt is required only during recovery sequence. So lets avoid it during regular rpm suspend. Signed-off-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/515279/ Link: https://lore.kernel.org/r/20221216223253.1.Ice9c47bfeb1fddb8dc377a3491a043a3ee7fca7d@changeid Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 15 +++++++++------ drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 +++++++ drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 6484b97c5344f..f3c9600221d48 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -876,7 +876,8 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu) #define GBIF_CLIENT_HALT_MASK BIT(0) #define GBIF_ARB_HALT_MASK BIT(1) -static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu) +static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, + bool gx_off) { struct msm_gpu *gpu = &adreno_gpu->base; @@ -889,9 +890,11 @@ static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu) return; } - /* Halt the gx side of GBIF */ - gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); - spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); + if (gx_off) { + /* Halt the gx side of GBIF */ + gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); + spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); + } /* Halt new client requests on GBIF */ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); @@ -929,7 +932,7 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) /* Halt the gmu cm3 core */ gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1); - a6xx_bus_clear_pending_transactions(adreno_gpu); + a6xx_bus_clear_pending_transactions(adreno_gpu, true); /* Reset GPU core blocks */ gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, 1); @@ -1083,7 +1086,7 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) return; } - a6xx_bus_clear_pending_transactions(adreno_gpu); + a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); /* tell the GMU we want to slumber */ ret = a6xx_gmu_notify_slumber(gmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index e87196457b9aa..33bba81d5a22b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1277,6 +1277,12 @@ static void a6xx_recover(struct msm_gpu *gpu) if (hang_debug) a6xx_dump(gpu); + /* + * To handle recovery specific sequences during the rpm suspend we are + * about to trigger + */ + a6xx_gpu->hung = true; + /* Halt SQE first */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); @@ -1319,6 +1325,7 @@ static void a6xx_recover(struct msm_gpu *gpu) mutex_unlock(&gpu->active_lock); msm_gpu_hw_init(gpu); + a6xx_gpu->hung = false; } static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index ab853f61db632..eea2e60ce3b7b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -32,6 +32,7 @@ struct a6xx_gpu { void *llc_slice; void *htw_llc_slice; bool have_mmu500; + bool hung; }; #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) From c26de7507d1f5ffa5daf6a4980ef7896889691a9 Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Thu, 5 Jan 2023 09:07:11 +0800 Subject: [PATCH 0413/1593] stmmac: dwmac-mediatek: remove the dwmac_fix_mac_speed In current driver, MAC will always enable 2ns delay in RGMII mode, but that's not the correct usage. Remove the dwmac_fix_mac_speed() in driver, and recommend "rgmii-id" for phy-mode in device tree. Fixes: f2d356a6ab71 ("stmmac: dwmac-mediatek: add support for mt8195") Reviewed-by: Andrew Lunn Signed-off-by: Biao Huang Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index d42e1afb65213..2f7d8e4561d92 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -90,7 +90,6 @@ struct mediatek_dwmac_plat_data { struct mediatek_dwmac_variant { int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat); int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat); - void (*dwmac_fix_mac_speed)(void *priv, unsigned int speed); /* clock ids to be requested */ const char * const *clk_list; @@ -443,32 +442,9 @@ static int mt8195_set_delay(struct mediatek_dwmac_plat_data *plat) return 0; } -static void mt8195_fix_mac_speed(void *priv, unsigned int speed) -{ - struct mediatek_dwmac_plat_data *priv_plat = priv; - - if ((phy_interface_mode_is_rgmii(priv_plat->phy_mode))) { - /* prefer 2ns fixed delay which is controlled by TXC_PHASE_CTRL, - * when link speed is 1Gbps with RGMII interface, - * Fall back to delay macro circuit for 10/100Mbps link speed. - */ - if (speed == SPEED_1000) - regmap_update_bits(priv_plat->peri_regmap, - MT8195_PERI_ETH_CTRL0, - MT8195_RGMII_TXC_PHASE_CTRL | - MT8195_DLY_GTXC_ENABLE | - MT8195_DLY_GTXC_INV | - MT8195_DLY_GTXC_STAGES, - MT8195_RGMII_TXC_PHASE_CTRL); - else - mt8195_set_delay(priv_plat); - } -} - static const struct mediatek_dwmac_variant mt8195_gmac_variant = { .dwmac_set_phy_interface = mt8195_set_interface, .dwmac_set_delay = mt8195_set_delay, - .dwmac_fix_mac_speed = mt8195_fix_mac_speed, .clk_list = mt8195_dwmac_clk_l, .num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l), .dma_bit_mask = 35, @@ -619,8 +595,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; - if (priv_plat->variant->dwmac_fix_mac_speed) - plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->safety_feat_cfg), From eea8ce81fbb544e3caad1a1c876ba1af467b3d3c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 5 Jan 2023 11:42:49 +0800 Subject: [PATCH 0414/1593] net: usb: cdc_ether: add support for Thales Cinterion PLS62-W modem This modem has 7 interfaces, 5 of them are serial interfaces and are driven by cdc_acm, while 2 of them are wwan interfaces and are driven by cdc_ether: If 0: Abstract (modem) If 1: Abstract (modem) If 2: Abstract (modem) If 3: Abstract (modem) If 4: Abstract (modem) If 5: Ethernet Networking If 6: Ethernet Networking Without this change, the 2 network interfaces will be named to usb0 and usb1, our QA think the names are confusing and filed a bug on it. After applying this change, the name will be wwan0 and wwan1, and they could work well with modem manager. Signed-off-by: Hui Wang Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20230105034249.10433-1-hui.wang@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ether.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8911cd2ed5343..c140edb4b6482 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -1007,6 +1007,12 @@ static const struct usb_device_id products[] = { USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, +}, { + /* Cinterion PLS62-W modem by GEMALTO/THALES */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM, From c45219cba101f32853d011da2ba46a1b2338dc4c Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Thu, 5 Jan 2023 13:02:08 +0100 Subject: [PATCH 0415/1593] Revert "arm64: dts: meson-sm1-odroid-hc4: disable unused USB PHY0" This reverts commit 703e84d6615a4a95fb504c8f2e4c9426b86f3930. USB device enumeration was not working on Odroid HC4 as both USB2 PHYs need to be enabled. This is inherited from the GLX USB design [1]. [1]: https://lore.kernel.org/all/20170814224542.18257-1-martin.blumenstingl@googlemail.com/T/ Signed-off-by: Pierre-Olivier Mercier Acked-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20230105120206.28964-1-nemunaire@nemunai.re Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts index e3486f60645a4..a1f0c38ccadda 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts @@ -131,10 +131,6 @@ }; &usb { - phys = <&usb2_phy1>; - phy-names = "usb2-phy1"; -}; - -&usb2_phy0 { - status = "disabled"; + phys = <&usb2_phy0>, <&usb2_phy1>; + phy-names = "usb2-phy0", "usb2-phy1"; }; From 1a5a23b9bdf6bde0e5185ca834ff6e806cc2aaaf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 30 Dec 2022 14:54:27 +0800 Subject: [PATCH 0416/1593] usb: fotg210-udc: fix error return code in fotg210_udc_probe() After commit 5f217ccd520f ("fotg210-udc: Support optional external PHY"), the error code is re-assigned to 0 in fotg210_udc_probe(), if allocate or map memory fails after the assignment, it can't return an error code. Set the error code to -ENOMEM to fix this problem. Fixes: 5f217ccd520f ("fotg210-udc: Support optional external PHY") Signed-off-by: Yang Yingliang Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20221230065427.944586-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-udc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c index 66e1b7ee3346e..87cca81bf4ac9 100644 --- a/drivers/usb/fotg210/fotg210-udc.c +++ b/drivers/usb/fotg210/fotg210-udc.c @@ -1201,6 +1201,8 @@ int fotg210_udc_probe(struct platform_device *pdev) dev_info(dev, "found and initialized PHY\n"); } + ret = -ENOMEM; + for (i = 0; i < FOTG210_MAX_NUM_EP; i++) { fotg210->ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL); if (!fotg210->ep[i]) From 0688773f0710528e1ab302c3d6317e269f2e2e6e Mon Sep 17 00:00:00 2001 From: Patrick Thompson Date: Tue, 20 Dec 2022 15:58:26 -0500 Subject: [PATCH 0417/1593] drm: Add orientation quirk for Lenovo ideapad D330-10IGL Panel is 800x1280 but mounted on a detachable form factor sideways. Signed-off-by: Patrick Thompson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20221220205826.178008-1-ptf@google.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 52d8800a8ab86..3659f0465a724 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -304,6 +304,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Ideapad D330-10IGL (HD) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGL"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Lenovo Yoga Book X90F / X91F / X91L */ .matches = { /* Non exact match to match all versions */ From 8a758d98dba380a7d32a98b0840ad707e3036233 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Oct 2022 23:17:06 +0100 Subject: [PATCH 0418/1593] rxrpc: Stash the network namespace pointer in rxrpc_local Stash the network namespace pointer in the rxrpc_local struct in addition to a pointer to the rxrpc-specific net namespace info. Use this to remove some places where the socket is passed as a parameter. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/ar-internal.h | 10 +++++----- net/rxrpc/call_accept.c | 2 +- net/rxrpc/conn_client.c | 2 +- net/rxrpc/local_object.c | 7 ++++--- net/rxrpc/peer_object.c | 23 ++++++++++------------- 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 18092526d3c82..9cf763c338bcd 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -283,7 +283,8 @@ struct rxrpc_local { struct rcu_head rcu; atomic_t active_users; /* Number of users of the local endpoint */ refcount_t ref; /* Number of references to the structure */ - struct rxrpc_net *rxnet; /* The network ns in which this resides */ + struct net *net; /* The network namespace */ + struct rxrpc_net *rxnet; /* Our bits in the network namespace */ struct hlist_node link; struct socket *socket; /* my UDP socket */ struct task_struct *io_thread; @@ -1063,12 +1064,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *); */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, - struct sockaddr_rxrpc *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, gfp_t gfp); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t, enum rxrpc_peer_trace); -void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, - struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index c02401656fa98..c957e4415cdc4 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_new_incoming_peer(rx, local, peer); + rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 87efa0373aed3..e4063c4f4bb24 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -378,7 +378,7 @@ static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx, _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); + cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); if (!cp->peer) goto error; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 270b63d8f37a6..c0ac2fe07ec48 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -85,7 +85,7 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, /* * Allocate a new local endpoint. */ -static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, +static struct rxrpc_local *rxrpc_alloc_local(struct net *net, const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; @@ -94,7 +94,8 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, if (local) { refcount_set(&local->ref, 1); atomic_set(&local->active_users, 1); - local->rxnet = rxnet; + local->net = net; + local->rxnet = rxrpc_net(net); INIT_HLIST_NODE(&local->link); init_rwsem(&local->defrag_sem); init_completion(&local->io_thread_ready); @@ -248,7 +249,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto found; } - local = rxrpc_alloc_local(rxnet, srx); + local = rxrpc_alloc_local(net, srx); if (!local) goto nomem; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 4eecea2be307b..8d7a715a0bb1c 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -147,10 +147,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, * assess the MTU size for the network interface through which this peer is * reached */ -static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, +static void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer) { - struct net *net = sock_net(&rx->sk); + struct net *net = local->net; struct dst_entry *dst; struct rtable *rt; struct flowi fl; @@ -236,11 +236,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, /* * Initialise peer record. */ -static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, +static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(rx, peer); + rxrpc_assess_MTU_size(local, peer); peer->mtu = peer->if_mtu; peer->rtt_last_req = ktime_get_real(); @@ -272,8 +272,7 @@ static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, /* * Set up a new peer. */ -static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, - struct rxrpc_local *local, +static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, unsigned long hash_key, gfp_t gfp) @@ -285,7 +284,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); - rxrpc_init_peer(rx, peer, hash_key); + rxrpc_init_peer(local, peer, hash_key); } _leave(" = %p", peer); @@ -304,14 +303,13 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer) * since we've already done a search in the list from the non-reentrant context * (the data_ready handler) that is the only place we can add new peers. */ -void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, - struct rxrpc_peer *peer) +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) { struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &peer->srx); - rxrpc_init_peer(rx, peer, hash_key); + rxrpc_init_peer(local, peer, hash_key); spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); @@ -322,8 +320,7 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, /* * obtain a remote transport endpoint for the specified address */ -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, - struct rxrpc_local *local, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; @@ -343,7 +340,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, /* The peer is not yet present in hash - create a candidate * for a new record and then redo the search. */ - candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp); + candidate = rxrpc_create_peer(local, srx, hash_key, gfp); if (!candidate) { _leave(" = NULL [nomem]"); return NULL; From 5040011d073d3acdeb58af2b64f84e33bb03abd2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Nov 2022 10:24:29 +0000 Subject: [PATCH 0419/1593] rxrpc: Make the local endpoint hold a ref on a connected call Make the local endpoint and it's I/O thread hold a reference on a connected call until that call is disconnected. Without this, we're reliant on either the AF_RXRPC socket to hold a ref (which is dropped when the call is released) or a queued work item to hold a ref (the work item is being replaced with the I/O thread). Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 3 +++ net/rxrpc/call_object.c | 2 ++ net/rxrpc/conn_client.c | 6 +++--- net/rxrpc/conn_object.c | 25 +++++++++++++++---------- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5f9dd73895364..b526d982da7ea 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -148,6 +148,7 @@ E_(rxrpc_client_to_idle, "->Idle") #define rxrpc_call_traces \ + EM(rxrpc_call_get_io_thread, "GET iothread") \ EM(rxrpc_call_get_input, "GET input ") \ EM(rxrpc_call_get_kernel_service, "GET krnl-srv") \ EM(rxrpc_call_get_notify_socket, "GET notify ") \ @@ -160,6 +161,7 @@ EM(rxrpc_call_new_prealloc_service, "NEW prealloc") \ EM(rxrpc_call_put_discard_prealloc, "PUT disc-pre") \ EM(rxrpc_call_put_discard_error, "PUT disc-err") \ + EM(rxrpc_call_put_io_thread, "PUT iothread") \ EM(rxrpc_call_put_input, "PUT input ") \ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ @@ -173,6 +175,7 @@ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ + EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ EM(rxrpc_call_see_release, "SEE release ") \ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 89dcf60b11587..239fc3c750790 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -453,6 +453,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, BUG(); } + rxrpc_get_call(call, rxrpc_call_get_io_thread); + /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called * from) and the RESPONSE packet parser (which is only really diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index e4063c4f4bb24..1edd65883c55a 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -725,8 +725,11 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); + rxrpc_get_call(call, rxrpc_call_get_io_thread); + bundle = rxrpc_prep_call(rx, call, cp, srx, gfp); if (IS_ERR(bundle)) { + rxrpc_put_call(call, rxrpc_call_get_io_thread); ret = PTR_ERR(bundle); goto out; } @@ -820,7 +823,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _enter("c=%x", call->debug_id); spin_lock(&bundle->channel_lock); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); /* Calls that have never actually been assigned a channel can simply be * discarded. @@ -912,8 +914,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call out: spin_unlock(&bundle->channel_lock); - _leave(""); - return; } /* diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3c8f83dacb2b3..2bd3f62888956 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -178,6 +178,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + rxrpc_see_call(call, rxrpc_call_see_disconnected); + call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { @@ -186,18 +189,20 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) spin_unlock(&call->peer->lock); } - if (rxrpc_is_client_call(call)) - return rxrpc_disconnect_client_call(conn->bundle, call); + if (rxrpc_is_client_call(call)) { + rxrpc_disconnect_client_call(conn->bundle, call); + } else { + spin_lock(&conn->bundle->channel_lock); + __rxrpc_disconnect_call(conn, call); + spin_unlock(&conn->bundle->channel_lock); - spin_lock(&conn->bundle->channel_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->bundle->channel_lock); + conn->idle_timestamp = jiffies; + if (atomic_dec_and_test(&conn->active)) + rxrpc_set_service_reap_timer(conn->rxnet, + jiffies + rxrpc_connection_expiry); + } - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - conn->idle_timestamp = jiffies; - if (atomic_dec_and_test(&conn->active)) - rxrpc_set_service_reap_timer(conn->rxnet, - jiffies + rxrpc_connection_expiry); + rxrpc_put_call(call, rxrpc_call_put_io_thread); } /* From 30df927b936b2ef21eb07dce9c141c7897609643 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 8 Oct 2022 14:33:50 +0100 Subject: [PATCH 0420/1593] rxrpc: Separate call retransmission from other conn events Call the rxrpc_conn_retransmit_call() directly from rxrpc_input_packet() rather than calling it via connection event handling. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/conn_event.c | 34 +++++----------------------------- net/rxrpc/io_thread.c | 2 +- 3 files changed, 8 insertions(+), 30 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9cf763c338bcd..f3b8806e72411 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -897,6 +897,8 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *); /* * conn_event.c */ +void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int channel); void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 480364bcbf855..dfd29882126f6 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -19,9 +19,9 @@ /* * Retransmit terminal ACK or ABORT of the previous call. */ -static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, - struct sk_buff *skb, - unsigned int channel) +void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + struct sk_buff *skb, + unsigned int channel) { struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; @@ -292,24 +292,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_DATA: - case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb, - sp->hdr.cid & RXRPC_CHANNELMASK); - return 0; - - case RXRPC_PACKET_TYPE_BUSY: - /* Just ignore BUSY packets for now. */ - return 0; - - case RXRPC_PACKET_TYPE_ABORT: - conn->error = -ECONNABORTED; - conn->abort_code = skb->priority; - conn->state = RXRPC_CONN_REMOTELY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); - return -ECONNABORTED; - case RXRPC_PACKET_TYPE_CHALLENGE: return conn->security->respond_to_challenge(conn, skb, _abort_code); @@ -504,18 +486,12 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { _leave(" = -ECONNABORTED [%u]", conn->state); - return -ECONNABORTED; + return 0; } _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_DATA: - case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb, - sp->hdr.cid & RXRPC_CHANNELMASK); - return 0; - case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets for now. */ return 0; @@ -526,7 +502,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) conn->state = RXRPC_CONN_REMOTELY_ABORTED; set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); - return -ECONNABORTED; + return 0; case RXRPC_PACKET_TYPE_CHALLENGE: case RXRPC_PACKET_TYPE_RESPONSE: diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 1ad067d66fb60..0e1a548d35f8e 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -358,7 +358,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, sp->hdr.seq, sp->hdr.serial, sp->hdr.flags); - rxrpc_input_conn_packet(conn, skb); + rxrpc_conn_retransmit_call(conn, skb, channel); return 0; } From a343b174b4bdde851033996960bca5ad1394d04b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Oct 2022 22:17:56 +0100 Subject: [PATCH 0421/1593] rxrpc: Only set/transmit aborts in the I/O thread Only set the abort call completion state in the I/O thread and only transmit ABORT packets from there. rxrpc_abort_call() can then be made to actually send the packet. Further, ABORT packets should only be sent if the call has been exposed to the network (ie. at least one attempted DATA transmission has occurred for it). Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 1 + net/rxrpc/ar-internal.h | 7 ++++++- net/rxrpc/call_event.c | 16 +++++++++++++--- net/rxrpc/call_object.c | 7 ++++--- net/rxrpc/input.c | 6 ++---- net/rxrpc/recvmsg.c | 2 ++ net/rxrpc/sendmsg.c | 29 ++++++++++++++++++++++------- 7 files changed, 50 insertions(+), 18 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index b526d982da7ea..c44cc01de7501 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -17,6 +17,7 @@ * Declare tracing information enums and their string mappings for display. */ #define rxrpc_call_poke_traces \ + EM(rxrpc_call_poke_abort, "Abort") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ EM(rxrpc_call_poke_start, "Start") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f3b8806e72411..0cf28a56aec5d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -625,7 +625,10 @@ struct rxrpc_call { unsigned long events; spinlock_t notify_lock; /* Kernel notification lock */ rwlock_t state_lock; /* lock for state transition */ - u32 abort_code; /* Local/remote abort code */ + const char *send_abort_why; /* String indicating why the abort was sent */ + s32 send_abort; /* Abort code to be sent */ + short send_abort_err; /* Error to be associated with the abort */ + s32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ @@ -1146,6 +1149,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *, /* * sendmsg.c */ +bool rxrpc_propose_abort(struct rxrpc_call *call, + u32 abort_code, int error, const char *why); int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index b2cf448fb02c0..b7efecf5ccfc0 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -270,9 +270,11 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) { struct rxrpc_txbuf *txb; - if (rxrpc_is_client_call(call) && - !test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + if (list_empty(&call->tx_sendmsg)) + return; rxrpc_expose_client_call(call); + } while ((txb = list_first_entry_or_null(&call->tx_sendmsg, struct rxrpc_txbuf, call_link))) { @@ -336,6 +338,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) unsigned long now, next, t; rxrpc_serial_t ackr_serial; bool resend = false, expired = false; + s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); @@ -346,6 +349,14 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (call->state == RXRPC_CALL_COMPLETE) goto out; + /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ + abort_code = smp_load_acquire(&call->send_abort); + if (abort_code) { + rxrpc_abort_call(call->send_abort_why, call, 0, call->send_abort, + call->send_abort_err); + goto out; + } + if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) goto out; @@ -433,7 +444,6 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } else { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); } - rxrpc_send_abort_packet(call); goto out; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 239fc3c750790..298b7c465d7ee 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -430,6 +430,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; + __set_bit(RXRPC_CALL_EXPOSED, &call->flags); + spin_lock(&conn->state_lock); switch (conn->state) { @@ -590,7 +592,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); - rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKR"); rxrpc_put_call(call, rxrpc_call_put_release_sock_tba); } @@ -598,8 +600,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_get_release_sock); - rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); - rxrpc_send_abort_packet(call); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKT"); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_release_sock); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d0e20e946e48d..1f03a286620d5 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -12,8 +12,7 @@ static void rxrpc_proto_abort(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) - rxrpc_send_abort_packet(call); + rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG); } /* @@ -1007,8 +1006,7 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) - rxrpc_send_abort_packet(call); + rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN); trace_rxrpc_improper_term(call); break; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6ebd6440a2b7c..a4ccdc006d0fe 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -134,6 +134,8 @@ bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, write_lock(&call->state_lock); ret = __rxrpc_abort_call(why, call, seq, abort_code, error); write_unlock(&call->state_lock); + if (ret && test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + rxrpc_send_abort_packet(call); return ret; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index cde1e65f16b45..dc3c2a834fc8b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -17,6 +17,26 @@ #include #include "ar-internal.h" +/* + * Propose an abort to be made in the I/O thread. + */ +bool rxrpc_propose_abort(struct rxrpc_call *call, + u32 abort_code, int error, const char *why) +{ + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + + if (!call->send_abort && call->state < RXRPC_CALL_COMPLETE) { + call->send_abort_why = why; + call->send_abort_err = error; + /* Request abort locklessly vs rxrpc_input_call_event(). */ + smp_store_release(&call->send_abort, abort_code); + rxrpc_poke_call(call, rxrpc_call_poke_abort); + return true; + } + + return false; +} + /* * Return true if there's sufficient Tx queue space. */ @@ -663,9 +683,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (p.command == RXRPC_CMD_SEND_ABORT) { + rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, "CMD"); ret = 0; - if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED)) - ret = rxrpc_send_abort_packet(call); } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else { @@ -760,11 +779,7 @@ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); - - aborted = rxrpc_abort_call(why, call, 0, abort_code, error); - if (aborted) - rxrpc_send_abort_packet(call); - + aborted = rxrpc_propose_abort(call, abort_code, error, why); mutex_unlock(&call->user_mutex); return aborted; } From 03fc55adf8761c546d72798264b019c9f672c578 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Oct 2022 17:01:25 +0100 Subject: [PATCH 0422/1593] rxrpc: Only disconnect calls in the I/O thread Only perform call disconnection in the I/O thread to reduce the locking requirement. This is the first part of a fix for a race that exists between call connection and call disconnection whereby the data transmission code adds the call to the peer error distribution list after the call has been disconnected (say by the rxrpc socket getting closed). The fix is to complete the process of moving call connection, data transmission and call disconnection into the I/O thread and thus forcibly serialising them. Note that the issue may predate the overhaul to an I/O thread model that were included in the merge window for v6.2, but the timing is very much changed by the change given below. Fixes: cf37b5987508 ("rxrpc: Move DATA transmission into call processor work item") Reported-by: syzbot+c22650d2844392afdcfd@syzkaller.appspotmail.com Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 1 + net/rxrpc/call_event.c | 7 ++++++- net/rxrpc/call_object.c | 9 +-------- net/rxrpc/input.c | 6 ------ net/rxrpc/recvmsg.c | 1 + 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c44cc01de7501..eac513668e333 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -18,6 +18,7 @@ */ #define rxrpc_call_poke_traces \ EM(rxrpc_call_poke_abort, "Abort") \ + EM(rxrpc_call_poke_complete, "Compl") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ EM(rxrpc_call_poke_start, "Start") \ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index b7efecf5ccfc0..b2fc3fa686ece 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -484,8 +484,13 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } out: - if (call->state == RXRPC_CALL_COMPLETE) + if (call->state == RXRPC_CALL_COMPLETE) { del_timer_sync(&call->timer); + if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + rxrpc_disconnect_call(call); + if (call->security) + call->security->free_call_crypto(call); + } if (call->acks_hard_ack != call->tx_bottom) rxrpc_shrink_call_tx_buffer(call); _leave(""); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 298b7c465d7ee..13aac3ca03a03 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -50,7 +50,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) struct rxrpc_local *local = call->local; bool busy; - if (call->state < RXRPC_CALL_COMPLETE) { + if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) { spin_lock_bh(&local->lock); busy = !list_empty(&call->attend_link); trace_rxrpc_poke_call(call, busy, what); @@ -533,13 +533,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), call->flags, rxrpc_call_see_release); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); rxrpc_put_call_slot(call); - del_timer_sync(&call->timer); /* Make sure we don't get any more notifications */ write_lock(&rx->recvmsg_lock); @@ -572,10 +569,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) - rxrpc_disconnect_call(call); - if (call->security) - call->security->free_call_crypto(call); _leave(""); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1f03a286620d5..bb4beb4453259 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -997,8 +997,6 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) */ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_connection *conn = call->conn; - switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); @@ -1012,8 +1010,4 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) } rxrpc_input_call_event(call, skb); - - spin_lock(&conn->bundle->channel_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->bundle->channel_lock); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a4ccdc006d0fe..8d5fe65f5951c 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -201,6 +201,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) case RXRPC_CALL_CLIENT_RECV_REPLY: __rxrpc_call_completed(call); write_unlock(&call->state_lock); + rxrpc_poke_call(call, rxrpc_call_poke_complete); break; case RXRPC_CALL_SERVER_RECV_REQUEST: From f2cce89a074e6d2991dddc94f6b6ebe1576b8459 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Oct 2022 09:08:34 +0100 Subject: [PATCH 0423/1593] rxrpc: Implement a mechanism to send an event notification to a connection Provide a means by which an event notification can be sent to a connection through such that the I/O thread can pick it up and handle it rather than doing it in a separate workqueue. This is then used to move the deferred final ACK of a call into the I/O thread rather than a separate work queue as part of the drive to do all transmission from the I/O thread. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 5 ++--- net/rxrpc/ar-internal.h | 5 +++++ net/rxrpc/conn_event.c | 14 ++++++++++---- net/rxrpc/conn_object.c | 20 +++++++++++++++++++- net/rxrpc/io_thread.c | 19 ++++++++++++++++++- net/rxrpc/local_object.c | 1 + 6 files changed, 55 insertions(+), 9 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index eac513668e333..b969756f97fce 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -111,7 +111,7 @@ EM(rxrpc_conn_get_call_input, "GET inp-call") \ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ - EM(rxrpc_conn_get_poke, "GET poke ") \ + EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ EM(rxrpc_conn_new_service, "NEW service ") \ @@ -126,10 +126,9 @@ EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \ EM(rxrpc_conn_put_unbundle, "PUT unbundle") \ EM(rxrpc_conn_put_unidle, "PUT unidle ") \ + EM(rxrpc_conn_put_work, "PUT work ") \ EM(rxrpc_conn_queue_challenge, "QUE chall ") \ - EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \ EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \ - EM(rxrpc_conn_queue_timer, "QUE timer ") \ EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \ EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \ E_(rxrpc_conn_see_work, "SEE work ") diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0cf28a56aec5d..d82d7f36cdaa0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -202,6 +202,7 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { + struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ u16 offset; /* Offset of data */ u16 len; /* Length of data */ u8 flags; @@ -292,6 +293,7 @@ struct rxrpc_local { struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head rx_queue; /* Received packets */ + struct list_head conn_attend_q; /* Conns requiring immediate attention */ struct list_head call_attend_q; /* Calls requiring immediate attention */ struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ @@ -441,6 +443,7 @@ struct rxrpc_connection { struct rxrpc_peer *peer; /* Remote endpoint */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ struct key *key; /* Security details */ + struct list_head attend_link; /* Link in local->conn_attend_q */ refcount_t ref; atomic_t active; /* Active count for service conns */ @@ -905,6 +908,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *s void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); +void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb); /* * conn_object.c @@ -912,6 +916,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); extern unsigned int rxrpc_connection_expiry; extern unsigned int rxrpc_closed_conn_expiry; +void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why); struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t); struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *, struct sockaddr_rxrpc *, diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index dfd29882126f6..7a980a32344f1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -412,10 +412,6 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); - /* Process delayed ACKs whose time has come. */ - if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) - rxrpc_process_delayed_final_acks(conn, false); - /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { @@ -515,3 +511,13 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) return -EPROTO; } } + +/* + * Input a connection event. + */ +void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) +{ + /* Process delayed ACKs whose time has come. */ + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn, false); +} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 2bd3f62888956..281f59e356f5d 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -23,12 +23,30 @@ static void rxrpc_clean_up_connection(struct work_struct *work); static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, unsigned long reap_at); +void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) +{ + struct rxrpc_local *local = conn->local; + bool busy; + + if (WARN_ON_ONCE(!local)) + return; + + spin_lock_bh(&local->lock); + busy = !list_empty(&conn->attend_link); + if (!busy) { + rxrpc_get_connection(conn, why); + list_add_tail(&conn->attend_link, &local->conn_attend_q); + } + spin_unlock_bh(&local->lock); + rxrpc_wake_up_io_thread(local); +} + static void rxrpc_connection_timer(struct timer_list *timer) { struct rxrpc_connection *conn = container_of(timer, struct rxrpc_connection, timer); - rxrpc_queue_conn(conn, rxrpc_conn_queue_timer); + rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer); } /* diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 0e1a548d35f8e..46e58cf5bc96e 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -421,6 +421,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, */ int rxrpc_io_thread(void *data) { + struct rxrpc_connection *conn; struct sk_buff_head rx_queue; struct rxrpc_local *local = data; struct rxrpc_call *call; @@ -436,6 +437,20 @@ int rxrpc_io_thread(void *data) for (;;) { rxrpc_inc_stat(local->rxnet, stat_io_loop); + /* Deal with connections that want immediate attention. */ + conn = list_first_entry_or_null(&local->conn_attend_q, + struct rxrpc_connection, + attend_link); + if (conn) { + spin_lock_bh(&local->lock); + list_del_init(&conn->attend_link); + spin_unlock_bh(&local->lock); + + rxrpc_input_conn_event(conn, NULL); + rxrpc_put_connection(conn, rxrpc_conn_put_poke); + continue; + } + /* Deal with calls that want immediate attention. */ if ((call = list_first_entry_or_null(&local->call_attend_q, struct rxrpc_call, @@ -463,6 +478,7 @@ int rxrpc_io_thread(void *data) rxrpc_input_error(local, skb); rxrpc_free_skb(skb, rxrpc_skb_put_error_report); break; + break; default: WARN_ON_ONCE(1); rxrpc_free_skb(skb, rxrpc_skb_put_unknown); @@ -481,7 +497,8 @@ int rxrpc_io_thread(void *data) set_current_state(TASK_INTERRUPTIBLE); should_stop = kthread_should_stop(); if (!skb_queue_empty(&local->rx_queue) || - !list_empty(&local->call_attend_q)) { + !list_empty(&local->call_attend_q) || + !list_empty(&local->conn_attend_q)) { __set_current_state(TASK_RUNNING); continue; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index c0ac2fe07ec48..8ef6cd8defa45 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -100,6 +100,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, init_rwsem(&local->defrag_sem); init_completion(&local->io_thread_ready); skb_queue_head_init(&local->rx_queue); + INIT_LIST_HEAD(&local->conn_attend_q); INIT_LIST_HEAD(&local->call_attend_q); local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); From a00ce28b1778fa3576575b43bdb17f60ded38b66 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Oct 2022 09:56:36 +0100 Subject: [PATCH 0424/1593] rxrpc: Clean up connection abort Clean up connection abort, using the connection state_lock to gate access to change that state, and use an rxrpc_call_completion value to indicate the difference between local and remote aborts as these can be pasted directly into the call state. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 2 + net/rxrpc/ar-internal.h | 46 ++++--- net/rxrpc/call_object.c | 8 +- net/rxrpc/conn_event.c | 233 ++++++++++++----------------------- net/rxrpc/insecure.c | 18 +-- net/rxrpc/output.c | 56 +++++++++ net/rxrpc/proc.c | 10 +- net/rxrpc/rxkad.c | 28 ++--- 8 files changed, 188 insertions(+), 213 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index b969756f97fce..222d0498d23fc 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -111,6 +111,7 @@ EM(rxrpc_conn_get_call_input, "GET inp-call") \ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ + EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ @@ -128,6 +129,7 @@ EM(rxrpc_conn_put_unidle, "PUT unidle ") \ EM(rxrpc_conn_put_work, "PUT work ") \ EM(rxrpc_conn_queue_challenge, "QUE chall ") \ + EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \ EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \ EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \ EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d82d7f36cdaa0..78bd6fb0bc154 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -263,13 +263,11 @@ struct rxrpc_security { /* respond to a challenge */ int (*respond_to_challenge)(struct rxrpc_connection *, - struct sk_buff *, - u32 *); + struct sk_buff *); /* verify a response */ int (*verify_response)(struct rxrpc_connection *, - struct sk_buff *, - u32 *); + struct sk_buff *); /* clear connection security */ void (*clear)(struct rxrpc_connection *); @@ -367,6 +365,18 @@ struct rxrpc_conn_parameters { u32 security_level; /* Security level selected */ }; +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + /* * Bits in the connection flags. */ @@ -391,6 +401,7 @@ enum rxrpc_conn_flag { */ enum rxrpc_conn_event { RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */ + RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */ }; /* @@ -403,8 +414,7 @@ enum rxrpc_conn_proto_state { RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ - RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ - RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ + RXRPC_CONN_ABORTED, /* Conn aborted */ RXRPC_CONN__NR_STATES }; @@ -487,7 +497,8 @@ struct rxrpc_connection { unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 abort_code; /* Abort code of connection abort */ + enum rxrpc_call_completion completion; /* Completion condition */ + s32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ @@ -561,18 +572,6 @@ enum rxrpc_call_state { NR__RXRPC_CALL_STATES }; -/* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - /* * Call Tx congestion management modes. */ @@ -905,11 +904,19 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *); */ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb, unsigned int channel); +int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, const char *why); void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb); +static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn) +{ + /* Order reading the abort info after the state check. */ + return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED; +} + /* * conn_object.c */ @@ -1059,6 +1066,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *); +void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 13aac3ca03a03..666430182dfd6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -443,14 +443,10 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_RECV_REQUEST; break; - case RXRPC_CONN_REMOTELY_ABORTED: - __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + case RXRPC_CONN_ABORTED: + __rxrpc_set_call_completion(call, conn->completion, conn->abort_code, conn->error); break; - case RXRPC_CONN_LOCALLY_ABORTED: - __rxrpc_abort_call("CON", call, 1, - conn->abort_code, conn->error); - break; default: BUG(); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 7a980a32344f1..753d91a9646fc 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -16,6 +16,60 @@ #include #include "ar-internal.h" +/* + * Set the completion state on an aborted connection. + */ +static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, + enum rxrpc_call_completion compl) +{ + bool aborted = false; + + if (conn->state != RXRPC_CONN_ABORTED) { + spin_lock(&conn->state_lock); + if (conn->state != RXRPC_CONN_ABORTED) { + conn->abort_code = abort_code; + conn->error = err; + conn->completion = compl; + /* Order the abort info before the state change. */ + smp_store_release(&conn->state, RXRPC_CONN_ABORTED); + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events); + aborted = true; + } + spin_unlock(&conn->state_lock); + } + + return aborted; +} + +/* + * Mark a socket buffer to indicate that the connection it's on should be aborted. + */ +int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, const char *why) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (rxrpc_set_conn_aborted(conn, skb, abort_code, err, + RXRPC_CALL_LOCALLY_ABORTED)) { + trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, abort_code, err); + rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort); + } + return -EPROTO; +} + +/* + * Mark a connection as being remotely aborted. + */ +static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, + RXRPC_CALL_REMOTELY_ABORTED); +} + /* * Retransmit terminal ACK or ABORT of the previous call. */ @@ -146,9 +200,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, /* * pass a connection-level abort onto all calls on that connection */ -static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - rxrpc_serial_t serial) +static void rxrpc_abort_calls(struct rxrpc_connection *conn) { struct rxrpc_call *call; int i; @@ -161,102 +213,17 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, call = rcu_dereference_protected( conn->channels[i].call, lockdep_is_held(&conn->bundle->channel_lock)); - if (call) { - if (compl == RXRPC_CALL_LOCALLY_ABORTED) - trace_rxrpc_abort(call->debug_id, - "CON", call->cid, - call->call_id, 0, - conn->abort_code, - conn->error); - else - trace_rxrpc_rx_abort(call, serial, - conn->abort_code); - rxrpc_set_call_completion(call, compl, + if (call) + rxrpc_set_call_completion(call, + conn->completion, conn->abort_code, conn->error); - } } spin_unlock(&conn->bundle->channel_lock); _leave(""); } -/* - * generate a connection-level abort - */ -static int rxrpc_abort_connection(struct rxrpc_connection *conn, - int error, u32 abort_code) -{ - struct rxrpc_wire_header whdr; - struct msghdr msg; - struct kvec iov[2]; - __be32 word; - size_t len; - u32 serial; - int ret; - - _enter("%d,,%u,%u", conn->debug_id, error, abort_code); - - /* generate a connection-level abort */ - spin_lock(&conn->state_lock); - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - spin_unlock(&conn->state_lock); - _leave(" = 0 [already dead]"); - return 0; - } - - conn->error = error; - conn->abort_code = abort_code; - conn->state = RXRPC_CONN_LOCALLY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - spin_unlock(&conn->state_lock); - - msg.msg_name = &conn->peer->srx.transport; - msg.msg_namelen = conn->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - whdr.epoch = htonl(conn->proto.epoch); - whdr.cid = htonl(conn->proto.cid); - whdr.callNumber = 0; - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - whdr.flags = conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(conn->service_id); - - word = htonl(conn->abort_code); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &word; - iov[1].iov_len = sizeof(word); - - len = iov[0].iov_len + iov[1].iov_len; - - serial = atomic_inc_return(&conn->serial); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); - whdr.serial = htonl(serial); - - ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); - if (ret < 0) { - trace_rxrpc_tx_fail(conn->debug_id, serial, ret, - rxrpc_tx_point_conn_abort); - _debug("sendmsg failed: %d", ret); - return -EAGAIN; - } - - trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); - - conn->peer->last_tx_at = ktime_get_seconds(); - - _leave(" = 0"); - return 0; -} - /* * mark a call as being on a now-secured channel * - must be called with BH's disabled. @@ -278,26 +245,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) * connection-level Rx packet processor */ static int rxrpc_process_event(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); int loop, ret; - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - _leave(" = -ECONNABORTED [%u]", conn->state); + if (conn->state == RXRPC_CONN_ABORTED) return -ECONNABORTED; - } _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_CHALLENGE: - return conn->security->respond_to_challenge(conn, skb, - _abort_code); + return conn->security->respond_to_challenge(conn, skb); case RXRPC_PACKET_TYPE_RESPONSE: - ret = conn->security->verify_response(conn, skb, _abort_code); + ret = conn->security->verify_response(conn, skb); if (ret < 0) return ret; @@ -336,26 +299,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, */ static void rxrpc_secure_connection(struct rxrpc_connection *conn) { - u32 abort_code; - int ret; - - _enter("{%d}", conn->debug_id); - - ASSERT(conn->security_ix != 0); - - if (conn->security->issue_challenge(conn) < 0) { - abort_code = RX_CALL_DEAD; - ret = -ENOMEM; - goto abort; - } - - _leave(""); - return; - -abort: - _debug("abort %d, %d", ret, abort_code); - rxrpc_abort_connection(conn, ret, abort_code); - _leave(" [aborted]"); + if (conn->security->issue_challenge(conn) < 0) + rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM, "OOM"); } /* @@ -406,7 +351,6 @@ void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn, bool force) static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { struct sk_buff *skb; - u32 abort_code = RX_PROTOCOL_ERROR; int ret; if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) @@ -416,33 +360,18 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); - ret = rxrpc_process_event(conn, skb, &abort_code); + ret = rxrpc_process_event(conn, skb); switch (ret) { - case -EPROTO: - case -EKEYEXPIRED: - case -EKEYREJECTED: - goto protocol_error; case -ENOMEM: case -EAGAIN: - goto requeue_and_leave; - case -ECONNABORTED: + skb_queue_head(&conn->rx_queue, skb); + rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work); + break; default: rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); break; } } - - return; - -requeue_and_leave: - skb_queue_head(&conn->rx_queue, skb); - return; - -protocol_error: - if (rxrpc_abort_connection(conn, ret, abort_code) < 0) - goto requeue_and_leave; - rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); - return; } void rxrpc_process_connection(struct work_struct *work) @@ -480,28 +409,25 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - _leave(" = -ECONNABORTED [%u]", conn->state); - return 0; - } - - _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); - switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets for now. */ return 0; case RXRPC_PACKET_TYPE_ABORT: - conn->error = -ECONNABORTED; - conn->abort_code = skb->priority; - conn->state = RXRPC_CONN_REMOTELY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); - return 0; + if (rxrpc_is_conn_aborted(conn)) + return true; + rxrpc_input_conn_abort(conn, skb); + rxrpc_abort_calls(conn); + return true; case RXRPC_PACKET_TYPE_CHALLENGE: case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_is_conn_aborted(conn)) { + if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED) + rxrpc_send_conn_abort(conn); + return true; + } rxrpc_post_packet_to_conn(conn, skb); return 0; @@ -517,6 +443,9 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) */ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) { + if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) + rxrpc_abort_calls(conn); + /* Process delayed ACKs whose time has come. */ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) rxrpc_process_delayed_final_acks(conn, false); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 0eb8471bfc536..29dcc7d3f51a7 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -43,25 +43,15 @@ static void none_free_call_crypto(struct rxrpc_call *call) } static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("chall_none")); - return -EPROTO; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN"); } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("resp_none")); - return -EPROTO; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN"); } static void none_clear(struct rxrpc_connection *conn) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3d8c9f830ee09..8a5ff2c9e0612 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -544,6 +544,62 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) goto done; } +/* + * Transmit a connection-level abort. + */ +void rxrpc_send_conn_abort(struct rxrpc_connection *conn) +{ + struct rxrpc_wire_header whdr; + struct msghdr msg; + struct kvec iov[2]; + __be32 word; + size_t len; + u32 serial; + int ret; + + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(conn->proto.cid); + whdr.callNumber = 0; + whdr.seq = 0; + whdr.type = RXRPC_PACKET_TYPE_ABORT; + whdr.flags = conn->out_clientflag; + whdr.userStatus = 0; + whdr.securityIndex = conn->security_ix; + whdr._rsvd = 0; + whdr.serviceId = htons(conn->service_id); + + word = htonl(conn->abort_code); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &word; + iov[1].iov_len = sizeof(word); + + len = iov[0].iov_len + iov[1].iov_len; + + serial = atomic_inc_return(&conn->serial); + whdr.serial = htonl(serial); + + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_conn_abort); + _debug("sendmsg failed: %d", ret); + return; + } + + trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); + + conn->peer->last_tx_at = ktime_get_seconds(); +} + /* * Reject a packet through the local endpoint. */ diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 3a59591ec0615..63947cce40483 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -17,8 +17,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", - [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort", - [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CONN_ABORTED] = "Aborted ", }; /* @@ -143,6 +142,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + const char *state; char lbuff[50], rbuff[50]; if (v == &rxnet->conn_proc_list) { @@ -163,9 +163,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) } sprintf(lbuff, "%pISpc", &conn->local->srx.transport); - sprintf(rbuff, "%pISpc", &conn->peer->srx.transport); print: + state = rxrpc_is_conn_aborted(conn) ? + rxrpc_call_completions[conn->completion] : + rxrpc_conn_states[conn->state]; seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d" " %s %08x %08x %08x %08x %08x %08x %08x\n", @@ -176,7 +178,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) rxrpc_conn_is_service(conn) ? "Svc" : "Clt", refcount_read(&conn->ref), atomic_read(&conn->active), - rxrpc_conn_states[conn->state], + state, key_serial(conn->key), atomic_read(&conn->serial), conn->hi_serial, diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index d1233720e05f2..5d2fbc6ec3cf1 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -821,8 +821,7 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, * respond to a challenge packet */ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { const struct rxrpc_key_token *token; struct rxkad_challenge challenge; @@ -898,7 +897,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); ret = -EPROTO; other_error: - *_abort_code = abort_code; + rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK"); return ret; } @@ -910,8 +909,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, - time64_t *_expiry, - u32 *_abort_code) + time64_t *_expiry) { struct skcipher_request *req; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -1042,8 +1040,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, abort_code = RXKADBADTICKET; ret = -EPROTO; other_error: - *_abort_code = abort_code; - return ret; + return rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK"); temporary_error: return ret; } @@ -1086,8 +1083,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, * verify a response */ static int rxkad_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { struct rxkad_response *response; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -1115,11 +1111,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, abort_code = RXKADNOAUTH; break; } - trace_rxrpc_abort(0, "SVK", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - abort_code, PTR_ERR(server_key)); - *_abort_code = abort_code; - return -EPROTO; + return rxrpc_abort_conn(conn, skb, abort_code, + PTR_ERR(server_key), "RXK"); } ret = -ENOMEM; @@ -1168,7 +1161,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, goto temporary_error_free_ticket; ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, - &session_key, &expiry, _abort_code); + &session_key, &expiry); if (ret < 0) goto temporary_error_free_ticket; @@ -1246,10 +1239,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, kfree(ticket); protocol_error: kfree(response); - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); key_put(server_key); - *_abort_code = abort_code; - return -EPROTO; + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + return rxrpc_abort_conn(conn, skb, abort_code, -EPROTO, "RXK"); temporary_error_free_ticket: kfree(ticket); From 57af281e5389b6fefedb3685f86847cbb0055f75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2022 21:45:42 +0100 Subject: [PATCH 0425/1593] rxrpc: Tidy up abort generation infrastructure Tidy up the abort generation infrastructure in the following ways: (1) Create an enum and string mapping table to list the reasons an abort might be generated in tracing. (2) Replace the 3-char string with the values from (1) in the places that use that to log the abort source. This gets rid of a memcpy() in the tracepoint. (3) Subsume the rxrpc_rx_eproto tracepoint with the rxrpc_abort tracepoint and use values from (1) to indicate the trace reason. (4) Always make a call to an abort function at the point of the abort rather than stashing the values into variables and using goto to get to a place where it reported. The C optimiser will collapse the calls together as appropriate. The abort functions return a value that can be returned directly if appropriate. Note that this extends into afs also at the points where that generates an abort. To aid with this, the afs sources need to #define RXRPC_TRACE_ONLY_DEFINE_ENUMS before including the rxrpc tracing header because they don't have access to the rxrpc internal structures that some of the tracepoints make use of. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/cmservice.c | 6 +- fs/afs/rxrpc.c | 23 ++- include/net/af_rxrpc.h | 3 +- include/trace/events/rxrpc.h | 140 +++++++++++---- net/rxrpc/ar-internal.h | 51 +++--- net/rxrpc/call_accept.c | 43 +++-- net/rxrpc/call_event.c | 13 +- net/rxrpc/call_object.c | 6 +- net/rxrpc/conn_event.c | 19 +-- net/rxrpc/input.c | 65 ++++--- net/rxrpc/insecure.c | 6 +- net/rxrpc/io_thread.c | 163 ++++++++---------- net/rxrpc/recvmsg.c | 18 +- net/rxrpc/rxkad.c | 321 +++++++++++++++-------------------- net/rxrpc/rxperf.c | 17 +- net/rxrpc/security.c | 14 +- net/rxrpc/sendmsg.c | 20 ++- 17 files changed, 484 insertions(+), 444 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 7dcd59693a0c2..d4ddb20d67320 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include static int afs_deliver_cb_init_call_back_state(struct afs_call *); static int afs_deliver_cb_init_call_back_state3(struct afs_call *); @@ -191,7 +193,7 @@ static void afs_cm_destructor(struct afs_call *call) * Abort a service call from within an action function. */ static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, - const char *why) + enum rxrpc_abort_reason why) { rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, error, why); @@ -469,7 +471,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) afs_send_empty_reply(call); else - afs_abort_service_call(call, 1, 1, "K-1"); + afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative); afs_put_call(call); _leave(""); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c62939e5ea1f0..bd3830bc67006 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include struct workqueue_struct *afs_async_calls; @@ -397,7 +399,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) error_do_abort: if (ret != -ECONNABORTED) { rxrpc_kernel_abort_call(call->net->socket, rxcall, - RX_USER_ABORT, ret, "KSD"); + RX_USER_ABORT, ret, + afs_abort_send_data_error); } else { len = 0; iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0); @@ -527,7 +530,8 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KIV"); + abort_code, ret, + afs_abort_op_not_supported); goto local_abort; case -EIO: pr_err("kAFS: Call %u in bad state %u\n", @@ -542,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call) if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KUM"); + abort_code, ret, + afs_abort_unmarshal_error); goto local_abort; default: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KER"); + abort_code, ret, + afs_abort_general_error); goto local_abort; } } @@ -619,7 +625,8 @@ long afs_wait_for_call_to_complete(struct afs_call *call, /* Kill off the call if it's still live. */ _debug("call interrupted"); if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) + RX_USER_ABORT, -EINTR, + afs_abort_interrupted)) afs_set_call_complete(call, -EINTR, 0); } } @@ -836,7 +843,8 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); fallthrough; default: _leave(" [error]"); @@ -878,7 +886,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); } _leave(" [error]"); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index d5a5ae9263804..ba717eac0229a 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,7 @@ struct key; struct sock; struct socket; struct rxrpc_call; +enum rxrpc_abort_reason; enum rxrpc_interruptibility { RXRPC_INTERRUPTIBLE, /* Call is interruptible */ @@ -55,7 +56,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, struct iov_iter *, size_t *, bool, u32 *, u16 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, - u32, int, const char *); + u32, int, enum rxrpc_abort_reason); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 222d0498d23fc..caeabd50e049c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,6 +16,104 @@ /* * Declare tracing information enums and their string mappings for display. */ +#define rxrpc_abort_reasons \ + /* AFS errors */ \ + EM(afs_abort_general_error, "afs-error") \ + EM(afs_abort_interrupted, "afs-intr") \ + EM(afs_abort_oom, "afs-oom") \ + EM(afs_abort_op_not_supported, "afs-op-notsupp") \ + EM(afs_abort_probeuuid_negative, "afs-probeuuid-neg") \ + EM(afs_abort_send_data_error, "afs-send-data") \ + EM(afs_abort_unmarshal_error, "afs-unmarshal") \ + /* rxperf errors */ \ + EM(rxperf_abort_general_error, "rxperf-error") \ + EM(rxperf_abort_oom, "rxperf-oom") \ + EM(rxperf_abort_op_not_supported, "rxperf-op-notsupp") \ + EM(rxperf_abort_unmarshal_error, "rxperf-unmarshal") \ + /* RxKAD security errors */ \ + EM(rxkad_abort_1_short_check, "rxkad1-short-check") \ + EM(rxkad_abort_1_short_data, "rxkad1-short-data") \ + EM(rxkad_abort_1_short_encdata, "rxkad1-short-encdata") \ + EM(rxkad_abort_1_short_header, "rxkad1-short-hdr") \ + EM(rxkad_abort_2_short_check, "rxkad2-short-check") \ + EM(rxkad_abort_2_short_data, "rxkad2-short-data") \ + EM(rxkad_abort_2_short_header, "rxkad2-short-hdr") \ + EM(rxkad_abort_2_short_len, "rxkad2-short-len") \ + EM(rxkad_abort_bad_checksum, "rxkad2-bad-cksum") \ + EM(rxkad_abort_chall_key_expired, "rxkad-chall-key-exp") \ + EM(rxkad_abort_chall_level, "rxkad-chall-level") \ + EM(rxkad_abort_chall_no_key, "rxkad-chall-nokey") \ + EM(rxkad_abort_chall_short, "rxkad-chall-short") \ + EM(rxkad_abort_chall_version, "rxkad-chall-version") \ + EM(rxkad_abort_resp_bad_callid, "rxkad-resp-bad-callid") \ + EM(rxkad_abort_resp_bad_checksum, "rxkad-resp-bad-cksum") \ + EM(rxkad_abort_resp_bad_param, "rxkad-resp-bad-param") \ + EM(rxkad_abort_resp_call_ctr, "rxkad-resp-call-ctr") \ + EM(rxkad_abort_resp_call_state, "rxkad-resp-call-state") \ + EM(rxkad_abort_resp_key_expired, "rxkad-resp-key-exp") \ + EM(rxkad_abort_resp_key_rejected, "rxkad-resp-key-rej") \ + EM(rxkad_abort_resp_level, "rxkad-resp-level") \ + EM(rxkad_abort_resp_nokey, "rxkad-resp-nokey") \ + EM(rxkad_abort_resp_ooseq, "rxkad-resp-ooseq") \ + EM(rxkad_abort_resp_short, "rxkad-resp-short") \ + EM(rxkad_abort_resp_short_tkt, "rxkad-resp-short-tkt") \ + EM(rxkad_abort_resp_tkt_aname, "rxkad-resp-tk-aname") \ + EM(rxkad_abort_resp_tkt_expired, "rxkad-resp-tk-exp") \ + EM(rxkad_abort_resp_tkt_future, "rxkad-resp-tk-future") \ + EM(rxkad_abort_resp_tkt_inst, "rxkad-resp-tk-inst") \ + EM(rxkad_abort_resp_tkt_len, "rxkad-resp-tk-len") \ + EM(rxkad_abort_resp_tkt_realm, "rxkad-resp-tk-realm") \ + EM(rxkad_abort_resp_tkt_short, "rxkad-resp-tk-short") \ + EM(rxkad_abort_resp_tkt_sinst, "rxkad-resp-tk-sinst") \ + EM(rxkad_abort_resp_tkt_sname, "rxkad-resp-tk-sname") \ + EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \ + EM(rxkad_abort_resp_version, "rxkad-resp-version") \ + /* rxrpc errors */ \ + EM(rxrpc_abort_call_improper_term, "call-improper-term") \ + EM(rxrpc_abort_call_reset, "call-reset") \ + EM(rxrpc_abort_call_sendmsg, "call-sendmsg") \ + EM(rxrpc_abort_call_sock_release, "call-sock-rel") \ + EM(rxrpc_abort_call_sock_release_tba, "call-sock-rel-tba") \ + EM(rxrpc_abort_call_timeout, "call-timeout") \ + EM(rxrpc_abort_no_service_key, "no-serv-key") \ + EM(rxrpc_abort_nomem, "nomem") \ + EM(rxrpc_abort_service_not_offered, "serv-not-offered") \ + EM(rxrpc_abort_shut_down, "shut-down") \ + EM(rxrpc_abort_unsupported_security, "unsup-sec") \ + EM(rxrpc_badmsg_bad_abort, "bad-abort") \ + EM(rxrpc_badmsg_bad_jumbo, "bad-jumbo") \ + EM(rxrpc_badmsg_short_ack, "short-ack") \ + EM(rxrpc_badmsg_short_ack_info, "short-ack-info") \ + EM(rxrpc_badmsg_short_hdr, "short-hdr") \ + EM(rxrpc_badmsg_unsupported_packet, "unsup-pkt") \ + EM(rxrpc_badmsg_zero_call, "zero-call") \ + EM(rxrpc_badmsg_zero_seq, "zero-seq") \ + EM(rxrpc_badmsg_zero_service, "zero-service") \ + EM(rxrpc_eproto_ackr_outside_window, "ackr-out-win") \ + EM(rxrpc_eproto_ackr_sack_overflow, "ackr-sack-over") \ + EM(rxrpc_eproto_ackr_short_sack, "ackr-short-sack") \ + EM(rxrpc_eproto_ackr_zero, "ackr-zero") \ + EM(rxrpc_eproto_bad_upgrade, "bad-upgrade") \ + EM(rxrpc_eproto_data_after_last, "data-after-last") \ + EM(rxrpc_eproto_different_last, "diff-last") \ + EM(rxrpc_eproto_early_reply, "early-reply") \ + EM(rxrpc_eproto_improper_term, "improper-term") \ + EM(rxrpc_eproto_no_client_call, "no-cl-call") \ + EM(rxrpc_eproto_no_client_conn, "no-cl-conn") \ + EM(rxrpc_eproto_no_service_call, "no-sv-call") \ + EM(rxrpc_eproto_reupgrade, "re-upgrade") \ + EM(rxrpc_eproto_rxnull_challenge, "rxnull-chall") \ + EM(rxrpc_eproto_rxnull_response, "rxnull-resp") \ + EM(rxrpc_eproto_tx_rot_last, "tx-rot-last") \ + EM(rxrpc_eproto_unexpected_ack, "unex-ack") \ + EM(rxrpc_eproto_unexpected_ackall, "unex-ackall") \ + EM(rxrpc_eproto_unexpected_implicit_end, "unex-impl-end") \ + EM(rxrpc_eproto_unexpected_reply, "unex-reply") \ + EM(rxrpc_eproto_wrong_security, "wrong-sec") \ + EM(rxrpc_recvmsg_excess_data, "recvmsg-excess") \ + EM(rxrpc_recvmsg_short_data, "recvmsg-short") \ + E_(rxrpc_sendmsg_late_send, "sendmsg-late") + #define rxrpc_call_poke_traces \ EM(rxrpc_call_poke_abort, "Abort") \ EM(rxrpc_call_poke_complete, "Compl") \ @@ -382,6 +480,7 @@ #define EM(a, b) a, #define E_(a, b) a +enum rxrpc_abort_reason { rxrpc_abort_reasons } __mode(byte); enum rxrpc_bundle_trace { rxrpc_bundle_traces } __mode(byte); enum rxrpc_call_poke_trace { rxrpc_call_poke_traces } __mode(byte); enum rxrpc_call_trace { rxrpc_call_traces } __mode(byte); @@ -410,9 +509,13 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); */ #undef EM #undef E_ + +#ifndef RXRPC_TRACE_ONLY_DEFINE_ENUMS + #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +rxrpc_abort_reasons; rxrpc_bundle_traces; rxrpc_call_poke_traces; rxrpc_call_traces; @@ -663,14 +766,14 @@ TRACE_EVENT(rxrpc_rx_done, ); TRACE_EVENT(rxrpc_abort, - TP_PROTO(unsigned int call_nr, const char *why, u32 cid, u32 call_id, - rxrpc_seq_t seq, int abort_code, int error), + TP_PROTO(unsigned int call_nr, enum rxrpc_abort_reason why, + u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error), TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error), TP_STRUCT__entry( __field(unsigned int, call_nr ) - __array(char, why, 4 ) + __field(enum rxrpc_abort_reason, why ) __field(u32, cid ) __field(u32, call_id ) __field(rxrpc_seq_t, seq ) @@ -679,8 +782,8 @@ TRACE_EVENT(rxrpc_abort, ), TP_fast_assign( - memcpy(__entry->why, why, 4); __entry->call_nr = call_nr; + __entry->why = why; __entry->cid = cid; __entry->call_id = call_id; __entry->abort_code = abort_code; @@ -691,7 +794,8 @@ TRACE_EVENT(rxrpc_abort, TP_printk("c=%08x %08x:%08x s=%u a=%d e=%d %s", __entry->call_nr, __entry->cid, __entry->call_id, __entry->seq, - __entry->abort_code, __entry->error, __entry->why) + __entry->abort_code, __entry->error, + __print_symbolic(__entry->why, rxrpc_abort_reasons)) ); TRACE_EVENT(rxrpc_call_complete, @@ -1527,30 +1631,6 @@ TRACE_EVENT(rxrpc_improper_term, __entry->abort_code) ); -TRACE_EVENT(rxrpc_rx_eproto, - TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, - const char *why), - - TP_ARGS(call, serial, why), - - TP_STRUCT__entry( - __field(unsigned int, call ) - __field(rxrpc_serial_t, serial ) - __field(const char *, why ) - ), - - TP_fast_assign( - __entry->call = call ? call->debug_id : 0; - __entry->serial = serial; - __entry->why = why; - ), - - TP_printk("c=%08x EPROTO %08x %s", - __entry->call, - __entry->serial, - __entry->why) - ); - TRACE_EVENT(rxrpc_connect_call, TP_PROTO(struct rxrpc_call *call), @@ -1848,6 +1928,8 @@ TRACE_EVENT(rxrpc_call_poked, #undef EM #undef E_ + +#endif /* RXRPC_TRACE_ONLY_DEFINE_ENUMS */ #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 78bd6fb0bc154..120ce3ccbb227 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -627,9 +627,10 @@ struct rxrpc_call { unsigned long events; spinlock_t notify_lock; /* Kernel notification lock */ rwlock_t state_lock; /* lock for state transition */ - const char *send_abort_why; /* String indicating why the abort was sent */ + unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */ s32 send_abort; /* Abort code to be sent */ short send_abort_err; /* Error to be associated with the abort */ + rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */ s32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ enum rxrpc_call_state state; /* current state of call */ @@ -818,9 +819,11 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); -int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *, - struct rxrpc_connection *, struct sockaddr_rxrpc *, - struct sk_buff *); +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb); void rxrpc_accept_incoming_calls(struct rxrpc_local *); int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); @@ -840,7 +843,7 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call, unsigned long now, enum rxrpc_timer_trace why); -void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); +bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); /* * call_object.c @@ -905,10 +908,10 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *); void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb, unsigned int channel); int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, - s32 abort_code, int err, const char *why); + s32 abort_code, int err, enum rxrpc_abort_reason why); void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); -int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); +bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb); static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn) @@ -979,12 +982,19 @@ void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); */ int rxrpc_encap_rcv(struct sock *, struct sk_buff *); void rxrpc_error_report(struct sock *); +bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err); int rxrpc_io_thread(void *data); static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) { wake_up_process(local->io_thread); } +static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why) +{ + return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO); +} + /* * insecure.c */ @@ -1108,29 +1118,26 @@ bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); bool __rxrpc_call_completed(struct rxrpc_call *); bool rxrpc_call_completed(struct rxrpc_call *); -bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); -bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); +bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why); +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * Abort a call due to a protocol error. */ -static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, - struct sk_buff *skb, - const char *eproto_why, - const char *why, - u32 abort_code) +static inline int rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + s32 abort_code, + enum rxrpc_abort_reason why) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); - return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); + rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why); + return -EPROTO; } -#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ - __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ - (abort_why), (abort_code)) - /* * rtt.c */ @@ -1162,8 +1169,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *, /* * sendmsg.c */ -bool rxrpc_propose_abort(struct rxrpc_call *call, - u32 abort_code, int error, const char *why); +bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, + enum rxrpc_abort_reason why); int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index c957e4415cdc4..a132d486dea0d 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * If we want to report an error, we mark the skb with the packet type and * abort code and return false. */ -int rxrpc_new_incoming_call(struct rxrpc_local *local, - struct rxrpc_peer *peer, - struct rxrpc_connection *conn, - struct sockaddr_rxrpc *peer_srx, - struct sk_buff *skb) +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb) { const struct rxrpc_security *sec = NULL; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -339,10 +339,9 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, _enter(""); - /* Don't set up a call for anything other than the first DATA packet. */ - if (sp->hdr.seq != 1 || - sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - return 0; /* Just discard */ + /* Don't set up a call for anything other than a DATA packet. */ + if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); rcu_read_lock(); @@ -363,16 +362,14 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, if (!conn) { sec = rxrpc_get_incoming_security(rx, skb); if (!sec) - goto reject; + goto unsupported_security; } spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { - trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, - sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; + rxrpc_direct_abort(skb, rxrpc_abort_shut_down, + RX_INVALID_OPERATION, -ESHUTDOWN); goto no_call; } @@ -413,22 +410,24 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, _leave(" = %p{%d}", call, call->debug_id); rxrpc_input_call_event(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return 0; + return true; unsupported_service: - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->priority = RX_INVALID_OPERATION; - goto reject; + rcu_read_unlock(); + return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EOPNOTSUPP); +unsupported_security: + rcu_read_unlock(); + return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); -reject: rcu_read_unlock(); _leave(" = f [%u]", skb->mark); - return -EPROTO; + return false; discard: rcu_read_unlock(); - return 0; + return true; } /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index b2fc3fa686ece..695aeb70d1a6b 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -333,7 +333,7 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) /* * Handle retransmission and deferred ACK/abort generation. */ -void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) +bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { unsigned long now, next, t; rxrpc_serial_t ackr_serial; @@ -352,8 +352,8 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ abort_code = smp_load_acquire(&call->send_abort); if (abort_code) { - rxrpc_abort_call(call->send_abort_why, call, 0, call->send_abort, - call->send_abort_err); + rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err, + call->send_abort_why); goto out; } @@ -440,9 +440,11 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); - rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_reset); } else { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); + rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME, + rxrpc_abort_call_timeout); } goto out; } @@ -494,4 +496,5 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (call->acks_hard_ack != call->tx_bottom) rxrpc_shrink_call_tx_buffer(call); _leave(""); + return true; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 666430182dfd6..705f6e26cc75e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -581,7 +581,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); - rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKR"); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_sock_release_tba); rxrpc_put_call(call, rxrpc_call_put_release_sock_tba); } @@ -589,7 +590,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_get_release_sock); - rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKT"); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_sock_release); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_release_sock); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 753d91a9646fc..485d7f0fed2cb 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -47,7 +47,7 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff * Mark a socket buffer to indicate that the connection it's on should be aborted. */ int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, - s32 abort_code, int err, const char *why) + s32 abort_code, int err, enum rxrpc_abort_reason why) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -288,8 +288,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; default: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_conn_pkt")); + WARN_ON_ONCE(1); return -EPROTO; } } @@ -300,7 +299,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, static void rxrpc_secure_connection(struct rxrpc_connection *conn) { if (conn->security->issue_challenge(conn) < 0) - rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM, "OOM"); + rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM, + rxrpc_abort_nomem); } /* @@ -405,14 +405,14 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, /* * Input a connection-level packet. */ -int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets for now. */ - return 0; + return true; case RXRPC_PACKET_TYPE_ABORT: if (rxrpc_is_conn_aborted(conn)) @@ -429,12 +429,11 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) return true; } rxrpc_post_packet_to_conn(conn, skb); - return 0; + return true; default: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_conn_pkt")); - return -EPROTO; + WARN_ON_ONCE(1); + return true; } } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index bb4beb4453259..bd69ff2d90826 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -9,10 +9,10 @@ #include "ar-internal.h" -static void rxrpc_proto_abort(const char *why, - struct rxrpc_call *call, rxrpc_seq_t seq) +static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, + enum rxrpc_abort_reason why) { - rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG); + rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why); } /* @@ -249,8 +249,8 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, * This occurs when we get an ACKALL packet, the first DATA packet of a reply, * or a final ACK packet. */ -static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, - const char *abort_why) +static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + enum rxrpc_abort_reason abort_why) { unsigned int state; @@ -283,13 +283,12 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, else trace_rxrpc_txqueue(call, rxrpc_txqueue_end); _leave(" = ok"); - return true; + return; bad_state: write_unlock(&call->state_lock); kdebug("end_tx %s", rxrpc_call_states[call->state]); - rxrpc_proto_abort(abort_why, call, call->tx_top); - return false; + rxrpc_proto_abort(call, call->tx_top, abort_why); } /* @@ -311,11 +310,13 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { if (!rxrpc_rotate_tx_window(call, top, &summary)) { - rxrpc_proto_abort("TXL", call, top); + rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply); return false; } } - return rxrpc_end_tx_phase(call, true, "ETD"); + + rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply); + return true; } static void rxrpc_input_update_ack_window(struct rxrpc_call *call, @@ -365,17 +366,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, if (last) { if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq + 1 != wtop) { - rxrpc_proto_abort("LSN", call, seq); - return; - } + seq + 1 != wtop) + return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last); } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && after_eq(seq, wtop)) { pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n", call->debug_id, seq, window, wtop, wlimit); - rxrpc_proto_abort("LSA", call, seq); - return; + return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last); } } @@ -583,7 +581,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) goto out_notify; if (!rxrpc_input_split_jumbo(call, skb)) { - rxrpc_proto_abort("VLD", call, sp->hdr.seq); + rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo); goto out_notify; } skb = NULL; @@ -764,7 +762,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) offset = sizeof(struct rxrpc_wire_header); if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) - return rxrpc_proto_abort("XAK", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack); offset += sizeof(ack); ack_serial = sp->hdr.serial; @@ -844,7 +842,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) ioffset = offset + nr_acks + 3; if (skb->len >= ioffset + sizeof(info) && skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info); if (nr_acks > 0) skb_condense(skb); @@ -867,7 +865,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_input_ackinfo(call, skb, &info); if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@ -882,20 +880,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (before(hard_ack, call->acks_hard_ack) || after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window); if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); if (after(hard_ack, call->acks_hard_ack)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { - rxrpc_end_tx_phase(call, false, "ETA"); + rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); return; } } if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) - return rxrpc_proto_abort("XSA", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, nr_acks, &summary); } @@ -917,7 +915,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ack_summary summary = { 0 }; if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) - rxrpc_end_tx_phase(call, false, "ETL"); + rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall); } /* @@ -962,27 +960,23 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: - rxrpc_input_data(call, skb); - break; + return rxrpc_input_data(call, skb); case RXRPC_PACKET_TYPE_ACK: - rxrpc_input_ack(call, skb); - break; + return rxrpc_input_ack(call, skb); case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets from the server; the retry and * lifespan timers will take care of business. BUSY packets * from the client don't make sense. */ - break; + return; case RXRPC_PACKET_TYPE_ABORT: - rxrpc_input_abort(call, skb); - break; + return rxrpc_input_abort(call, skb); case RXRPC_PACKET_TYPE_ACKALL: - rxrpc_input_ackall(call, skb); - break; + return rxrpc_input_ackall(call, skb); default: break; @@ -1004,7 +998,8 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_CALL_COMPLETE: break; default: - rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN); + rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN, + rxrpc_eproto_improper_term); trace_rxrpc_improper_term(call); break; } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 29dcc7d3f51a7..34353b6e584bc 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -45,13 +45,15 @@ static void none_free_call_crypto(struct rxrpc_call *call) static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb) { - return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN"); + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_challenge); } static int none_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb) { - return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN"); + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_response); } static void none_clear(struct rxrpc_connection *conn) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 46e58cf5bc96e..33fd2394c8b3b 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -66,10 +66,32 @@ void rxrpc_error_report(struct sock *sk) rcu_read_unlock(); } +/* + * Directly produce an abort from a packet. + */ +bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + abort_code, err); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = abort_code; + return false; +} + +static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why) +{ + return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG); +} + +#define just_discard true + /* * Process event packets targeted at a local endpoint. */ -static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) +static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); char v; @@ -81,22 +103,21 @@ static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) if (v == 0) rxrpc_send_version_request(local, &sp->hdr, skb); } + + return true; } /* * Extract the wire header from a packet and translate the byte order. */ -static noinline -int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) +static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp, + struct sk_buff *skb) { struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_hdr")); - return -EBADMSG; - } + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -110,7 +131,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); - return 0; + return true; } /* @@ -130,28 +151,28 @@ static bool rxrpc_extract_abort(struct sk_buff *skb) /* * Process packets received on the local endpoint */ -static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) { struct rxrpc_connection *conn; struct sockaddr_rxrpc peer_srx; struct rxrpc_skb_priv *sp; struct rxrpc_peer *peer = NULL; struct sk_buff *skb = *_skb; - int ret = 0; + bool ret = false; skb_pull(skb, sizeof(struct udphdr)); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; + if (!rxrpc_extract_header(sp, skb)) + return just_discard; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - return 0; + return just_discard; } } @@ -160,28 +181,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: if (rxrpc_to_client(sp)) - return 0; - rxrpc_input_version(local, skb); - return 0; + return just_discard; + return rxrpc_input_version(local, skb); case RXRPC_PACKET_TYPE_BUSY: if (rxrpc_to_server(sp)) - return 0; + return just_discard; fallthrough; case RXRPC_PACKET_TYPE_ACK: case RXRPC_PACKET_TYPE_ACKALL: if (sp->hdr.callNumber == 0) - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); break; case RXRPC_PACKET_TYPE_ABORT: if (!rxrpc_extract_abort(skb)) - return 0; /* Just discard if malformed */ + return just_discard; /* Just discard if malformed */ break; case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0 || - sp->hdr.seq == 0) - goto bad_message; + if (sp->hdr.callNumber == 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); + if (sp->hdr.seq == 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); /* Unshare the packet so that it can be modified for in-place * decryption. @@ -191,7 +212,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) if (!skb) { rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); *_skb = NULL; - return 0; + return just_discard; } if (skb != *_skb) { @@ -205,28 +226,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) case RXRPC_PACKET_TYPE_CHALLENGE: if (rxrpc_to_server(sp)) - return 0; + return just_discard; break; case RXRPC_PACKET_TYPE_RESPONSE: if (rxrpc_to_client(sp)) - return 0; + return just_discard; break; /* Packet types 9-11 should just be ignored. */ case RXRPC_PACKET_TYPE_PARAMS: case RXRPC_PACKET_TYPE_10: case RXRPC_PACKET_TYPE_11: - return 0; + return just_discard; default: - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet); } if (sp->hdr.serviceId == 0) - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service); if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0)) - return true; /* Unsupported address type - discard. */ + return just_discard; /* Unsupported address type. */ if (peer_srx.transport.family != local->srx.transport.family && (peer_srx.transport.family == AF_INET && @@ -234,7 +255,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", peer_srx.transport.family, local->srx.transport.family); - return true; /* Wrong address type - discard. */ + return just_discard; /* Wrong address type. */ } if (rxrpc_to_client(sp)) { @@ -242,12 +263,8 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb); conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); rcu_read_unlock(); - if (!conn) { - trace_rxrpc_abort(0, "NCC", sp->hdr.cid, - sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - goto protocol_error; - } + if (!conn) + return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn); ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); rxrpc_put_connection(conn, rxrpc_conn_put_call_input); @@ -280,19 +297,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb); rxrpc_put_peer(peer, rxrpc_peer_put_input); - if (ret < 0) - goto reject_packet; - return 0; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - rxrpc_reject_packet(local, skb); - return 0; + return ret; } /* @@ -306,21 +311,23 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; unsigned int channel; + bool ret; if (sp->hdr.securityIndex != conn->security_ix) - goto wrong_security; + return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security, + RXKADINCONSISTENCY, -EBADMSG); if (sp->hdr.serviceId != conn->service_id) { int old_id; if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) - goto reupgrade; + return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade); + old_id = cmpxchg(&conn->service_id, conn->orig_service_id, sp->hdr.serviceId); - if (old_id != conn->orig_service_id && old_id != sp->hdr.serviceId) - goto reupgrade; + return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade); } if (after(sp->hdr.serial, conn->hi_serial)) @@ -336,19 +343,19 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - return 0; + return just_discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - return 0; + return just_discard; /* For the previous service call, if completed successfully, we * discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - return 0; + return just_discard; /* But otherwise we need to retransmit the final packet from * data cached in the connection record. @@ -359,7 +366,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, sp->hdr.serial, sp->hdr.flags); rxrpc_conn_retransmit_call(conn, skb, channel); - return 0; + return just_discard; } rcu_read_lock(); @@ -370,7 +377,8 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, if (sp->hdr.callNumber > chan->call_id) { if (rxrpc_to_client(sp)) { rxrpc_put_call(call, rxrpc_call_put_input); - goto reject_packet; + return rxrpc_protocol_error(skb, + rxrpc_eproto_unexpected_implicit_end); } if (call) { @@ -382,38 +390,14 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, if (!call) { if (rxrpc_to_client(sp)) - goto bad_message; - if (rxrpc_new_incoming_call(conn->local, conn->peer, conn, - peer_srx, skb) == 0) - return 0; - goto reject_packet; + return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call); + return rxrpc_new_incoming_call(conn->local, conn->peer, conn, + peer_srx, skb); } - rxrpc_input_call_event(call, skb); + ret = rxrpc_input_call_event(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return 0; - -wrong_security: - trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - skb->priority = RXKADINCONSISTENCY; - goto post_abort; - -reupgrade: - trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); - goto protocol_error; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; -post_abort: - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - rxrpc_reject_packet(conn->local, skb); - return 0; + return ret; } /* @@ -470,7 +454,8 @@ int rxrpc_io_thread(void *data) switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; - rxrpc_input_packet(local, &skb); + if (!rxrpc_input_packet(local, &skb)) + rxrpc_reject_packet(local, skb); trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_free_skb(skb, rxrpc_skb_put_input); break; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8d5fe65f5951c..59b521b82aec3 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -117,8 +117,8 @@ bool rxrpc_call_completed(struct rxrpc_call *call) /* * Record that a call is locally aborted. */ -bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) +bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why) { trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, abort_code, error); @@ -126,13 +126,13 @@ bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, abort_code, error); } -bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why) { bool ret; write_lock(&call->state_lock); - ret = __rxrpc_abort_call(why, call, seq, abort_code, error); + ret = __rxrpc_abort_call(call, seq, abort_code, error, why); write_unlock(&call->state_lock); if (ret && test_bit(RXRPC_CALL_EXPOSED, &call->flags)) rxrpc_send_abort_packet(call); @@ -642,11 +642,15 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, return ret; short_data: - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data")); + trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data, + call->cid, call->call_id, call->rx_consumed, + 0, -EBADMSG); ret = -EBADMSG; goto out; excess_data: - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data")); + trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data, + call->cid, call->call_id, call->rx_consumed, + 0, -EMSGSIZE); ret = -EMSGSIZE; goto out; call_complete: diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 5d2fbc6ec3cf1..e52cb8058156f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -411,18 +411,15 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist sg[16]; - bool aborted; u32 data_size, buf; u16 check; int ret; _enter(""); - if (sp->len < 8) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", - RXKADSEALEDINCON); - goto protocol_error; - } + if (sp->len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. @@ -442,11 +439,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_zero(req); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", - RXKADDATALEN); - goto protocol_error; - } + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_encdata); sp->offset += sizeof(sechdr); sp->len -= sizeof(sechdr); @@ -456,26 +451,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, check = buf >> 16; check ^= seq ^ call->call_id; check &= 0xffff; - if (check != 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C", - RXKADSEALEDINCON); - goto protocol_error; - } - - if (data_size > sp->len) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", - RXKADDATALEN); - goto protocol_error; - } + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_check); + if (data_size > sp->len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_data); sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; } /* @@ -490,18 +475,15 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; - bool aborted; u32 data_size, buf; u16 check; int nsg, ret; _enter(",{%d}", sp->len); - if (sp->len < 8) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", - RXKADSEALEDINCON); - goto protocol_error; - } + if (sp->len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. @@ -513,7 +495,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } else { sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); if (!sg) - goto nomem; + return -ENOMEM; } sg_init_table(sg, nsg); @@ -537,11 +519,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, kfree(sg); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", - RXKADDATALEN); - goto protocol_error; - } + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_len); sp->offset += sizeof(sechdr); sp->len -= sizeof(sechdr); @@ -551,30 +531,17 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, check = buf >> 16; check ^= seq ^ call->call_id; check &= 0xffff; - if (check != 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C", - RXKADSEALEDINCON); - goto protocol_error; - } + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_check); - if (data_size > sp->len) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", - RXKADDATALEN); - goto protocol_error; - } + if (data_size > sp->len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_data); sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; - -nomem: - _leave(" = -ENOMEM"); - return -ENOMEM; } /* @@ -590,7 +557,6 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) __be32 buf[2]; } crypto __aligned(8); rxrpc_seq_t seq = sp->hdr.seq; - bool aborted; int ret; u16 cksum; u32 x, y; @@ -627,9 +593,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) cksum = 1; /* zero checksums are not permitted */ if (cksum != sp->hdr.cksum) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", - RXKADSEALEDINCON); - goto protocol_error; + ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_bad_checksum); + goto out; } switch (call->conn->security_level) { @@ -647,13 +613,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) break; } +out: skcipher_request_free(req); return ret; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; } /* @@ -827,27 +789,24 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_challenge challenge; struct rxkad_response *resp; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - const char *eproto; - u32 version, nonce, min_level, abort_code; - int ret; + u32 version, nonce, min_level; + int ret = -EPROTO; _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); - eproto = tracepoint_string("chall_no_key"); - abort_code = RX_PROTOCOL_ERROR; if (!conn->key) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxkad_abort_chall_no_key); - abort_code = RXKADEXPIRED; ret = key_validate(conn->key); if (ret < 0) - goto other_error; + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, + rxkad_abort_chall_key_expired); - eproto = tracepoint_string("chall_short"); - abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &challenge, sizeof(challenge)) < 0) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_chall_short); version = ntohl(challenge.version); nonce = ntohl(challenge.nonce); @@ -855,15 +814,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level); - eproto = tracepoint_string("chall_ver"); - abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_chall_version); - abort_code = RXKADLEVELFAIL; - ret = -EACCES; if (conn->security_level < min_level) - goto other_error; + return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES, + rxkad_abort_chall_level); token = conn->key->payload.data[0]; @@ -892,13 +849,6 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); kfree(resp); return ret; - -protocol_error: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); - ret = -EPROTO; -other_error: - rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK"); - return ret; } /* @@ -912,16 +862,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, time64_t *_expiry) { struct skcipher_request *req; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv, key; struct scatterlist sg[1]; struct in_addr addr; unsigned int life; - const char *eproto; time64_t issue, now; bool little_endian; - int ret; - u32 abort_code; u8 *p, *q, *name, *end; _enter("{%d},{%x}", conn->debug_id, key_serial(server_key)); @@ -933,10 +879,9 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); - ret = -ENOMEM; req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS); if (!req) - goto temporary_error; + return -ENOMEM; sg_init_one(&sg[0], ticket, ticket_len); skcipher_request_set_callback(req, 0, NULL, NULL); @@ -947,18 +892,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p = ticket; end = p + ticket_len; -#define Z(field) \ - ({ \ - u8 *__str = p; \ - eproto = tracepoint_string("rxkad_bad_"#field); \ - q = memchr(p, 0, end - p); \ - if (!q || q - p > (field##_SZ)) \ - goto bad_ticket; \ - for (; p < q; p++) \ - if (!isprint(*p)) \ - goto bad_ticket; \ - p++; \ - __str; \ +#define Z(field, fieldl) \ + ({ \ + u8 *__str = p; \ + q = memchr(p, 0, end - p); \ + if (!q || q - p > field##_SZ) \ + return rxrpc_abort_conn( \ + conn, skb, RXKADBADTICKET, -EPROTO, \ + rxkad_abort_resp_tkt_##fieldl); \ + for (; p < q; p++) \ + if (!isprint(*p)) \ + return rxrpc_abort_conn( \ + conn, skb, RXKADBADTICKET, -EPROTO, \ + rxkad_abort_resp_tkt_##fieldl); \ + p++; \ + __str; \ }) /* extract the ticket flags */ @@ -967,20 +915,20 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p++; /* extract the authentication name */ - name = Z(ANAME); + name = Z(ANAME, aname); _debug("KIV ANAME: %s", name); /* extract the principal's instance */ - name = Z(INST); + name = Z(INST, inst); _debug("KIV INST : %s", name); /* extract the principal's authentication domain */ - name = Z(REALM); + name = Z(REALM, realm); _debug("KIV REALM: %s", name); - eproto = tracepoint_string("rxkad_bad_len"); if (end - p < 4 + 8 + 4 + 2) - goto bad_ticket; + return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO, + rxkad_abort_resp_tkt_short); /* get the IPv4 address of the entity that requested the ticket */ memcpy(&addr, p, sizeof(addr)); @@ -1012,37 +960,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, _debug("KIV ISSUE: %llx [%llx]", issue, now); /* check the ticket is in date */ - if (issue > now) { - abort_code = RXKADNOAUTH; - ret = -EKEYREJECTED; - goto other_error; - } - - if (issue < now - life) { - abort_code = RXKADEXPIRED; - ret = -EKEYEXPIRED; - goto other_error; - } + if (issue > now) + return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED, + rxkad_abort_resp_tkt_future); + if (issue < now - life) + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED, + rxkad_abort_resp_tkt_expired); *_expiry = issue + life; /* get the service name */ - name = Z(SNAME); + name = Z(SNAME, sname); _debug("KIV SNAME: %s", name); /* get the service instance name */ - name = Z(INST); + name = Z(INST, sinst); _debug("KIV SINST: %s", name); return 0; - -bad_ticket: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); - abort_code = RXKADBADTICKET; - ret = -EPROTO; -other_error: - return rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK"); -temporary_error: - return ret; } /* @@ -1089,10 +1023,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; struct key *server_key; - const char *eproto; time64_t expiry; void *ticket; - u32 abort_code, version, kvno, ticket_len, level; + u32 version, kvno, ticket_len, level; __be32 csum; int ret, i; @@ -1100,19 +1033,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, server_key = rxrpc_look_up_server_security(conn, skb, 0, 0); if (IS_ERR(server_key)) { - switch (PTR_ERR(server_key)) { + ret = PTR_ERR(server_key); + switch (ret) { case -ENOKEY: - abort_code = RXKADUNKNOWNKEY; - break; + return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret, + rxkad_abort_resp_nokey); case -EKEYEXPIRED: - abort_code = RXKADEXPIRED; - break; + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, + rxkad_abort_resp_key_expired); default: - abort_code = RXKADNOAUTH; - break; + return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret, + rxkad_abort_resp_key_rejected); } - return rxrpc_abort_conn(conn, skb, abort_code, - PTR_ERR(server_key), "RXK"); } ret = -ENOMEM; @@ -1120,11 +1052,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (!response) goto temporary_error; - eproto = tracepoint_string("rxkad_rsp_short"); - abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - response, sizeof(*response)) < 0) + response, sizeof(*response)) < 0) { + rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short); goto protocol_error; + } version = ntohl(response->version); ticket_len = ntohl(response->ticket_len); @@ -1132,20 +1065,23 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); - eproto = tracepoint_string("rxkad_rsp_ver"); - abort_code = RXKADINCONSISTENCY; - if (version != RXKAD_VERSION) + if (version != RXKAD_VERSION) { + rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_resp_version); goto protocol_error; + } - eproto = tracepoint_string("rxkad_rsp_tktlen"); - abort_code = RXKADTICKETLEN; - if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) + if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) { + rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, + rxkad_abort_resp_tkt_len); goto protocol_error; + } - eproto = tracepoint_string("rxkad_rsp_unkkey"); - abort_code = RXKADUNKNOWNKEY; - if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) + if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) { + rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, + rxkad_abort_resp_unknown_tkt); goto protocol_error; + } /* extract the kerberos ticket and decrypt and decode it */ ret = -ENOMEM; @@ -1153,12 +1089,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (!ticket) goto temporary_error_free_resp; - eproto = tracepoint_string("rxkad_tkt_short"); - abort_code = RXKADPACKETSHORT; - ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), - ticket, ticket_len); - if (ret < 0) - goto temporary_error_free_ticket; + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), + ticket, ticket_len) < 0) { + rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short_tkt); + goto protocol_error; + } ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, &session_key, &expiry); @@ -1169,56 +1105,66 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * response */ rxkad_decrypt_response(conn, response, &session_key); - eproto = tracepoint_string("rxkad_rsp_param"); - abort_code = RXKADSEALEDINCON; - if (ntohl(response->encrypted.epoch) != conn->proto.epoch) - goto protocol_error_free; - if (ntohl(response->encrypted.cid) != conn->proto.cid) - goto protocol_error_free; - if (ntohl(response->encrypted.securityIndex) != conn->security_ix) + if (ntohl(response->encrypted.epoch) != conn->proto.epoch || + ntohl(response->encrypted.cid) != conn->proto.cid || + ntohl(response->encrypted.securityIndex) != conn->security_ix) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_param); goto protocol_error_free; + } + csum = response->encrypted.checksum; response->encrypted.checksum = 0; rxkad_calc_response_checksum(response); - eproto = tracepoint_string("rxkad_rsp_csum"); - if (response->encrypted.checksum != csum) + if (response->encrypted.checksum != csum) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_checksum); goto protocol_error_free; + } spin_lock(&conn->bundle->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { struct rxrpc_call *call; u32 call_id = ntohl(response->encrypted.call_id[i]); - eproto = tracepoint_string("rxkad_rsp_callid"); - if (call_id > INT_MAX) + if (call_id > INT_MAX) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_callid); goto protocol_error_unlock; + } - eproto = tracepoint_string("rxkad_rsp_callctr"); - if (call_id < conn->channels[i].call_counter) + if (call_id < conn->channels[i].call_counter) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_ctr); goto protocol_error_unlock; + } - eproto = tracepoint_string("rxkad_rsp_callst"); if (call_id > conn->channels[i].call_counter) { call = rcu_dereference_protected( conn->channels[i].call, lockdep_is_held(&conn->bundle->channel_lock)); - if (call && call->state < RXRPC_CALL_COMPLETE) + if (call && call->state < RXRPC_CALL_COMPLETE) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_state); goto protocol_error_unlock; + } conn->channels[i].call_counter = call_id; } } spin_unlock(&conn->bundle->channel_lock); - eproto = tracepoint_string("rxkad_rsp_seq"); - abort_code = RXKADOUTOFSEQUENCE; - if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) + if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) { + rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, + rxkad_abort_resp_ooseq); goto protocol_error_free; + } - eproto = tracepoint_string("rxkad_rsp_level"); - abort_code = RXKADLEVELFAIL; level = ntohl(response->encrypted.level); - if (level > RXRPC_SECURITY_ENCRYPT) + if (level > RXRPC_SECURITY_ENCRYPT) { + rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, + rxkad_abort_resp_level); goto protocol_error_free; + } conn->security_level = level; /* create a key to hold the security data and expiration time - after @@ -1240,8 +1186,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, protocol_error: kfree(response); key_put(server_key); - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); - return rxrpc_abort_conn(conn, skb, abort_code, -EPROTO, "RXK"); + return -EPROTO; temporary_error_free_ticket: kfree(ticket); diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index d33a109e846c1..16dcabb71ebe1 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -10,6 +10,8 @@ #include #include #include +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include MODULE_DESCRIPTION("rxperf test server (afs)"); MODULE_AUTHOR("Red Hat, Inc."); @@ -307,12 +309,14 @@ static void rxperf_deliver_to_call(struct work_struct *work) case -EOPNOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - abort_code, ret, "GOP"); + abort_code, ret, + rxperf_abort_op_not_supported); goto call_complete; case -ENOTSUPP: abort_code = RX_USER_ABORT; rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - abort_code, ret, "GUA"); + abort_code, ret, + rxperf_abort_op_not_supported); goto call_complete; case -EIO: pr_err("Call %u in bad state %u\n", @@ -324,11 +328,13 @@ static void rxperf_deliver_to_call(struct work_struct *work) case -ENOMEM: case -EFAULT: rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RXGEN_SS_UNMARSHAL, ret, "GUM"); + RXGEN_SS_UNMARSHAL, ret, + rxperf_abort_unmarshal_error); goto call_complete; default: rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RX_CALL_DEAD, ret, "GER"); + RX_CALL_DEAD, ret, + rxperf_abort_general_error); goto call_complete; } } @@ -523,7 +529,8 @@ static int rxperf_process_call(struct rxperf_call *call) if (n == -ENOMEM) rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "GOM"); + RXGEN_SS_MARSHAL, -ENOMEM, + rxperf_abort_oom); return n; } diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index ab968f65a4900..78af14694618d 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -144,21 +144,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { - trace_rxrpc_abort(0, "SVS", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; + rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security, + RX_INVALID_OPERATION, -EKEYREJECTED); return NULL; } if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && !rx->securities) { - trace_rxrpc_abort(0, "SVR", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = sec->no_key_abort; + rxrpc_direct_abort(skb, rxrpc_abort_no_service_key, + sec->no_key_abort, -EKEYREJECTED); return NULL; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index dc3c2a834fc8b..d67808b659f13 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -20,14 +20,15 @@ /* * Propose an abort to be made in the I/O thread. */ -bool rxrpc_propose_abort(struct rxrpc_call *call, - u32 abort_code, int error, const char *why) +bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, + enum rxrpc_abort_reason why) { - _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); if (!call->send_abort && call->state < RXRPC_CALL_COMPLETE) { call->send_abort_why = why; call->send_abort_err = error; + call->send_abort_seq = 0; /* Request abort locklessly vs rxrpc_input_call_event(). */ smp_store_release(&call->send_abort, abort_code); rxrpc_poke_call(call, rxrpc_call_poke_abort); @@ -683,7 +684,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (p.command == RXRPC_CMD_SEND_ABORT) { - rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, "CMD"); + rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, + rxrpc_abort_call_sendmsg); ret = 0; } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; @@ -748,7 +750,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, break; default: /* Request phase complete for this client call */ - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send")); + trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, + call->cid, call->call_id, call->rx_consumed, + 0, -EPROTO); ret = -EPROTO; break; } @@ -766,17 +770,17 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * @error: Local error value - * @why: 3-char string indicating why. + * @why: Indication as to why. * * Allow a kernel service to abort a call, if it's still in an abortable state * and return true if the call was aborted, false if it was already complete. */ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code, int error, const char *why) + u32 abort_code, int error, enum rxrpc_abort_reason why) { bool aborted; - _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); aborted = rxrpc_propose_abort(call, abort_code, error, why); From f06cb29189361353e9ed12df936c8e1d7f69b730 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Oct 2022 22:58:56 +0100 Subject: [PATCH 0426/1593] rxrpc: Make the set of connection IDs per local endpoint Make the set of connection IDs per local endpoint so that endpoints don't cause each other's connections to get dismissed. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/af_rxrpc.c | 8 -------- net/rxrpc/ar-internal.h | 5 +++-- net/rxrpc/conn_client.c | 44 +++++++++++++++++----------------------- net/rxrpc/conn_object.c | 6 +++--- net/rxrpc/local_object.c | 10 +++++++++ 5 files changed, 35 insertions(+), 38 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7ea576f6ba4bc..6f6a6b77ee846 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -957,16 +957,9 @@ static const struct net_proto_family rxrpc_family_ops = { static int __init af_rxrpc_init(void) { int ret = -1; - unsigned int tmp; BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb)); - get_random_bytes(&tmp, sizeof(tmp)); - tmp &= 0x3fffffff; - if (tmp == 0) - tmp = 1; - idr_set_cursor(&rxrpc_client_conn_ids, tmp); - ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( "rxrpc_call_jar", sizeof(struct rxrpc_call), 0, @@ -1062,7 +1055,6 @@ static void __exit af_rxrpc_exit(void) * are released. */ rcu_barrier(); - rxrpc_destroy_client_conn_ids(); destroy_workqueue(rxrpc_workqueue); rxrpc_exit_security(); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 120ce3ccbb227..e9ab06100a211 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -300,6 +300,8 @@ struct rxrpc_local { int debug_id; /* debug ID for printks */ bool dead; bool service_closed; /* Service socket closed */ + struct idr conn_ids; /* List of connection IDs */ + spinlock_t conn_lock; /* Lock for client connection pool */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -887,9 +889,8 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) extern unsigned int rxrpc_reap_client_connections; extern unsigned long rxrpc_conn_idle_client_expiry; extern unsigned long rxrpc_conn_idle_client_fast_expiry; -extern struct idr rxrpc_client_conn_ids; -void rxrpc_destroy_client_conn_ids(void); +void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local); struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 1edd65883c55a..59ce5c08cf574 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -34,12 +34,6 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; -/* - * We use machine-unique IDs for our client connections. - */ -DEFINE_IDR(rxrpc_client_conn_ids); -static DEFINE_SPINLOCK(rxrpc_conn_id_lock); - static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); /* @@ -51,65 +45,65 @@ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { - struct rxrpc_net *rxnet = conn->rxnet; + struct rxrpc_local *local = conn->local; int id; _enter(""); idr_preload(gfp); - spin_lock(&rxrpc_conn_id_lock); + spin_lock(&local->conn_lock); - id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn, + id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000, GFP_NOWAIT); if (id < 0) goto error; - spin_unlock(&rxrpc_conn_id_lock); + spin_unlock(&local->conn_lock); idr_preload_end(); - conn->proto.epoch = rxnet->epoch; + conn->proto.epoch = local->rxnet->epoch; conn->proto.cid = id << RXRPC_CIDSHIFT; set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); _leave(" [CID %x]", conn->proto.cid); return 0; error: - spin_unlock(&rxrpc_conn_id_lock); + spin_unlock(&local->conn_lock); idr_preload_end(); _leave(" = %d", id); return id; } /* - * Release a connection ID for a client connection from the global pool. + * Release a connection ID for a client connection. */ -static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn) +static void rxrpc_put_client_connection_id(struct rxrpc_local *local, + struct rxrpc_connection *conn) { if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) { - spin_lock(&rxrpc_conn_id_lock); - idr_remove(&rxrpc_client_conn_ids, - conn->proto.cid >> RXRPC_CIDSHIFT); - spin_unlock(&rxrpc_conn_id_lock); + spin_lock(&local->conn_lock); + idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT); + spin_unlock(&local->conn_lock); } } /* * Destroy the client connection ID tree. */ -void rxrpc_destroy_client_conn_ids(void) +void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) { struct rxrpc_connection *conn; int id; - if (!idr_is_empty(&rxrpc_client_conn_ids)) { - idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { + if (!idr_is_empty(&local->conn_ids)) { + idr_for_each_entry(&local->conn_ids, conn, id) { pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", conn, refcount_read(&conn->ref)); } BUG(); } - idr_destroy(&rxrpc_client_conn_ids); + idr_destroy(&local->conn_ids); } /* @@ -225,7 +219,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) return conn; error_1: - rxrpc_put_client_connection_id(conn); + rxrpc_put_client_connection_id(bundle->local, conn); error_0: kfree(conn); _leave(" = %d", ret); @@ -257,7 +251,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) * times the maximum number of client conns away from the current * allocation point to try and keep the IDs concentrated. */ - id_cursor = idr_get_cursor(&rxrpc_client_conn_ids); + id_cursor = idr_get_cursor(&conn->local->conn_ids); id = conn->proto.cid >> RXRPC_CIDSHIFT; distance = id - id_cursor; if (distance < 0) @@ -982,7 +976,7 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn) trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); atomic_dec(&rxnet->nr_client_conns); - rxrpc_put_client_connection_id(conn); + rxrpc_put_client_connection_id(local, conn); } /* diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 281f59e356f5d..2e3f0a222e1b4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -100,10 +100,10 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - /* Look up client connections by connection ID alone as their IDs are - * unique for this machine. + /* Look up client connections by connection ID alone as their + * IDs are unique for this machine. */ - conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); + conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 8ef6cd8defa45..ca8b3ee68b597 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -89,6 +89,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; + u32 tmp; local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { @@ -109,6 +110,14 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; + idr_init(&local->conn_ids); + get_random_bytes(&tmp, sizeof(tmp)); + tmp &= 0x3fffffff; + if (tmp == 0) + tmp = 1; + idr_set_cursor(&local->conn_ids, tmp); + spin_lock_init(&local->conn_lock); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1); } @@ -409,6 +418,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local) * local endpoint. */ rxrpc_purge_queue(&local->rx_queue); + rxrpc_destroy_client_conn_ids(local); } /* From 2953d3b8d8fd1188034c54862b74402b0b846695 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Oct 2022 08:54:03 +0100 Subject: [PATCH 0427/1593] rxrpc: Offload the completion of service conn security to the I/O thread Offload the completion of the challenge/response cycle on a service connection to the I/O thread. After the RESPONSE packet has been successfully decrypted and verified by the work queue, offloading the changing of the call states to the I/O thread makes iteration over the conn's channel list simpler. Do this by marking the RESPONSE skbuff and putting it onto the receive queue for the I/O thread to collect. We put it on the front of the queue as we've already received the packet for it. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/conn_event.c | 46 +++++++++++++++++++++++++----------- net/rxrpc/io_thread.c | 5 ++++ 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index caeabd50e049c..85671f4a77de1 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -126,6 +126,7 @@ #define rxrpc_skb_traces \ EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ + EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ @@ -135,6 +136,7 @@ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ + EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e9ab06100a211..e508ec221b75e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -38,6 +38,7 @@ struct rxrpc_txbuf; enum rxrpc_skb_mark { RXRPC_SKB_MARK_PACKET, /* Received packet */ RXRPC_SKB_MARK_ERROR, /* Error notification */ + RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 485d7f0fed2cb..b2042702ca9aa 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -248,7 +248,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int loop, ret; + int ret; if (conn->state == RXRPC_CONN_ABORTED) return -ECONNABORTED; @@ -269,22 +269,21 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (ret < 0) return ret; - spin_lock(&conn->bundle->channel_lock); spin_lock(&conn->state_lock); - - if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { + if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) conn->state = RXRPC_CONN_SERVICE; - spin_unlock(&conn->state_lock); - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure( - rcu_dereference_protected( - conn->channels[loop].call, - lockdep_is_held(&conn->bundle->channel_lock))); - } else { - spin_unlock(&conn->state_lock); - } + spin_unlock(&conn->state_lock); - spin_unlock(&conn->bundle->channel_lock); + if (conn->state == RXRPC_CONN_SERVICE) { + /* Offload call state flipping to the I/O thread. As + * we've already received the packet, put it on the + * front of the queue. + */ + skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; + rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); + skb_queue_head(&conn->local->rx_queue, skb); + rxrpc_wake_up_io_thread(conn->local); + } return 0; default: @@ -442,9 +441,28 @@ bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) */ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) { + unsigned int loop; + if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; + + spin_lock(&conn->bundle->channel_lock); + + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure( + rcu_dereference_protected( + conn->channels[loop].call, + lockdep_is_held(&conn->bundle->channel_lock))); + + spin_unlock(&conn->bundle->channel_lock); + break; + } + /* Process delayed ACKs whose time has come. */ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) rxrpc_process_delayed_final_acks(conn, false); diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 33fd2394c8b3b..751139b3c1ac9 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -451,6 +451,7 @@ int rxrpc_io_thread(void *data) /* Process received packets and errors. */ if ((skb = __skb_dequeue(&rx_queue))) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; @@ -463,6 +464,10 @@ int rxrpc_io_thread(void *data) rxrpc_input_error(local, skb); rxrpc_free_skb(skb, rxrpc_skb_put_error_report); break; + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + rxrpc_input_conn_event(sp->conn, skb); + rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured); break; default: WARN_ON_ONCE(1); From 1bab27af6b88b5c811f99de4812b5590f20d1cb7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Oct 2022 09:30:23 +0100 Subject: [PATCH 0428/1593] rxrpc: Set up a connection bundle from a call, not rxrpc_conn_parameters Use the information now stored in struct rxrpc_call to configure the connection bundle and thence the connection, rather than using the rxrpc_conn_parameters struct. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 3 +- net/rxrpc/af_rxrpc.c | 1 - net/rxrpc/ar-internal.h | 8 +-- net/rxrpc/call_object.c | 4 +- net/rxrpc/conn_client.c | 132 ++++++++++++++++++----------------- net/rxrpc/conn_object.c | 2 +- net/rxrpc/sendmsg.c | 1 - 7 files changed, 76 insertions(+), 75 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 85671f4a77de1..e2f6b79d55170 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -178,7 +178,6 @@ #define rxrpc_peer_traces \ EM(rxrpc_peer_free, "FREE ") \ EM(rxrpc_peer_get_accept, "GET accept ") \ - EM(rxrpc_peer_get_activate_call, "GET act-call") \ EM(rxrpc_peer_get_bundle, "GET bundle ") \ EM(rxrpc_peer_get_client_conn, "GET cln-conn") \ EM(rxrpc_peer_get_input, "GET input ") \ @@ -191,7 +190,6 @@ EM(rxrpc_peer_put_bundle, "PUT bundle ") \ EM(rxrpc_peer_put_call, "PUT call ") \ EM(rxrpc_peer_put_conn, "PUT conn ") \ - EM(rxrpc_peer_put_discard_tmp, "PUT disc-tmp") \ EM(rxrpc_peer_put_input, "PUT input ") \ EM(rxrpc_peer_put_input_error, "PUT inpt-err") \ E_(rxrpc_peer_put_keepalive, "PUT keepaliv") @@ -201,6 +199,7 @@ EM(rxrpc_bundle_get_client_call, "GET clt-call") \ EM(rxrpc_bundle_get_client_conn, "GET clt-conn") \ EM(rxrpc_bundle_get_service_conn, "GET svc-conn") \ + EM(rxrpc_bundle_put_call, "PUT call ") \ EM(rxrpc_bundle_put_conn, "PUT conn ") \ EM(rxrpc_bundle_put_discard, "PUT discard ") \ E_(rxrpc_bundle_new, "NEW ") diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 6f6a6b77ee846..f4e1ffff2ba41 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -328,7 +328,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, mutex_unlock(&call->user_mutex); } - rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p", call); return call; } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e508ec221b75e..2740c63331140 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -360,7 +360,6 @@ struct rxrpc_conn_proto { struct rxrpc_conn_parameters { struct rxrpc_local *local; /* Representation of local endpoint */ - struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ @@ -428,6 +427,7 @@ struct rxrpc_bundle { struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ + const struct rxrpc_security *security; /* applied security module */ refcount_t ref; atomic_t active; /* Number of active users */ unsigned int debug_id; @@ -593,6 +593,7 @@ enum rxrpc_congest_mode { struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ + struct rxrpc_bundle *bundle; /* Connection bundle to use */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_sock __rcu *socket; /* socket responsible */ @@ -894,11 +895,10 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry; void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local); struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); -int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, - struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - gfp_t); +int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); +void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_discard_expired_client_conns(struct work_struct *); void rxrpc_destroy_all_client_connections(struct rxrpc_net *); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 705f6e26cc75e..835e9781afc6c 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -365,7 +365,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(rx, call, cp, srx, gfp); + ret = rxrpc_connect_call(call, gfp); if (ret < 0) goto error_attached_to_socket; @@ -663,6 +663,8 @@ static void rxrpc_destroy_call(struct work_struct *work) rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); rxrpc_put_connection(call->conn, rxrpc_conn_put_call); + rxrpc_deactivate_bundle(call->bundle); + rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call); rxrpc_put_peer(call->peer, rxrpc_peer_put_call); rxrpc_put_local(call->local, rxrpc_local_put_call); call_rcu(&call->rcu, rxrpc_rcu_free_call); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 59ce5c08cf574..c0db7722571e3 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -34,7 +34,10 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; -static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); +static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle) +{ + atomic_inc(&bundle->active); +} /* * Get a connection ID and epoch for a client connection from the global pool. @@ -109,20 +112,21 @@ void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) /* * Allocate a connection bundle. */ -static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, +static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_bundle *bundle; bundle = kzalloc(sizeof(*bundle), gfp); if (bundle) { - bundle->local = cp->local; - bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle); - bundle->key = cp->key; - bundle->exclusive = cp->exclusive; - bundle->upgrade = cp->upgrade; - bundle->service_id = cp->service_id; - bundle->security_level = cp->security_level; + bundle->local = call->local; + bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle); + bundle->key = key_get(call->key); + bundle->security = call->security; + bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); + bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); + bundle->service_id = call->dest_srx.srx_service; + bundle->security_level = call->security_level; refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); @@ -146,19 +150,23 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free); rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); + key_put(bundle->key); kfree(bundle); } void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) { - unsigned int id = bundle->debug_id; + unsigned int id; bool dead; int r; - dead = __refcount_dec_and_test(&bundle->ref, &r); - trace_rxrpc_bundle(id, r - 1, why); - if (dead) - rxrpc_free_bundle(bundle); + if (bundle) { + id = bundle->debug_id; + dead = __refcount_dec_and_test(&bundle->ref, &r); + trace_rxrpc_bundle(id, r - 1, why); + if (dead) + rxrpc_free_bundle(bundle); + } } /* @@ -272,20 +280,23 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) * Look up the conn bundle that matches the connection parameters, adding it if * it doesn't yet exist. */ -static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp, - gfp_t gfp) +static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) { static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle, *candidate; - struct rxrpc_local *local = cp->local; + struct rxrpc_local *local = call->local; struct rb_node *p, **pp, *parent; long diff; + bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); _enter("{%px,%x,%u,%u}", - cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade); + call->peer, key_serial(call->key), call->security_level, + upgrade); - if (cp->exclusive) - return rxrpc_alloc_bundle(cp, gfp); + if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) { + call->bundle = rxrpc_alloc_bundle(call, gfp); + return call->bundle; + } /* First, see if the bundle is already there. */ _debug("search 1"); @@ -294,11 +305,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c while (p) { bundle = rb_entry(p, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->X - (long)cp->X) - diff = (cmp(peer) ?: - cmp(key) ?: - cmp(security_level) ?: - cmp(upgrade)); +#define cmp(X, Y) ((long)(X) - (long)(Y)) + diff = (cmp(bundle->peer, call->peer) ?: + cmp(bundle->key, call->key) ?: + cmp(bundle->security_level, call->security_level) ?: + cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) p = p->rb_left; @@ -311,9 +322,9 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c _debug("not found"); /* It wasn't. We need to add one. */ - candidate = rxrpc_alloc_bundle(cp, gfp); + candidate = rxrpc_alloc_bundle(call, gfp); if (!candidate) - return NULL; + return ERR_PTR(-ENOMEM); _debug("search 2"); spin_lock(&local->client_bundles_lock); @@ -323,11 +334,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c parent = *pp; bundle = rb_entry(parent, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->X - (long)cp->X) - diff = (cmp(peer) ?: - cmp(key) ?: - cmp(security_level) ?: - cmp(upgrade)); +#define cmp(X, Y) ((long)(X) - (long)(Y)) + diff = (cmp(bundle->peer, call->peer) ?: + cmp(bundle->key, call->key) ?: + cmp(bundle->security_level, call->security_level) ?: + cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) pp = &(*pp)->rb_left; @@ -341,19 +352,19 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); - rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); + call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); spin_unlock(&local->client_bundles_lock); - _leave(" = %u [new]", candidate->debug_id); - return candidate; + _leave(" = B=%u [new]", call->bundle->debug_id); + return call->bundle; found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: - rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); - atomic_inc(&bundle->active); + call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); + rxrpc_activate_bundle(bundle); spin_unlock(&local->client_bundles_lock); - _leave(" = %u [found]", bundle->debug_id); - return bundle; + _leave(" = B=%u [found]", call->bundle->debug_id); + return call->bundle; } /* @@ -362,31 +373,25 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c * If we return with a connection, the call will be on its waiting list. It's * left to the caller to assign a channel and wake up the call. */ -static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_bundle *bundle; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); - if (!cp->peer) + call->peer = rxrpc_lookup_peer(call->local, &call->dest_srx, gfp); + if (!call->peer) goto error; call->tx_last_sent = ktime_get_real(); - call->cong_ssthresh = cp->peer->cong_ssthresh; + call->cong_ssthresh = call->peer->cong_ssthresh; if (call->cong_cwnd >= call->cong_ssthresh) call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; else call->cong_mode = RXRPC_CALL_SLOW_START; - if (cp->upgrade) - __set_bit(RXRPC_CALL_UPGRADE, &call->flags); /* Find the client connection bundle. */ - bundle = rxrpc_look_up_bundle(cp, gfp); + bundle = rxrpc_look_up_bundle(call, gfp); if (!bundle) goto error; @@ -449,7 +454,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (old) trace_rxrpc_client(old, -1, rxrpc_client_replace); candidate->bundle_shift = shift; - atomic_inc(&bundle->active); + rxrpc_activate_bundle(bundle); bundle->conns[i] = candidate; for (j = 0; j < RXRPC_MAXCALLS; j++) set_bit(shift + j, &bundle->avail_chans); @@ -541,7 +546,6 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, rxrpc_see_call(call, rxrpc_call_see_activate_client); list_del_init(&call->chan_wait_link); - call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call); call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call); call->cid = conn->proto.cid | channel; call->call_id = call_id; @@ -705,14 +709,11 @@ static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, * find a connection for a call * - called in process context with IRQs enabled */ -int rxrpc_connect_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_bundle *bundle; - struct rxrpc_net *rxnet = cp->local->rxnet; + struct rxrpc_local *local = call->local; + struct rxrpc_net *rxnet = local->rxnet; int ret = 0; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); @@ -721,7 +722,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, rxrpc_get_call(call, rxrpc_call_get_io_thread); - bundle = rxrpc_prep_call(rx, call, cp, srx, gfp); + bundle = rxrpc_prep_call(call, gfp); if (IS_ERR(bundle)) { rxrpc_put_call(call, rxrpc_call_get_io_thread); ret = PTR_ERR(bundle); @@ -738,9 +739,6 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, /* Paired with the write barrier in rxrpc_activate_one_channel(). */ smp_rmb(); -out_put_bundle: - rxrpc_deactivate_bundle(bundle); - rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call); out: _leave(" = %d", ret); return ret; @@ -758,7 +756,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); rxrpc_disconnect_client_call(bundle, call); - goto out_put_bundle; + goto out; } /* @@ -945,11 +943,15 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) /* * Drop the active count on a bundle. */ -static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) { - struct rxrpc_local *local = bundle->local; + struct rxrpc_local *local; bool need_put = false; + if (!bundle) + return; + + local = bundle->local; if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { if (!bundle->exclusive) { _debug("erase bundle"); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 2e3f0a222e1b4..2a7d5378300cc 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -208,7 +208,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) } if (rxrpc_is_client_call(call)) { - rxrpc_disconnect_client_call(conn->bundle, call); + rxrpc_disconnect_client_call(call->bundle, call); } else { spin_lock(&conn->bundle->channel_lock); __rxrpc_disconnect_call(conn, call); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index d67808b659f13..2a003c3a9897e 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -564,7 +564,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ - rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p\n", call); return call; } From 0b9bb322f13d486d5b8630264ccbfb4794bb43a9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Oct 2022 00:16:55 +0100 Subject: [PATCH 0429/1593] rxrpc: Split out the call state changing functions into their own file Split out the functions that change the state of an rxrpc call into their own file. The idea being to remove anything to do with changing the state of a call directly from the rxrpc sendmsg() and recvmsg() paths and have all that done in the I/O thread only, with the ultimate aim of removing the state lock entirely. Moving the code out of sendmsg.c and recvmsg.c makes that easier to manage. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/Makefile | 1 + net/rxrpc/ar-internal.h | 26 ++++++++---- net/rxrpc/call_state.c | 89 +++++++++++++++++++++++++++++++++++++++++ net/rxrpc/recvmsg.c | 81 ------------------------------------- 4 files changed, 108 insertions(+), 89 deletions(-) create mode 100644 net/rxrpc/call_state.c diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index e76d3459d78ee..ac5caf5a48e16 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -10,6 +10,7 @@ rxrpc-y := \ call_accept.o \ call_event.o \ call_object.o \ + call_state.o \ conn_client.o \ conn_event.o \ conn_object.o \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2740c63331140..203e0354d86bb 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -885,6 +885,24 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) return !rxrpc_is_service_call(call); } +/* + * call_state.c + */ +bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error); +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error); +bool __rxrpc_call_completed(struct rxrpc_call *call); +bool rxrpc_call_completed(struct rxrpc_call *call); +bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why); +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why); + /* * conn_client.c */ @@ -1116,14 +1134,6 @@ extern const struct seq_operations rxrpc_local_seq_ops; * recvmsg.c */ void rxrpc_notify_socket(struct rxrpc_call *); -bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); -bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); -bool __rxrpc_call_completed(struct rxrpc_call *); -bool rxrpc_call_completed(struct rxrpc_call *); -bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, - u32 abort_code, int error, enum rxrpc_abort_reason why); -bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, - u32 abort_code, int error, enum rxrpc_abort_reason why); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c new file mode 100644 index 0000000000000..8fbb2112ed7e2 --- /dev/null +++ b/net/rxrpc/call_state.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Call state changing functions. + * + * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include "ar-internal.h" + +/* + * Transition a call to the complete state. + */ +bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl; + call->state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; + } + return false; +} + +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock(&call->state_lock); + } + return ret; +} + +/* + * Record that a call successfully completed. + */ +bool __rxrpc_call_completed(struct rxrpc_call *call) +{ + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +bool rxrpc_call_completed(struct rxrpc_call *call) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock(&call->state_lock); + ret = __rxrpc_call_completed(call); + write_unlock(&call->state_lock); + } + return ret; +} + +/* + * Record that a call is locally aborted. + */ +bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why) +{ + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, + abort_code, error); + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); +} + +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why) +{ + bool ret; + + write_lock(&call->state_lock); + ret = __rxrpc_abort_call(call, seq, abort_code, error, why); + write_unlock(&call->state_lock); + if (ret && test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + rxrpc_send_abort_packet(call); + return ret; +} diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 59b521b82aec3..ff08f917ecda2 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -58,87 +58,6 @@ void rxrpc_notify_socket(struct rxrpc_call *call) _leave(""); } -/* - * Transition a call to the complete state. - */ -bool __rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; - call->error = error; - call->completion = compl; - call->state = RXRPC_CALL_COMPLETE; - trace_rxrpc_call_complete(call); - wake_up(&call->waitq); - rxrpc_notify_socket(call); - return true; - } - return false; -} - -bool rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - bool ret = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock(&call->state_lock); - } - return ret; -} - -/* - * Record that a call successfully completed. - */ -bool __rxrpc_call_completed(struct rxrpc_call *call) -{ - return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); -} - -bool rxrpc_call_completed(struct rxrpc_call *call) -{ - bool ret = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_call_completed(call); - write_unlock(&call->state_lock); - } - return ret; -} - -/* - * Record that a call is locally aborted. - */ -bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, - u32 abort_code, int error, enum rxrpc_abort_reason why) -{ - trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, - abort_code, error); - return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error); -} - -bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, - u32 abort_code, int error, enum rxrpc_abort_reason why) -{ - bool ret; - - write_lock(&call->state_lock); - ret = __rxrpc_abort_call(call, seq, abort_code, error, why); - write_unlock(&call->state_lock); - if (ret && test_bit(RXRPC_CALL_EXPOSED, &call->flags)) - rxrpc_send_abort_packet(call); - return ret; -} - /* * Pass a call terminating message to userspace. */ From d41b3f5b96881809c73f86e3ca436c9426610b7a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Dec 2022 15:32:32 +0000 Subject: [PATCH 0430/1593] rxrpc: Wrap accesses to get call state to put the barrier in one place Wrap accesses to get the state of a call from outside of the I/O thread in a single place so that the barrier needed to order wrt the error code and abort code is in just that place. Also use a barrier when setting the call state and again when reading the call state such that the auxiliary completion info (error code, abort code) can be read without taking a read lock on the call state lock. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 16 ++++++++++++++++ net/rxrpc/call_state.c | 3 ++- net/rxrpc/recvmsg.c | 12 ++++++------ net/rxrpc/sendmsg.c | 29 +++++++++++++---------------- 5 files changed, 38 insertions(+), 24 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f4e1ffff2ba41..61c30d0f67356 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -379,7 +379,7 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); bool rxrpc_kernel_check_life(const struct socket *sock, const struct rxrpc_call *call) { - return call->state != RXRPC_CALL_COMPLETE; + return !rxrpc_call_is_complete(call); } EXPORT_SYMBOL(rxrpc_kernel_check_life); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 203e0354d86bb..9e992487649c4 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -903,6 +903,22 @@ bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, u32 abort_code, int error, enum rxrpc_abort_reason why); +static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call) +{ + /* Order read ->state before read ->error. */ + return smp_load_acquire(&call->state); +} + +static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call) +{ + return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; +} + +static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call) +{ + return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED; +} + /* * conn_client.c */ diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c index 8fbb2112ed7e2..649fb9e5d1af5 100644 --- a/net/rxrpc/call_state.c +++ b/net/rxrpc/call_state.c @@ -19,7 +19,8 @@ bool __rxrpc_set_call_completion(struct rxrpc_call *call, call->abort_code = abort_code; call->error = error; call->completion = compl; - call->state = RXRPC_CALL_COMPLETE; + /* Allow reader of completion state to operate locklessly */ + smp_store_release(&call->state, RXRPC_CALL_COMPLETE); trace_rxrpc_call_complete(call); wake_up(&call->waitq); rxrpc_notify_socket(call); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index ff08f917ecda2..7bf36a8839ecc 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -89,7 +89,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); break; default: - pr_err("Invalid terminal call state %u\n", call->state); + pr_err("Invalid terminal call state %u\n", call->completion); BUG(); break; } @@ -111,7 +111,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); - if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) + if (rxrpc_call_state(call) == RXRPC_CALL_CLIENT_RECV_REPLY) rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); write_lock(&call->state_lock); @@ -210,7 +210,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; - if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + if (rxrpc_call_state(call) >= RXRPC_CALL_SERVER_ACK_REQUEST) { seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; ret = 1; goto done; @@ -416,7 +416,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = len; } - switch (READ_ONCE(call->state)) { + switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -436,7 +436,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (ret < 0) goto error_unlock_call; - if (call->state == RXRPC_CALL_COMPLETE) { + if (rxrpc_call_is_complete(call)) { ret = rxrpc_recvmsg_term(call, msg); if (ret < 0) goto error_unlock_call; @@ -516,7 +516,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, mutex_lock(&call->user_mutex); - switch (READ_ONCE(call->state)) { + switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 2a003c3a9897e..f0b5822f3e04c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -25,7 +25,7 @@ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, { _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); - if (!call->send_abort && call->state < RXRPC_CALL_COMPLETE) { + if (!call->send_abort && !rxrpc_call_is_complete(call)) { call->send_abort_why = why; call->send_abort_err = error; call->send_abort_seq = 0; @@ -60,7 +60,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, NULL)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; if (signal_pending(current)) @@ -95,7 +95,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, &tx_win)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; if (timeout == 0 && @@ -124,7 +124,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, NULL)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); @@ -273,7 +273,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ret = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) goto maybe_error; - state = READ_ONCE(call->state); + state = rxrpc_call_state(call); ret = -ESHUTDOWN; if (state >= RXRPC_CALL_COMPLETE) goto maybe_error; @@ -350,7 +350,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, /* check for the far side aborting the call or a network error * occurring */ - if (call->state == RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) goto call_terminated; /* add the packet to the send queue if it's now full */ @@ -375,12 +375,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, success: ret = copied; - if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { - read_lock(&call->state_lock); - if (call->error < 0) - ret = call->error; - read_unlock(&call->state_lock); - } + if (rxrpc_call_is_complete(call) && + call->error < 0) + ret = call->error; out: call->tx_pending = txb; _leave(" = %d", ret); @@ -618,10 +615,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); /* ... and we have the call lock. */ ret = 0; - if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) goto out_put_unlock; } else { - switch (READ_ONCE(call->state)) { + switch (rxrpc_call_state(call)) { case RXRPC_CALL_UNINITIALISED: case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: @@ -675,7 +672,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) break; } - state = READ_ONCE(call->state); + state = rxrpc_call_state(call); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, state, call->conn); @@ -735,7 +732,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); - switch (READ_ONCE(call->state)) { + switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: case RXRPC_CALL_SERVER_SEND_REPLY: From 2d689424b6184535890c251f937ccf815fde9cd2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Nov 2022 08:35:36 +0000 Subject: [PATCH 0431/1593] rxrpc: Move call state changes from sendmsg to I/O thread Move all the call state changes that are made in rxrpc_sendmsg() to the I/O thread. This is a step towards removing the call state lock. This requires the switch to the RXRPC_CALL_CLIENT_AWAIT_REPLY and RXRPC_CALL_SERVER_SEND_REPLY states to be done when the last packet is decanted from ->tx_sendmsg to ->tx_buffer in the I/O thread, not when it is added to ->tx_sendmsg by sendmsg(). Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- Documentation/networking/rxrpc.rst | 4 +- net/rxrpc/call_event.c | 50 +++++++++++++++++++++- net/rxrpc/sendmsg.c | 69 ++++++------------------------ 3 files changed, 63 insertions(+), 60 deletions(-) diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst index 39494a6ea739c..e1af54424192b 100644 --- a/Documentation/networking/rxrpc.rst +++ b/Documentation/networking/rxrpc.rst @@ -880,8 +880,8 @@ The kernel interface functions are as follows: notify_end_rx can be NULL or it can be used to specify a function to be called when the call changes state to end the Tx phase. This function is - called with the call-state spinlock held to prevent any reply or final ACK - from being delivered first. + called with a spinlock held to prevent the last DATA packet from being + transmitted until the function returns. (#) Receive data from a call:: diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 695aeb70d1a6b..2e3c01060d59a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -251,6 +251,50 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) _leave(""); } +/* + * Start transmitting the reply to a service. This cancels the need to ACK the + * request if we haven't yet done so. + */ +static void rxrpc_begin_service_reply(struct rxrpc_call *call) +{ + unsigned long now; + + write_lock(&call->state_lock); + + if (call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { + now = jiffies; + call->state = RXRPC_CALL_SERVER_SEND_REPLY; + WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); + } + + write_unlock(&call->state_lock); +} + +/* + * Close the transmission phase. After this point there is no more data to be + * transmitted in the call. + */ +static void rxrpc_close_tx_phase(struct rxrpc_call *call) +{ + _debug("________awaiting reply/ACK__________"); + + write_lock(&call->state_lock); + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + break; + case RXRPC_CALL_SERVER_SEND_REPLY: + call->state = RXRPC_CALL_SERVER_AWAIT_ACK; + break; + default: + break; + } + write_unlock(&call->state_lock); +} + static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) { unsigned int winsize = min_t(unsigned int, call->tx_winsize, @@ -285,6 +329,9 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) call->tx_top = txb->seq; list_add_tail(&txb->call_link, &call->tx_buffer); + if (txb->wire.flags & RXRPC_LAST_PACKET) + rxrpc_close_tx_phase(call); + rxrpc_transmit_one(call, txb); if (!rxrpc_tx_window_has_space(call)) @@ -298,12 +345,11 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) case RXRPC_CALL_SERVER_ACK_REQUEST: if (list_empty(&call->tx_sendmsg)) return; + rxrpc_begin_service_reply(call); fallthrough; case RXRPC_CALL_SERVER_SEND_REPLY: - case RXRPC_CALL_SERVER_AWAIT_ACK: case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (!rxrpc_tx_window_has_space(call)) return; if (list_empty(&call->tx_sendmsg)) { diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index f0b5822f3e04c..0428528abbf49 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -189,7 +189,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_notify_end_tx_t notify_end_tx) { - unsigned long now; rxrpc_seq_t seq = txb->seq; bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke; @@ -212,36 +211,10 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, poke = list_empty(&call->tx_sendmsg); list_add_tail(&txb->call_link, &call->tx_sendmsg); call->tx_prepared = seq; + if (last) + rxrpc_notify_end_tx(rx, call, notify_end_tx); spin_unlock(&call->tx_lock); - if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { - _debug("________awaiting reply/ACK__________"); - write_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; - rxrpc_notify_end_tx(rx, call, notify_end_tx); - break; - case RXRPC_CALL_SERVER_ACK_REQUEST: - call->state = RXRPC_CALL_SERVER_SEND_REPLY; - now = jiffies; - WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); - if (call->ackr_reason == RXRPC_ACK_DELAY) - call->ackr_reason = 0; - trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); - if (!last) - break; - fallthrough; - case RXRPC_CALL_SERVER_SEND_REPLY: - call->state = RXRPC_CALL_SERVER_AWAIT_ACK; - rxrpc_notify_end_tx(rx, call, notify_end_tx); - break; - default: - break; - } - write_unlock(&call->state_lock); - } - if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); } @@ -280,8 +253,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ret = -EPROTO; if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && state != RXRPC_CALL_SERVER_ACK_REQUEST && - state != RXRPC_CALL_SERVER_SEND_REPLY) + state != RXRPC_CALL_SERVER_SEND_REPLY) { + /* Request phase complete for this client call */ + trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, + call->cid, call->call_id, call->rx_consumed, + 0, -EPROTO); goto maybe_error; + } ret = -EMSGSIZE; if (call->tx_total_len != -1) { @@ -573,7 +551,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { - enum rxrpc_call_state state; struct rxrpc_call *call; unsigned long now, j; bool dropped_lock = false; @@ -672,11 +649,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) break; } - state = rxrpc_call_state(call); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, state, call->conn); - - if (state >= RXRPC_CALL_COMPLETE) { + if (rxrpc_call_is_complete(call)) { /* it's too late for this call */ ret = -ESHUTDOWN; } else if (p.command == RXRPC_CMD_SEND_ABORT) { @@ -722,7 +695,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, bool dropped_lock = false; int ret; - _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + _enter("{%d},", call->debug_id); ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); @@ -732,26 +705,10 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); - switch (rxrpc_call_state(call)) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - case RXRPC_CALL_SERVER_SEND_REPLY: - ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, - notify_end_tx, &dropped_lock); - break; - case RXRPC_CALL_COMPLETE: - read_lock(&call->state_lock); + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, + notify_end_tx, &dropped_lock); + if (ret == -ESHUTDOWN) ret = call->error; - read_unlock(&call->state_lock); - break; - default: - /* Request phase complete for this client call */ - trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, - call->cid, call->call_id, call->rx_consumed, - 0, -EPROTO); - ret = -EPROTO; - break; - } if (!dropped_lock) mutex_unlock(&call->user_mutex); From 93368b6bd58ac49d804fdc9ab041a6dc89ebf1cc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Oct 2022 23:43:00 +0100 Subject: [PATCH 0432/1593] rxrpc: Move call state changes from recvmsg to I/O thread Move the call state changes that are made in rxrpc_recvmsg() to the I/O thread. This means that, thenceforth, only the I/O thread does this and the call state lock can be removed. This requires the Rx phase to be ended when the last packet is received, not when it is processed. Since this now changes the rxrpc call state to SUCCEEDED before we've consumed all the data from it, rxrpc_kernel_check_life() mustn't say the call is dead until the recvmsg queue is empty (unless the call has failed). Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/rxrpc.c | 1 + net/rxrpc/af_rxrpc.c | 10 ++- net/rxrpc/ar-internal.h | 3 +- net/rxrpc/input.c | 38 ++++++++- net/rxrpc/recvmsg.c | 168 +++++++++++++++------------------------- 5 files changed, 109 insertions(+), 111 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bd3830bc67006..7817e2b860e5e 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -909,6 +909,7 @@ int afs_extract_data(struct afs_call *call, bool want_more) ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, &call->iov_len, want_more, &remote_abort, &call->service_id); + trace_afs_receive_data(call, call->iter, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 61c30d0f67356..cf200e4e0eae1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -373,13 +373,17 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * @sock: The socket the call is on * @call: The call to check * - * Allow a kernel service to find out whether a call is still alive - - * ie. whether it has completed. + * Allow a kernel service to find out whether a call is still alive - whether + * it has completed successfully and all received data has been consumed. */ bool rxrpc_kernel_check_life(const struct socket *sock, const struct rxrpc_call *call) { - return !rxrpc_call_is_complete(call); + if (!rxrpc_call_is_complete(call)) + return true; + if (call->completion != RXRPC_CALL_SUCCEEDED) + return false; + return !skb_queue_empty(&call->recvmsg_queue); } EXPORT_SYMBOL(rxrpc_kernel_check_life); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9e992487649c4..8612734397361 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -545,7 +545,8 @@ enum rxrpc_call_flag { RXRPC_CALL_KERNEL, /* The call was made by the kernel */ RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ - RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */ + RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ + RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ }; /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index bd69ff2d90826..6eb21425f41fe 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -319,6 +319,41 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) return true; } +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) +{ + rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); + + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + + trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); + + if (rxrpc_call_state(call) == RXRPC_CALL_CLIENT_RECV_REPLY) + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); + + write_lock(&call->state_lock); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + __rxrpc_call_completed(call); + write_unlock(&call->state_lock); + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: + call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; + write_unlock(&call->state_lock); + rxrpc_propose_delay_ACK(call, serial, + rxrpc_propose_ack_processing_op); + break; + default: + write_unlock(&call->state_lock); + break; + } +} + static void rxrpc_input_update_ack_window(struct rxrpc_call *call, rxrpc_seq_t window, rxrpc_seq_t wtop) { @@ -337,8 +372,9 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); - trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); + if (last) + rxrpc_end_rx_phase(call, sp->hdr.serial); } /* diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 7bf36a8839ecc..dd54ceee7bcc8 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -100,42 +100,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) return ret; } -/* - * End the packet reception phase. - */ -static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) -{ - rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); - - _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); - - trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); - - if (rxrpc_call_state(call) == RXRPC_CALL_CLIENT_RECV_REPLY) - rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); - - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - __rxrpc_call_completed(call); - write_unlock(&call->state_lock); - rxrpc_poke_call(call, rxrpc_call_poke_complete); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; - write_unlock(&call->state_lock); - rxrpc_propose_delay_ACK(call, serial, - rxrpc_propose_ack_processing_op); - break; - default: - write_unlock(&call->state_lock); - break; - } -} - /* * Discard a packet we've used up and advance the Rx window by one. */ @@ -166,10 +130,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate, serial, call->rx_consumed); - if (last) { - rxrpc_end_rx_phase(call, serial); - return; - } + + if (last) + set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags); /* Check to see if there's an ACK that needs sending. */ acked = atomic_add_return(call->rx_consumed - old_consumed, @@ -194,7 +157,8 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) /* * Deliver messages to a call. This keeps processing packets until the buffer * is filled and we find either more DATA (returns 0) or the end of the DATA - * (returns 1). If more packets are required, it returns -EAGAIN. + * (returns 1). If more packets are required, it returns -EAGAIN and if the + * call has failed it returns -EIO. */ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, struct iov_iter *iter, @@ -210,7 +174,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; - if (rxrpc_call_state(call) >= RXRPC_CALL_SERVER_ACK_REQUEST) { + if (rxrpc_call_has_failed(call)) { + seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; + ret = -EIO; + goto done; + } + + if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) { seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; ret = 1; goto done; @@ -234,14 +204,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_offset == 0) { ret2 = rxrpc_verify_data(call, skb); - rx_pkt_offset = sp->offset; - rx_pkt_len = sp->len; trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, - rx_pkt_offset, rx_pkt_len, ret2); + sp->offset, sp->len, ret2); if (ret2 < 0) { + kdebug("verify = %d", ret2); ret = ret2; goto out; } + rx_pkt_offset = sp->offset; + rx_pkt_len = sp->len; } else { trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); @@ -416,36 +387,36 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = len; } - switch (rxrpc_call_state(call)) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, - flags, &copied); - if (ret == -EAGAIN) - ret = 0; - - if (!skb_queue_empty(&call->recvmsg_queue)) - rxrpc_notify_socket(call); - break; - default: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) ret = 0; - break; - } - + if (ret == -EIO) + goto call_failed; if (ret < 0) goto error_unlock_call; - if (rxrpc_call_is_complete(call)) { - ret = rxrpc_recvmsg_term(call, msg); - if (ret < 0) - goto error_unlock_call; - if (!(flags & MSG_PEEK)) - rxrpc_release_call(rx, call); - msg->msg_flags |= MSG_EOR; - ret = 1; - } + if (rxrpc_call_is_complete(call) && + skb_queue_empty(&call->recvmsg_queue)) + goto call_complete; + if (rxrpc_call_has_failed(call)) + goto call_failed; + rxrpc_notify_socket(call); + goto not_yet_complete; + +call_failed: + rxrpc_purge_queue(&call->recvmsg_queue); +call_complete: + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error_unlock_call; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; + +not_yet_complete: if (ret == 0) msg->msg_flags |= MSG_MORE; else @@ -508,49 +479,34 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, size_t offset = 0; int ret; - _enter("{%d,%s},%zu,%d", - call->debug_id, rxrpc_call_states[call->state], - *_len, want_more); - - ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING); + _enter("{%d},%zu,%d", call->debug_id, *_len, want_more); mutex_lock(&call->user_mutex); - switch (rxrpc_call_state(call)) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = rxrpc_recvmsg_data(sock, call, NULL, iter, - *_len, 0, &offset); - *_len -= offset; - if (ret < 0) - goto out; - - /* We can only reach here with a partially full buffer if we - * have reached the end of the data. We must otherwise have a - * full buffer or have been given -EAGAIN. - */ - if (ret == 1) { - if (iov_iter_count(iter) > 0) - goto short_data; - if (!want_more) - goto read_phase_complete; - ret = 0; - goto out; - } - - if (!want_more) - goto excess_data; + ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset); + *_len -= offset; + if (ret == -EIO) + goto call_failed; + if (ret < 0) goto out; - case RXRPC_CALL_COMPLETE: - goto call_complete; - - default: - ret = -EINPROGRESS; + /* We can only reach here with a partially full buffer if we have + * reached the end of the data. We must otherwise have a full buffer + * or have been given -EAGAIN. + */ + if (ret == 1) { + if (iov_iter_count(iter) > 0) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; goto out; } + if (!want_more) + goto excess_data; + goto out; + read_phase_complete: ret = 1; out: @@ -572,7 +528,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, 0, -EMSGSIZE); ret = -EMSGSIZE; goto out; -call_complete: +call_failed: *_abort = call->abort_code; ret = call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { From 96b4059f43ce69e9c590f77d6ce3e99888d5cfe6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Oct 2022 11:25:55 +0100 Subject: [PATCH 0433/1593] rxrpc: Remove call->state_lock All the setters of call->state are now in the I/O thread and thus the state lock is now unnecessary. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/ar-internal.h | 33 +++++++++----- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_event.c | 42 ++++++++---------- net/rxrpc/call_object.c | 30 ++++++------- net/rxrpc/call_state.c | 87 ++++++++++++++----------------------- net/rxrpc/conn_client.c | 10 ++--- net/rxrpc/conn_event.c | 11 ++--- net/rxrpc/input.c | 96 ++++++++++++++++++----------------------- net/rxrpc/output.c | 4 +- net/rxrpc/proc.c | 6 ++- net/rxrpc/rxkad.c | 2 +- net/rxrpc/sendmsg.c | 3 -- 12 files changed, 142 insertions(+), 184 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8612734397361..751b1903fd6e1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -631,14 +631,13 @@ struct rxrpc_call { unsigned long flags; unsigned long events; spinlock_t notify_lock; /* Kernel notification lock */ - rwlock_t state_lock; /* lock for state transition */ unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */ s32 send_abort; /* Abort code to be sent */ short send_abort_err; /* Error to be associated with the abort */ rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */ s32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ - enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_state _state; /* Current state of call (needs barrier) */ enum rxrpc_call_completion completion; /* Call completion condition */ refcount_t ref; u8 security_ix; /* Security type */ @@ -889,25 +888,37 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) /* * call_state.c */ -bool __rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error); bool rxrpc_set_call_completion(struct rxrpc_call *call, enum rxrpc_call_completion compl, u32 abort_code, int error); -bool __rxrpc_call_completed(struct rxrpc_call *call); bool rxrpc_call_completed(struct rxrpc_call *call); -bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, - u32 abort_code, int error, enum rxrpc_abort_reason why); bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, u32 abort_code, int error, enum rxrpc_abort_reason why); +void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl, + int error); + +static inline void rxrpc_set_call_state(struct rxrpc_call *call, + enum rxrpc_call_state state) +{ + /* Order write of completion info before write of ->state. */ + smp_store_release(&call->_state, state); +} + +static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call) +{ + return call->_state; /* Only inside I/O thread */ +} + +static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call) +{ + return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; +} static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call) { - /* Order read ->state before read ->error. */ - return smp_load_acquire(&call->state); + /* Order read ->state before read of completion info. */ + return smp_load_acquire(&call->_state); } static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a132d486dea0d..3fbf2fcaaf9e2 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -99,7 +99,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); - call->state = RXRPC_CALL_SERVER_PREALLOC; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC); __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2e3c01060d59a..1abdef15debce 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -257,20 +257,13 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) */ static void rxrpc_begin_service_reply(struct rxrpc_call *call) { - unsigned long now; - - write_lock(&call->state_lock); - - if (call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { - now = jiffies; - call->state = RXRPC_CALL_SERVER_SEND_REPLY; - WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); - if (call->ackr_reason == RXRPC_ACK_DELAY) - call->ackr_reason = 0; - trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); - } + unsigned long now = jiffies; - write_unlock(&call->state_lock); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY); + WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); } /* @@ -281,18 +274,16 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call) { _debug("________awaiting reply/ACK__________"); - write_lock(&call->state_lock); - switch (call->state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY); break; case RXRPC_CALL_SERVER_SEND_REPLY: - call->state = RXRPC_CALL_SERVER_AWAIT_ACK; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK); break; default: break; } - write_unlock(&call->state_lock); } static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) @@ -341,7 +332,7 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) static void rxrpc_transmit_some_data(struct rxrpc_call *call) { - switch (call->state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: if (list_empty(&call->tx_sendmsg)) return; @@ -390,9 +381,10 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) //printk("\n--------------------\n"); _enter("{%d,%s,%lx}", - call->debug_id, rxrpc_call_states[call->state], call->events); + call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)], + call->events); - if (call->state == RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) goto out; /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ @@ -415,7 +407,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } t = READ_ONCE(call->expect_req_by); - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && + if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST && time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); expired = true; @@ -499,7 +491,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); - if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY) + if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY) rxrpc_resend(call, NULL); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) @@ -511,7 +503,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_input_data); /* Make sure the timer is restarted */ - if (call->state != RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { next = call->expect_rx_by; #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } @@ -532,7 +524,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } out: - if (call->state == RXRPC_CALL_COMPLETE) { + if (__rxrpc_call_is_complete(call)) { del_timer_sync(&call->timer); if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 835e9781afc6c..c94161acf3c41 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -69,7 +69,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { trace_rxrpc_timer_expired(call, jiffies); rxrpc_poke_call(call, rxrpc_call_poke_timer); } @@ -162,7 +162,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, init_waitqueue_head(&call->waitq); spin_lock_init(&call->notify_lock); spin_lock_init(&call->tx_lock); - rwlock_init(&call->state_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; @@ -211,7 +210,6 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, now = ktime_get_real(); call->acks_latest_ts = now; call->cong_tstamp = now; - call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->dest_srx = *srx; call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; @@ -227,11 +225,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, ret = rxrpc_init_client_call_security(call); if (ret < 0) { - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret); rxrpc_put_call(call, rxrpc_call_put_discard_error); return ERR_PTR(ret); } + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), p->user_call_ID, rxrpc_call_new_client); @@ -384,8 +384,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, -EEXIST); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, rxrpc_call_see_userid_exists); rxrpc_release_call(rx, call); @@ -403,8 +402,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret, rxrpc_call_see_connect_failed); set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret); _leave(" = c=%08x [err]", call->debug_id); return call; } @@ -427,25 +425,25 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->call_id = sp->hdr.callNumber; call->dest_srx.srx_service = sp->hdr.serviceId; call->cid = sp->hdr.cid; - call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - call->state = RXRPC_CALL_SERVER_SECURING; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); break; case RXRPC_CONN_SERVICE: - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: - __rxrpc_set_call_completion(call, conn->completion, - conn->abort_code, conn->error); + rxrpc_set_call_completion(call, conn->completion, + conn->abort_code, conn->error); break; default: BUG(); @@ -614,7 +612,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) dead = __refcount_dec_and_test(&call->ref, &r); trace_rxrpc_call(debug_id, r - 1, 0, why); if (dead) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { spin_lock(&rxnet->call_lock); @@ -677,7 +675,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) { memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); del_timer(&call->timer); @@ -715,7 +713,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, refcount_read(&call->ref), - rxrpc_call_states[call->state], + rxrpc_call_states[__rxrpc_call_state(call)], call->flags, call->events); spin_unlock(&rxnet->call_lock); diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c index 649fb9e5d1af5..27dc1242b7125 100644 --- a/net/rxrpc/call_state.c +++ b/net/rxrpc/call_state.c @@ -10,81 +10,60 @@ /* * Transition a call to the complete state. */ -bool __rxrpc_set_call_completion(struct rxrpc_call *call, +bool rxrpc_set_call_completion(struct rxrpc_call *call, enum rxrpc_call_completion compl, u32 abort_code, int error) { - if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; - call->error = error; - call->completion = compl; - /* Allow reader of completion state to operate locklessly */ - smp_store_release(&call->state, RXRPC_CALL_COMPLETE); - trace_rxrpc_call_complete(call); - wake_up(&call->waitq); - rxrpc_notify_socket(call); - return true; - } - return false; -} - -bool rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - bool ret = false; + if (__rxrpc_call_state(call) == RXRPC_CALL_COMPLETE) + return false; - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock(&call->state_lock); - } - return ret; + call->abort_code = abort_code; + call->error = error; + call->completion = compl; + /* Allow reader of completion state to operate locklessly */ + rxrpc_set_call_state(call, RXRPC_CALL_COMPLETE); + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; } /* * Record that a call successfully completed. */ -bool __rxrpc_call_completed(struct rxrpc_call *call) -{ - return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); -} - bool rxrpc_call_completed(struct rxrpc_call *call) { - bool ret = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_call_completed(call); - write_unlock(&call->state_lock); - } - return ret; + return rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); } /* * Record that a call is locally aborted. */ -bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, - u32 abort_code, int error, enum rxrpc_abort_reason why) +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why) { trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, abort_code, error); - return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error); + if (!rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error)) + return false; + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + rxrpc_send_abort_packet(call); + return true; } -bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, - u32 abort_code, int error, enum rxrpc_abort_reason why) +/* + * Record that a call errored out before even getting off the ground, thereby + * setting the state to allow it to be destroyed. + */ +void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl, + int error) { - bool ret; - - write_lock(&call->state_lock); - ret = __rxrpc_abort_call(call, seq, abort_code, error, why); - write_unlock(&call->state_lock); - if (ret && test_bit(RXRPC_CALL_EXPOSED, &call->flags)) - rxrpc_send_abort_packet(call); - return ret; + call->abort_code = RX_CALL_DEAD; + call->error = error; + call->completion = compl; + call->_state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + __set_bit(RXRPC_CALL_RELEASED, &call->flags); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index c0db7722571e3..8b5ea68dc47e7 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -553,9 +553,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, trace_rxrpc_connect_call(call); - write_lock(&call->state_lock); - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - write_unlock(&call->state_lock); + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST); /* Paired with the read barrier in rxrpc_connect_call(). This orders * cid and epoch in the connection wrt to call_id without the need to @@ -687,7 +685,7 @@ static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, set_current_state(TASK_UNINTERRUPTIBLE); break; } - if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN) + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) break; if ((call->interruptibility == RXRPC_INTERRUPTIBLE || call->interruptibility == RXRPC_PREINTERRUPTIBLE) && @@ -729,7 +727,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) goto out; } - if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) { + if (rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_CONN) { ret = rxrpc_wait_for_channel(bundle, call, gfp); if (ret < 0) goto wait_failed; @@ -748,7 +746,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) list_del_init(&call->chan_wait_link); spin_unlock(&bundle->channel_lock); - if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) { + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) { ret = 0; goto granted_channel; } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b2042702ca9aa..8d0b9ff0a5e15 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -230,14 +230,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - _enter("%p", call); - if (call) { - write_lock(&call->state_lock); - if (call->state == RXRPC_CALL_SERVER_SECURING) { - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - rxrpc_notify_socket(call); - } - write_unlock(&call->state_lock); + if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + rxrpc_notify_socket(call); } } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6eb21425f41fe..367927a998815 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -184,7 +184,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) if (call->cong_mode != RXRPC_CALL_SLOW_START && call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) return; - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) + if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); @@ -252,43 +252,31 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, enum rxrpc_abort_reason abort_why) { - unsigned int state; - ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); - write_lock(&call->state_lock); - - state = call->state; - switch (state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: - if (reply_begun) - call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; - else - call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + if (reply_begun) { + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_RECV_REPLY); + trace_rxrpc_txqueue(call, rxrpc_txqueue_end); + break; + } + + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY); + trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply); break; case RXRPC_CALL_SERVER_AWAIT_ACK: - __rxrpc_call_completed(call); - state = call->state; + rxrpc_call_completed(call); + trace_rxrpc_txqueue(call, rxrpc_txqueue_end); break; default: - goto bad_state; + kdebug("end_tx %s", rxrpc_call_states[__rxrpc_call_state(call)]); + rxrpc_proto_abort(call, call->tx_top, abort_why); + break; } - - write_unlock(&call->state_lock); - if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply); - else - trace_rxrpc_txqueue(call, rxrpc_txqueue_end); - _leave(" = ok"); - return; - -bad_state: - write_unlock(&call->state_lock); - kdebug("end_tx %s", rxrpc_call_states[call->state]); - rxrpc_proto_abort(call, call->tx_top, abort_why); } /* @@ -303,7 +291,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) if (call->ackr_reason) { now = jiffies; timo = now + MAX_JIFFY_OFFSET; - WRITE_ONCE(call->resend_at, timo); + WRITE_ONCE(call->delay_ack_at, timo); trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } @@ -326,30 +314,23 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) { rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); - _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + _enter("%d,%s", call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)]); trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); - if (rxrpc_call_state(call) == RXRPC_CALL_CLIENT_RECV_REPLY) - rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); - - write_lock(&call->state_lock); - - switch (call->state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_RECV_REPLY: - __rxrpc_call_completed(call); - write_unlock(&call->state_lock); + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); + rxrpc_call_completed(call); break; case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST); call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; - write_unlock(&call->state_lock); - rxrpc_propose_delay_ACK(call, serial, - rxrpc_propose_ack_processing_op); + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op); break; + default: - write_unlock(&call->state_lock); break; } } @@ -583,7 +564,6 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - enum rxrpc_call_state state; rxrpc_serial_t serial = sp->hdr.serial; rxrpc_seq_t seq0 = sp->hdr.seq; @@ -591,11 +571,20 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) atomic64_read(&call->ackr_window), call->rx_highest_seq, skb->len, seq0); - state = READ_ONCE(call->state); - if (state >= RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) return; - if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + /* Received data implicitly ACKs all of the request + * packets we sent when we're acting as a client. + */ + if (!rxrpc_receiving_reply(call)) + goto out_notify; + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; @@ -606,15 +595,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_reduce_call_timer(call, expect_req_by, now, rxrpc_timer_set_for_idle); } + break; } - /* Received data implicitly ACKs all of the request packets we sent - * when we're acting as a client. - */ - if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || - state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && - !rxrpc_receiving_reply(call)) - goto out_notify; + default: + break; + } if (!rxrpc_input_split_jumbo(call, skb)) { rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo); @@ -904,7 +890,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); /* Ignore ACKs unless we are or have just been transmitting. */ - switch (READ_ONCE(call->state)) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: case RXRPC_CALL_SERVER_SEND_REPLY: @@ -1027,7 +1013,7 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) */ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) { - switch (READ_ONCE(call->state)) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); fallthrough; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8a5ff2c9e0612..a9746be296347 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -261,7 +261,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) rxrpc_tx_point_call_ack); rxrpc_tx_backoff(call, ret); - if (call->state < RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { if (ret < 0) rxrpc_cancel_rtt_probe(call, serial, rtt_slot); rxrpc_set_keepalive(call); @@ -723,7 +723,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) static inline void rxrpc_instant_resend(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { - if (call->state < RXRPC_CALL_COMPLETE) + if (!__rxrpc_call_is_complete(call)) kdebug("resend"); } diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 63947cce40483..c39ef94602ed7 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -50,6 +50,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_local *local; struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + enum rxrpc_call_state state; unsigned long timeout = 0; rxrpc_seq_t acks_hard_ack; char lbuff[50], rbuff[50]; @@ -74,7 +75,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) sprintf(rbuff, "%pISpc", &call->dest_srx.transport); - if (call->state != RXRPC_CALL_SERVER_PREALLOC) { + state = rxrpc_call_state(call); + if (state != RXRPC_CALL_SERVER_PREALLOC) { timeout = READ_ONCE(call->expect_rx_by); timeout -= jiffies; } @@ -91,7 +93,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", refcount_read(&call->ref), - rxrpc_call_states[call->state], + rxrpc_call_states[state], call->abort_code, call->debug_id, acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index e52cb8058156f..dfb01e7b90fbb 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1143,7 +1143,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, call = rcu_dereference_protected( conn->channels[i].call, lockdep_is_held(&conn->bundle->channel_lock)); - if (call && call->state < RXRPC_CALL_COMPLETE) { + if (call && !__rxrpc_call_is_complete(call)) { rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, rxkad_abort_resp_call_state); goto protocol_error_unlock; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0428528abbf49..a5d0005b7ce52 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -702,9 +702,6 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, mutex_lock(&call->user_mutex); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, notify_end_tx, &dropped_lock); if (ret == -ESHUTDOWN) From 0d6bf319bc5aba4535bb46e1b607973688a2248a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Nov 2022 16:46:13 +0000 Subject: [PATCH 0434/1593] rxrpc: Move the client conn cache management to the I/O thread Move the management of the client connection cache to the I/O thread rather than managing it from the namespace as an aggregate across all the local endpoints within the namespace. This will allow a load of locking to be got rid of in a future patch as only the I/O thread will be looking at the this. The downside is that the total number of cached connections on the system can get higher because the limit is now per-local rather than per-netns. We can, however, keep the number of client conns in use across the entire netfs and use that to reduce the expiration time of idle connection. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/ar-internal.h | 17 ++++---- net/rxrpc/conn_client.c | 92 ++++++++++++++-------------------------- net/rxrpc/conn_object.c | 1 - net/rxrpc/io_thread.c | 4 ++ net/rxrpc/local_object.c | 17 ++++++++ net/rxrpc/net_ns.c | 17 -------- 6 files changed, 62 insertions(+), 86 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 751b1903fd6e1..de84061a54472 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -76,13 +76,7 @@ struct rxrpc_net { bool live; - bool kill_all_client_conns; atomic_t nr_client_conns; - spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ - struct mutex client_conn_discard_lock; /* Prevent multiple discarders */ - struct list_head idle_client_conns; - struct work_struct client_conn_reaper; - struct timer_list client_conn_reap_timer; struct hlist_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ @@ -294,8 +288,16 @@ struct rxrpc_local { struct sk_buff_head rx_queue; /* Received packets */ struct list_head conn_attend_q; /* Conns requiring immediate attention */ struct list_head call_attend_q; /* Calls requiring immediate attention */ + struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ + bool kill_all_client_conns; + spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ + struct list_head idle_client_conns; + struct timer_list client_conn_reap_timer; + unsigned long client_conn_flags; +#define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */ + spinlock_t lock; /* access lock */ rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ @@ -946,8 +948,7 @@ void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); -void rxrpc_discard_expired_client_conns(struct work_struct *); -void rxrpc_destroy_all_client_connections(struct rxrpc_net *); +void rxrpc_discard_expired_client_conns(struct rxrpc_local *local); void rxrpc_clean_up_local_conns(struct rxrpc_local *); /* diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 8b5ea68dc47e7..ebb43f65ebc51 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -578,17 +578,17 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, */ static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet = bundle->local->rxnet; + struct rxrpc_local *local = bundle->local; bool drop_ref; if (!list_empty(&conn->cache_link)) { drop_ref = false; - spin_lock(&rxnet->client_conn_cache_lock); + spin_lock(&local->client_conn_cache_lock); if (!list_empty(&conn->cache_link)) { list_del_init(&conn->cache_link); drop_ref = true; } - spin_unlock(&rxnet->client_conn_cache_lock); + spin_unlock(&local->client_conn_cache_lock); if (drop_ref) rxrpc_put_connection(conn, rxrpc_conn_put_unidle); } @@ -710,14 +710,10 @@ static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_bundle *bundle; - struct rxrpc_local *local = call->local; - struct rxrpc_net *rxnet = local->rxnet; int ret = 0; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); - rxrpc_get_call(call, rxrpc_call_get_io_thread); bundle = rxrpc_prep_call(call, gfp); @@ -787,14 +783,14 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) /* * Set the reap timer. */ -static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) +static void rxrpc_set_client_reap_timer(struct rxrpc_local *local) { - if (!rxnet->kill_all_client_conns) { + if (!local->kill_all_client_conns) { unsigned long now = jiffies; unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; - if (rxnet->live) - timer_reduce(&rxnet->client_conn_reap_timer, reap_at); + if (local->rxnet->live) + timer_reduce(&local->client_conn_reap_timer, reap_at); } } @@ -805,7 +801,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call { struct rxrpc_connection *conn; struct rxrpc_channel *chan = NULL; - struct rxrpc_net *rxnet = bundle->local->rxnet; + struct rxrpc_local *local = bundle->local; unsigned int channel; bool may_reuse; u32 cid; @@ -895,11 +891,11 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call conn->idle_timestamp = jiffies; rxrpc_get_connection(conn, rxrpc_conn_get_idle); - spin_lock(&rxnet->client_conn_cache_lock); - list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); - spin_unlock(&rxnet->client_conn_cache_lock); + spin_lock(&local->client_conn_cache_lock); + list_move_tail(&conn->cache_link, &local->idle_client_conns); + spin_unlock(&local->client_conn_cache_lock); - rxrpc_set_client_reap_timer(rxnet); + rxrpc_set_client_reap_timer(local); } out: @@ -986,42 +982,34 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn) * This may be called from conn setup or from a work item so cannot be * considered non-reentrant. */ -void rxrpc_discard_expired_client_conns(struct work_struct *work) +void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = - container_of(work, struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; _enter(""); - if (list_empty(&rxnet->idle_client_conns)) { + if (list_empty(&local->idle_client_conns)) { _leave(" [empty]"); return; } - /* Don't double up on the discarding */ - if (!mutex_trylock(&rxnet->client_conn_discard_lock)) { - _leave(" [already]"); - return; - } - /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ - nr_conns = atomic_read(&rxnet->nr_client_conns); + nr_conns = atomic_read(&local->rxnet->nr_client_conns); next: - spin_lock(&rxnet->client_conn_cache_lock); + spin_lock(&local->client_conn_cache_lock); - if (list_empty(&rxnet->idle_client_conns)) + if (list_empty(&local->idle_client_conns)) goto out; - conn = list_entry(rxnet->idle_client_conns.next, + conn = list_entry(local->idle_client_conns.next, struct rxrpc_connection, cache_link); - if (!rxnet->kill_all_client_conns) { + if (!local->kill_all_client_conns) { /* If the number of connections is over the reap limit, we * expedite discard by reducing the expiry timeout. We must, * however, have at least a short grace period to be able to do @@ -1044,7 +1032,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_del_init(&conn->cache_link); - spin_unlock(&rxnet->client_conn_cache_lock); + spin_unlock(&local->client_conn_cache_lock); rxrpc_unbundle_conn(conn); /* Drop the ->cache_link ref */ @@ -1062,32 +1050,11 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) * then things get messier. */ _debug("not yet"); - if (!rxnet->kill_all_client_conns) - timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at); + if (!local->kill_all_client_conns) + timer_reduce(&local->client_conn_reap_timer, conn_expires_at); out: - spin_unlock(&rxnet->client_conn_cache_lock); - mutex_unlock(&rxnet->client_conn_discard_lock); - _leave(""); -} - -/* - * Preemptively destroy all the client connection records rather than waiting - * for them to time out - */ -void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) -{ - _enter(""); - - spin_lock(&rxnet->client_conn_cache_lock); - rxnet->kill_all_client_conns = true; - spin_unlock(&rxnet->client_conn_cache_lock); - - del_timer_sync(&rxnet->client_conn_reap_timer); - - if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) - _debug("destroy: queue failed"); - + spin_unlock(&local->client_conn_cache_lock); _leave(""); } @@ -1097,14 +1064,19 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) void rxrpc_clean_up_local_conns(struct rxrpc_local *local) { struct rxrpc_connection *conn, *tmp; - struct rxrpc_net *rxnet = local->rxnet; LIST_HEAD(graveyard); _enter(""); - spin_lock(&rxnet->client_conn_cache_lock); + spin_lock(&local->client_conn_cache_lock); + local->kill_all_client_conns = true; + spin_unlock(&local->client_conn_cache_lock); + + del_timer_sync(&local->client_conn_reap_timer); + + spin_lock(&local->client_conn_cache_lock); - list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, + list_for_each_entry_safe(conn, tmp, &local->idle_client_conns, cache_link) { if (conn->local == local) { atomic_dec(&conn->active); @@ -1113,7 +1085,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) } } - spin_unlock(&rxnet->client_conn_cache_lock); + spin_unlock(&local->client_conn_cache_lock); while (!list_empty(&graveyard)) { conn = list_entry(graveyard.next, diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 2a7d5378300cc..3d8c1dc6a82ae 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -470,7 +470,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) _enter(""); atomic_dec(&rxnet->nr_conns); - rxrpc_destroy_all_client_connections(rxnet); del_timer_sync(&rxnet->service_conn_reap_timer); rxrpc_queue_work(&rxnet->service_conn_reaper); diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 751139b3c1ac9..a299cc34c1403 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -435,6 +435,10 @@ int rxrpc_io_thread(void *data) continue; } + if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) + rxrpc_discard_expired_client_conns(local); + /* Deal with calls that want immediate attention. */ if ((call = list_first_entry_or_null(&local->call_attend_q, struct rxrpc_call, diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index ca8b3ee68b597..9bc8d08ca12cf 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -82,6 +82,16 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, } } +static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_local *local = + container_of(timer, struct rxrpc_local, client_conn_reap_timer); + + if (local->kill_all_client_conns && + test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) + rxrpc_wake_up_io_thread(local); +} + /* * Allocate a new local endpoint. */ @@ -103,8 +113,15 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, skb_queue_head_init(&local->rx_queue); INIT_LIST_HEAD(&local->conn_attend_q); INIT_LIST_HEAD(&local->call_attend_q); + local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); + local->kill_all_client_conns = false; + spin_lock_init(&local->client_conn_cache_lock); + INIT_LIST_HEAD(&local->idle_client_conns); + timer_setup(&local->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); + spin_lock_init(&local->lock); rwlock_init(&local->services_lock); local->debug_id = atomic_inc_return(&rxrpc_debug_id); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 5905530e2f33b..a0319c040c25d 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -10,15 +10,6 @@ unsigned int rxrpc_net_id; -static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) -{ - struct rxrpc_net *rxnet = - container_of(timer, struct rxrpc_net, client_conn_reap_timer); - - if (rxnet->live) - rxrpc_queue_work(&rxnet->client_conn_reaper); -} - static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) { struct rxrpc_net *rxnet = @@ -63,14 +54,6 @@ static __net_init int rxrpc_init_net(struct net *net) rxrpc_service_conn_reap_timeout, 0); atomic_set(&rxnet->nr_client_conns, 0); - rxnet->kill_all_client_conns = false; - spin_lock_init(&rxnet->client_conn_cache_lock); - mutex_init(&rxnet->client_conn_discard_lock); - INIT_LIST_HEAD(&rxnet->idle_client_conns); - INIT_WORK(&rxnet->client_conn_reaper, - rxrpc_discard_expired_client_conns); - timer_setup(&rxnet->client_conn_reap_timer, - rxrpc_client_conn_reap_timeout, 0); INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); From 9d35d880e0e4a3ab32d8c12f9e4d76198aadd42d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Oct 2022 09:45:43 +0100 Subject: [PATCH 0435/1593] rxrpc: Move client call connection to the I/O thread Move the connection setup of client calls to the I/O thread so that a whole load of locking and barrierage can be eliminated. This necessitates the app thread waiting for connection to complete before it can begin encrypting data. This also completes the fix for a race that exists between call connection and call disconnection whereby the data transmission code adds the call to the peer error distribution list after the call has been disconnected (say by the rxrpc socket getting closed). The fix is to complete the process of moving call connection, data transmission and call disconnection into the I/O thread and thus forcibly serialising them. Note that the issue may predate the overhaul to an I/O thread model that were included in the merge window for v6.2, but the timing is very much changed by the change given below. Fixes: cf37b5987508 ("rxrpc: Move DATA transmission into call processor work item") Reported-by: syzbot+c22650d2844392afdcfd@syzkaller.appspotmail.com Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 5 +- net/rxrpc/ar-internal.h | 22 +- net/rxrpc/call_object.c | 58 +++- net/rxrpc/call_state.c | 2 +- net/rxrpc/conn_client.c | 533 ++++++++--------------------------- net/rxrpc/conn_event.c | 49 +--- net/rxrpc/conn_object.c | 19 +- net/rxrpc/conn_service.c | 1 - net/rxrpc/io_thread.c | 13 +- net/rxrpc/local_object.c | 6 +- net/rxrpc/proc.c | 1 + net/rxrpc/rxkad.c | 21 +- net/rxrpc/security.c | 33 +-- net/rxrpc/sendmsg.c | 64 +++++ 14 files changed, 297 insertions(+), 530 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e2f6b79d55170..283db0ea3db4b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -218,7 +218,6 @@ EM(rxrpc_conn_put_call, "PUT call ") \ EM(rxrpc_conn_put_call_input, "PUT inp-call") \ EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \ - EM(rxrpc_conn_put_discard, "PUT discard ") \ EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \ EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \ EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \ @@ -240,12 +239,11 @@ EM(rxrpc_client_chan_activate, "ChActv") \ EM(rxrpc_client_chan_disconnect, "ChDisc") \ EM(rxrpc_client_chan_pass, "ChPass") \ - EM(rxrpc_client_chan_wait_failed, "ChWtFl") \ EM(rxrpc_client_cleanup, "Clean ") \ EM(rxrpc_client_discard, "Discar") \ - EM(rxrpc_client_duplicate, "Duplic") \ EM(rxrpc_client_exposed, "Expose") \ EM(rxrpc_client_replace, "Replac") \ + EM(rxrpc_client_queue_new_call, "Q-Call") \ EM(rxrpc_client_to_active, "->Actv") \ E_(rxrpc_client_to_idle, "->Idle") @@ -273,6 +271,7 @@ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ + EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index de84061a54472..007258538beeb 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -292,7 +292,6 @@ struct rxrpc_local { struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ bool kill_all_client_conns; - spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ struct list_head idle_client_conns; struct timer_list client_conn_reap_timer; unsigned long client_conn_flags; @@ -304,7 +303,8 @@ struct rxrpc_local { bool dead; bool service_closed; /* Service socket closed */ struct idr conn_ids; /* List of connection IDs */ - spinlock_t conn_lock; /* Lock for client connection pool */ + struct list_head new_client_calls; /* Newly created client calls need connection */ + spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -385,7 +385,6 @@ enum rxrpc_call_completion { * Bits in the connection flags. */ enum rxrpc_conn_flag { - RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ @@ -413,6 +412,7 @@ enum rxrpc_conn_event { */ enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ + RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */ RXRPC_CONN_CLIENT, /* Client connection */ RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ @@ -436,11 +436,9 @@ struct rxrpc_bundle { u32 security_level; /* Security level selected */ u16 service_id; /* Service ID for this connection */ bool try_upgrade; /* True if the bundle is attempting upgrade */ - bool alloc_conn; /* True if someone's getting a conn */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ - short alloc_error; /* Error from last conn allocation */ - spinlock_t channel_lock; + unsigned short alloc_error; /* Error from last conn allocation */ struct rb_node local_node; /* Node in local->client_conns */ struct list_head waiting_calls; /* Calls waiting for channels */ unsigned long avail_chans; /* Mask of available channels */ @@ -468,7 +466,7 @@ struct rxrpc_connection { unsigned char act_chans; /* Mask of active channels */ struct rxrpc_channel { unsigned long final_ack_at; /* Time at which to issue final ACK */ - struct rxrpc_call __rcu *call; /* Active call */ + struct rxrpc_call *call; /* Active call */ unsigned int call_debug_id; /* call->debug_id */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ @@ -489,6 +487,7 @@ struct rxrpc_connection { struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ + struct mutex security_lock; /* Lock for security management */ const struct rxrpc_security *security; /* applied security module */ union { struct { @@ -619,7 +618,7 @@ struct rxrpc_call { struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ - struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */ + struct list_head wait_link; /* Link in local->new_client_calls */ struct hlist_node error_link; /* link in error distribution list */ struct list_head accept_link; /* Link in rx->acceptq */ struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ @@ -866,6 +865,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); +void rxrpc_start_call_timer(struct rxrpc_call *call); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); @@ -905,6 +905,7 @@ static inline void rxrpc_set_call_state(struct rxrpc_call *call, { /* Order write of completion info before write of ->state. */ smp_store_release(&call->_state, state); + wake_up(&call->waitq); } static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call) @@ -940,10 +941,11 @@ extern unsigned int rxrpc_reap_client_connections; extern unsigned long rxrpc_conn_idle_client_expiry; extern unsigned long rxrpc_conn_idle_client_fast_expiry; -void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local); +void rxrpc_purge_client_connections(struct rxrpc_local *local); struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); -int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp); +int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp); +void rxrpc_connect_client_calls(struct rxrpc_local *local); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c94161acf3c41..3ded5a24627c5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -150,7 +150,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, timer_setup(&call->timer, rxrpc_call_timer_expired, 0); INIT_WORK(&call->destroyer, rxrpc_destroy_call); INIT_LIST_HEAD(&call->link); - INIT_LIST_HEAD(&call->chan_wait_link); + INIT_LIST_HEAD(&call->wait_link); INIT_LIST_HEAD(&call->accept_link); INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); @@ -242,7 +242,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, /* * Initiate the call ack/resend/expiry timer. */ -static void rxrpc_start_call_timer(struct rxrpc_call *call) +void rxrpc_start_call_timer(struct rxrpc_call *call) { unsigned long now = jiffies; unsigned long j = now + MAX_JIFFY_OFFSET; @@ -286,6 +286,39 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call) up(limiter); } +/* + * Start the process of connecting a call. We obtain a peer and a connection + * bundle, but the actual association of a call with a connection is offloaded + * to the I/O thread to simplify locking. + */ +static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) +{ + struct rxrpc_local *local = call->local; + int ret = 0; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp); + if (!call->peer) + goto error; + + ret = rxrpc_look_up_bundle(call, gfp); + if (ret < 0) + goto error; + + trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call); + rxrpc_get_call(call, rxrpc_call_get_io_thread); + spin_lock(&local->client_call_lock); + list_add_tail(&call->wait_link, &local->new_client_calls); + spin_unlock(&local->client_call_lock); + rxrpc_wake_up_io_thread(local); + return 0; + +error: + __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + return ret; +} + /* * Set up a call for the given parameters. * - Called with the socket lock held, which it must release. @@ -369,10 +402,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error_attached_to_socket; - rxrpc_see_call(call, rxrpc_call_see_connected); - - rxrpc_start_call_timer(call); - _leave(" = %p [new]", call); return call; @@ -387,22 +416,20 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, rxrpc_call_see_userid_exists); - rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_userid_exists); _leave(" = -EEXIST"); return ERR_PTR(-EEXIST); /* We got an error, but the call is attached to the socket and is in - * need of release. However, we might now race with recvmsg() when - * completing the call queues it. Return 0 from sys_sendmsg() and + * need of release. However, we might now race with recvmsg() when it + * completion notifies the socket. Return 0 from sys_sendmsg() and * leave the error to recvmsg() to deal with. */ error_attached_to_socket: trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret, rxrpc_call_see_connect_failed); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret); + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); _leave(" = c=%08x [err]", call->debug_id); return call; } @@ -460,7 +487,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, chan = sp->hdr.cid & RXRPC_CHANNELMASK; conn->channels[chan].call_counter = call->call_id; conn->channels[chan].call_id = call->call_id; - rcu_assign_pointer(conn->channels[chan].call, call); + conn->channels[chan].call = call; spin_unlock(&conn->state_lock); spin_lock(&conn->peer->lock); @@ -520,7 +547,7 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call) void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - bool put = false; + bool put = false, putu = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); @@ -555,7 +582,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - rxrpc_put_call(call, rxrpc_call_put_userid_exists); + putu = true; } list_del(&call->sock_link); @@ -563,6 +590,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + if (putu) + rxrpc_put_call(call, rxrpc_call_put_userid); + _leave(""); } diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c index 27dc1242b7125..6afb54373ebbf 100644 --- a/net/rxrpc/call_state.c +++ b/net/rxrpc/call_state.c @@ -65,5 +65,5 @@ void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion comp call->completion = compl; call->_state = RXRPC_CALL_COMPLETE; trace_rxrpc_call_complete(call); - __set_bit(RXRPC_CALL_RELEASED, &call->flags); + WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index ebb43f65ebc51..981ca5b98bcb9 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -39,61 +39,19 @@ static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle) atomic_inc(&bundle->active); } -/* - * Get a connection ID and epoch for a client connection from the global pool. - * The connection struct pointer is then recorded in the idr radix tree. The - * epoch doesn't change until the client is rebooted (or, at least, unless the - * module is unloaded). - */ -static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, - gfp_t gfp) -{ - struct rxrpc_local *local = conn->local; - int id; - - _enter(""); - - idr_preload(gfp); - spin_lock(&local->conn_lock); - - id = idr_alloc_cyclic(&local->conn_ids, conn, - 1, 0x40000000, GFP_NOWAIT); - if (id < 0) - goto error; - - spin_unlock(&local->conn_lock); - idr_preload_end(); - - conn->proto.epoch = local->rxnet->epoch; - conn->proto.cid = id << RXRPC_CIDSHIFT; - set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); - _leave(" [CID %x]", conn->proto.cid); - return 0; - -error: - spin_unlock(&local->conn_lock); - idr_preload_end(); - _leave(" = %d", id); - return id; -} - /* * Release a connection ID for a client connection. */ static void rxrpc_put_client_connection_id(struct rxrpc_local *local, struct rxrpc_connection *conn) { - if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) { - spin_lock(&local->conn_lock); - idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT); - spin_unlock(&local->conn_lock); - } + idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT); } /* * Destroy the client connection ID tree. */ -void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) +static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) { struct rxrpc_connection *conn; int id; @@ -129,7 +87,6 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, bundle->security_level = call->security_level; refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); - spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new); } @@ -169,69 +126,68 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) } } +/* + * Get rid of outstanding client connection preallocations when a local + * endpoint is destroyed. + */ +void rxrpc_purge_client_connections(struct rxrpc_local *local) +{ + rxrpc_destroy_client_conn_ids(local); +} + /* * Allocate a client connection. */ static struct rxrpc_connection * -rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) +rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = bundle->local->rxnet; - int ret; + struct rxrpc_local *local = bundle->local; + struct rxrpc_net *rxnet = local->rxnet; + int id; _enter(""); - conn = rxrpc_alloc_connection(rxnet, gfp); - if (!conn) { - _leave(" = -ENOMEM"); + conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN); + if (!conn) return ERR_PTR(-ENOMEM); + + id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000, + GFP_ATOMIC | __GFP_NOWARN); + if (id < 0) { + kfree(conn); + return ERR_PTR(id); } refcount_set(&conn->ref, 1); - conn->bundle = bundle; - conn->local = bundle->local; - conn->peer = bundle->peer; - conn->key = bundle->key; + conn->proto.cid = id << RXRPC_CIDSHIFT; + conn->proto.epoch = local->rxnet->epoch; + conn->out_clientflag = RXRPC_CLIENT_INITIATED; + conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); + conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn); + conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn); + conn->key = key_get(bundle->key); + conn->security = bundle->security; conn->exclusive = bundle->exclusive; conn->upgrade = bundle->upgrade; conn->orig_service_id = bundle->service_id; conn->security_level = bundle->security_level; - conn->out_clientflag = RXRPC_CLIENT_INITIATED; - conn->state = RXRPC_CONN_CLIENT; + conn->state = RXRPC_CONN_CLIENT_UNSECURED; conn->service_id = conn->orig_service_id; - ret = rxrpc_get_client_connection_id(conn, gfp); - if (ret < 0) - goto error_0; - - ret = rxrpc_init_client_conn_security(conn); - if (ret < 0) - goto error_1; + if (conn->security == &rxrpc_no_security) + conn->state = RXRPC_CONN_CLIENT; atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); - rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn); - rxrpc_get_local(conn->local, rxrpc_local_get_client_conn); - key_get(conn->key); - - trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), - rxrpc_conn_new_client); + rxrpc_see_connection(conn, rxrpc_conn_new_client); atomic_inc(&rxnet->nr_client_conns); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); - _leave(" = %p", conn); return conn; - -error_1: - rxrpc_put_client_connection_id(bundle->local, conn); -error_0: - kfree(conn); - _leave(" = %d", ret); - return ERR_PTR(ret); } /* @@ -249,7 +205,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; - if (conn->state != RXRPC_CONN_CLIENT || + if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED && + conn->state != RXRPC_CONN_CLIENT) || conn->proto.epoch != rxnet->epoch) goto mark_dont_reuse; @@ -280,7 +237,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) * Look up the conn bundle that matches the connection parameters, adding it if * it doesn't yet exist. */ -static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) +int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) { static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle, *candidate; @@ -295,7 +252,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) { call->bundle = rxrpc_alloc_bundle(call, gfp); - return call->bundle; + return call->bundle ? 0 : -ENOMEM; } /* First, see if the bundle is already there. */ @@ -324,7 +281,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t /* It wasn't. We need to add one. */ candidate = rxrpc_alloc_bundle(call, gfp); if (!candidate) - return ERR_PTR(-ENOMEM); + return -ENOMEM; _debug("search 2"); spin_lock(&local->client_bundles_lock); @@ -355,7 +312,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); spin_unlock(&local->client_bundles_lock); _leave(" = B=%u [new]", call->bundle->debug_id); - return call->bundle; + return 0; found_bundle_free: rxrpc_free_bundle(candidate); @@ -364,160 +321,77 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t rxrpc_activate_bundle(bundle); spin_unlock(&local->client_bundles_lock); _leave(" = B=%u [found]", call->bundle->debug_id); - return call->bundle; -} - -/* - * Create or find a client bundle to use for a call. - * - * If we return with a connection, the call will be on its waiting list. It's - * left to the caller to assign a channel and wake up the call. - */ -static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_call *call, gfp_t gfp) -{ - struct rxrpc_bundle *bundle; - - _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - - call->peer = rxrpc_lookup_peer(call->local, &call->dest_srx, gfp); - if (!call->peer) - goto error; - - call->tx_last_sent = ktime_get_real(); - call->cong_ssthresh = call->peer->cong_ssthresh; - if (call->cong_cwnd >= call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; - else - call->cong_mode = RXRPC_CALL_SLOW_START; - - /* Find the client connection bundle. */ - bundle = rxrpc_look_up_bundle(call, gfp); - if (!bundle) - goto error; - - /* Get this call queued. Someone else may activate it whilst we're - * lining up a new connection, but that's fine. - */ - spin_lock(&bundle->channel_lock); - list_add_tail(&call->chan_wait_link, &bundle->waiting_calls); - spin_unlock(&bundle->channel_lock); - - _leave(" = [B=%x]", bundle->debug_id); - return bundle; - -error: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + return 0; } /* * Allocate a new connection and add it into a bundle. */ -static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) - __releases(bundle->channel_lock) +static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, + unsigned int slot) { - struct rxrpc_connection *candidate = NULL, *old = NULL; - bool conflict; - int i; - - _enter(""); - - conflict = bundle->alloc_conn; - if (!conflict) - bundle->alloc_conn = true; - spin_unlock(&bundle->channel_lock); - if (conflict) { - _leave(" [conf]"); - return; - } - - candidate = rxrpc_alloc_client_connection(bundle, gfp); - - spin_lock(&bundle->channel_lock); - bundle->alloc_conn = false; + struct rxrpc_connection *conn, *old; + unsigned int shift = slot * RXRPC_MAXCALLS; + unsigned int i; - if (IS_ERR(candidate)) { - bundle->alloc_error = PTR_ERR(candidate); - spin_unlock(&bundle->channel_lock); - _leave(" [err %ld]", PTR_ERR(candidate)); - return; + old = bundle->conns[slot]; + if (old) { + bundle->conns[slot] = NULL; + trace_rxrpc_client(old, -1, rxrpc_client_replace); + rxrpc_put_connection(old, rxrpc_conn_put_noreuse); } - bundle->alloc_error = 0; - - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { - unsigned int shift = i * RXRPC_MAXCALLS; - int j; - - old = bundle->conns[i]; - if (!rxrpc_may_reuse_conn(old)) { - if (old) - trace_rxrpc_client(old, -1, rxrpc_client_replace); - candidate->bundle_shift = shift; - rxrpc_activate_bundle(bundle); - bundle->conns[i] = candidate; - for (j = 0; j < RXRPC_MAXCALLS; j++) - set_bit(shift + j, &bundle->avail_chans); - candidate = NULL; - break; - } - - old = NULL; + conn = rxrpc_alloc_client_connection(bundle); + if (IS_ERR(conn)) { + bundle->alloc_error = PTR_ERR(conn); + return false; } - spin_unlock(&bundle->channel_lock); - - if (candidate) { - _debug("discard C=%x", candidate->debug_id); - trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); - rxrpc_put_connection(candidate, rxrpc_conn_put_discard); - } - - rxrpc_put_connection(old, rxrpc_conn_put_noreuse); - _leave(""); + rxrpc_activate_bundle(bundle); + conn->bundle_shift = shift; + bundle->conns[slot] = conn; + for (i = 0; i < RXRPC_MAXCALLS; i++) + set_bit(shift + i, &bundle->avail_chans); + return true; } /* * Add a connection to a bundle if there are no usable connections or we have * connections waiting for extra capacity. */ -static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp) +static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle) { - struct rxrpc_call *call; - int i, usable; + int slot = -1, i, usable; _enter(""); - spin_lock(&bundle->channel_lock); + bundle->alloc_error = 0; /* See if there are any usable connections. */ usable = 0; - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { if (rxrpc_may_reuse_conn(bundle->conns[i])) usable++; - - if (!usable && !list_empty(&bundle->waiting_calls)) { - call = list_first_entry(&bundle->waiting_calls, - struct rxrpc_call, chan_wait_link); - if (test_bit(RXRPC_CALL_UPGRADE, &call->flags)) - bundle->try_upgrade = true; + else if (slot == -1) + slot = i; } + if (!usable && bundle->upgrade) + bundle->try_upgrade = true; + if (!usable) goto alloc_conn; if (!bundle->avail_chans && !bundle->try_upgrade && - !list_empty(&bundle->waiting_calls) && usable < ARRAY_SIZE(bundle->conns)) goto alloc_conn; - spin_unlock(&bundle->channel_lock); _leave(""); - return; + return usable; alloc_conn: - return rxrpc_add_conn_to_bundle(bundle, gfp); + return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false; } /* @@ -531,11 +405,13 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_channel *chan = &conn->channels[channel]; struct rxrpc_bundle *bundle = conn->bundle; struct rxrpc_call *call = list_entry(bundle->waiting_calls.next, - struct rxrpc_call, chan_wait_link); + struct rxrpc_call, wait_link); u32 call_id = chan->call_counter + 1; _enter("C=%x,%u", conn->debug_id, channel); + list_del_init(&call->wait_link); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); /* Cancel the final ACK on the previous call if it hasn't been sent yet @@ -545,65 +421,50 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); rxrpc_see_call(call, rxrpc_call_see_activate_client); - list_del_init(&call->chan_wait_link); call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call); call->cid = conn->proto.cid | channel; call->call_id = call_id; call->dest_srx.srx_service = conn->service_id; - - trace_rxrpc_connect_call(call); - - rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST); - - /* Paired with the read barrier in rxrpc_connect_call(). This orders - * cid and epoch in the connection wrt to call_id without the need to - * take the channel_lock. - * - * We provisionally assign a callNumber at this point, but we don't - * confirm it until the call is about to be exposed. - * - * TODO: Pair with a barrier in the data_ready handler when that looks - * at the call ID through a connection channel. - */ - smp_wmb(); + call->cong_ssthresh = call->peer->cong_ssthresh; + if (call->cong_cwnd >= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + else + call->cong_mode = RXRPC_CALL_SLOW_START; chan->call_id = call_id; chan->call_debug_id = call->debug_id; - rcu_assign_pointer(chan->call, call); + chan->call = call; + + rxrpc_see_call(call, rxrpc_call_see_connected); + trace_rxrpc_connect_call(call); + call->tx_last_sent = ktime_get_real(); + rxrpc_start_call_timer(call); + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST); wake_up(&call->waitq); } /* * Remove a connection from the idle list if it's on it. */ -static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) +static void rxrpc_unidle_conn(struct rxrpc_connection *conn) { - struct rxrpc_local *local = bundle->local; - bool drop_ref; - if (!list_empty(&conn->cache_link)) { - drop_ref = false; - spin_lock(&local->client_conn_cache_lock); - if (!list_empty(&conn->cache_link)) { - list_del_init(&conn->cache_link); - drop_ref = true; - } - spin_unlock(&local->client_conn_cache_lock); - if (drop_ref) - rxrpc_put_connection(conn, rxrpc_conn_put_unidle); + list_del_init(&conn->cache_link); + rxrpc_put_connection(conn, rxrpc_conn_put_unidle); } } /* - * Assign channels and callNumbers to waiting calls with channel_lock - * held by caller. + * Assign channels and callNumbers to waiting calls. */ -static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) +static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; unsigned long avail, mask; unsigned int channel, slot; + trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); + if (bundle->try_upgrade) mask = 1; else @@ -623,7 +484,7 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) if (bundle->try_upgrade) set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); - rxrpc_unidle_conn(bundle, conn); + rxrpc_unidle_conn(conn); channel &= (RXRPC_MAXCALLS - 1); conn->act_chans |= 1 << channel; @@ -632,125 +493,24 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) } /* - * Assign channels and callNumbers to waiting calls. - */ -static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) -{ - _enter("B=%x", bundle->debug_id); - - trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); - - if (!bundle->avail_chans) - return; - - spin_lock(&bundle->channel_lock); - rxrpc_activate_channels_locked(bundle); - spin_unlock(&bundle->channel_lock); - _leave(""); -} - -/* - * Wait for a callNumber and a channel to be granted to a call. - */ -static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, - struct rxrpc_call *call, gfp_t gfp) -{ - DECLARE_WAITQUEUE(myself, current); - int ret = 0; - - _enter("%d", call->debug_id); - - if (!gfpflags_allow_blocking(gfp)) { - rxrpc_maybe_add_conn(bundle, gfp); - rxrpc_activate_channels(bundle); - ret = bundle->alloc_error ?: -EAGAIN; - goto out; - } - - add_wait_queue_exclusive(&call->waitq, &myself); - for (;;) { - rxrpc_maybe_add_conn(bundle, gfp); - rxrpc_activate_channels(bundle); - ret = bundle->alloc_error; - if (ret < 0) - break; - - switch (call->interruptibility) { - case RXRPC_INTERRUPTIBLE: - case RXRPC_PREINTERRUPTIBLE: - set_current_state(TASK_INTERRUPTIBLE); - break; - case RXRPC_UNINTERRUPTIBLE: - default: - set_current_state(TASK_UNINTERRUPTIBLE); - break; - } - if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) - break; - if ((call->interruptibility == RXRPC_INTERRUPTIBLE || - call->interruptibility == RXRPC_PREINTERRUPTIBLE) && - signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - schedule(); - } - remove_wait_queue(&call->waitq, &myself); - __set_current_state(TASK_RUNNING); - -out: - _leave(" = %d", ret); - return ret; -} - -/* - * find a connection for a call - * - called in process context with IRQs enabled + * Connect waiting channels (called from the I/O thread). */ -int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) +void rxrpc_connect_client_calls(struct rxrpc_local *local) { - struct rxrpc_bundle *bundle; - int ret = 0; - - _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - - rxrpc_get_call(call, rxrpc_call_get_io_thread); - - bundle = rxrpc_prep_call(call, gfp); - if (IS_ERR(bundle)) { - rxrpc_put_call(call, rxrpc_call_get_io_thread); - ret = PTR_ERR(bundle); - goto out; - } - - if (rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_CONN) { - ret = rxrpc_wait_for_channel(bundle, call, gfp); - if (ret < 0) - goto wait_failed; - } - -granted_channel: - /* Paired with the write barrier in rxrpc_activate_one_channel(). */ - smp_rmb(); + struct rxrpc_call *call; -out: - _leave(" = %d", ret); - return ret; + while ((call = list_first_entry_or_null(&local->new_client_calls, + struct rxrpc_call, wait_link)) + ) { + struct rxrpc_bundle *bundle = call->bundle; -wait_failed: - spin_lock(&bundle->channel_lock); - list_del_init(&call->chan_wait_link); - spin_unlock(&bundle->channel_lock); + spin_lock(&local->client_call_lock); + list_move_tail(&call->wait_link, &bundle->waiting_calls); + spin_unlock(&local->client_call_lock); - if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) { - ret = 0; - goto granted_channel; + if (rxrpc_bundle_has_space(bundle)) + rxrpc_activate_channels(bundle); } - - trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); - rxrpc_disconnect_client_call(bundle, call); - goto out; } /* @@ -808,8 +568,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _enter("c=%x", call->debug_id); - spin_lock(&bundle->channel_lock); - /* Calls that have never actually been assigned a channel can simply be * discarded. */ @@ -818,8 +576,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); - list_del_init(&call->chan_wait_link); - goto out; + list_del_init(&call->wait_link); + return; } cid = call->cid; @@ -827,10 +585,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call chan = &conn->channels[channel]; trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - if (rcu_access_pointer(chan->call) != call) { - spin_unlock(&bundle->channel_lock); - BUG(); - } + if (WARN_ON(chan->call != call)) + return; may_reuse = rxrpc_may_reuse_conn(conn); @@ -851,16 +607,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call trace_rxrpc_client(conn, channel, rxrpc_client_to_active); bundle->try_upgrade = false; if (may_reuse) - rxrpc_activate_channels_locked(bundle); + rxrpc_activate_channels(bundle); } - } /* See if we can pass the channel directly to another call. */ if (may_reuse && !list_empty(&bundle->waiting_calls)) { trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); - goto out; + return; } /* Schedule the final ACK to be transmitted in a short while so that it @@ -878,7 +633,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call } /* Deactivate the channel. */ - rcu_assign_pointer(chan->call, NULL); + chan->call = NULL; set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans); conn->act_chans &= ~(1 << channel); @@ -891,15 +646,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call conn->idle_timestamp = jiffies; rxrpc_get_connection(conn, rxrpc_conn_get_idle); - spin_lock(&local->client_conn_cache_lock); list_move_tail(&conn->cache_link, &local->idle_client_conns); - spin_unlock(&local->client_conn_cache_lock); rxrpc_set_client_reap_timer(local); } - -out: - spin_unlock(&bundle->channel_lock); } /* @@ -909,7 +659,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; unsigned int bindex; - bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -917,18 +666,13 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) rxrpc_process_delayed_final_acks(conn, true); - spin_lock(&bundle->channel_lock); bindex = conn->bundle_shift / RXRPC_MAXCALLS; if (bundle->conns[bindex] == conn) { _debug("clear slot %u", bindex); bundle->conns[bindex] = NULL; for (i = 0; i < RXRPC_MAXCALLS; i++) clear_bit(conn->bundle_shift + i, &bundle->avail_chans); - need_drop = true; - } - spin_unlock(&bundle->channel_lock); - - if (need_drop) { + rxrpc_put_client_connection_id(bundle->local, conn); rxrpc_deactivate_bundle(bundle); rxrpc_put_connection(conn, rxrpc_conn_put_unbundle); } @@ -990,24 +734,16 @@ void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) _enter(""); - if (list_empty(&local->idle_client_conns)) { - _leave(" [empty]"); - return; - } - /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ nr_conns = atomic_read(&local->rxnet->nr_client_conns); next: - spin_lock(&local->client_conn_cache_lock); - - if (list_empty(&local->idle_client_conns)) - goto out; - - conn = list_entry(local->idle_client_conns.next, - struct rxrpc_connection, cache_link); + conn = list_first_entry_or_null(&local->idle_client_conns, + struct rxrpc_connection, cache_link); + if (!conn) + return; if (!local->kill_all_client_conns) { /* If the number of connections is over the reap limit, we @@ -1032,8 +768,6 @@ void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_del_init(&conn->cache_link); - spin_unlock(&local->client_conn_cache_lock); - rxrpc_unbundle_conn(conn); /* Drop the ->cache_link ref */ rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle); @@ -1053,8 +787,6 @@ void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) if (!local->kill_all_client_conns) timer_reduce(&local->client_conn_reap_timer, conn_expires_at); -out: - spin_unlock(&local->client_conn_cache_lock); _leave(""); } @@ -1063,34 +795,19 @@ void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) */ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) { - struct rxrpc_connection *conn, *tmp; - LIST_HEAD(graveyard); + struct rxrpc_connection *conn; _enter(""); - spin_lock(&local->client_conn_cache_lock); local->kill_all_client_conns = true; - spin_unlock(&local->client_conn_cache_lock); del_timer_sync(&local->client_conn_reap_timer); - spin_lock(&local->client_conn_cache_lock); - - list_for_each_entry_safe(conn, tmp, &local->idle_client_conns, - cache_link) { - if (conn->local == local) { - atomic_dec(&conn->active); - trace_rxrpc_client(conn, -1, rxrpc_client_discard); - list_move(&conn->cache_link, &graveyard); - } - } - - spin_unlock(&local->client_conn_cache_lock); - - while (!list_empty(&graveyard)) { - conn = list_entry(graveyard.next, - struct rxrpc_connection, cache_link); + while ((conn = list_first_entry_or_null(&local->idle_client_conns, + struct rxrpc_connection, cache_link))) { list_del_init(&conn->cache_link); + atomic_dec(&conn->active); + trace_rxrpc_client(conn, -1, rxrpc_client_discard); rxrpc_unbundle_conn(conn); rxrpc_put_connection(conn, rxrpc_conn_put_local_dead); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 8d0b9ff0a5e15..44414e724415e 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -100,9 +100,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, /* If the last call got moved on whilst we were waiting to run, just * ignore this packet. */ - call_id = READ_ONCE(chan->last_call); - /* Sync with __rxrpc_disconnect_call() */ - smp_rmb(); + call_id = chan->last_call; if (skb && call_id != sp->hdr.callNumber) return; @@ -119,9 +117,12 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[2].iov_base = &ack_info; iov[2].iov_len = sizeof(ack_info); + serial = atomic_inc_return(&conn->serial); + pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid | channel); pkt.whdr.callNumber = htonl(call_id); + pkt.whdr.serial = htonl(serial); pkt.whdr.seq = 0; pkt.whdr.type = chan->last_type; pkt.whdr.flags = conn->out_clientflag; @@ -158,31 +159,15 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[0].iov_len += sizeof(pkt.ack); len += sizeof(pkt.ack) + 3 + sizeof(ack_info); ioc = 3; - break; - - default: - return; - } - - /* Resync with __rxrpc_disconnect_call() and check that the last call - * didn't get advanced whilst we were filling out the packets. - */ - smp_rmb(); - if (READ_ONCE(chan->last_call) != call_id) - return; - - serial = atomic_inc_return(&conn->serial); - pkt.whdr.serial = htonl(serial); - switch (chan->last_type) { - case RXRPC_PACKET_TYPE_ABORT: - break; - case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), pkt.ack.reason, 0); break; + + default: + return; } ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len); @@ -207,12 +192,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) _enter("{%d},%x", conn->debug_id, conn->abort_code); - spin_lock(&conn->bundle->channel_lock); - for (i = 0; i < RXRPC_MAXCALLS; i++) { - call = rcu_dereference_protected( - conn->channels[i].call, - lockdep_is_held(&conn->bundle->channel_lock)); + call = conn->channels[i].call; if (call) rxrpc_set_call_completion(call, conn->completion, @@ -220,7 +201,6 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) conn->error); } - spin_unlock(&conn->bundle->channel_lock); _leave(""); } @@ -316,9 +296,7 @@ void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn, bool force) if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) continue; - smp_rmb(); /* vs rxrpc_disconnect_client_call */ - ack_at = READ_ONCE(chan->final_ack_at); - + ack_at = chan->final_ack_at; if (time_before(j, ack_at) && !force) { if (time_before(ack_at, next_j)) { next_j = ack_at; @@ -446,15 +424,8 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (conn->state != RXRPC_CONN_SERVICE) break; - spin_lock(&conn->bundle->channel_lock); - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure( - rcu_dereference_protected( - conn->channels[loop].call, - lockdep_is_held(&conn->bundle->channel_lock))); - - spin_unlock(&conn->bundle->channel_lock); + rxrpc_call_is_secure(conn->channels[loop].call); break; } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3d8c1dc6a82ae..ac85d4644a3c3 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + mutex_init(&conn->security_lock); skb_queue_head_init(&conn->rx_queue); conn->rxnet = rxnet; conn->security = &rxrpc_no_security; @@ -157,7 +158,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, _enter("%d,%x", conn->debug_id, call->cid); - if (rcu_access_pointer(chan->call) == call) { + if (chan->call == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ @@ -177,12 +178,9 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, break; } - /* Sync with rxrpc_conn_retransmit(). */ - smp_wmb(); chan->last_call = chan->call_id; chan->call_id = chan->call_counter; - - rcu_assign_pointer(chan->call, NULL); + chan->call = NULL; } _leave(""); @@ -210,10 +208,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) if (rxrpc_is_client_call(call)) { rxrpc_disconnect_client_call(call->bundle, call); } else { - spin_lock(&conn->bundle->channel_lock); __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->bundle->channel_lock); - conn->idle_timestamp = jiffies; if (atomic_dec_and_test(&conn->active)) rxrpc_set_service_reap_timer(conn->rxnet, @@ -316,10 +311,10 @@ static void rxrpc_clean_up_connection(struct work_struct *work) container_of(work, struct rxrpc_connection, destructor); struct rxrpc_net *rxnet = conn->rxnet; - ASSERT(!rcu_access_pointer(conn->channels[0].call) && - !rcu_access_pointer(conn->channels[1].call) && - !rcu_access_pointer(conn->channels[2].call) && - !rcu_access_pointer(conn->channels[3].call)); + ASSERT(!conn->channels[0].call && + !conn->channels[1].call && + !conn->channels[2].call && + !conn->channels[3].call); ASSERT(list_empty(&conn->cache_link)); del_timer_sync(&conn->timer); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 2a55a88b2a5b7..f30323de82bd1 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -11,7 +11,6 @@ static struct rxrpc_bundle rxrpc_service_dummy_bundle = { .ref = REFCOUNT_INIT(1), .debug_id = UINT_MAX, - .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), }; /* diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index a299cc34c1403..9e9dfb2fc559b 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -369,10 +369,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, return just_discard; } - rcu_read_lock(); - call = rxrpc_try_get_call(rcu_dereference(chan->call), - rxrpc_call_get_input); - rcu_read_unlock(); + call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input); if (sp->hdr.callNumber > chan->call_id) { if (rxrpc_to_client(sp)) { @@ -453,6 +450,9 @@ int rxrpc_io_thread(void *data) continue; } + if (!list_empty(&local->new_client_calls)) + rxrpc_connect_client_calls(local); + /* Process received packets and errors. */ if ((skb = __skb_dequeue(&rx_queue))) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -492,7 +492,10 @@ int rxrpc_io_thread(void *data) should_stop = kthread_should_stop(); if (!skb_queue_empty(&local->rx_queue) || !list_empty(&local->call_attend_q) || - !list_empty(&local->conn_attend_q)) { + !list_empty(&local->conn_attend_q) || + !list_empty(&local->new_client_calls) || + test_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) { __set_current_state(TASK_RUNNING); continue; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 9bc8d08ca12cf..b8eaca5d9f221 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -117,7 +117,6 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); local->kill_all_client_conns = false; - spin_lock_init(&local->client_conn_cache_lock); INIT_LIST_HEAD(&local->idle_client_conns); timer_setup(&local->client_conn_reap_timer, rxrpc_client_conn_reap_timeout, 0); @@ -133,7 +132,8 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, if (tmp == 0) tmp = 1; idr_set_cursor(&local->conn_ids, tmp); - spin_lock_init(&local->conn_lock); + INIT_LIST_HEAD(&local->new_client_calls); + spin_lock_init(&local->client_call_lock); trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1); } @@ -435,7 +435,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local) * local endpoint. */ rxrpc_purge_queue(&local->rx_queue); - rxrpc_destroy_client_conn_ids(local); + rxrpc_purge_client_connections(local); } /* diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index c39ef94602ed7..750158a085cdd 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -12,6 +12,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", + [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ", [RXRPC_CONN_CLIENT] = "Client ", [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index dfb01e7b90fbb..1bf571a66e020 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1122,36 +1122,31 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, goto protocol_error_free; } - spin_lock(&conn->bundle->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { - struct rxrpc_call *call; u32 call_id = ntohl(response->encrypted.call_id[i]); + u32 counter = READ_ONCE(conn->channels[i].call_counter); if (call_id > INT_MAX) { rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, rxkad_abort_resp_bad_callid); - goto protocol_error_unlock; + goto protocol_error_free; } - if (call_id < conn->channels[i].call_counter) { + if (call_id < counter) { rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, rxkad_abort_resp_call_ctr); - goto protocol_error_unlock; + goto protocol_error_free; } - if (call_id > conn->channels[i].call_counter) { - call = rcu_dereference_protected( - conn->channels[i].call, - lockdep_is_held(&conn->bundle->channel_lock)); - if (call && !__rxrpc_call_is_complete(call)) { + if (call_id > counter) { + if (conn->channels[i].call) { rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, rxkad_abort_resp_call_state); - goto protocol_error_unlock; + goto protocol_error_free; } conn->channels[i].call_counter = call_id; } } - spin_unlock(&conn->bundle->channel_lock); if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) { rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, @@ -1179,8 +1174,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _leave(" = 0"); return 0; -protocol_error_unlock: - spin_unlock(&conn->bundle->channel_lock); protocol_error_free: kfree(ticket); protocol_error: diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 78af14694618d..cd66634dffe69 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -97,38 +97,31 @@ int rxrpc_init_client_call_security(struct rxrpc_call *call) */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { - const struct rxrpc_security *sec; struct rxrpc_key_token *token; struct key *key = conn->key; - int ret; + int ret = 0; _enter("{%d},{%x}", conn->debug_id, key_serial(key)); - if (!key) - return 0; - - ret = key_validate(key); - if (ret < 0) - return ret; - for (token = key->payload.data[0]; token; token = token->next) { - sec = rxrpc_security_lookup(token->security_index); - if (sec) + if (token->security_index == conn->security->security_index) goto found; } return -EKEYREJECTED; found: - conn->security = sec; - - ret = conn->security->init_connection_security(conn, token); - if (ret < 0) { - conn->security = &rxrpc_no_security; - return ret; + mutex_lock(&conn->security_lock); + if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) { + ret = conn->security->init_connection_security(conn, token); + if (ret == 0) { + spin_lock(&conn->state_lock); + if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) + conn->state = RXRPC_CONN_CLIENT; + spin_unlock(&conn->state_lock); + } } - - _leave(" = 0"); - return 0; + mutex_unlock(&conn->security_lock); + return ret; } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index a5d0005b7ce52..da49fcf1c4567 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -38,6 +38,60 @@ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, return false; } +/* + * Wait for a call to become connected. Interruption here doesn't cause the + * call to be aborted. + */ +static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) +{ + DECLARE_WAITQUEUE(myself, current); + int ret = 0; + + _enter("%d", call->debug_id); + + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) + return call->error; + + add_wait_queue_exclusive(&call->waitq, &myself); + + for (;;) { + ret = call->error; + if (ret < 0) + break; + + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + case RXRPC_PREINTERRUPTIBLE: + set_current_state(TASK_INTERRUPTIBLE); + break; + case RXRPC_UNINTERRUPTIBLE: + default: + set_current_state(TASK_UNINTERRUPTIBLE); + break; + } + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) { + ret = call->error; + break; + } + if ((call->interruptibility == RXRPC_INTERRUPTIBLE || + call->interruptibility == RXRPC_PREINTERRUPTIBLE) && + signal_pending(current)) { + ret = sock_intr_errno(*timeo); + break; + } + *timeo = schedule_timeout(*timeo); + } + + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); + + if (ret == 0 && rxrpc_call_is_complete(call)) + ret = call->error; + + _leave(" = %d", ret); + return ret; +} + /* * Return true if there's sufficient Tx queue space. */ @@ -239,6 +293,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + ret = rxrpc_wait_to_be_connected(call, &timeo); + if (ret < 0) + return ret; + + if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) { + ret = rxrpc_init_client_conn_security(call->conn); + if (ret < 0) + return ret; + } + /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); From c244c092f1ed2acfb5af3d3da81e22367d3dd733 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Thu, 5 Jan 2023 06:02:51 +0000 Subject: [PATCH 0436/1593] tipc: fix unexpected link reset due to discovery messages This unexpected behavior is observed: node 1 | node 2 ------ | ------ link is established | link is established reboot | link is reset up | send discovery message receive discovery message | link is established | link is established send discovery message | | receive discovery message | link is reset (unexpected) | send reset message link is reset | It is due to delayed re-discovery as described in function tipc_node_check_dest(): "this link endpoint has already reset and re-established contact with the peer, before receiving a discovery message from that node." However, commit 598411d70f85 has changed the condition for calling tipc_node_link_down() which was the acceptance of new media address. This commit fixes this by restoring the old and correct behavior. Fixes: 598411d70f85 ("tipc: make resetting of links non-atomic") Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/node.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 49ddc484c4fe7..5e000fde80676 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool addr_match = false; bool sign_match = false; bool link_up = false; + bool link_is_reset = false; bool accept_addr = false; - bool reset = true; + bool reset = false; char *if_name; unsigned long intv; u16 session; @@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr, /* Prepare to validate requesting node's signature and media address */ l = le->link; link_up = l && tipc_link_is_up(l); + link_is_reset = l && tipc_link_is_reset(l); addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ if (sign_match && addr_match && link_up) { - /* All is fine. Do nothing. */ - reset = false; + /* All is fine. Ignore requests. */ /* Peer node is not a container/local namespace */ if (!n->peer_hash_mix) n->peer_hash_mix = hash_mixes; @@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, */ accept_addr = true; *respond = true; + reset = true; } else if (!sign_match && addr_match && link_up) { /* Peer node rebooted. Two possibilities: * - Delayed re-discovery; this link endpoint has already @@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, n->signature = signature; accept_addr = true; *respond = true; + reset = true; } if (!accept_addr) @@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + link_is_reset = tipc_link_is_reset(l); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_write_unlock(n); - if (reset && l && !tipc_link_is_reset(l)) + if (reset && !link_is_reset) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } From 83c7423d1eb6806d13c521d1002cc1a012111719 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2022 17:45:51 +0100 Subject: [PATCH 0437/1593] udf: Fix extension of the last extent in the file When extending the last extent in the file within the last block, we wrongly computed the length of the last extent. This is mostly a cosmetical problem since the extent does not contain any data and the length will be fixed up by following operations but still. Fixes: 1f3868f06855 ("udf: Fix extending file within last block") Signed-off-by: Jan Kara --- fs/udf/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1d7c2a812fc19..f3e988928d1d8 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -595,7 +595,7 @@ static void udf_do_extend_final_block(struct inode *inode, */ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) return; - added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen; + added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); last_ext->extLength += added_bytes; UDF_I(inode)->i_lenExtents += added_bytes; From 23970a1c9475b305770fd37bebfec7a10f263787 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 30 Dec 2022 12:53:41 -0500 Subject: [PATCH 0438/1593] udf: initialize newblock to 0 The clang build reports this error fs/udf/inode.c:805:6: error: variable 'newblock' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] if (*err < 0) ^~~~~~~~ newblock is never set before error handling jump. Initialize newblock to 0 and remove redundant settings. Fixes: d8b39db5fab8 ("udf: Handle error when adding extent to a file") Reported-by: Nathan Chancellor Signed-off-by: Tom Rix Signed-off-by: Jan Kara Message-Id: <20221230175341.1629734-1-trix@redhat.com> --- fs/udf/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index f3e988928d1d8..34e416327dd4e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -684,7 +684,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, struct kernel_lb_addr eloc, tmpeloc; int c = 1; loff_t lbcount = 0, b_off = 0; - udf_pblk_t newblocknum, newblock; + udf_pblk_t newblocknum, newblock = 0; sector_t offset = 0; int8_t etype; struct udf_inode_info *iinfo = UDF_I(inode); @@ -787,7 +787,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len); if (ret < 0) { *err = ret; - newblock = 0; goto out_free; } c = 0; @@ -852,7 +851,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, goal, err); if (!newblocknum) { *err = -ENOSPC; - newblock = 0; goto out_free; } if (isBeyondEOF) From 2a33ad4a0ba5a527b92aeef9a313aefec197fe28 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 2 Jan 2023 22:00:42 +0530 Subject: [PATCH 0439/1593] HID: amd_sfh: Fix warning unwind goto Return directly instead of using existing goto will not cleanup previously allocated resources. Hence replace return with goto to fix warning unwind goto which cleanups previously allocated resources. Fixes: 93ce5e0231d7 ("HID: amd_sfh: Implement SFH1.1 functionality") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 2 +- drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index ab125f79408f2..1fb0f7105fb21 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -282,7 +282,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) } rc = mp2_ops->get_rep_desc(cl_idx, cl_data->report_descr[i]); if (rc) - return rc; + goto cleanup; mp2_ops->start(privdata, info); status = amd_sfh_wait_for_response (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c index 4da2f9f62aba3..a1d6e08fab7d4 100644 --- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c +++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c @@ -160,7 +160,7 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) } rc = mp2_ops->get_rep_desc(cl_idx, cl_data->report_descr[i]); if (rc) - return rc; + goto cleanup; writel(0, privdata->mmio + AMD_P2C_MSG(0)); mp2_ops->start(privdata, info); From e498a04443240c15c3c857165f7b652b87f4fd96 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Jan 2023 13:17:46 +0100 Subject: [PATCH 0440/1593] usb: dwc3: xilinx: include linux/gpio/consumer.h The newly added gpio consumer calls cause a build failure in configurations that fail to include the right header implicitly: drivers/usb/dwc3/dwc3-xilinx.c: In function 'dwc3_xlnx_init_zynqmp': drivers/usb/dwc3/dwc3-xilinx.c:207:22: error: implicit declaration of function 'devm_gpiod_get_optional'; did you mean 'devm_clk_get_optional'? [-Werror=implicit-function-declaration] 207 | reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); | ^~~~~~~~~~~~~~~~~~~~~~~ | devm_clk_get_optional Fixes: ca05b38252d7 ("usb: dwc3: xilinx: Add gpio-reset support") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230103121755.956027-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-xilinx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 8607d4c23283c..0745e9f11b2ef 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include From c4e3ef5685393c5051b52cf1e94b8891d49793ab Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 8 Dec 2022 16:50:35 -0800 Subject: [PATCH 0441/1593] usb: dwc3: gadget: Ignore End Transfer delay on teardown If we delay sending End Transfer for Setup TRB to be prepared, we need to check if the End Transfer was in preparation for a driver teardown/soft-disconnect. In those cases, just send the End Transfer command without delay. In the case of soft-disconnect, there's a very small chance the command may not go through immediately. But should it happen, the Setup TRB will be prepared during the polling of the controller halted state, allowing the command to go through then. In the case of disabling endpoint due to reconfiguration (e.g. set_interface(alt-setting) or usb reset), then it's driven by the host. Typically the host wouldn't immediately cancel the control request and send another control transfer to trigger the End Transfer command timeout. Fixes: 4db0fbb60136 ("usb: dwc3: gadget: Don't delay End Transfer on delayed_status") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/f1617a323e190b9cc408fb8b65456e32b5814113.1670546756.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 789976567f9f0..89dcfac01235f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1727,6 +1727,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int else if (!ret) dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + dep->flags &= ~DWC3_EP_DELAY_STOP; return ret; } @@ -3732,8 +3733,10 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, if (dep->number <= 1 && dwc->ep0state != EP0_DATA_PHASE) return; + if (interrupt && (dep->flags & DWC3_EP_DELAY_STOP)) + return; + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || - (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; From 0d4d52361b6c29bf771acd4fa461f06d78fb2fac Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 3 Jan 2023 15:11:07 +0100 Subject: [PATCH 0442/1593] s390/debug: add _ASM_S390_ prefix to header guard Using DEBUG_H without a prefix is very generic and inconsistent with other header guards in arch/s390/include/asm. In fact it collides with the same name in the ath9k wireless driver though that depends on !S390 via disabled wireless support. Let's just use a consistent header guard name and prevent possible future trouble. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/debug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 77f24262c25c1..ac665b9670c5d 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -4,8 +4,8 @@ * * Copyright IBM Corp. 1999, 2020 */ -#ifndef DEBUG_H -#define DEBUG_H +#ifndef _ASM_S390_DEBUG_H +#define _ASM_S390_DEBUG_H #include #include @@ -487,4 +487,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas); #endif /* MODULE */ -#endif /* DEBUG_H */ +#endif /* _ASM_S390_DEBUG_H */ From 45d619bdaf799196d702a9ae464b07066d6db2f9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 7 Dec 2022 17:15:19 +0100 Subject: [PATCH 0443/1593] s390: expicitly align _edata and _end symbols on page boundary Symbols _edata and _end in the linker script are the only unaligned expicitly on page boundary. Although _end is aligned implicitly by BSS_SECTION macro that is still inconsistent and could lead to a bug if a tool or function would assume that _edata is as aligned as others. For example, vmem_map_init() function does not align symbols _etext, _einittext etc. Should these symbols be unaligned as well, the size of ranges to update were short on one page. Instead of fixing every occurrence of this kind in the code and external tools just force the alignment on these two symbols. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 5ea3830af0ccf..f81d96710595a 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -79,6 +79,7 @@ SECTIONS _end_amode31_refs = .; } + . = ALIGN(PAGE_SIZE); _edata = .; /* End of data section */ /* will be freed after init */ @@ -193,6 +194,7 @@ SECTIONS BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE) + . = ALIGN(PAGE_SIZE); _end = . ; /* From a494398bde273143c2352dd373cad8211f7d94b2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 5 Jan 2023 12:13:06 +0900 Subject: [PATCH 0444/1593] s390: define RUNTIME_DISCARD_EXIT to fix link error with GNU ld < 2.36 Nathan Chancellor reports that the s390 vmlinux fails to link with GNU ld < 2.36 since commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv"). It happens for defconfig, or more specifically for CONFIG_EXPOLINE=y. $ s390x-linux-gnu-ld --version | head -n1 GNU ld (GNU Binutils for Debian) 2.35.2 $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- allnoconfig $ ./scripts/config -e CONFIG_EXPOLINE $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- olddefconfig $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- `.exit.text' referenced in section `.s390_return_reg' of drivers/base/dd.o: defined in discarded section `.exit.text' of drivers/base/dd.o make[1]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 1 make: *** [Makefile:1252: vmlinux] Error 2 arch/s390/kernel/vmlinux.lds.S wants to keep EXIT_TEXT: .exit.text : { EXIT_TEXT } But, at the same time, EXIT_TEXT is thrown away by DISCARD because s390 does not define RUNTIME_DISCARD_EXIT. I still do not understand why the latter wins after 99cb0d917ffa, but defining RUNTIME_DISCARD_EXIT seems correct because the comment line in arch/s390/kernel/vmlinux.lds.S says: /* * .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table */ Nathan also found that binutils commit 21401fc7bf67 ("Duplicate output sections in scripts") cured this issue, so we cannot reproduce it with binutils 2.36+, but it is better to not rely on it. Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Link: https://lore.kernel.org/all/Y7Jal56f6UBh1abE@dev-arch.thelio-3990X/ Reported-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20230105031306.1455409-1-masahiroy@kernel.org Signed-off-by: Heiko Carstens --- arch/s390/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index f81d96710595a..cbf9c1b0beda4 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -17,6 +17,8 @@ /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA +#define RUNTIME_DISCARD_EXIT + #define EMITS_PT_NOTE #include From f3dc61cde80d48751999c4cb46daf3b2185e6895 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Nov 2022 10:18:26 +0000 Subject: [PATCH 0445/1593] firmware/psci: Fix MEM_PROTECT_RANGE function numbers PSCI v1.1 offers 32-bit and 64-bit variants of the MEM_PROTECT_RANGE call using function identifier 20. Fix the incorrect definitions of the MEM_PROTECT_CHECK_RANGE calls in the PSCI UAPI header. Cc: Dmitry Baryshkov Cc: Lorenzo Pieralisi Cc: Arnd Bergmann Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging") Acked-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20221125101826.22404-1-will@kernel.org Signed-off-by: Will Deacon --- include/uapi/linux/psci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 3511095c2702b..42a40ad3fb622 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -58,7 +58,7 @@ #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) +#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20) #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) #define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) @@ -67,7 +67,7 @@ #define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) +#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff From cef139299fd86098c6e3dbd389d1d0b2462d7710 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 5 Jan 2023 09:08:34 +0000 Subject: [PATCH 0446/1593] firmware/psci: Don't register with debugfs if PSCI isn't available Contrary to popular belief, PSCI is not a universal property of an ARM/arm64 system. There is a garden variety of systems out there that don't (or even cannot) implement it. I'm the first one deplore such a situation, but hey... On such systems, a "cat /sys/kernel/debug/psci" results in fireworks, as no invocation callback is registered. Check for the invoke_psci_fn and psci_ops.get_version pointers before registering with the debugfs subsystem, avoiding the issue altogether. Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging") Reported-by: Hector Martin Signed-off-by: Marc Zyngier Cc: Dmitry Baryshkov Cc: Mark Brown Cc: Ulf Hansson Cc: Arnd Bergmann Cc: Mark Rutland Cc: Lorenzo Pieralisi Reviewed-by: Hector Martin Acked-by: Dmitry Baryshkov Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230105090834.630238-1-maz@kernel.org Signed-off-by: Will Deacon --- drivers/firmware/psci/psci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index e7bcfca4159f6..447ee4ea5c903 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -440,6 +440,9 @@ static const struct file_operations psci_debugfs_ops = { static int __init psci_debugfs_init(void) { + if (!invoke_psci_fn || !psci_ops.get_version) + return 0; + return PTR_ERR_OR_ZERO(debugfs_create_file("psci", 0444, NULL, NULL, &psci_debugfs_ops)); } From 5db568e748f6fb7d0d2e1bff4c2698ad4f50b982 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 2 Jan 2023 11:46:51 +0530 Subject: [PATCH 0447/1593] arm64: errata: Workaround possible Cortex-A715 [ESR|FAR]_ELx corruption If a Cortex-A715 cpu sees a page mapping permissions change from executable to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers, on the next instruction abort caused by permission fault. Only user-space does executable to non-executable permission transition via mprotect() system call which calls ptep_modify_prot_start() and ptep_modify _prot_commit() helpers, while changing the page mapping. The platform code can override these helpers via __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION. Work around the problem via doing a break-before-make TLB invalidation, for all executable user space mappings, that go through mprotect() system call. This overrides ptep_modify_prot_start() and ptep_modify_prot_commit(), via defining HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION on the platform thus giving an opportunity to intercept user space exec mappings, and do the necessary TLB invalidation. Similar interceptions are also implemented for HugeTLB. Cc: Catalin Marinas Cc: Will Deacon Cc: Jonathan Corbet Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230102061651.34745-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 16 ++++++++++++++++ arch/arm64/include/asm/hugetlb.h | 9 +++++++++ arch/arm64/include/asm/pgtable.h | 9 +++++++++ arch/arm64/kernel/cpu_errata.c | 7 +++++++ arch/arm64/mm/hugetlbpage.c | 21 +++++++++++++++++++++ arch/arm64/mm/mmu.c | 21 +++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 8 files changed, 86 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 808ade4cc008a..ec5f889d76819 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -120,6 +120,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 03934808b2ed0..cf6d1cd8b6dc5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -972,6 +972,22 @@ config ARM64_ERRATUM_2457168 If unsure, say Y. +config ARM64_ERRATUM_2645198 + bool "Cortex-A715: 2645198: Workaround possible [ESR|FAR]_ELx corruption" + default y + help + This option adds the workaround for ARM Cortex-A715 erratum 2645198. + + If a Cortex-A715 cpu sees a page mapping permissions change from executable + to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers on the + next instruction abort caused by permission fault. + + Only user-space does executable to non-executable permission transition via + mprotect() system call. Workaround the problem by doing a break-before-make + TLB invalidation, for all changes to executable user space mappings. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index d20f5da2d76fa..6a4a1ab8eb238 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -49,6 +49,15 @@ extern pte_t huge_ptep_get(pte_t *ptep); void __init arm64_hugetlb_cma_reserve(void); +#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start +extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit +extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); + #include #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 780973a6cbb61..02ed5c0adf97a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1093,6 +1093,15 @@ static inline bool pud_sect_supported(void) } +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +#define ptep_modify_prot_start ptep_modify_prot_start +extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define ptep_modify_prot_commit ptep_modify_prot_commit +extern void ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 89ac00084f38a..307faa2b4395e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -661,6 +661,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2645198 + { + .desc = "ARM erratum 2645198", + .capability = ARM64_WORKAROUND_2645198, + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A715) + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_2077057 { .desc = "ARM erratum 2077057", diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 35e9a468d13e6..95364e8bdc194 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -559,3 +559,24 @@ bool __init arch_hugetlb_valid_size(unsigned long size) { return __hugetlb_valid_size(size); } + +pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) && + cpus_have_const_cap(ARM64_WORKAROUND_2645198)) { + /* + * Break-before-make (BBM) is required for all user space mappings + * when the permission changes from executable to non-executable + * in cases where cpu is affected with errata #2645198. + */ + if (pte_user_exec(READ_ONCE(*ptep))) + return huge_ptep_clear_flush(vma, addr, ptep); + } + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); +} + +void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 14c87e8d69d83..d77c9f56b7b43 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1630,3 +1630,24 @@ static int __init prevent_bootmem_remove_init(void) } early_initcall(prevent_bootmem_remove_init); #endif + +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) && + cpus_have_const_cap(ARM64_WORKAROUND_2645198)) { + /* + * Break-before-make (BBM) is required for all user space mappings + * when the permission changes from executable to non-executable + * in cases where cpu is affected with errata #2645198. + */ + if (pte_user_exec(READ_ONCE(*ptep))) + return ptep_clear_flush(vma, addr, ptep); + } + return ptep_get_and_clear(vma->vm_mm, addr, ptep); +} + +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + set_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index a86ee376920a0..dfeb2c51e2573 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -71,6 +71,7 @@ WORKAROUND_2038923 WORKAROUND_2064142 WORKAROUND_2077057 WORKAROUND_2457168 +WORKAROUND_2645198 WORKAROUND_2658417 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE From d3178e8a434b58678d99257c0387810a24042fb6 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Wed, 4 Jan 2023 09:47:09 +0800 Subject: [PATCH 0448/1593] bpf: Skip invalid kfunc call in backtrack_insn The verifier skips invalid kfunc call in check_kfunc_call(), which would be captured in fixup_kfunc_call() if such insn is not eliminated by dead code elimination. However, this can lead to the following warning in backtrack_insn(), also see [1]: ------------[ cut here ]------------ verifier backtracking bug WARNING: CPU: 6 PID: 8646 at kernel/bpf/verifier.c:2756 backtrack_insn kernel/bpf/verifier.c:2756 __mark_chain_precision kernel/bpf/verifier.c:3065 mark_chain_precision kernel/bpf/verifier.c:3165 adjust_reg_min_max_vals kernel/bpf/verifier.c:10715 check_alu_op kernel/bpf/verifier.c:10928 do_check kernel/bpf/verifier.c:13821 [inline] do_check_common kernel/bpf/verifier.c:16289 [...] So make backtracking conservative with this by returning ENOTSUPP. [1] https://lore.kernel.org/bpf/CACkBjsaXNceR8ZjkLG=dT3P=4A8SBsg0Z5h5PWLryF5=ghKq=g@mail.gmail.com/ Reported-by: syzbot+4da3ff23081bafe74fc2@syzkaller.appspotmail.com Signed-off-by: Hao Sun Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20230104014709.9375-1-sunhao.th@gmail.com --- kernel/bpf/verifier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 85f96c1e9f62f..c4c0985daac02 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2748,6 +2748,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg == 0 && is_callback_calling_function(insn->imm)) return -ENOTSUPP; + /* kfunc with imm==0 is invalid and fixup_kfunc_call will + * catch this error later. Make backtracking conservative + * with ENOTSUPP. + */ + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) + return -ENOTSUPP; /* regular helper call sets R0 */ *reg_mask &= ~1; if (*reg_mask & 0x3f) { From 26658868354963afbff672ad6f7a85c44c311975 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 29 Dec 2022 16:16:47 +0100 Subject: [PATCH 0449/1593] dt-bindings: soc: qcom: apr: Make qcom,protection-domain optional again The protection domain functionality exists only in SoCs starting from MSM8998 [1], while the APR bindings are also used on older platforms. Commit 41288c305836 ("ASoC: dt-bindings: qcom,apr: Split services to shared schema") made the "qcom,protection-domain" required but it should remain optional to avoid dtbs_check warnings on older platforms, e.g.: arch/arm64/boot/dts/qcom/apq8096-db820c.dtb: apr: service@3: 'qcom,protection-domain' is a required property From schema: Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml [1]: https://lore.kernel.org/all/20200312120842.21991-1-sibis@codeaurora.org/ Fixes: 41288c305836 ("ASoC: dt-bindings: qcom,apr: Split services to shared schema") Signed-off-by: Stephan Gerhold Reviewed-by: Krzysztof Kozlowski Reviewed-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221229151648.19839-2-stephan@gerhold.net --- .../devicetree/bindings/soc/qcom/qcom,apr-services.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml index 290555426c39c..bdf482db32aac 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,apr-services.yaml @@ -39,8 +39,8 @@ properties: qcom,protection-domain: $ref: /schemas/types.yaml#/definitions/string-array description: | - Protection domain service name and path for APR service - possible values are:: + Protection domain service name and path for APR service (if supported). + Possible values are:: "avs/audio", "msm/adsp/audio_pd". "kernel/elf_loader", "msm/modem/wlan_pd". "tms/servreg", "msm/adsp/audio_pd". @@ -49,6 +49,5 @@ properties: required: - reg - - qcom,protection-domain additionalProperties: true From 599d41fb8ea8bd2a99ca9525dd69405020e43dda Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 29 Dec 2022 16:16:48 +0100 Subject: [PATCH 0450/1593] soc: qcom: apr: Make qcom,protection-domain optional again APR should not fail if the service device tree node does not have the qcom,protection-domain property, since this functionality does not exist on older platforms such as MSM8916 and MSM8996. Ignore -EINVAL (returned when the property does not exist) to fix a regression on 6.2-rc1 that prevents audio from working: qcom,apr remoteproc0:smd-edge.apr_audio_svc.-1.-1: Failed to read second value of qcom,protection-domain qcom,apr remoteproc0:smd-edge.apr_audio_svc.-1.-1: Failed to add apr 3 svc Fixes: 6d7860f5750d ("soc: qcom: apr: Add check for idr_alloc and of_property_read_string_index") Signed-off-by: Stephan Gerhold Reviewed-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221229151648.19839-3-stephan@gerhold.net --- drivers/soc/qcom/apr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c index cd44f17dad3d0..d51abb462ae5d 100644 --- a/drivers/soc/qcom/apr.c +++ b/drivers/soc/qcom/apr.c @@ -461,9 +461,10 @@ static int apr_add_device(struct device *dev, struct device_node *np, goto out; } + /* Protection domain is optional, it does not exist on older platforms */ ret = of_property_read_string_index(np, "qcom,protection-domain", 1, &adev->service_path); - if (ret < 0) { + if (ret < 0 && ret != -EINVAL) { dev_err(dev, "Failed to read second value of qcom,protection-domain\n"); goto out; } From 0b3a551fa58b4da941efeb209b3770868e2eddd7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 14:55:56 -0500 Subject: [PATCH 0451/1593] nfsd: fix handling of cached open files in nfsd4_open codepath Commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") added the ability to cache an open fd over a compound. There are a couple of problems with the way this currently works: It's racy, as a newly-created nfsd_file can end up with its PENDING bit cleared while the nf is hashed, and the nf_file pointer is still zeroed out. Other tasks can find it in this state and they expect to see a valid nf_file, and can oops if nf_file is NULL. Also, there is no guarantee that we'll end up creating a new nfsd_file if one is already in the hash. If an extant entry is in the hash with a valid nf_file, nfs4_get_vfs_file will clobber its nf_file pointer with the value of op_file and the old nf_file will leak. Fix both issues by making a new nfsd_file_acquirei_opened variant that takes an optional file pointer. If one is present when this is called, we'll take a new reference to it instead of trying to open the file. If the nfsd_file already has a valid nf_file, we'll just ignore the optional file and pass the nfsd_file back as-is. Also rework the tracepoints a bit to allow for an "opened" variant and don't try to avoid counting acquisitions in the case where we already have a cached open file. Fixes: fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") Cc: Trond Myklebust Reported-by: Stanislav Saner Reported-and-Tested-by: Ruben Vestergaard Reported-and-Tested-by: Torkil Svensgaard Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 40 ++++++++++++++++++---------------- fs/nfsd/filecache.h | 5 +++-- fs/nfsd/nfs4state.c | 16 ++++---------- fs/nfsd/trace.h | 52 ++++++++++++--------------------------------- 4 files changed, 42 insertions(+), 71 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 45b2c9e3f6360..0ef0703490144 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1071,8 +1071,8 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, - bool open, bool want_gc) + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, @@ -1147,8 +1147,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { - if (open) - this_cpu_inc(nfsd_file_acquisitions); + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { if (refcount_dec_and_test(&nf->nf_ref)) @@ -1158,20 +1157,23 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, out_status: put_cred(key.cred); - if (open) - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { - if (open) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { status = nfsd_open_verified(rqstp, fhp, may_flags, &nf->nf_file); trace_nfsd_file_open(nf, status); - } else - status = nfs_ok; + } } else status = nfserr_jukebox; /* @@ -1207,7 +1209,7 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); } /** @@ -1228,28 +1230,30 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); } /** - * nfsd_file_create - Get a struct nfsd_file, do not open + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file * @rqstp: the RPC transaction being executed * @fhp: the NFS filehandle of the file just created * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) * @pnf: OUT: new or found "struct nfsd_file" object * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is released immediately - * one RCU grace period after the final nfsd_file_put(). + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ __be32 -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b7efb2c3ddb18..41516a4263ea5 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e1e85c21f12bc..313f666d53578 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5262,18 +5262,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - if (!open->op_filp) { - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - } else { - status = nfsd_file_create(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - nf->nf_file = open->op_filp; - open->op_filp = NULL; - trace_nfsd_file_create(rqstp, access, nf); - } + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c852ae8eaf371..8f9c82d9e075b 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire, ) ); -TRACE_EVENT(nfsd_file_create, - TP_PROTO( - const struct svc_rqst *rqstp, - unsigned int may_flags, - const struct nfsd_file *nf - ), - - TP_ARGS(rqstp, may_flags, nf), - - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(const void *, nf_file) - __field(unsigned long, may_flags) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - __field(u32, xid) - ), - - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->may_flags = may_flags; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->xid = be32_to_cpu(rqstp->rq_xid); - ), - - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->nf_inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - TRACE_EVENT(nfsd_file_insert_err, TP_PROTO( const struct svc_rqst *rqstp, @@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err, ) ); -TRACE_EVENT(nfsd_file_open, - TP_PROTO(struct nfsd_file *nf, __be32 status), +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( __field(void *, nf_inode) /* cannot be dereferenced */ @@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, From 7827c81f0248e3c2f40d438b020f3d222f002171 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Jan 2023 12:43:37 -0500 Subject: [PATCH 0452/1593] Revert "SUNRPC: Use RMW bitops in single-threaded hot paths" The premise that "Once an svc thread is scheduled and executing an RPC, no other processes will touch svc_rqst::rq_flags" is false. svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd threads when determining which thread to wake up next. Found via KCSAN. Fixes: 28df0988815f ("SUNRPC: Use RMW bitops in single-threaded hot paths") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 7 +++---- fs/nfsd/nfs4xdr.c | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- net/sunrpc/svc.c | 6 +++--- net/sunrpc/svc_xprt.c | 2 +- net/sunrpc/svcsock.c | 8 ++++---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 7 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bd880d55f565b..9b81d012666e2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -2607,12 +2607,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); - /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2734,7 +2733,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) out: cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ebb4d02a42ce6..97edb32be77f1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); return true; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 148bb0a7fa5b4..acb822b23af1b 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -923,7 +923,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g * rejecting the server-computed MIC in this somewhat rare case, * do not use splice with the GSS integrity service. */ - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) @@ -990,7 +990,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs int pad, remaining_len, offset; u32 rseqno; - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 8f1b596db33fc..b1da586c44765 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1243,10 +1243,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off by GSS integrity and privacy services */ - __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); - __clear_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_DROPME, &rqstp->rq_flags); svc_putu32(resv, rqstp->rq_xid); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2106003645a78..c2ce125380080 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1238,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2fc98fea59b46..e833103f46291 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) static void svc_sock_secure_port(struct svc_rqst *rqstp) { if (svc_port_is_privileged(svc_addr(rqstp))) - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); else - __clear_bit(RQ_SECURE, &rqstp->rq_flags); + clear_bit(RQ_SECURE, &rqstp->rq_flags); } /* @@ -1008,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags)) - __set_bit(RQ_LOCAL, &rqstp->rq_flags); + set_bit(RQ_LOCAL, &rqstp->rq_flags); else - __clear_bit(RQ_LOCAL, &rqstp->rq_flags); + clear_bit(RQ_LOCAL, &rqstp->rq_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 199fa012f18a8..94b20fb471356 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) static void svc_rdma_secure_port(struct svc_rqst *rqstp) { - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); } static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) From ec64efc4966edf19fa1bc398a26bddfbadc1605f Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 5 Dec 2022 17:44:59 +0000 Subject: [PATCH 0453/1593] dt-bindings: riscv: fix underscore requirement for multi-letter extensions The RISC-V ISA Manual allows the first multi-letter extension to avoid a leading underscore. Underscores are only required between multi-letter extensions. The dt-binding does not validate that a multi-letter extension is canonically ordered, as that'd need an even worse regex than is here, but it should not fail validation for valid ISA strings. Allow the first multi-letter extension to appear immediately after the single-letter extensions. Link: https://github.com/riscv/riscv-isa-manual/releases/tag/riscv-unpriv-pdf-from-asciidoc-15112022 # Chapter 29.5 Fixes: 299824e68bd0 ("dt-bindings: riscv: add new riscv,isa strings for emulators") Acked-by: Guo Ren Signed-off-by: Conor Dooley Acked-by: Rob Herring Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221205174459.60195-2-conor@kernel.org Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/cpus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 90a7cabf58feb..97659bb718119 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -80,7 +80,7 @@ properties: insensitive, letters in the riscv,isa string must be all lowercase to simplify parsing. $ref: "/schemas/types.yaml#/definitions/string" - pattern: ^rv(?:64|32)imaf?d?q?c?b?v?k?h?(?:_[hsxz](?:[a-z])+)*$ + pattern: ^rv(?:64|32)imaf?d?q?c?b?v?k?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ # RISC-V requires 'timebase-frequency' in /cpus, so disallow it here timebase-frequency: false From a943385aa80151c6b2611d3a1cf8338af2b257a1 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 5 Dec 2022 17:45:00 +0000 Subject: [PATCH 0454/1593] dt-bindings: riscv: fix single letter canonical order I used the wikipedia table for ordering extensions when updating the pattern here in commit 299824e68bd0 ("dt-bindings: riscv: add new riscv,isa strings for emulators"). Unfortunately that table did not match canonical order, as defined by the RISC-V ISA Manual, which defines extension ordering in (what is currently) Table 41, "Standard ISA extension names". Fix things up by re-sorting v (vector) and adding p (packed-simd) & j (dynamic languages). The e (reduced integer) and g (general) extensions are still intentionally left out. Link: https://github.com/riscv/riscv-isa-manual/releases/tag/riscv-unpriv-pdf-from-asciidoc-15112022 # Chapter 29.5 Fixes: 299824e68bd0 ("dt-bindings: riscv: add new riscv,isa strings for emulators") Acked-by: Guo Ren Reviewed-by: Heiko Stuebner Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Signed-off-by: Conor Dooley Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221205174459.60195-3-conor@kernel.org Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/cpus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 97659bb718119..d4148418350c6 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -80,7 +80,7 @@ properties: insensitive, letters in the riscv,isa string must be all lowercase to simplify parsing. $ref: "/schemas/types.yaml#/definitions/string" - pattern: ^rv(?:64|32)imaf?d?q?c?b?v?k?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ + pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ # RISC-V requires 'timebase-frequency' in /cpus, so disallow it here timebase-frequency: false From 0283189e8f3d0917e2ac399688df85211f48447b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 4 Jan 2023 10:47:39 -0700 Subject: [PATCH 0455/1593] docs: Fix the docs build with Sphinx 6.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sphinx 6.0 removed the execfile_() function, which we use as part of the configuration process. They *did* warn us... Just open-code the functionality as is done in Sphinx itself. Tested (using SPHINX_CONF, since this code is only executed with an alternative config file) on various Sphinx versions from 2.5 through 6.0. Reported-by: Martin Liška Cc: stable@vger.kernel.org Signed-off-by: Jonathan Corbet --- Documentation/sphinx/load_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py index eeb394b39e2cc..8b416bfd75ac1 100644 --- a/Documentation/sphinx/load_config.py +++ b/Documentation/sphinx/load_config.py @@ -3,7 +3,7 @@ import os import sys -from sphinx.util.pycompat import execfile_ +from sphinx.util.osutil import fs_encoding # ------------------------------------------------------------------------------ def loadConfig(namespace): @@ -48,7 +48,9 @@ def loadConfig(namespace): sys.stdout.write("load additional sphinx-config: %s\n" % config_file) config = namespace.copy() config['__file__'] = config_file - execfile_(config_file, config) + with open(config_file, 'rb') as f: + code = compile(f.read(), fs_encoding, 'exec') + exec(code, config) del config['__file__'] namespace.update(config) else: From 31abfdda65279a860b10a98038135501e4fc00a1 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 4 Jan 2023 13:59:16 -0700 Subject: [PATCH 0456/1593] docs: Deprecate use of Sphinx < 2.4.x The Sphinx 2.4 release is three years old, and it is becoming increasingly difficult to even find a system with an sufficiently archaic Python installation that can run versions older than that. I can no longer test changes against anything prior to 2.4.x. Move toward raising our minimum Sphinx requirement to 2.4.x so we can delete some older support code and claim to support a range of versions that we can actually test. In the absence of screams, the actual removal of support can happen later in 2023. Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/conf.py b/Documentation/conf.py index a5c45df0bd839..44899be7b2cca 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -31,6 +31,12 @@ def have_command(cmd): # Get Sphinx version major, minor, patch = sphinx.version_info[:3] +# +# Warn about older versions that we don't want to support for much +# longer. +# +if (major < 2) or (major == 2 and minor < 4): + print('WARNING: support for Sphinx < 2.4 will be removed soon.') # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the From a3d81bc1eaef48e34dd0b9b48eefed9e02a06451 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Fri, 6 Jan 2023 16:48:38 +0800 Subject: [PATCH 0457/1593] bpf: Skip task with pid=1 in send_signal_common() The following kernel panic can be triggered when a task with pid=1 attaches a prog that attempts to send killing signal to itself, also see [1] for more details: Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b CPU: 3 PID: 1 Comm: systemd Not tainted 6.1.0-09652-g59fe41b5255f #148 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x100/0x178 lib/dump_stack.c:106 panic+0x2c4/0x60f kernel/panic.c:275 do_exit.cold+0x63/0xe4 kernel/exit.c:789 do_group_exit+0xd4/0x2a0 kernel/exit.c:950 get_signal+0x2460/0x2600 kernel/signal.c:2858 arch_do_signal_or_restart+0x78/0x5d0 arch/x86/kernel/signal.c:306 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x44/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd So skip task with pid=1 in bpf_send_signal_common() to avoid the panic. [1] https://lore.kernel.org/bpf/20221222043507.33037-1-sunhao.th@gmail.com Signed-off-by: Hao Sun Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20230106084838.12690-1-sunhao.th@gmail.com --- kernel/trace/bpf_trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3bbd3f0c810c8..f47274de012b5 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -848,6 +848,9 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) return -EPERM; if (unlikely(!nmi_uaccess_okay())) return -EPERM; + /* Task should not be pid=1 to avoid kernel panic. */ + if (unlikely(is_global_init(current))) + return -EPERM; if (irqs_disabled()) { /* Do an early check on signal validity. Otherwise, From cb7a95af78d29442b8294683eca4897544b8ef46 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 4 Jan 2023 11:06:28 -0800 Subject: [PATCH 0458/1593] hfs/hfsplus: avoid WARN_ON() for sanity check, use proper error handling Commit 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check") fixed a build warning by turning a comment into a WARN_ON(), but it turns out that syzbot then complains because it can trigger said warning with a corrupted hfs image. The warning actually does warn about a bad situation, but we are much better off just handling it as the error it is. So rather than warn about us doing bad things, stop doing the bad things and return -EIO. While at it, also fix a memory leak that was introduced by an earlier fix for a similar syzbot warning situation, and add a check for one case that historically wasn't handled at all (ie neither comment nor subsequent WARN_ON). Reported-by: syzbot+7bb7cd3595533513a9e7@syzkaller.appspotmail.com Fixes: 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check") Fixes: 8d824e69d9f3 ("hfs: fix OOB Read in __hfs_brec_find") Link: https://lore.kernel.org/lkml/000000000000dbce4e05f170f289@google.com/ Tested-by: Michael Schmitz Cc: Arnd Bergmann Cc: Matthew Wilcox Cc: Viacheslav Dubeyko Signed-off-by: Linus Torvalds --- fs/hfs/inode.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9c329a365e750..3a155c1d810ef 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -458,15 +458,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) /* panic? */ return -EIO; + res = -EIO; if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) - return -EIO; + goto out; fd.search_key->cat = HFS_I(main_inode)->cat_key; if (hfs_brec_find(&fd)) - /* panic? */ goto out; if (S_ISDIR(main_inode->i_mode)) { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir)); + if (fd.entrylength < sizeof(struct hfs_cat_dir)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || @@ -479,6 +480,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); } else if (HFS_IS_RSRC(inode)) { + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); hfs_inode_write_fork(inode, rec.file.RExtRec, @@ -486,7 +489,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } else { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file)); + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || @@ -503,9 +507,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } + res = 0; out: hfs_find_exit(&fd); - return 0; + return res; } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, From 1382999aa0548a171a272ca817f6c38e797c458c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 Jan 2023 04:01:56 +0100 Subject: [PATCH 0459/1593] tpm: Allow system suspend to continue when TPM suspend fails TPM 1 is sometimes broken across system suspends, due to races or locking issues or something else that haven't been diagnosed or fixed yet, most likely having to do with concurrent reads from the TPM's hardware random number generator driver. These issues prevent the system from actually suspending, with errors like: tpm tpm0: A TPM error (28) occurred continue selftest ... tpm tpm0: A TPM error (28) occurred attempting get random ... tpm tpm0: Error (28) sending savestate before suspend tpm_tis 00:08: PM: __pnp_bus_suspend(): tpm_pm_suspend+0x0/0x80 returns 28 tpm_tis 00:08: PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 28 tpm_tis 00:08: PM: failed to suspend: error 28 PM: Some devices failed to suspend, or early wake event detected This issue was partially fixed by 23393c646142 ("char: tpm: Protect tpm_pm_suspend with locks"), in a last minute 6.1 commit that Linus took directly because the TPM maintainers weren't available. However, it seems like this just addresses the most common cases of the bug, rather than addressing it entirely. So there are more things to fix still, apparently. In lieu of actually fixing the underlying bug, just allow system suspend to continue, so that laptops still go to sleep fine. Later, this can be reverted when the real bug is fixed. Link: https://lore.kernel.org/lkml/7cbe96cf-e0b5-ba63-d1b4-f63d2e826efa@suse.cz/ Cc: stable@vger.kernel.org # 6.1+ Reported-by: Vlastimil Babka Suggested-by: Linus Torvalds Acked-by: Luigi Semenzato Cc: Peter Huewe Cc: Jarkko Sakkinen Cc: James Bottomley Cc: Johannes Altmanninger Signed-off-by: Jason A. Donenfeld Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm-interface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index d69905233aff2..7e513b7718320 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -412,7 +412,9 @@ int tpm_pm_suspend(struct device *dev) } suspended: - return rc; + if (rc) + dev_err(dev, "Ignoring error %d while suspending\n", rc); + return 0; } EXPORT_SYMBOL_GPL(tpm_pm_suspend); From b4e9b8763e417db31c7088103cc557d55cb7a8f5 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Thu, 5 Jan 2023 21:31:07 +0530 Subject: [PATCH 0460/1593] octeontx2-af: Fix LMAC config in cgx_lmac_rx_tx_enable PF netdev can request AF to enable or disable reception and transmission on assigned CGX::LMAC. The current code instead of disabling or enabling 'reception and transmission' also disables/enable the LMAC. This patch fixes this issue. Fixes: 1435f66a28b4 ("octeontx2-af: CGX Rx/Tx enable/disable mbox handlers") Signed-off-by: Angela Czubak Signed-off-by: Hariprasad Kelam Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230105160107.17638-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index b2b71fe80d61c..724df6398bbe2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -774,9 +774,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); if (enable) - cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; + cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN; else - cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN); cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index fb2d37676d84e..5a20d93004c71 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -26,7 +26,6 @@ #define CMR_P2X_SEL_SHIFT 59ULL #define CMR_P2X_SEL_NIX0 1ULL #define CMR_P2X_SEL_NIX1 2ULL -#define CMR_EN BIT_ULL(55) #define DATA_PKT_TX_EN BIT_ULL(53) #define DATA_PKT_RX_EN BIT_ULL(54) #define CGX_LMAC_TYPE_SHIFT 40 From 4414c1f5c7a375eaa108676a56e12cc8234eb647 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 6 Jan 2023 23:30:57 -0800 Subject: [PATCH 0461/1593] xtensa: drop unused members of struct thread_struct bad_vaddr, bad_uaddr and error_code fields are set but never read by the xtensa arch-specific code. Drop them. Also drop the commented out info field. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 9 --------- arch/xtensa/kernel/traps.c | 2 -- arch/xtensa/mm/fault.c | 4 ---- 3 files changed, 15 deletions(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 228e4dff5fb2d..a6d09fe048311 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -154,11 +154,6 @@ struct thread_struct { unsigned long ra; /* kernel's a0: return address and window call size */ unsigned long sp; /* kernel's a1: stack pointer */ - /* struct xtensa_cpuinfo info; */ - - unsigned long bad_vaddr; /* last user fault */ - unsigned long bad_uaddr; /* last kernel fault accessing user space */ - unsigned long error_code; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bp[XCHAL_NUM_IBREAK]; struct perf_event *ptrace_wp[XCHAL_NUM_DBREAK]; @@ -176,10 +171,6 @@ struct thread_struct { { \ ra: 0, \ sp: sizeof(init_stack) + (long) &init_stack, \ - /*info: {0}, */ \ - bad_vaddr: 0, \ - bad_uaddr: 0, \ - error_code: 0, \ } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 0c25e035ff107..cd98366a9b238 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -362,8 +362,6 @@ static void do_unaligned_user(struct pt_regs *regs) __die_if_kernel("Unhandled unaligned exception in kernel", regs, SIGKILL); - current->thread.bad_vaddr = regs->excvaddr; - current->thread.error_code = -3; pr_info_ratelimited("Unaligned memory access to %08lx in '%s' " "(pid = %d, pc = %#010lx)\n", regs->excvaddr, current->comm, diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 8c781b05c0bdd..faf7cf35a0ee3 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -206,8 +206,6 @@ void do_page_fault(struct pt_regs *regs) bad_area: mmap_read_unlock(mm); if (user_mode(regs)) { - current->thread.bad_vaddr = address; - current->thread.error_code = is_write; force_sig_fault(SIGSEGV, code, (void *) address); return; } @@ -232,7 +230,6 @@ void do_page_fault(struct pt_regs *regs) /* Send a sigbus, regardless of whether we were in kernel * or user mode. */ - current->thread.bad_vaddr = address; force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address); /* Kernel mode? Handle exceptions or die */ @@ -252,7 +249,6 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) if ((entry = search_exception_tables(regs->pc)) != NULL) { pr_debug("%s: Exception at pc=%#010lx (%lx)\n", current->comm, regs->pc, entry->fixup); - current->thread.bad_uaddr = address; regs->pc = entry->fixup; return; } From 42f229c350f57a8e825f7591e17cbc5c87e50235 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Jan 2023 13:03:18 +0000 Subject: [PATCH 0462/1593] rxrpc: Fix incoming call setup race An incoming call can race with rxrpc socket destruction, leading to a leaked call. This may result in an oops when the call timer eventually expires: BUG: kernel NULL pointer dereference, address: 0000000000000874 RIP: 0010:_raw_spin_lock_irqsave+0x2a/0x50 Call Trace: try_to_wake_up+0x59/0x550 ? __local_bh_enable_ip+0x37/0x80 ? rxrpc_poke_call+0x52/0x110 [rxrpc] ? rxrpc_poke_call+0x110/0x110 [rxrpc] ? rxrpc_poke_call+0x110/0x110 [rxrpc] call_timer_fn+0x24/0x120 with a warning in the kernel log looking something like: rxrpc: Call 00000000ba5e571a still in use (1,SvAwtACK,1061d,0)! incurred during rmmod of rxrpc. The 1061d is the call flags: RECVMSG_READ_ALL, RX_HEARD, BEGAN_RX_TIMER, RX_LAST, EXPOSED, IS_SERVICE, RELEASED but no DISCONNECTED flag (0x800), so it's an incoming (service) call and it's still connected. The race appears to be that: (1) rxrpc_new_incoming_call() consults the service struct, checks sk_state and allocates a call - then pauses, possibly for an interrupt. (2) rxrpc_release_sock() sets RXRPC_CLOSE, nulls the service pointer, discards the prealloc and releases all calls attached to the socket. (3) rxrpc_new_incoming_call() resumes, launching the new call, including its timer and attaching it to the socket. Fix this by read-locking local->services_lock to access the AF_RXRPC socket providing the service rather than RCU in rxrpc_new_incoming_call(). There's no real need to use RCU here as local->services_lock is only write-locked by the socket side in two places: when binding and when shutting down. Fixes: 5e6ef4f1017c ("rxrpc: Make the I/O thread take over the call and local processor work") Reported-by: Marc Dionne Signed-off-by: David Howells cc: linux-afs@lists.infradead.org --- net/rxrpc/af_rxrpc.c | 8 ++++---- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_accept.c | 14 +++++++------- net/rxrpc/security.c | 6 +++--- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index cf200e4e0eae1..ebbd4a1c3f86e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -155,10 +155,10 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) if (service_id) { write_lock(&local->services_lock); - if (rcu_access_pointer(local->service)) + if (local->service) goto service_in_use; rx->local = local; - rcu_assign_pointer(local->service, rx); + local->service = rx; write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -875,9 +875,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; - if (rx->local && rcu_access_pointer(rx->local->service) == rx) { + if (rx->local && rx->local->service == rx) { write_lock(&rx->local->services_lock); - rcu_assign_pointer(rx->local->service, NULL); + rx->local->service = NULL; write_unlock(&rx->local->services_lock); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 007258538beeb..433060cade038 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -283,7 +283,7 @@ struct rxrpc_local { struct socket *socket; /* my UDP socket */ struct task_struct *io_thread; struct completion io_thread_ready; /* Indication that the I/O thread started */ - struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ + struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head rx_queue; /* Received packets */ struct list_head conn_attend_q; /* Conns requiring immediate attention */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3fbf2fcaaf9e2..3e8689fdc4371 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -343,13 +343,13 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); - rcu_read_lock(); + read_lock(&local->services_lock); /* Weed out packets to services we're not offering. Packets that would * begin a call are explicitly rejected and the rest are just * discarded. */ - rx = rcu_dereference(local->service); + rx = local->service; if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && sp->hdr.serviceId != rx->second_service) ) { @@ -399,7 +399,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, spin_unlock(&conn->state_lock); spin_unlock(&rx->incoming_lock); - rcu_read_unlock(); + read_unlock(&local->services_lock); if (hlist_unhashed(&call->error_link)) { spin_lock(&call->peer->lock); @@ -413,20 +413,20 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, return true; unsupported_service: - rcu_read_unlock(); + read_unlock(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EOPNOTSUPP); unsupported_security: - rcu_read_unlock(); + read_unlock(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); - rcu_read_unlock(); + read_unlock(&local->services_lock); _leave(" = f [%u]", skb->mark); return false; discard: - rcu_read_unlock(); + read_unlock(&local->services_lock); return true; } diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index cd66634dffe69..cb8dd1d3b1d49 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -178,9 +178,9 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); - rcu_read_lock(); + read_lock(&conn->local->services_lock); - rx = rcu_dereference(conn->local->service); + rx = conn->local->service; if (!rx) goto out; @@ -202,6 +202,6 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, } out: - rcu_read_unlock(); + read_unlock(&conn->local->services_lock); return key; } From ea44242bbfcde2993fb27ec7c3ad5ab5cc39e438 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 15 Dec 2022 12:09:25 +0800 Subject: [PATCH 0463/1593] scsi: hisi_sas: Fix tag freeing for reserved tags The reserved tags were put in the lower region of the tagset in commit f7d190a94e35 ("scsi: hisi_sas: Put reserved tags in lower region of tagset"). However, only the allocate function was changed, freeing was not handled. This resulted in a failure to boot: [ 33.467345] hisi_sas_v3_hw 0000:b4:02.0: task exec: failed[-132]! [ 33.473413] sas: Executing internal abort failed 5000000000000603 (-132) [ 33.480088] hisi_sas_v3_hw 0000:b4:02.0: I_T nexus reset: internal abort (-132) [ 33.657336] hisi_sas_v3_hw 0000:b4:02.0: task exec: failed[-132]! [ 33.663403] ata7.00: failed to IDENTIFY (I/O error, err_mask=0x40) [ 35.787344] hisi_sas_v3_hw 0000:b4:04.0: task exec: failed[-132]! [ 35.793411] sas: Executing internal abort failed 5000000000000703 (-132) [ 35.800084] hisi_sas_v3_hw 0000:b4:04.0: I_T nexus reset: internal abort (-132) [ 35.977335] hisi_sas_v3_hw 0000:b4:04.0: task exec: failed[-132]! [ 35.983403] ata10.00: failed to IDENTIFY (I/O error, err_mask=0x40) [ 35.989643] ata10.00: revalidation failed (errno=-5) Fixes: f7d190a94e35 ("scsi: hisi_sas: Put reserved tags in lower region of tagset") Cc: John Garry Cc: Xiang Chen Signed-off-by: Jason Yan Reviewed-by: John Garry Acked-by: Xiang Chen Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 41ba22f6c7f05..e9c2d306ed873 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -162,7 +162,7 @@ static void hisi_sas_slot_index_clear(struct hisi_hba *hisi_hba, int slot_idx) static void hisi_sas_slot_index_free(struct hisi_hba *hisi_hba, int slot_idx) { if (hisi_hba->hw->slot_index_alloc || - slot_idx >= HISI_SAS_UNRESERVED_IPTT) { + slot_idx < HISI_SAS_RESERVED_IPTT) { spin_lock(&hisi_hba->lock); hisi_sas_slot_index_clear(hisi_hba, slot_idx); spin_unlock(&hisi_hba->lock); From a67aad57d9aee41180aff36e54cb72fe4b8d5a5a Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Wed, 21 Dec 2022 01:52:03 +0000 Subject: [PATCH 0464/1593] scsi: libsas: Grab the ATA port lock in sas_ata_device_link_abort() Grab the ATA port lock in sas_ata_device_link_abort() before calling ata_link_abort() as outlined in this function's locking requirements. Fixes: 44112922674b ("scsi: libsas: Add sas_ata_device_link_abort()") Signed-off-by: Xingui Yang Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 1ccce706167a5..5e80225b53082 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -889,7 +889,9 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) { struct ata_port *ap = device->sata_dev.ap; struct ata_link *link = &ap->link; + unsigned long flags; + spin_lock_irqsave(ap->lock, flags); device->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */ device->sata_dev.fis[3] = ATA_ABORTED; /* tf error */ @@ -897,6 +899,7 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) if (force_reset) link->eh_info.action |= ATA_EH_RESET; ata_link_abort(link); + spin_unlock_irqrestore(ap->lock, flags); } EXPORT_SYMBOL_GPL(sas_ata_device_link_abort); From 7fb3ff22ad8772bbf0e3ce1ef3eb7b09f431807f Mon Sep 17 00:00:00 2001 From: Yair Podemsky Date: Wed, 30 Nov 2022 14:51:21 +0200 Subject: [PATCH 0465/1593] sched/core: Fix arch_scale_freq_tick() on tickless systems In order for the scheduler to be frequency invariant we measure the ratio between the maximum CPU frequency and the actual CPU frequency. During long tickless periods of time the calculations that keep track of that might overflow, in the function scale_freq_tick(): if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; eventually forcing the kernel to disable the feature for all CPUs, and show the warning message: "Scheduler frequency invariance went wobbly, disabling!". Let's avoid that by limiting the frequency invariant calculations to CPUs with regular tick. Fixes: e2b0d619b400 ("x86, sched: check for counters overflow in frequency invariant accounting") Suggested-by: "Peter Zijlstra (Intel)" Signed-off-by: Yair Podemsky Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Acked-by: Giovanni Gherdovich Link: https://lore.kernel.org/r/20221130125121.34407-1-ypodemsk@redhat.com --- kernel/sched/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f7..965d813c28ad7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5504,7 +5504,9 @@ void scheduler_tick(void) unsigned long thermal_pressure; u64 resched_latency; - arch_scale_freq_tick(); + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + arch_scale_freq_tick(); + sched_clock_tick(); rq_lock(rq, &rf); From eaf2213ba563b2d74a1f2c13a6b258273f689802 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jan 2023 16:47:41 +0900 Subject: [PATCH 0466/1593] tomoyo: fix broken dependency on *.conf.default If *.conf.default is updated, builtin-policy.h should be rebuilt, but this does not work when compiled with O= option. [Without this commit] $ touch security/tomoyo/policy/exception_policy.conf.default $ make O=/tmp security/tomoyo/ make[1]: Entering directory '/tmp' GEN Makefile CALL /home/masahiro/ref/linux/scripts/checksyscalls.sh DESCEND objtool make[1]: Leaving directory '/tmp' [With this commit] $ touch security/tomoyo/policy/exception_policy.conf.default $ make O=/tmp security/tomoyo/ make[1]: Entering directory '/tmp' GEN Makefile CALL /home/masahiro/ref/linux/scripts/checksyscalls.sh DESCEND objtool POLICY security/tomoyo/builtin-policy.h CC security/tomoyo/common.o AR security/tomoyo/built-in.a make[1]: Leaving directory '/tmp' $(srctree)/ is essential because $(wildcard ) does not follow VPATH. Fixes: f02dee2d148b ("tomoyo: Do not generate empty policy files") Signed-off-by: Masahiro Yamada Signed-off-by: Tetsuo Handa --- security/tomoyo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index cca5a3012fee2..221eaadffb09c 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -10,7 +10,7 @@ endef quiet_cmd_policy = POLICY $@ cmd_policy = ($(call do_policy,profile); $(call do_policy,exception_policy); $(call do_policy,domain_policy); $(call do_policy,manager); $(call do_policy,stat)) >$@ -$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(src)/policy/*.conf.default) FORCE +$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE $(call if_changed,policy) $(obj)/common.o: $(obj)/builtin-policy.h From df4840c1b880136fea97a1d64724995778c2475f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jan 2023 16:47:42 +0900 Subject: [PATCH 0467/1593] tomoyo: avoid unneeded creation of builtin-policy.h When CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING=y, builtin-policy.h is unneeded. Signed-off-by: Masahiro Yamada Signed-off-by: Tetsuo Handa --- security/tomoyo/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index 221eaadffb09c..1b18a02ccd2e6 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -13,4 +13,6 @@ quiet_cmd_policy = POLICY $@ $(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE $(call if_changed,policy) +ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING $(obj)/common.o: $(obj)/builtin-policy.h +endif From c0f7ae27539fbac267384a7bfc58296ea7550d52 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 26 Dec 2022 16:40:43 +0200 Subject: [PATCH 0468/1593] MAINTAINERS: Update email of Tudor Ambarus My professional email will change and the microchip one will bounce after mid-november of 2022. Update the MAINTAINERS file, the YAML bindings, MODULE_AUTHOR entries and author mentions, and add an entry in the .mailmap file. Signed-off-by: Tudor Ambarus Acked-by: Rob Herring Acked-by: Pratyush Yadav Acked-by: Mark Brown Acked-by: Nicolas Ferre Acked-by: Herbert Xu Acked-by: Krzysztof Kozlowski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221226144043.367706-1-tudor.ambarus@linaro.org --- .mailmap | 1 + .../bindings/crypto/atmel,at91sam9g46-aes.yaml | 2 +- .../bindings/crypto/atmel,at91sam9g46-sha.yaml | 2 +- .../bindings/crypto/atmel,at91sam9g46-tdes.yaml | 2 +- .../devicetree/bindings/spi/atmel,at91rm9200-spi.yaml | 2 +- .../devicetree/bindings/spi/atmel,quadspi.yaml | 2 +- MAINTAINERS | 10 +++++----- drivers/crypto/atmel-ecc.c | 4 ++-- drivers/crypto/atmel-i2c.c | 4 ++-- drivers/crypto/atmel-i2c.h | 2 +- 10 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.mailmap b/.mailmap index ccba4cf0d8938..562f70d3b6a54 100644 --- a/.mailmap +++ b/.mailmap @@ -422,6 +422,7 @@ Tony Luck TripleX Chung TripleX Chung Tsuneo Yoshioka +Tudor Ambarus Tycho Andersen Tzung-Bi Shih Uwe Kleine-König diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml index 0ccaab16dc614..0b7383b3106b5 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Advanced Encryption Standard (AES) HW cryptographic accelerator maintainers: - - Tudor Ambarus + - Tudor Ambarus properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml index 5163c51b4547b..ee2ffb0343251 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Secure Hash Algorithm (SHA) HW cryptographic accelerator maintainers: - - Tudor Ambarus + - Tudor Ambarus properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml index fcc5adf03cadb..3d6ed24b1b006 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Triple Data Encryption Standard (TDES) HW cryptographic accelerator maintainers: - - Tudor Ambarus + - Tudor Ambarus properties: compatible: diff --git a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml index 4dd973e341e6c..6c57dd6c3a361 100644 --- a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel SPI device maintainers: - - Tudor Ambarus + - Tudor Ambarus allOf: - $ref: spi-controller.yaml# diff --git a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml index 1d493add4053d..b0d99bc105352 100644 --- a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Quad Serial Peripheral Interface (QSPI) maintainers: - - Tudor Ambarus + - Tudor Ambarus allOf: - $ref: spi-controller.yaml# diff --git a/MAINTAINERS b/MAINTAINERS index 7f86d02cb427a..36c500e34508a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13620,7 +13620,7 @@ F: arch/microblaze/ MICROCHIP AT91 DMA DRIVERS M: Ludovic Desroches -M: Tudor Ambarus +M: Tudor Ambarus L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: dmaengine@vger.kernel.org S: Supported @@ -13665,7 +13665,7 @@ F: Documentation/devicetree/bindings/media/microchip,csi2dc.yaml F: drivers/media/platform/microchip/microchip-csi2dc.c MICROCHIP ECC DRIVER -M: Tudor Ambarus +M: Tudor Ambarus L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/atmel-ecc.* @@ -13762,7 +13762,7 @@ S: Maintained F: drivers/mmc/host/atmel-mci.c MICROCHIP NAND DRIVER -M: Tudor Ambarus +M: Tudor Ambarus L: linux-mtd@lists.infradead.org S: Supported F: Documentation/devicetree/bindings/mtd/atmel-nand.txt @@ -13814,7 +13814,7 @@ S: Supported F: drivers/power/reset/at91-sama5d2_shdwc.c MICROCHIP SPI DRIVER -M: Tudor Ambarus +M: Tudor Ambarus S: Supported F: drivers/spi/spi-atmel.* @@ -19665,7 +19665,7 @@ F: drivers/clk/spear/ F: drivers/pinctrl/spear/ SPI NOR SUBSYSTEM -M: Tudor Ambarus +M: Tudor Ambarus M: Pratyush Yadav R: Michael Walle L: linux-mtd@lists.infradead.org diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 53100fb9b07bd..12205e2b53b45 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus + * Author: Tudor Ambarus */ #include @@ -411,6 +411,6 @@ static void __exit atmel_ecc_exit(void) module_init(atmel_ecc_init); module_exit(atmel_ecc_exit); -MODULE_AUTHOR("Tudor Ambarus "); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 81ce09bedda8f..55bff1e131426 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus + * Author: Tudor Ambarus */ #include @@ -390,6 +390,6 @@ static void __exit atmel_i2c_exit(void) module_init(atmel_i2c_init); module_exit(atmel_i2c_exit); -MODULE_AUTHOR("Tudor Ambarus "); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index 48929efe2a5bf..35f7857a7f7cc 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus + * Author: Tudor Ambarus */ #ifndef __ATMEL_I2C_H__ From 5304930dbae82d259bcf7e5611db7c81e7a42eff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 7 Jan 2023 10:15:35 -0500 Subject: [PATCH 0469/1593] NFSD: Use set_bit(RQ_DROPME) The premise that "Once an svc thread is scheduled and executing an RPC, no other processes will touch svc_rqst::rq_flags" is false. svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd threads when determining which thread to wake up next. Fixes: 9315564747cb ("NFSD: Use only RQ_DROPME to signal the need to drop a reply") Signed-off-by: Chuck Lever --- fs/nfsd/nfsproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a5570cf75f3fd..9744443c39652 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } From 43d5f5d63699724d47f0d9e0eae516a260d232b4 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 6 Jan 2023 13:44:56 +0000 Subject: [PATCH 0470/1593] riscv: dts: sifive: fu740: fix size of pcie 32bit memory The 32-bit memory resource is needed for non-prefetchable memory allocations on the PCIe bus, however with some cards (such as the SM768) the system fails to allocate memory from this. Checking the allocation against the datasheet, it looks like there has been a mis-calcualation of the resource for the first memory region (0x0060090000..0x0070ffffff) which in the data-sheet for the fu740 (v1p2) is from 0x0060000000..0x007fffffff. Changing this to allocate from 0x0060090000..0x007fffffff fixes the probing issues. Fixes: ae80d5148085 ("riscv: dts: Add PCIe support for the SiFive FU740-C000 SoC") Cc: Paul Walmsley Cc: Greentime Hu Signed-off-by: Ben Dooks Cc: stable@vger.kernel.org Tested-by: Ron Economos # from IRC Reviewed-by: Conor Dooley Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/sifive/fu740-c000.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi index 43bed6c0a84fe..5235fd1c9cb67 100644 --- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi @@ -328,7 +328,7 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */ <0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */ - <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */ + <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x10000000>, /* mem */ <0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */ num-lanes = <0x8>; interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>; From c9c1d6d82091f05b4dabf2c624bdaeba19bdf893 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 3 Jan 2023 15:03:48 +0200 Subject: [PATCH 0471/1593] iio: imu: st_lsm6dsx: fix build when CONFIG_IIO_TRIGGERED_BUFFER=m The following kernel linkage error: st_lsm6dsx_core.o: in function `st_lsm6dsx_sw_buffers_setup': st_lsm6dsx_core.c:2578: undefined reference to `devm_iio_triggered_buffer_setup_ext' is caused by the fact that the object owning devm_iio_triggered_buffer_setup_ext() (drivers/iio/buffer/industrialio-triggered-buffer.o) is allowed to be built as module when its user (drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c) is built-in. The st_lsm6dsx driver already has a "select IIO_BUFFER", so add another select for IIO_TRIGGERED_BUFFER, to make that option follow what is set for the st_lsm6dsx driver. This is similar to what other iio drivers do. Fixes: 2cfb2180c3e8 ("iio: imu: st_lsm6dsx: introduce sw trigger support") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230103130348.1733467-1-vladimir.oltean@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig index f6660847fb58d..8c16cdacf2f21 100644 --- a/drivers/iio/imu/st_lsm6dsx/Kconfig +++ b/drivers/iio/imu/st_lsm6dsx/Kconfig @@ -4,6 +4,7 @@ config IIO_ST_LSM6DSX tristate "ST_LSM6DSx driver for STM 6-axis IMU MEMS sensors" depends on (I2C || SPI || I3C) select IIO_BUFFER + select IIO_TRIGGERED_BUFFER select IIO_KFIFO_BUF select IIO_ST_LSM6DSX_I2C if (I2C) select IIO_ST_LSM6DSX_SPI if (SPI_MASTER) From 115d9d77bb0f9152c60b6e8646369fa7f6167593 Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Fri, 6 Jan 2023 22:22:44 +0000 Subject: [PATCH 0472/1593] mm: Always release pages to the buddy allocator in memblock_free_late(). If CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, memblock_free_pages() only releases pages to the buddy allocator if they are not in the deferred range. This is correct for free pages (as defined by for_each_free_mem_pfn_range_in_zone()) because free pages in the deferred range will be initialized and released as part of the deferred init process. memblock_free_pages() is called by memblock_free_late(), which is used to free reserved ranges after memblock_free_all() has run. All pages in reserved ranges have been initialized at that point, and accordingly, those pages are not touched by the deferred init process. This means that currently, if the pages that memblock_free_late() intends to release are in the deferred range, they will never be released to the buddy allocator. They will forever be reserved. In addition, memblock_free_pages() calls kmsan_memblock_free_pages(), which is also correct for free pages but is not correct for reserved pages. KMSAN metadata for reserved pages is initialized by kmsan_init_shadow(), which runs shortly before memblock_free_all(). For both of these reasons, memblock_free_pages() should only be called for free pages, and memblock_free_late() should call __free_pages_core() directly instead. One case where this issue can occur in the wild is EFI boot on x86_64. The x86 EFI code reserves all EFI boot services memory ranges via memblock_reserve() and frees them later via memblock_free_late() (efi_reserve_boot_services() and efi_free_boot_services(), respectively). If any of those ranges happens to fall within the deferred init range, the pages will not be released and that memory will be unavailable. For example, on an Amazon EC2 t3.micro VM (1 GB) booting via EFI: v6.2-rc2: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 178867 v6.2-rc2 + patch: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 222816 # +43,949 pages Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/01010185892de53e-e379acfb-7044-4b24-b30a-e2657c1ba989-000000@us-west-2.amazonses.com Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 8 +++++++- tools/testing/memblock/internal.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index d036c7861310c..685e30e6d27c5 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1640,7 +1640,13 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size) end = PFN_DOWN(base + size); for (; cursor < end; cursor++) { - memblock_free_pages(pfn_to_page(cursor), cursor, 0); + /* + * Reserved pages are always initialized by the end of + * memblock_free_all() (by memmap_init() and, if deferred + * initialization is enabled, memmap_init_reserved_pages()), so + * these pages can be released directly to the buddy allocator. + */ + __free_pages_core(pfn_to_page(cursor), 0); totalram_pages_inc(); } } diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db73082..85973e55489e7 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,6 +15,10 @@ bool mirrored_kernelcore = false; struct page {}; +void __free_pages_core(struct page *page, unsigned int order) +{ +} + void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { From e6db6f9398dadcbc06318a133d4c44a2d3844e61 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 8 Jan 2023 10:39:17 -0700 Subject: [PATCH 0473/1593] io_uring/io-wq: only free worker if it was allocated for creation We have two types of task_work based creation, one is using an existing worker to setup a new one (eg when going to sleep and we have no free workers), and the other is allocating a new worker. Only the latter should be freed when we cancel task_work creation for a new worker. Fixes: af82425c6a2d ("io_uring/io-wq: free worker if task_work creation is canceled") Reported-by: syzbot+d56ec896af3637bdb7e4@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 992dcd9f8c4cf..411bb2d1acd45 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,7 +1230,12 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); - kfree(worker); + /* + * Only the worker continuation helper has worker allocated and + * hence needs freeing. + */ + if (cb->func == create_worker_cont) + kfree(worker); } } From b7bfaa761d760e72a969d116517eaa12e404c262 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Jan 2023 11:49:43 -0600 Subject: [PATCH 0474/1593] Linux 6.2-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dfba294ae7907..460716314fb3c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From f71eaf2708be7831428eacae7db25d8ec6b8b4c5 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 23 Nov 2022 09:42:00 +0000 Subject: [PATCH 0475/1593] bus: sunxi-rsb: Fix error handling in sunxi_rsb_init() The sunxi_rsb_init() returns the platform_driver_register() directly without checking its return value, if platform_driver_register() failed, the sunxi_rsb_bus is not unregistered. Fix by unregister sunxi_rsb_bus when platform_driver_register() failed. Fixes: d787dcdb9c8f ("bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus") Signed-off-by: Yuan Can Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221123094200.12036-1-yuancan@huawei.com Signed-off-by: Jernej Skrabec --- drivers/bus/sunxi-rsb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 3aa91aed3bf73..226e87b85116e 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -857,7 +857,13 @@ static int __init sunxi_rsb_init(void) return ret; } - return platform_driver_register(&sunxi_rsb_driver); + ret = platform_driver_register(&sunxi_rsb_driver); + if (ret) { + bus_unregister(&sunxi_rsb_bus); + return ret; + } + + return 0; } module_init(sunxi_rsb_init); From e3c9405ec56b6b0a75d993427bfc7f4194f73754 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 7 Jan 2023 15:37:47 +0100 Subject: [PATCH 0476/1593] docs: kbuild: remove mention to dropped $(objtree) feature Commit 8d613a1d048c ("kbuild: drop $(objtree)/ prefix support for clean-files") dropped support for prefixing with $(objtree). Thus update the documentation to match that change. Signed-off-by: Miguel Ojeda Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 6b7368d1f5163..38bc74eaa5474 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -1042,7 +1042,7 @@ $(clean-files). When executing "make clean", the file "crc32table.h" will be deleted. Kbuild will assume files to be in the same relative directory as the -Makefile, except if prefixed with $(objtree). +Makefile. To exclude certain files or directories from make clean, use the $(no-clean-files) variable. From b409ea4534204d4e6da3017d8713ce338f44b489 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 8 Jan 2023 21:35:57 +0900 Subject: [PATCH 0477/1593] init/version-timestamp.c: remove unneeded #include The kbuild test robot detected this by 'make versioncheck'. Fixes: 2df8220cc511 ("kbuild: build init/built-in.a just once") Reported-by: kernel test robot Signed-off-by: Masahiro Yamada --- init/version-timestamp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/init/version-timestamp.c b/init/version-timestamp.c index 179e93bae5390..043cbf80a766d 100644 --- a/init/version-timestamp.c +++ b/init/version-timestamp.c @@ -2,7 +2,6 @@ #include #include -#include #include #include #include From 49e4d04f0486117ac57a97890eb1db6d52bf82b3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2023 10:34:10 -1000 Subject: [PATCH 0478/1593] block: Drop spurious might_sleep() from blk_put_queue() Dan reports the following smatch detected the following: block/blk-cgroup.c:1863 blkcg_schedule_throttle() warn: sleeping in atomic context caused by blkcg_schedule_throttle() calling blk_put_queue() in an non-sleepable context. blk_put_queue() acquired might_sleep() in 63f93fd6fa57 ("block: mark blk_put_queue as potentially blocking") which transferred the might_sleep() from blk_free_queue(). blk_free_queue() acquired might_sleep() in e8c7d14ac6c3 ("block: revert back to synchronous request_queue removal") while turning request_queue removal synchronous. However, this isn't necessary as nothing in the free path actually requires sleeping. It's pretty unusual to require a sleeping context in a put operation and it's not needed in the first place. Let's drop it. Signed-off-by: Tejun Heo Reported-by: Dan Carpenter Link: https://lkml.kernel.org/r/Y7g3L6fntnTtOm63@kili Cc: Christoph Hellwig Cc: Luis Chamberlain Fixes: e8c7d14ac6c3 ("block: revert back to synchronous request_queue removal") # v5.9+ Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/Y7iFwjN+XzWvLv3y@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 9321767470dc5..b5098355d8b27 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -283,12 +283,9 @@ static void blk_free_queue(struct request_queue *q) * * Decrements the refcount of the request_queue and free it when the refcount * reaches 0. - * - * Context: Can sleep. */ void blk_put_queue(struct request_queue *q) { - might_sleep(); if (refcount_dec_and_test(&q->refs)) blk_free_queue(q); } From b0355dbbf13c0052931dd14c38c789efed64d3de Mon Sep 17 00:00:00 2001 From: Benedict Wong Date: Thu, 5 Jan 2023 21:28:12 +0000 Subject: [PATCH 0479/1593] Fix XFRM-I support for nested ESP tunnels This change adds support for nested IPsec tunnels by ensuring that XFRM-I verifies existing policies before decapsulating a subsequent policies. Addtionally, this clears the secpath entries after policies are verified, ensuring that previous tunnels with no-longer-valid do not pollute subsequent policy checks. This is necessary especially for nested tunnels, as the IP addresses, protocol and ports may all change, thus not matching the previous policies. In order to ensure that packets match the relevant inbound templates, the xfrm_policy_check should be done before handing off to the inner XFRM protocol to decrypt and decapsulate. Notably, raw ESP/AH packets did not perform policy checks inherently, whereas all other encapsulated packets (UDP, TCP encapsulated) do policy checks after calling xfrm_input handling in the respective encapsulation layer. Test: Verified with additional Android Kernel Unit tests Signed-off-by: Benedict Wong Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface_core.c | 54 +++++++++++++++++++++++++++++++--- net/xfrm/xfrm_policy.c | 3 ++ 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 1f99dc4690271..35279c220bd78 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) skb->mark = 0; } +static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type, unsigned short family) +{ + struct sec_path *sp; + + sp = skb_sec_path(skb); + if (sp && (sp->len || sp->olen) && + !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + goto discard; + + XFRM_SPI_SKB_CB(skb)->family = family; + if (family == AF_INET) { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + } else { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + } + + return xfrm_input(skb, nexthdr, spi, encap_type); +discard: + kfree_skb(skb); + return 0; +} + +static int xfrmi4_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET); +} + +static int xfrmi6_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0, 0, AF_INET6); +} + +static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET); +} + +static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6); +} + static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { const struct xfrm_mode *inner_mode; @@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = { }; static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { - .handler = xfrm6_rcv, - .input_handler = xfrm_input, + .handler = xfrmi6_rcv, + .input_handler = xfrmi6_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { #endif static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { - .handler = xfrm4_rcv, - .input_handler = xfrm_input, + .handler = xfrmi4_rcv, + .input_handler = xfrmi4_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 10, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e9eb82c5457d0..ed0976f8e42b7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3742,6 +3742,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, goto reject; } + if (if_id) + secpath_reset(skb); + xfrm_pols_put(pols, npols); return 1; } From 37c1785609833e626d344047a84e272b7879b2c3 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 5 Jan 2023 17:01:41 +0800 Subject: [PATCH 0480/1593] x86/xen: Remove the unused function p2m_index() The function p2m_index is defined in the p2m.c file, but not called elsewhere, so remove this unused function. arch/x86/xen/p2m.c:137:24: warning: unused function 'p2m_index'. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3557 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20230105090141.36248-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Juergen Gross --- arch/x86/xen/p2m.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58db86f7b3846..9bdc3b656b2c4 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -134,11 +134,6 @@ static inline unsigned p2m_mid_index(unsigned long pfn) return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; } -static inline unsigned p2m_index(unsigned long pfn) -{ - return pfn % P2M_PER_PAGE; -} - static void p2m_top_mfn_init(unsigned long *top) { unsigned i; From c0dccad87cf68fc6012aec7567e354353097ec1a Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 30 Nov 2022 17:36:02 +0100 Subject: [PATCH 0481/1593] hvc/xen: lock console list traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The currently lockless access to the xen console list in vtermno_to_xencons() is incorrect, as additions and removals from the list can happen anytime, and as such the traversal of the list to get the private console data for a given termno needs to happen with the lock held. Note users that modify the list already do so with the lock taken. Adjust current lock takers to use the _irq{save,restore} helpers, since the context in which vtermno_to_xencons() is called can have interrupts disabled. Use the _irq{save,restore} set of helpers to switch the current callers to disable interrupts in the locked region. I haven't checked if existing users could instead use the _irq variant, as I think it's safer to use _irq{save,restore} upfront. While there switch from using list_for_each_entry_safe to list_for_each_entry: the current entry cursor won't be removed as part of the code in the loop body, so using the _safe variant is pointless. Fixes: 02e19f9c7cac ('hvc_xen: implement multiconsole support') Signed-off-by: Roger Pau Monné Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/20221130163611.14686-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/tty/hvc/hvc_xen.c | 46 ++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index c879f922c716b..5bddb2f5e9318 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -52,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock); static struct xencons_info *vtermno_to_xencons(int vtermno) { - struct xencons_info *entry, *n, *ret = NULL; + struct xencons_info *entry, *ret = NULL; + unsigned long flags; - if (list_empty(&xenconsoles)) - return NULL; + spin_lock_irqsave(&xencons_lock, flags); + if (list_empty(&xenconsoles)) { + spin_unlock_irqrestore(&xencons_lock, flags); + return NULL; + } - list_for_each_entry_safe(entry, n, &xenconsoles, list) { + list_for_each_entry(entry, &xenconsoles, list) { if (entry->vtermno == vtermno) { ret = entry; break; } } + spin_unlock_irqrestore(&xencons_lock, flags); return ret; } @@ -223,7 +228,7 @@ static int xen_hvm_console_init(void) { int r; uint64_t v = 0; - unsigned long gfn; + unsigned long gfn, flags; struct xencons_info *info; if (!xen_hvm_domain()) @@ -258,9 +263,9 @@ static int xen_hvm_console_init(void) goto err; info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; err: @@ -283,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno) static int xen_pv_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_pv_domain()) return -ENODEV; @@ -299,9 +305,9 @@ static int xen_pv_console_init(void) /* already configured */ return 0; } - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); xencons_info_pv_init(info, HVC_COOKIE); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -309,6 +315,7 @@ static int xen_pv_console_init(void) static int xen_initial_domain_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_initial_domain()) return -ENODEV; @@ -323,9 +330,9 @@ static int xen_initial_domain_console_init(void) info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -380,10 +387,12 @@ static void xencons_free(struct xencons_info *info) static int xen_console_remove(struct xencons_info *info) { + unsigned long flags; + xencons_disconnect_backend(info); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->xbdev != NULL) xencons_free(info); else { @@ -464,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev, { int ret, devid; struct xencons_info *info; + unsigned long flags; devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; if (devid == 0) @@ -482,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev, ret = xencons_connect_backend(dev, info); if (ret < 0) goto error; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; @@ -584,10 +594,12 @@ static int __init xen_hvc_init(void) info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); if (IS_ERR(info->hvc)) { + unsigned long flags; + r = PTR_ERR(info->hvc); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->irq) unbind_from_irqhandler(info->irq, NULL); kfree(info); From f57034cedeb6e00256313a2a6ee67f974d709b0b Mon Sep 17 00:00:00 2001 From: Oleksii Moisieiev Date: Tue, 20 Dec 2022 14:50:13 +0000 Subject: [PATCH 0482/1593] xen/pvcalls: free active map buffer on pvcalls_front_free_map Data buffer for active map is allocated in alloc_active_ring and freed in free_active_ring function, which is used only for the error cleanup. pvcalls_front_release is calling pvcalls_front_free_map which ends foreign access for this buffer, but doesn't free allocated pages. Call free_active_ring to clean all allocated resources. Signed-off-by: Oleksii Moisieiev Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/6a762ee32dd655cbb09a4aa0e2307e8919761311.1671531297.git.oleksii_moisieiev@epam.com Signed-off-by: Juergen Gross --- drivers/xen/pvcalls-front.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 5328f4d35f25e..d5d589bda243d 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -225,6 +225,8 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id) return IRQ_HANDLED; } +static void free_active_ring(struct sock_mapping *map); + static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, struct sock_mapping *map) { @@ -240,7 +242,7 @@ static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) gnttab_end_foreign_access(map->active.ring->ref[i], NULL); gnttab_end_foreign_access(map->active.ref, NULL); - free_page((unsigned long)map->active.ring); + free_active_ring(map); kfree(map); } From 2f440c4f04ca28e3ddf4bb6f3d25f7613abe2873 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 16 Dec 2022 01:05:52 +0100 Subject: [PATCH 0483/1593] arm64: dts: imx8mm: Reinstate GPIO watchdog always-running property on eDM SBC The GPIO watchdog property name is 'always-running', not 'always-enabled'. Use the correct property name and reinstate it into the DT. Fixes: eff6b33c9ce9 ("arm64: dts: imx8mm: Remove watchdog always-enabled property from eDM SBC") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts index 752f409a30b16..9889319d4f045 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-data-modul-edm-sbc.dts @@ -88,6 +88,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_watchdog_gpio>; compatible = "linux,wdt-gpio"; + always-running; gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>; hw_algo = "level"; /* Reset triggers in 2..3 seconds */ From a5a36720c3f650f859f5e9535dd62d06f13f4f3b Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Fri, 6 Jan 2023 15:19:05 +0200 Subject: [PATCH 0484/1593] brcmfmac: Prefer DT board type over DMI board type The introduction of support for Apple board types inadvertently changed the precedence order, causing hybrid SMBIOS+DT platforms to look up the firmware using the DMI information instead of the device tree compatible to generate the board type. Revert back to the old behavior, as affected platforms use firmwares named after the DT compatible. Fixes: 7682de8b3351 ("wifi: brcmfmac: of: Fetch Apple properties") [1] https://bugzilla.opensuse.org/show_bug.cgi?id=1206697#c13 Cc: stable@vger.kernel.org Signed-off-by: Ivan T. Ivanov Reviewed-by: Hector Martin Reviewed-by: Arend van Spriel Tested-by: Peter Robinson Signed-off-by: David S. Miller --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index a83699de01ec3..fdd0c9abc1a10 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -79,7 +79,8 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Apple ARM64 platforms have their own idea of board type, passed in * via the device tree. They also have an antenna SKU parameter */ - if (!of_property_read_string(np, "brcm,board-type", &prop)) + err = of_property_read_string(np, "brcm,board-type", &prop); + if (!err) settings->board_type = prop; if (!of_property_read_string(np, "apple,antenna-sku", &prop)) @@ -87,7 +88,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Set board-type to the first string of the machine compatible prop */ root = of_find_node_by_path("/"); - if (root && !settings->board_type) { + if (root && err) { char *board_type; const char *tmp; From fca053893e8d5be8173c92876c6329cbee78b880 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 2 Jan 2023 11:04:02 -0300 Subject: [PATCH 0485/1593] arm64: dts: imx8m-venice: Remove incorrect 'uart-has-rtscts' The following build warnings are seen when running: make dtbs_check DT_SCHEMA_FILES=fsl-imx-uart.yaml arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dtb: serial@30860000: cts-gpios: False schema does not allow [[33, 3, 1]] From schema: Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dtb: serial@30860000: rts-gpios: False schema does not allow [[33, 5, 1]] From schema: Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml ... The imx8m Venice Gateworks boards do not expose the UART RTS and CTS as native UART pins, so 'uart-has-rtscts' should not be used. Using 'uart-has-rtscts' with 'rts-gpios' is an invalid combination detected by serial.yaml. Fix the problem by removing the incorrect 'uart-has-rtscts' property. Fixes: 27c8f4ccc1b9 ("arm64: dts: imx8mm-venice-gw72xx-0x: add dt overlays for serial modes") Fixes: d9a9a7cf32c9 ("arm64: dts: imx8m{m,n}-venice-*: add missing uart-has-rtscts property to UARTs") Fixes: 870f645b396b ("arm64: dts: imx8mp-venice-gw74xx: add WiFi/BT module support") Signed-off-by: Fabio Estevam Acked-by: Tim Harvey Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso | 1 - .../boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso | 1 - arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi | 1 - arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts | 3 --- arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts | 3 --- arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts | 1 - arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts | 1 - arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts | 1 - 8 files changed, 12 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso index 3ea73a6886ff4..f6ad1a4b8b665 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-rs232-rts.dtso @@ -33,7 +33,6 @@ pinctrl-0 = <&pinctrl_uart2>; rts-gpios = <&gpio5 29 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio5 28 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso index 2fa635e1c1a82..1f8ea20dfafcb 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx-0x-rs232-rts.dtso @@ -33,7 +33,6 @@ pinctrl-0 = <&pinctrl_uart2>; rts-gpios = <&gpio5 29 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio5 28 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi index 244ef8d6cc688..7761d5671cb13 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi @@ -222,7 +222,6 @@ pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_bten>; cts-gpios = <&gpio5 8 GPIO_ACTIVE_LOW>; rts-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; bluetooth { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts index 6433c205f8dde..64b366e83fa14 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts @@ -733,7 +733,6 @@ dtr-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; dsr-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; dcd-gpios = <&gpio1 11 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; @@ -749,7 +748,6 @@ pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>; cts-gpios = <&gpio4 10 GPIO_ACTIVE_LOW>; rts-gpios = <&gpio4 9 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; @@ -758,7 +756,6 @@ pinctrl-0 = <&pinctrl_uart4>, <&pinctrl_uart4_gpio>; cts-gpios = <&gpio5 11 GPIO_ACTIVE_LOW>; rts-gpios = <&gpio5 12 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts index 32872b0b1aaf3..e8bc1fccc47be 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts @@ -664,7 +664,6 @@ pinctrl-0 = <&pinctrl_uart1>, <&pinctrl_uart1_gpio>; rts-gpios = <&gpio4 10 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio4 24 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; @@ -681,7 +680,6 @@ pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>; rts-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; bluetooth { @@ -699,7 +697,6 @@ dtr-gpios = <&gpio4 3 GPIO_ACTIVE_LOW>; dsr-gpios = <&gpio4 4 GPIO_ACTIVE_LOW>; dcd-gpios = <&gpio4 6 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts index 8ce562246a08e..acc2ba8e00a88 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts @@ -581,7 +581,6 @@ dtr-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>; dsr-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; dcd-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts b/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts index b9444e4a3d2d6..7c12518dbc963 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts @@ -643,7 +643,6 @@ pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>; rts-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; bluetooth { diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index ceeca4966fc5c..8eb7d5ee38daa 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -623,7 +623,6 @@ pinctrl-0 = <&pinctrl_uart3>, <&pinctrl_uart3_gpio>; cts-gpios = <&gpio3 21 GPIO_ACTIVE_LOW>; rts-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>; - uart-has-rtscts; status = "okay"; bluetooth { From 60ea6f00c57dae5e7ba2c52ed407cb24fdb11ebe Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 6 Jan 2023 07:25:01 -0600 Subject: [PATCH 0486/1593] net: ipa: correct IPA v4.7 IMEM offset Commit b310de784bacd ("net: ipa: add IPA v4.7 support") was merged despite an unresolved comment made by Konrad Dybcio. Konrad observed that the IMEM region specified for IPA v4.7 did not match that used downstream for the SM7225 SoC. In "lagoon.dtsi" present in a Sony Xperia source tree, a ipa_smmu_ap node was defined with a "qcom,additional-mapping" property that defined the IPA IMEM area starting at offset 0x146a8000 (not 0x146a9000 that was committed). The IPA v4.7 target system used for testing uses the SM7225 SoC, so we'll adhere what the downstream code specifies is the address of the IMEM region used for IPA. Link: https://lore.kernel.org/linux-arm-msm/20221208211529.757669-1-elder@linaro.org Fixes: b310de784bac ("net: ipa: add IPA v4.7 support") Tested-by: Luca Weiss Signed-off-by: Alex Elder Reviewed-by: Konrad Dybcio Signed-off-by: David S. Miller --- drivers/net/ipa/data/ipa_data-v4.7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index 7552c400961eb..b83390c486158 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -357,7 +357,7 @@ static const struct ipa_mem ipa_mem_local_data[] = { static const struct ipa_mem_data ipa_mem_data = { .local_count = ARRAY_SIZE(ipa_mem_local_data), .local = ipa_mem_local_data, - .imem_addr = 0x146a9000, + .imem_addr = 0x146a8000, .imem_size = 0x00002000, .smem_id = 497, .smem_size = 0x00009000, From 2ab6478d1266b522a0a6ce3697914d63529f9e7a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 Jan 2023 15:22:29 -0800 Subject: [PATCH 0487/1593] mlxsw: spectrum_router: Replace 0-length array with flexible array Zero-length arrays are deprecated[1]. Replace struct mlxsw_sp_nexthop_group_info's "nexthops" 0-length array with a flexible array. Detected with GCC 13, using -fstrict-flex-arrays=3: drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c: In function 'mlxsw_sp_nexthop_group_hash_obj': drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:3278:38: warning: array subscript i is outside array bounds of 'struct mlxsw_sp_nexthop[0]' [-Warray-bounds=] 3278 | val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed); | ^~~~~~~~~~~~ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:2954:33: note: while referencing 'nexthops' 2954 | struct mlxsw_sp_nexthop nexthops[0]; | ^~~~~~~~ [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays Cc: Ido Schimmel Cc: Petr Machata Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: "Gustavo A. R. Silva" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Ido Schimmel Reviewed-by: Gustavo A. R. Silva Tested-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c22c3ac4e2a18..09e32778b012d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2951,7 +2951,7 @@ struct mlxsw_sp_nexthop_group_info { gateway:1, /* routes using the group use a gateway */ is_resilient:1; struct list_head list; /* member in nh_res_grp_list */ - struct mlxsw_sp_nexthop nexthops[0]; + struct mlxsw_sp_nexthop nexthops[]; #define nh_rif nexthops[0].rif }; From 9dab880d675b9d0dd56c6428e4e8352a3339371d Mon Sep 17 00:00:00 2001 From: Minsuk Kang Date: Fri, 6 Jan 2023 17:23:44 +0900 Subject: [PATCH 0488/1593] nfc: pn533: Wait for out_urb's completion in pn533_usb_send_frame() Fix a use-after-free that occurs in hcd when in_urb sent from pn533_usb_send_frame() is completed earlier than out_urb. Its callback frees the skb data in pn533_send_async_complete() that is used as a transfer buffer of out_urb. Wait before sending in_urb until the callback of out_urb is called. To modify the callback of out_urb alone, separate the complete function of out_urb and ack_urb. Found by a modified version of syzkaller. BUG: KASAN: use-after-free in dummy_timer Call Trace: memcpy (mm/kasan/shadow.c:65) dummy_perform_transfer (drivers/usb/gadget/udc/dummy_hcd.c:1352) transfer (drivers/usb/gadget/udc/dummy_hcd.c:1453) dummy_timer (drivers/usb/gadget/udc/dummy_hcd.c:1972) arch_static_branch (arch/x86/include/asm/jump_label.h:27) static_key_false (include/linux/jump_label.h:207) timer_expire_exit (include/trace/events/timer.h:127) call_timer_fn (kernel/time/timer.c:1475) expire_timers (kernel/time/timer.c:1519) __run_timers (kernel/time/timer.c:1790) run_timer_softirq (kernel/time/timer.c:1803) Fixes: c46ee38620a2 ("NFC: pn533: add NXP pn533 nfc device driver") Signed-off-by: Minsuk Kang Signed-off-by: David S. Miller --- drivers/nfc/pn533/usb.c | 44 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 6f71ac72012ea..ed9c5e2cf3ad4 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) return usb_submit_urb(phy->ack_urb, flags); } +struct pn533_out_arg { + struct pn533_usb_phy *phy; + struct completion done; +}; + static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; + struct pn533_out_arg arg; + void *cntx; int rc; if (phy->priv == NULL) @@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev, print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); + init_completion(&arg.done); + cntx = phy->out_urb->context; + phy->out_urb->context = &arg; + rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; + wait_for_completion(&arg.done); + phy->out_urb->context = cntx; + if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); @@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) return arg.rc; } -static void pn533_send_complete(struct urb *urb) +static void pn533_out_complete(struct urb *urb) +{ + struct pn533_out_arg *arg = urb->context; + struct pn533_usb_phy *phy = arg->phy; + + switch (urb->status) { + case 0: + break; /* success */ + case -ECONNRESET: + case -ENOENT: + dev_dbg(&phy->udev->dev, + "The urb has been stopped (status %d)\n", + urb->status); + break; + case -ESHUTDOWN: + default: + nfc_err(&phy->udev->dev, + "Urb failure (status %d)\n", + urb->status); + } + + complete(&arg->done); +} + +static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; @@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface, usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: From 7871f54e3deed68a27111dda162c4fe9b9c65f8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Jan 2023 14:25:23 +0000 Subject: [PATCH 0489/1593] gro: take care of DODGY packets Jaroslav reported a recent throughput regression with virtio_net caused by blamed commit. It is unclear if DODGY GSO packets coming from user space can be accepted by GRO engine in the future with minimal changes, and if there is any expected gain from it. In the meantime, make sure to detect and flush DODGY packets. Fixes: 5eddb24901ee ("gro: add support of (hw)gro packets to gro stack") Signed-off-by: Eric Dumazet Reported-and-bisected-by: Jaroslav Pulchart Cc: Coco Li Cc: Paolo Abeni Signed-off-by: David S. Miller --- net/core/gro.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index fd8c6a7e8d3e2..506f83d715f87 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -505,8 +505,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->count = 1; if (unlikely(skb_is_gso(skb))) { NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; - /* Only support TCP at the moment. */ - if (!skb_is_gso_tcp(skb)) + /* Only support TCP and non DODGY users. */ + if (!skb_is_gso_tcp(skb) || + (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY)) NAPI_GRO_CB(skb)->flush = 1; } From 76761babaa984fce8ecd87d87a68d920f24df438 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 6 Jan 2023 21:15:07 +0100 Subject: [PATCH 0490/1593] net: lan966x: Allow to add rules in TCAM even if not enabled The blamed commit implemented the vcap_operations to allow to add an entry in the TCAM. One of the callbacks is to validate the supported keysets. If the TCAM lookup was not enabled, then this will return failure so no entries could be added. This doesn't make much sense, as you can enable at a later point the TCAM. Therefore change it such to allow entries in TCAM even it is not enabled. Fixes: 4426b78c626d ("net: lan966x: Add port keyset config and callback interface") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index d8dc9fbb81e1a..a54c0426a35f3 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -95,10 +95,7 @@ lan966x_vcap_is2_get_port_keysets(struct net_device *dev, int lookup, bool found = false; u32 val; - /* Check if the port keyset selection is enabled */ val = lan_rd(lan966x, ANA_VCAP_S2_CFG(port->chip_port)); - if (!ANA_VCAP_S2_CFG_ENA_GET(val)) - return -ENOENT; /* Collect all keysets for the port in a list */ if (l3_proto == ETH_P_ALL) From 7d6ceeb1875cc08dc3d1e558e191434d94840cd5 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Sat, 7 Jan 2023 04:40:20 +0100 Subject: [PATCH 0491/1593] af_unix: selftest: Fix the size of the parameter to connect() Adjust size parameter in connect() to match the type of the parameter, to fix "No such file or directory" error in selftests/net/af_unix/ test_oob_unix.c:127. The existing code happens to work provided that the autogenerated pathname is shorter than sizeof (struct sockaddr), which is why it hasn't been noticed earlier. Visible from the trace excerpt: bind(3, {sa_family=AF_UNIX, sun_path="unix_oob_453059"}, 110) = 0 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fa6a6577a10) = 453060 [pid ] connect(6, {sa_family=AF_UNIX, sun_path="unix_oob_45305"}, 16) = -1 ENOENT (No such file or directory) BUG: The filename is trimmed to sizeof (struct sockaddr). Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Shuah Khan Cc: Kuniyuki Iwashima Cc: Florian Westphal Reviewed-by: Florian Westphal Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- tools/testing/selftests/net/af_unix/test_unix_oob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f28..532459a15067c 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); From 23257cfc1cb7202fd0065e9f4a6a0aac1c04c4a9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 29 Dec 2022 14:10:30 +0200 Subject: [PATCH 0492/1593] thunderbolt: Do not call PM runtime functions in tb_retimer_scan() We cannot call PM runtime functions in tb_retimer_scan() because it will also be called when retimers are scanned from userspace (happens when there is no device connected on ChromeOS for instance) and at the same USB4 port runtime resume hook. This leads to hang because neither can proceed. Fix this by runtime resuming USB4 ports in tb_scan_port() instead. This makes sure the ports are runtime PM active when retimers are added under it while avoiding the reported hang as well. Reported-by: Utkarsh Patel Fixes: 1e56c88adecc ("thunderbolt: Runtime resume USB4 port when retimers are scanned") Cc: stable@vger.kernel.org Acked-by: Yehezkel Bernat Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 17 +++-------------- drivers/thunderbolt/tb.c | 20 +++++++++++++++----- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 6ebe7a2886ec7..56008eb91e2e4 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -427,13 +427,6 @@ int tb_retimer_scan(struct tb_port *port, bool add) { u32 status[TB_MAX_RETIMER_INDEX + 1] = {}; int ret, i, last_idx = 0; - struct usb4_port *usb4; - - usb4 = port->usb4; - if (!usb4) - return 0; - - pm_runtime_get_sync(&usb4->dev); /* * Send broadcast RT to make sure retimer indices facing this @@ -441,7 +434,7 @@ int tb_retimer_scan(struct tb_port *port, bool add) */ ret = usb4_port_enumerate_retimers(port); if (ret) - goto out; + return ret; /* * Enable sideband channel for each retimer. We can do this @@ -471,11 +464,11 @@ int tb_retimer_scan(struct tb_port *port, bool add) break; } - ret = 0; if (!last_idx) - goto out; + return 0; /* Add on-board retimers if they do not exist already */ + ret = 0; for (i = 1; i <= last_idx; i++) { struct tb_retimer *rt; @@ -489,10 +482,6 @@ int tb_retimer_scan(struct tb_port *port, bool add) } } -out: - pm_runtime_mark_last_busy(&usb4->dev); - pm_runtime_put_autosuspend(&usb4->dev); - return ret; } diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 4628458044270..3f1ab30c4fb15 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -628,11 +628,15 @@ static void tb_scan_port(struct tb_port *port) * Downstream switch is reachable through two ports. * Only scan on the primary port (link_nr == 0). */ + + if (port->usb4) + pm_runtime_get_sync(&port->usb4->dev); + if (tb_wait_for_port(port, false) <= 0) - return; + goto out_rpm_put; if (port->remote) { tb_port_dbg(port, "port already has a remote\n"); - return; + goto out_rpm_put; } tb_retimer_scan(port, true); @@ -647,12 +651,12 @@ static void tb_scan_port(struct tb_port *port) */ if (PTR_ERR(sw) == -EIO || PTR_ERR(sw) == -EADDRNOTAVAIL) tb_scan_xdomain(port); - return; + goto out_rpm_put; } if (tb_switch_configure(sw)) { tb_switch_put(sw); - return; + goto out_rpm_put; } /* @@ -681,7 +685,7 @@ static void tb_scan_port(struct tb_port *port) if (tb_switch_add(sw)) { tb_switch_put(sw); - return; + goto out_rpm_put; } /* Link the switches using both links if available */ @@ -733,6 +737,12 @@ static void tb_scan_port(struct tb_port *port) tb_add_dp_resources(sw); tb_scan_switch(sw); + +out_rpm_put: + if (port->usb4) { + pm_runtime_mark_last_busy(&port->usb4->dev); + pm_runtime_put_autosuspend(&port->usb4->dev); + } } static void tb_deactivate_and_free_tunnel(struct tb_tunnel *tunnel) From e8ff07fb33026c5c1bb5b81293496faba5d68059 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 20 May 2022 13:35:19 +0300 Subject: [PATCH 0493/1593] thunderbolt: Use correct function to calculate maximum USB3 link rate We need to take minimum of both sides of the USB3 link into consideration, not just the downstream port. Fix this by calling tb_usb3_max_link_rate() instead. Fixes: 0bd680cd900c ("thunderbolt: Add USB3 bandwidth management") Cc: stable@vger.kernel.org Acked-by: Yehezkel Bernat Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 2c3cf7fc33571..1fc3c29b24f83 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -1275,7 +1275,7 @@ static void tb_usb3_reclaim_available_bandwidth(struct tb_tunnel *tunnel, return; } else if (!ret) { /* Use maximum link rate if the link valid is not set */ - ret = usb4_usb3_port_max_link_rate(tunnel->src_port); + ret = tb_usb3_max_link_rate(tunnel->dst_port, tunnel->src_port); if (ret < 0) { tb_tunnel_warn(tunnel, "failed to read maximum link rate\n"); return; From 84ee211c83212f4d35b56e0603acdcc41f860f1b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 8 Sep 2022 09:45:22 +0300 Subject: [PATCH 0494/1593] thunderbolt: Disable XDomain lane 1 only in software connection manager When firmware connection manager is in use we should not touch the lane adapter (well or any) configuration space so do this only when we know that the software connection manager is active. Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding") Cc: stable@vger.kernel.org Acked-by: Yehezkel Bernat Signed-off-by: Mika Westerberg --- drivers/thunderbolt/xdomain.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index cfa83486c9dad..3c51e47dd86b7 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -1419,12 +1419,19 @@ static int tb_xdomain_get_properties(struct tb_xdomain *xd) * registered, we notify the userspace that it has changed. */ if (!update) { - struct tb_port *port; + /* + * Now disable lane 1 if bonding was not enabled. Do + * this only if bonding was possible at the beginning + * (that is we are the connection manager and there are + * two lanes). + */ + if (xd->bonding_possible) { + struct tb_port *port; - /* Now disable lane 1 if bonding was not enabled */ - port = tb_port_at(xd->route, tb_xdomain_parent(xd)); - if (!port->bonded) - tb_port_disable(port->dual_link_port); + port = tb_port_at(xd->route, tb_xdomain_parent(xd)); + if (!port->bonded) + tb_port_disable(port->dual_link_port); + } if (device_add(&xd->dev)) { dev_err(&xd->dev, "failed to add XDomain device\n"); From f2193bb2ee6e21d9c2da10ea3ce63b94aea69341 Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Fri, 23 Dec 2022 10:40:56 +0800 Subject: [PATCH 0495/1593] parisc: pdc_stable: use strscpy() to instead of strncpy() The implementation of strscpy() is more robust and safer. That's now the recommended way to copy NUL-terminated strings. Signed-off-by: Xu Panda Signed-off-by: Yang Yang Signed-off-by: Helge Deller --- drivers/parisc/pdc_stable.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index d6af5726ddf35..2a18f7ba2398b 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -274,8 +274,7 @@ pdcspath_hwpath_write(struct pdcspath_entry *entry, const char *buf, size_t coun /* We'll use a local copy of buf */ count = min_t(size_t, count, sizeof(in)-1); - strncpy(in, buf, count); - in[count] = '\0'; + strscpy(in, buf, count + 1); /* Let's clean up the target. 0xff is a blank pattern */ memset(&hwpath, 0xff, sizeof(hwpath)); @@ -388,8 +387,7 @@ pdcspath_layer_write(struct pdcspath_entry *entry, const char *buf, size_t count /* We'll use a local copy of buf */ count = min_t(size_t, count, sizeof(in)-1); - strncpy(in, buf, count); - in[count] = '\0'; + strscpy(in, buf, count + 1); /* Let's clean up the target. 0 is a blank pattern */ memset(&layers, 0, sizeof(layers)); @@ -756,8 +754,7 @@ static ssize_t pdcs_auto_write(struct kobject *kobj, /* We'll use a local copy of buf */ count = min_t(size_t, count, sizeof(in)-1); - strncpy(in, buf, count); - in[count] = '\0'; + strscpy(in, buf, count + 1); /* Current flags are stored in primary boot path entry */ pathentry = &pdcspath_entry_primary; From 5d1335dabb3c493a3d6d5b233953b6ac7b6c1ff2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 19 Dec 2022 20:56:36 +0100 Subject: [PATCH 0496/1593] parisc: Fix return code of pdc_iodc_print() There is an off-by-one if the printed string includes a new-line char. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- arch/parisc/kernel/firmware.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 4dfe1f49c5c8b..6817892a2c585 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1303,7 +1303,7 @@ static char iodc_dbuf[4096] __page_aligned_bss; */ int pdc_iodc_print(const unsigned char *str, unsigned count) { - unsigned int i; + unsigned int i, found = 0; unsigned long flags; count = min_t(unsigned int, count, sizeof(iodc_dbuf)); @@ -1315,6 +1315,7 @@ int pdc_iodc_print(const unsigned char *str, unsigned count) iodc_dbuf[i+0] = '\r'; iodc_dbuf[i+1] = '\n'; i += 2; + found = 1; goto print; default: iodc_dbuf[i] = str[i]; @@ -1330,7 +1331,7 @@ int pdc_iodc_print(const unsigned char *str, unsigned count) __pa(pdc_result), 0, __pa(iodc_dbuf), i, 0); spin_unlock_irqrestore(&pdc_lock, flags); - return i; + return i - found; } #if !defined(BOOTLOADER) From 5b3fc9988d1e4be268d834c2aaae85c75fa34253 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 7 Jun 2022 21:23:29 +0200 Subject: [PATCH 0497/1593] fbdev: aty128fb: Use backlight helper Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Cc: Paul Mackerras Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Sam Ravnborg Reviewed-by: Daniel Thompson Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/aty128fb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c index dd31b9d7d337c..36a9ac05a340f 100644 --- a/drivers/video/fbdev/aty/aty128fb.c +++ b/drivers/video/fbdev/aty/aty128fb.c @@ -1766,12 +1766,10 @@ static int aty128_bl_update_status(struct backlight_device *bd) unsigned int reg = aty_ld_le32(LVDS_GEN_CNTL); int level; - if (bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK || - !par->lcd_on) + if (!par->lcd_on) level = 0; else - level = bd->props.brightness; + level = backlight_get_brightness(bd); reg |= LVDS_BL_MOD_EN | LVDS_BLON; if (level > 0) { From 1535ec976414d2ea8d29d40da6b5b99c91e0f2bf Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 7 Jun 2022 21:23:30 +0200 Subject: [PATCH 0498/1593] fbdev: atyfb: Use backlight helper Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Daniel Thompson Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/atyfb_base.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index d59215a4992e0..b02e4e645035c 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -2219,13 +2219,7 @@ static int aty_bl_update_status(struct backlight_device *bd) { struct atyfb_par *par = bl_get_data(bd); unsigned int reg = aty_ld_lcd(LCD_MISC_CNTL, par); - int level; - - if (bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK) - level = 0; - else - level = bd->props.brightness; + int level = backlight_get_brightness(bd); reg |= (BLMOD_EN | BIASMOD_EN); if (level > 0) { From c28509ef9c221378a586e72643fc43102f5512e9 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 7 Jun 2022 21:23:31 +0200 Subject: [PATCH 0499/1593] fbdev: radeon: Use backlight helper Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Cc: Benjamin Herrenschmidt Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Daniel Thompson Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/radeon_backlight.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/video/fbdev/aty/radeon_backlight.c b/drivers/video/fbdev/aty/radeon_backlight.c index d2c1263ad2600..427adc838f77e 100644 --- a/drivers/video/fbdev/aty/radeon_backlight.c +++ b/drivers/video/fbdev/aty/radeon_backlight.c @@ -57,11 +57,7 @@ static int radeon_bl_update_status(struct backlight_device *bd) * backlight. This provides some greater power saving and the display * is useless without backlight anyway. */ - if (bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK) - level = 0; - else - level = bd->props.brightness; + level = backlight_get_brightness(bd); del_timer_sync(&rinfo->lvds_timer); radeon_engine_idle(); From 973fcf37c267f937fbec656f4643571d7bd09a37 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 7 Jun 2022 21:23:32 +0200 Subject: [PATCH 0500/1593] fbdev: mx3fb: Use backlight helper Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Daniel Thompson Signed-off-by: Helge Deller --- drivers/video/fbdev/mx3fb.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c index b945b68984b97..76771e126d0af 100644 --- a/drivers/video/fbdev/mx3fb.c +++ b/drivers/video/fbdev/mx3fb.c @@ -283,12 +283,7 @@ static int mx3fb_bl_get_brightness(struct backlight_device *bl) static int mx3fb_bl_update_status(struct backlight_device *bl) { struct mx3fb_data *fbd = bl_get_data(bl); - int brightness = bl->props.brightness; - - if (bl->props.power != FB_BLANK_UNBLANK) - brightness = 0; - if (bl->props.fb_blank != FB_BLANK_UNBLANK) - brightness = 0; + int brightness = backlight_get_brightness(bl); fbd->backlight_level = (fbd->backlight_level & ~0xFF) | brightness; From 1cc17590ddfec590d7301f5e4cf6183ea19afa25 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 7 Jun 2022 21:23:33 +0200 Subject: [PATCH 0501/1593] fbdev: nvidia: Use backlight helper Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Cc: Antonino Daplas Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Daniel Thompson Signed-off-by: Helge Deller --- drivers/video/fbdev/nvidia/nv_backlight.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/video/fbdev/nvidia/nv_backlight.c b/drivers/video/fbdev/nvidia/nv_backlight.c index 2ce53529f6361..503a7a683855a 100644 --- a/drivers/video/fbdev/nvidia/nv_backlight.c +++ b/drivers/video/fbdev/nvidia/nv_backlight.c @@ -49,17 +49,11 @@ static int nvidia_bl_update_status(struct backlight_device *bd) { struct nvidia_par *par = bl_get_data(bd); u32 tmp_pcrt, tmp_pmc, fpcontrol; - int level; + int level = backlight_get_brightness(bd); if (!par->FlatPanel) return 0; - if (bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK) - level = 0; - else - level = bd->props.brightness; - tmp_pmc = NV_RD32(par->PMC, 0x10F0) & 0x0000FFFF; tmp_pcrt = NV_RD32(par->PCRTC0, 0x081C) & 0xFFFFFFFC; fpcontrol = NV_RD32(par->PRAMDAC, 0x0848) & 0xCFFFFFCC; From 450afd92d9cb177e0337c9ac26677765aeeff81a Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 7 Jun 2022 21:23:34 +0200 Subject: [PATCH 0502/1593] fbdev: omapfb: panel-dsi-cm: Use backlight helper Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Cc: Helge Deller Cc: linux-omap@vger.kernel.org Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Daniel Thompson Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 4fc4b26a8d304..ba94a0a7bd4fc 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -331,13 +331,7 @@ static int dsicm_bl_update_status(struct backlight_device *dev) struct panel_drv_data *ddata = dev_get_drvdata(&dev->dev); struct omap_dss_device *in = ddata->in; int r; - int level; - - if (dev->props.fb_blank == FB_BLANK_UNBLANK && - dev->props.power == FB_BLANK_UNBLANK) - level = dev->props.brightness; - else - level = 0; + int level = backlight_get_brightness(dev); dev_dbg(&ddata->pdev->dev, "update brightness to %d\n", level); From 8791906e667e44eddd62b341196f93911da8a578 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 7 Jun 2022 21:23:35 +0200 Subject: [PATCH 0503/1593] fbdev: riva: Use backlight helper Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Cc: Antonino Daplas Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Daniel Thompson Signed-off-by: Helge Deller --- drivers/video/fbdev/riva/fbdev.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/video/fbdev/riva/fbdev.c b/drivers/video/fbdev/riva/fbdev.c index 644278146d3b7..41edc6e794603 100644 --- a/drivers/video/fbdev/riva/fbdev.c +++ b/drivers/video/fbdev/riva/fbdev.c @@ -293,13 +293,7 @@ static int riva_bl_update_status(struct backlight_device *bd) { struct riva_par *par = bl_get_data(bd); U032 tmp_pcrt, tmp_pmc; - int level; - - if (bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK) - level = 0; - else - level = bd->props.brightness; + int level = backlight_get_brightness(bd); tmp_pmc = NV_RD32(par->riva.PMC, 0x10F0) & 0x0000FFFF; tmp_pcrt = NV_RD32(par->riva.PCRTC0, 0x081C) & 0xFFFFFFFC; From cfb47bf5a470bdd80e8ac2f7b2f3a34563ecd4ea Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 4 Jan 2023 16:07:40 -0500 Subject: [PATCH 0504/1593] arm64: dts: freescale: imx8dxl: fix sc_pwrkey's property name linux,keycode linux,keycode should be "linux,keycodes" according binding-doc Documentation/devicetree/bindings/input/fsl,scu-key.yaml Fixes: f537ee7f1e76 ("arm64: dts: freescale: add i.MX8DXL SoC support") Signed-off-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8dxl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8dxl.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl.dtsi index 0c64b91946212..214f21bd0cb49 100644 --- a/arch/arm64/boot/dts/freescale/imx8dxl.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8dxl.dtsi @@ -164,7 +164,7 @@ sc_pwrkey: keys { compatible = "fsl,imx8qxp-sc-key", "fsl,imx-sc-key"; - linux,keycode = ; + linux,keycodes = ; wakeup-source; }; From 74905e3de8adf0e6b5d7f455dcd32cdec13dfb6c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2022 06:03:03 -0500 Subject: [PATCH 0505/1593] KVM: nSVM: clarify recalc_intercepts() wrt CR8 The mysterious comment "We only want the cr8 intercept bits of L1" dates back to basically the introduction of nested SVM, back when the handling of "less typical" hypervisors was very haphazard. With the development of kvm-unit-tests for interrupt handling, the same code grew another vmcb_clr_intercept for the interrupt window (VINTR) vmexit, this time with a comment that is at least decent. It turns out however that the same comment applies to the CR8 write intercept, which is also a "recheck if an interrupt should be injected" intercept. The CR8 read intercept instead has not been used by KVM for 14 years (commit 649d68643ebf, "KVM: SVM: sync TPR value to V_TPR field in the VMCB"), so do not bother clearing it and let one comment describe both CR8 write and VINTR handling. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bc9cd7086fa97..add65dd597569 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -138,15 +138,13 @@ void recalc_intercepts(struct vcpu_svm *svm) c->intercepts[i] = h->intercepts[i]; if (g->int_ctl & V_INTR_MASKING_MASK) { - /* We only want the cr8 intercept bits of L1 */ - vmcb_clr_intercept(c, INTERCEPT_CR8_READ); - vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); - /* - * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not - * affect any interrupt we may want to inject; therefore, - * interrupt window vmexits are irrelevant to L0. + * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8 + * does not affect any interrupt we may want to inject; + * therefore, writes to CR8 are irrelevant to L0, as are + * interrupt window vmexits. */ + vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); vmcb_clr_intercept(c, INTERCEPT_VINTR); } From 45e966fcca03ecdcccac7cb236e16eea38cc18af Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 22 Oct 2022 04:17:53 -0400 Subject: [PATCH 0506/1593] KVM: x86: Do not return host topology information from KVM_GET_SUPPORTED_CPUID Passing the host topology to the guest is almost certainly wrong and will confuse the scheduler. In addition, several fields of these CPUID leaves vary on each processor; it is simply impossible to return the right values from KVM_GET_SUPPORTED_CPUID in such a way that they can be passed to KVM_SET_CPUID2. The values that will most likely prevent confusion are all zeroes. Userspace will have to override it anyway if it wishes to present a specific topology to the guest. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 14 ++++++++++++++ arch/x86/kvm/cpuid.c | 32 ++++++++++++++++---------------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index deb494f759ed3..d8ea37dfddf42 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8310,6 +8310,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID`` It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel has enabled in-kernel emulation of the local APIC. +CPU topology +~~~~~~~~~~~~ + +Several CPUID values include topology information for the host CPU: +0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different +versions of KVM return different values for this information and userspace +should not rely on it. Currently they return all zeroes. + +If userspace wishes to set up a guest topology, it should be careful that +the values of these three leaves differ for each CPU. In particular, +the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX +for 0x8000001e; the latter also encodes the core id and node id in bits +7:0 of EBX and ECX respectively. + Obsolete ioctls and capabilities ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b14653b61470c..596061c1610e6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -770,16 +770,22 @@ struct kvm_cpuid_array { int nent; }; +static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array) +{ + if (array->nent >= array->maxnent) + return NULL; + + return &array->entries[array->nent++]; +} + static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, u32 function, u32 index) { - struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid_entry2 *entry = get_next_cpuid(array); - if (array->nent >= array->maxnent) + if (!entry) return NULL; - entry = &array->entries[array->nent++]; - memset(entry, 0, sizeof(*entry)); entry->function = function; entry->index = index; @@ -956,22 +962,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = edx.full; break; } - /* - * Per Intel's SDM, the 0x1f is a superset of 0xb, - * thus they can be handled by common code. - */ case 0x1f: case 0xb: /* - * Populate entries until the level type (ECX[15:8]) of the - * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is - * the starting entry, filled by the primary do_host_cpuid(). + * No topology; a valid topology is indicated by the presence + * of subleaf 1. */ - for (i = 1; entry->ecx & 0xff00; ++i) { - entry = do_host_cpuid(array, function, i); - if (!entry) - goto out; - } + entry->eax = entry->ebx = entry->ecx = 0; break; case 0xd: { u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm(); @@ -1202,6 +1199,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx = entry->ecx = entry->edx = 0; break; case 0x8000001e: + /* Do not return host topology information. */ + entry->eax = entry->ebx = entry->ecx = 0; + entry->edx = 0; /* reserved */ break; case 0x8000001F: if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { From 87ca4f9efbd7cc649ff43b87970888f2812945b8 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 30 Dec 2022 23:11:19 -0500 Subject: [PATCH 0507/1593] sched/core: Fix use-after-free bug in dup_user_cpus_ptr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 07ec77a1d4e8 ("sched: Allow task CPU affinity to be restricted on asymmetric systems"), the setting and clearing of user_cpus_ptr are done under pi_lock for arm64 architecture. However, dup_user_cpus_ptr() accesses user_cpus_ptr without any lock protection. Since sched_setaffinity() can be invoked from another process, the process being modified may be undergoing fork() at the same time. When racing with the clearing of user_cpus_ptr in __set_cpus_allowed_ptr_locked(), it can lead to user-after-free and possibly double-free in arm64 kernel. Commit 8f9ea86fdf99 ("sched: Always preserve the user requested cpumask") fixes this problem as user_cpus_ptr, once set, will never be cleared in a task's lifetime. However, this bug was re-introduced in commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") which allows the clearing of user_cpus_ptr in do_set_cpus_allowed(). This time, it will affect all arches. Fix this bug by always clearing the user_cpus_ptr of the newly cloned/forked task before the copying process starts and check the user_cpus_ptr state of the source task under pi_lock. Note to stable, this patch won't be applicable to stable releases. Just copy the new dup_user_cpus_ptr() function over. Fixes: 07ec77a1d4e8 ("sched: Allow task CPU affinity to be restricted on asymmetric systems") Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") Reported-by: David Wang 王标 Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221231041120.440785-2-longman@redhat.com --- kernel/sched/core.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 965d813c28ad7..f9f6e5413dcf9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2612,19 +2612,43 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { + cpumask_t *user_mask; unsigned long flags; - if (!src->user_cpus_ptr) + /* + * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's + * may differ by now due to racing. + */ + dst->user_cpus_ptr = NULL; + + /* + * This check is racy and losing the race is a valid situation. + * It is not worth the extra overhead of taking the pi_lock on + * every fork/clone. + */ + if (data_race(!src->user_cpus_ptr)) return 0; - dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); - if (!dst->user_cpus_ptr) + user_mask = kmalloc_node(cpumask_size(), GFP_KERNEL, node); + if (!user_mask) return -ENOMEM; - /* Use pi_lock to protect content of user_cpus_ptr */ + /* + * Use pi_lock to protect content of user_cpus_ptr + * + * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent + * do_set_cpus_allowed(). + */ raw_spin_lock_irqsave(&src->pi_lock, flags); - cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + if (src->user_cpus_ptr) { + swap(dst->user_cpus_ptr, user_mask); + cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + } raw_spin_unlock_irqrestore(&src->pi_lock, flags); + + if (unlikely(user_mask)) + kfree(user_mask); + return 0; } From 9a5418bc48babb313d2a62df29ebe21ce8c06c59 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 30 Dec 2022 23:11:20 -0500 Subject: [PATCH 0508/1593] sched/core: Use kfree_rcu() in do_set_cpus_allowed() Commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") may call kfree() if user_cpus_ptr was previously set. Unfortunately, some of the callers of do_set_cpus_allowed() may have pi_lock held when calling it. So the following splats may be printed especially when running with a PREEMPT_RT kernel: WARNING: possible circular locking dependency detected BUG: sleeping function called from invalid context To avoid these problems, kfree_rcu() is used instead. An internal cpumask_rcuhead union is created for the sole purpose of facilitating the use of kfree_rcu() to free the cpumask. Since user_cpus_ptr is not being used in non-SMP configs, the newly introduced alloc_user_cpus_ptr() helper will return NULL in this case and sched_setaffinity() is modified to handle this special case. Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra Link: https://lore.kernel.org/r/20221231041120.440785-3-longman@redhat.com --- kernel/sched/core.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f9f6e5413dcf9..bb1ee6d7bddea 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2604,9 +2604,29 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) .user_mask = NULL, .flags = SCA_USER, /* clear the user requested mask */ }; + union cpumask_rcuhead { + cpumask_t cpumask; + struct rcu_head rcu; + }; __do_set_cpus_allowed(p, &ac); - kfree(ac.user_mask); + + /* + * Because this is called with p->pi_lock held, it is not possible + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using + * kfree_rcu(). + */ + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); +} + +static cpumask_t *alloc_user_cpus_ptr(int node) +{ + /* + * See do_set_cpus_allowed() above for the rcu_head usage. + */ + int size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); + + return kmalloc_node(size, GFP_KERNEL, node); } int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, @@ -2629,7 +2649,7 @@ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, if (data_race(!src->user_cpus_ptr)) return 0; - user_mask = kmalloc_node(cpumask_size(), GFP_KERNEL, node); + user_mask = alloc_user_cpus_ptr(node); if (!user_mask) return -ENOMEM; @@ -3605,6 +3625,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) return false; } +static inline cpumask_t *alloc_user_cpus_ptr(int node) +{ + return NULL; +} + #endif /* !CONFIG_SMP */ static void @@ -8265,8 +8290,8 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_put_task; - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); - if (!user_mask) { + user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE); + if (IS_ENABLED(CONFIG_SMP) && !user_mask) { retval = -ENOMEM; goto out_put_task; } From 01f2ea5bcf89dbd7a6530dbce7f2fb4e327e7006 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:46 -0800 Subject: [PATCH 0509/1593] perf/x86/cstate: Add Meteor Lake support Meteor Lake is Intel's successor to Raptor lake. From the perspective of Intel cstate residency counters, there is nothing changed compared with Raptor lake. Share adl_cstates with Raptor lake. Update the comments for Meteor Lake. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Link: https://lore.kernel.org/r/20230104201349.1451191-6-kan.liang@linux.intel.com --- arch/x86/events/intel/cstate.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index a2834bc93149a..3019fb1926e35 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,6 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL + * MTL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,50 +52,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL + * ICL,TGL,RKL,ADL,RPL,MTL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR + * RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL,RPL + * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL + * TNT,RKL,ADL,RPL,MTL * Scope: Package (physical package) * */ @@ -686,6 +687,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); From 6887a4d3aede084bf08b70fbc9736c69fce05d7f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:47 -0800 Subject: [PATCH 0510/1593] perf/x86/msr: Add Meteor Lake support Meteor Lake is Intel's successor to Raptor lake. PPERF and SMI_COUNT MSRs are also supported. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Link: https://lore.kernel.org/r/20230104201349.1451191-7-kan.liang@linux.intel.com --- arch/x86/events/msr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ecced3a52668a..074150d28fa82 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -107,6 +107,8 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; From 69ced4160969025821f2999ff92163ed26568f1c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 6 Jan 2023 08:04:48 -0800 Subject: [PATCH 0511/1593] perf/x86/msr: Add Emerald Rapids The same as Sapphire Rapids, the SMI_COUNT MSR is also supported on Emerald Rapids. Add Emerald Rapids model. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230106160449.3566477-3-kan.liang@linux.intel.com --- arch/x86/events/msr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 074150d28fa82..c65d8906cbcf4 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_BROADWELL_G: case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_D: From 5268a2842066c227e6ccd94bac562f1e1000244f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 6 Jan 2023 08:04:49 -0800 Subject: [PATCH 0512/1593] perf/x86/intel/uncore: Add Emerald Rapids From the perspective of the uncore PMU, the new Emerald Rapids is the same as the Sapphire Rapids. The only difference is the event list, which will be supported in the perf tool later. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230106160449.3566477-4-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 6f1ccc57a6921..459b1aafd4d4a 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1833,6 +1833,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; From 18bba1843fc7f264f58c9345d00827d082f9c558 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 12:41:46 +0100 Subject: [PATCH 0513/1593] efi: rt-wrapper: Add missing include Add the missing #include of asm/assembler.h, which is where the ldr_l macro is defined. Fixes: ff7a167961d1b97e ("arm64: efi: Execute runtime services from a dedicated stack") Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi-rt-wrapper.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index a00886410537d..d872d18101d83 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -4,6 +4,7 @@ */ #include +#include SYM_FUNC_START(__efi_rt_asm_wrapper) stp x29, x30, [sp, #-112]! From c8c37bc514514999e62a17e95160ed9ebf75ca8d Mon Sep 17 00:00:00 2001 From: Lareine Khawaly Date: Wed, 21 Dec 2022 19:59:00 +0000 Subject: [PATCH 0514/1593] i2c: designware: use casting of u64 in clock multiplication to avoid overflow In functions i2c_dw_scl_lcnt() and i2c_dw_scl_hcnt() may have overflow by depending on the values of the given parameters including the ic_clk. For example in our use case where ic_clk is larger than one million, multiplication of ic_clk * 4700 will result in 32 bit overflow. Add cast of u64 to the calculation to avoid multiplication overflow, and use the corresponding define for divide. Fixes: 2373f6b9744d ("i2c-designware: split of i2c-designware.c into core and bus specific parts") Signed-off-by: Lareine Khawaly Signed-off-by: Hanna Hawa Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index a3240ece55b2b..581e02cc979a0 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -351,7 +351,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) * * If your hardware is free from tHD;STA issue, try this one. */ - return DIV_ROUND_CLOSEST(ic_clk * tSYMBOL, MICRO) - 8 + offset; + return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * tSYMBOL, MICRO) - + 8 + offset; else /* * Conditional expression: @@ -367,7 +368,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) * The reason why we need to take into account "tf" here, * is the same as described in i2c_dw_scl_lcnt(). */ - return DIV_ROUND_CLOSEST(ic_clk * (tSYMBOL + tf), MICRO) - 3 + offset; + return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tSYMBOL + tf), MICRO) - + 3 + offset; } u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) @@ -383,7 +385,8 @@ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) * account the fall time of SCL signal (tf). Default tf value * should be 0.3 us, for safety. */ - return DIV_ROUND_CLOSEST(ic_clk * (tLOW + tf), MICRO) - 1 + offset; + return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tLOW + tf), MICRO) - + 1 + offset; } int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev) From 75507a319876aba88932e2c7dab58b6c22d89f6b Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 19 Dec 2022 13:01:45 +0000 Subject: [PATCH 0515/1593] i2c: designware: Fix unbalanced suspended flag Ensure that i2c_mark_adapter_suspended() is always balanced by a call to i2c_mark_adapter_resumed(). dw_i2c_plat_resume() must always be called, so that i2c_mark_adapter_resumed() is called. This is not compatible with DPM_FLAG_MAY_SKIP_RESUME, so remove the flag. Since the controller is always resumed on system resume the dw_i2c_plat_complete() callback is redundant and has been removed. The unbalanced suspended flag was introduced by commit c57813b8b288 ("i2c: designware: Lock the adapter while setting the suspended flag") Before that commit, the system and runtime PM used the same functions. The DPM_FLAG_MAY_SKIP_RESUME was used to skip the system resume if the driver had been in runtime-suspend. If system resume was skipped, the suspended flag would be cleared by the next runtime resume. The check of the suspended flag was _after_ the call to pm_runtime_get_sync() in i2c_dw_xfer(). So either a system resume or a runtime resume would clear the flag before it was checked. Having introduced the unbalanced suspended flag with that commit, a further commit 80704a84a9f8 ("i2c: designware: Use the i2c_mark_adapter_suspended/resumed() helpers") changed from using a local suspended flag to using the i2c_mark_adapter_suspended/resumed() functions. These use a flag that is checked by I2C core code before issuing the transfer to the bus driver, so there was no opportunity for the bus driver to runtime resume itself before the flag check. Signed-off-by: Richard Fitzgerald Fixes: c57813b8b288 ("i2c: designware: Lock the adapter while setting the suspended flag") Reviewed-by: Hans de Goede Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index ba043b5473936..74182db03a88b 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -351,13 +351,11 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) if (dev->flags & ACCESS_NO_IRQ_SUSPEND) { dev_pm_set_driver_flags(&pdev->dev, - DPM_FLAG_SMART_PREPARE | - DPM_FLAG_MAY_SKIP_RESUME); + DPM_FLAG_SMART_PREPARE); } else { dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE | - DPM_FLAG_SMART_SUSPEND | - DPM_FLAG_MAY_SKIP_RESUME); + DPM_FLAG_SMART_SUSPEND); } device_enable_async_suspend(&pdev->dev); @@ -419,21 +417,8 @@ static int dw_i2c_plat_prepare(struct device *dev) */ return !has_acpi_companion(dev); } - -static void dw_i2c_plat_complete(struct device *dev) -{ - /* - * The device can only be in runtime suspend at this point if it has not - * been resumed throughout the ending system suspend/resume cycle, so if - * the platform firmware might mess up with it, request the runtime PM - * framework to resume it. - */ - if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) - pm_request_resume(dev); -} #else #define dw_i2c_plat_prepare NULL -#define dw_i2c_plat_complete NULL #endif #ifdef CONFIG_PM @@ -483,7 +468,6 @@ static int __maybe_unused dw_i2c_plat_resume(struct device *dev) static const struct dev_pm_ops dw_i2c_dev_pm_ops = { .prepare = dw_i2c_plat_prepare, - .complete = dw_i2c_plat_complete, SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume) SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, dw_i2c_plat_runtime_resume, NULL) }; From 80f8be7af03ffe90dc4df998b16bfa212afbdde9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 8 Jan 2023 22:47:26 +0900 Subject: [PATCH 0516/1593] tomoyo: Omit use of bin2c bin2c was, as its name implies, introduced to convert a binary file to C code. However, I did not see any good reason ever for using this tool because using the .incbin directive is much faster, and often results in simpler code. Most of the uses of bin2c have been killed, for example: - 13610aa908dc ("kernel/configs: use .incbin directive to embed config_data.gz") - 4c0f032d4963 ("s390/purgatory: Omit use of bin2c") security/tomoyo/Makefile has even less reason for using bin2c because the policy files are text data. So, sed is enough for converting them to C string literals, and what is nicer, generates human-readable builtin-policy.h. This is the last user of bin2c. After this commit lands, bin2c will be removed. Signed-off-by: Masahiro Yamada [penguin-kernel: Update sed script to also escape backslash and quote ] Signed-off-by: Tetsuo Handa --- security/tomoyo/Kconfig | 1 - security/tomoyo/Makefile | 15 ++++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig index b9f867100a9ff..772d2ab58fd14 100644 --- a/security/tomoyo/Kconfig +++ b/security/tomoyo/Kconfig @@ -7,7 +7,6 @@ config SECURITY_TOMOYO select SECURITY_PATH select SECURITY_NETWORK select SRCU - select BUILD_BIN2C default n help This selects TOMOYO Linux, pathname-based access control. diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index 1b18a02ccd2e6..884ff155edc39 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -2,13 +2,14 @@ obj-y = audit.o common.o condition.o domain.o environ.o file.o gc.o group.o load_policy.o memory.o mount.o network.o realpath.o securityfs_if.o tomoyo.o util.o targets += builtin-policy.h -define do_policy -echo "static char tomoyo_builtin_$(1)[] __initdata ="; \ -$(objtree)/scripts/bin2c <$(firstword $(wildcard $(obj)/policy/$(1).conf $(srctree)/$(src)/policy/$(1).conf.default) /dev/null); \ -echo ";" -endef -quiet_cmd_policy = POLICY $@ - cmd_policy = ($(call do_policy,profile); $(call do_policy,exception_policy); $(call do_policy,domain_policy); $(call do_policy,manager); $(call do_policy,stat)) >$@ + +quiet_cmd_policy = POLICY $@ + cmd_policy = { \ + $(foreach x, profile exception_policy domain_policy manager stat, \ + printf 'static char tomoyo_builtin_$x[] __initdata =\n'; \ + sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- $(firstword $(filter %/$x.conf %/$x.conf.default, $^) /dev/null); \ + printf '\t"";\n';) \ + } > $@ $(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE $(call if_changed,policy) From f993d24a948d22710148dae2c48b8a87155f6cb9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jan 2023 01:10:23 +0900 Subject: [PATCH 0517/1593] s390: fix -Wundef warning for CONFIG_KERNEL_ZSTD Since commit 80b6093b55e3 ("kbuild: add -Wundef to KBUILD_CPPFLAGS for W=1 builds"), building with W=1 detects misuse of #(el)if. $ make W=1 ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- [snip] arch/s390/boot/decompressor.c:28:7: warning: "CONFIG_KERNEL_ZSTD" is not defined, evaluates to 0 [-Wundef] 28 | #elif CONFIG_KERNEL_ZSTD | ^~~~~~~~~~~~~~~~~~ This issue has been hidden because arch/s390/boot/Makefile overwrites KBUILD_CFLAGS, dropping -Wundef. CONFIG_KERNEL_ZSTD is a bool option. #elif defined() should be used. The line #ifdef CONFIG_KERNEL_BZIP2 is fine, but I changed it for consistency. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20230106161024.2373602-1-masahiroy@kernel.org Signed-off-by: Heiko Carstens --- arch/s390/boot/decompressor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index e27c2140d6206..8dcd7af2911a0 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -23,9 +23,9 @@ #define memmove memmove #define memzero(s, n) memset((s), 0, (n)) -#ifdef CONFIG_KERNEL_BZIP2 +#if defined(CONFIG_KERNEL_BZIP2) #define BOOT_HEAP_SIZE 0x400000 -#elif CONFIG_KERNEL_ZSTD +#elif defined(CONFIG_KERNEL_ZSTD) #define BOOT_HEAP_SIZE 0x30000 #else #define BOOT_HEAP_SIZE 0x10000 From 3a73746b267e5c6a87c9ad26f8c6a48e44da609c Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Tue, 20 Dec 2022 17:08:47 +0900 Subject: [PATCH 0518/1593] RDMA/rxe: Fix inaccurate constants in rxe_type_info ibv_query_device() has reported incorrect device attributes, which are actually not used by the device. Make the constants correspond with the attributes shown to users. Fixes: 3ccffe8abf2f ("RDMA/rxe: Move max_elem into rxe_type_info") Fixes: 3225717f6dfa ("RDMA/rxe: Replace red-black trees by xarrays") Link: https://lore.kernel.org/r/20221220080848.253785-1-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index f50620f5a0a14..1151c0b5cceab 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -23,16 +23,16 @@ static const struct rxe_type_info { .size = sizeof(struct rxe_ucontext), .elem_offset = offsetof(struct rxe_ucontext, elem), .min_index = 1, - .max_index = UINT_MAX, - .max_elem = UINT_MAX, + .max_index = RXE_MAX_UCONTEXT, + .max_elem = RXE_MAX_UCONTEXT, }, [RXE_TYPE_PD] = { .name = "pd", .size = sizeof(struct rxe_pd), .elem_offset = offsetof(struct rxe_pd, elem), .min_index = 1, - .max_index = UINT_MAX, - .max_elem = UINT_MAX, + .max_index = RXE_MAX_PD, + .max_elem = RXE_MAX_PD, }, [RXE_TYPE_AH] = { .name = "ah", @@ -40,7 +40,7 @@ static const struct rxe_type_info { .elem_offset = offsetof(struct rxe_ah, elem), .min_index = RXE_MIN_AH_INDEX, .max_index = RXE_MAX_AH_INDEX, - .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1, + .max_elem = RXE_MAX_AH, }, [RXE_TYPE_SRQ] = { .name = "srq", @@ -49,7 +49,7 @@ static const struct rxe_type_info { .cleanup = rxe_srq_cleanup, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, - .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, + .max_elem = RXE_MAX_SRQ, }, [RXE_TYPE_QP] = { .name = "qp", @@ -58,7 +58,7 @@ static const struct rxe_type_info { .cleanup = rxe_qp_cleanup, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, - .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1, + .max_elem = RXE_MAX_QP, }, [RXE_TYPE_CQ] = { .name = "cq", @@ -66,8 +66,8 @@ static const struct rxe_type_info { .elem_offset = offsetof(struct rxe_cq, elem), .cleanup = rxe_cq_cleanup, .min_index = 1, - .max_index = UINT_MAX, - .max_elem = UINT_MAX, + .max_index = RXE_MAX_CQ, + .max_elem = RXE_MAX_CQ, }, [RXE_TYPE_MR] = { .name = "mr", @@ -76,7 +76,7 @@ static const struct rxe_type_info { .cleanup = rxe_mr_cleanup, .min_index = RXE_MIN_MR_INDEX, .max_index = RXE_MAX_MR_INDEX, - .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, + .max_elem = RXE_MAX_MR, }, [RXE_TYPE_MW] = { .name = "mw", @@ -85,7 +85,7 @@ static const struct rxe_type_info { .cleanup = rxe_mw_cleanup, .min_index = RXE_MIN_MW_INDEX, .max_index = RXE_MAX_MW_INDEX, - .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, + .max_elem = RXE_MAX_MW, }, }; From 1aefe5c177c1922119afb4ee443ddd6ac3140b37 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Tue, 20 Dec 2022 17:08:48 +0900 Subject: [PATCH 0519/1593] RDMA/rxe: Prevent faulty rkey generation If you create MRs more than 0x10000 times after loading the module, responder starts to reply NAKs for RDMA/Atomic operations because of rkey violation detected in check_rkey(). The root cause is that rkeys are incremented each time a new MR is created and the value overflows into the range reserved for MWs. This commit also increases the value of RXE_MAX_MW that has been limited unlike other parameters. Fixes: 0994a1bcd5f7 ("RDMA/rxe: Bump up default maximum values used via uverbs") Link: https://lore.kernel.org/r/20221220080848.253785-2-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda Tested-by: Li Zhijian Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index a754fc902e3d1..7b41d79e72b2d 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -98,11 +98,11 @@ enum rxe_device_param { RXE_MAX_SRQ = DEFAULT_MAX_VALUE - RXE_MIN_SRQ_INDEX, RXE_MIN_MR_INDEX = 0x00000001, - RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE, - RXE_MAX_MR = DEFAULT_MAX_VALUE - RXE_MIN_MR_INDEX, - RXE_MIN_MW_INDEX = 0x00010001, - RXE_MAX_MW_INDEX = 0x00020000, - RXE_MAX_MW = 0x00001000, + RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE >> 1, + RXE_MAX_MR = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX, + RXE_MIN_MW_INDEX = RXE_MAX_MR_INDEX + 1, + RXE_MAX_MW_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_MW = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX, RXE_MAX_PKT_PER_ACK = 64, From 6f9aba7f0d74e8dd085f9ac11fe8f3fdd7fde4ca Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 5 Jan 2023 17:34:36 +0530 Subject: [PATCH 0520/1593] perf tests bpf prologue: Fix bpf-script-test-prologue test compile issue with clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While running 'perf test' for bpf, observed that "BPF prologue generation" test case fails to compile with clang. Logs below from powerpc: :33:2: error: use of undeclared identifier 'fmode_t' fmode_t f_mode = (fmode_t)_f_mode; ^ :37:6: error: use of undeclared identifier 'f_mode'; did you mean '_f_mode'? if (f_mode & FMODE_WRITE) ^~~~~~ _f_mode :30:60: note: '_f_mode' declared here int bpf_func__null_lseek(void *ctx, int err, unsigned long _f_mode, ^ 2 errors generated. The test code tests/bpf-script-test-prologue.c uses fmode_t. And the error above is for "fmode_t" which is defined in include/linux/types.h as part of kernel build directory: "/lib/modules//build" that comes from kernel devel [ soft link to /usr/src/ ]. Clang picks this header file from "-working-directory" build option that specifies this build folder. But the commit 14e4b9f4289aed2c ("perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility") changed the include directory to use: "/usr/include". Post this change, types.h from /usr/include/ is getting picked upwhich doesn’t contain definition of "fmode_t" and hence fails to compile. Compilation command before this commit: /usr/bin/clang -D__KERNEL__ -D__NR_CPUS__=72 -DLINUX_VERSION_CODE=0x50e00 -xc -I/root/lib/perf/include/bpf -nostdinc -I./arch/powerpc/include -I./arch/powerpc/include/generated -I./include -I./arch/powerpc/include/uapi -I./arch/powerpc/include/generated/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/compiler-version.h -include ./include/linux/kconfig.h -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules//build -c - -target bpf -g -O2 -o - Compilation command after this commit: /usr/bin/clang -D__KERNEL__ -D__NR_CPUS__=72 -DLINUX_VERSION_CODE=0x50e00 -xc -I/usr/include/ -nostdinc -I./arch/powerpc/include -I./arch/powerpc/include/generated -I./include -I./arch/powerpc/include/uapi -I./arch/powerpc/include/generated/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/compiler-version.h -include ./include/linux/kconfig.h -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules//build -c - -target bpf -g -O2 -o - The difference is addition of -I/usr/include/ in the first line which is causing the error. Fix this by adding typedef for "fmode_t" in the testcase to solve the compile issue. Fixes: 14e4b9f4289aed2c ("perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility") Signed-off-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Disha Goel Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: linuxppc-dev@lists.ozlabs.org Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Link: https://lore.kernel.org/linux-perf-users/20230105120436.92051-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-test-prologue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index bd83d364cf30d..91778b5c6125d 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -20,6 +20,8 @@ # undef if #endif +typedef unsigned int __bitwise fmode_t; + #define FMODE_READ 0x1 #define FMODE_WRITE 0x2 From 2ece0930ac5662bccce0ba4c59b84c98d2437200 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 2 Jan 2023 22:39:55 +0530 Subject: [PATCH 0521/1593] i2c: designware-pci: Add new PCI IDs for AMD NAVI GPU Add additional supported PCI IDs for latest AMD NAVI GPU card which has an integrated Type-C controller and designware I2C with PCI interface. Signed-off-by: Basavaraj Natikar Tested-by: Sanath S Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-pcidrv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index e499f96506c52..782fe1ef3ca10 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -396,6 +396,8 @@ static const struct pci_device_id i2_designware_pci_ids[] = { { PCI_VDEVICE(ATI, 0x73a4), navi_amd }, { PCI_VDEVICE(ATI, 0x73e4), navi_amd }, { PCI_VDEVICE(ATI, 0x73c4), navi_amd }, + { PCI_VDEVICE(ATI, 0x7444), navi_amd }, + { PCI_VDEVICE(ATI, 0x7464), navi_amd }, { 0,} }; MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids); From 78a4471fa1a76a8bef4919105de67660a89a1e9b Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Jan 2023 13:32:29 +0100 Subject: [PATCH 0522/1593] i2c: mxs: suppress probe-deferral error message During boot of I2SE Duckbill the kernel log contains a confusing error: Failed to request dma This is caused by i2c-mxs tries to request a not yet available DMA channel (-EPROBE_DEFER). So suppress this message by using dev_err_probe(). Signed-off-by: Stefan Wahren Reviewed-by: Fabio Estevam Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mxs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index 5af5cffc444ef..d113bed795452 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -826,8 +826,8 @@ static int mxs_i2c_probe(struct platform_device *pdev) /* Setup the DMA */ i2c->dmach = dma_request_chan(dev, "rx-tx"); if (IS_ERR(i2c->dmach)) { - dev_err(dev, "Failed to request dma\n"); - return PTR_ERR(i2c->dmach); + return dev_err_probe(dev, PTR_ERR(i2c->dmach), + "Failed to request dma\n"); } platform_set_drvdata(pdev, i2c); From 476fdcdaaae7b06c780cdfc234c704107f16c529 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 23 Dec 2022 10:20:11 +0100 Subject: [PATCH 0523/1593] drm/i915: Reserve enough fence slot for i915_vma_unbind_async MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A nested dma_resv_reserve_fences(1) will not reserve slot from the 2nd call onwards and folowing dma_resv_add_fence() might hit the "BUG_ON(fobj->num_fences >= fobj->max_fences)" check. I915 hit above nested dma_resv case in ttm_bo_handle_move_mem() with async unbind: dma_resv_reserve_fences() from --> ttm_bo_handle_move_mem() dma_resv_reserve_fences() from --> i915_vma_unbind_async() dma_resv_add_fence() from --> i915_vma_unbind_async() dma_resv_add_fence() from -->ttm_bo_move_accel_cleanup() Resolve this by adding an extra fence in i915_vma_unbind_async(). Suggested-by: Thomas Hellström Fixes: 2f6b90da9192 ("drm/i915: Use vma resources for async unbinding") Cc: # v5.18+ Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20221223092011.11657-1-nirmoy.das@intel.com (cherry picked from commit 4f0755c2faf7388616109717facc5bbde6850e60) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 3a33be5401ed2..135390d975b6f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -2116,7 +2116,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) if (!obj->mm.rsgt) return -EBUSY; - err = dma_resv_reserve_fences(obj->base.resv, 1); + err = dma_resv_reserve_fences(obj->base.resv, 2); if (err) return -EBUSY; From afce71ff6daa9c0f852df0727fe32c6fb107f0fa Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 3 Jan 2023 15:49:46 -0800 Subject: [PATCH 0524/1593] drm/i915: Fix potential context UAFs gem_context_register() makes the context visible to userspace, and which point a separate thread can trigger the I915_GEM_CONTEXT_DESTROY ioctl. So we need to ensure that nothing uses the ctx ptr after this. And we need to ensure that adding the ctx to the xarray is the *last* thing that gem_context_register() does with the ctx pointer. Signed-off-by: Rob Clark Fixes: eb4dedae920a ("drm/i915/gem: Delay tracking the GEM context until it is registered") Fixes: a4c1cdd34e2c ("drm/i915/gem: Delay context creation (v3)") Fixes: 49bd54b390c2 ("drm/i915: Track all user contexts per client") Cc: # v5.10+ Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti [tursulin: Stable and fixes tags add/tidy.] Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230103234948.1218393-1-robdclark@gmail.com (cherry picked from commit bed4b455cf5374e68879be56971c1da563bcd90c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 +++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7f2831efc798b..6250de9b9196c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1688,6 +1688,10 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } +/* + * Note that this implicitly consumes the ctx reference, by placing + * the ctx in the context_xa. + */ static void gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 id) @@ -1703,10 +1707,6 @@ static void gem_context_register(struct i915_gem_context *ctx, snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); - /* And finally expose ourselves to userspace via the idr */ - old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); - WARN_ON(old); - spin_lock(&ctx->client->ctx_lock); list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list); spin_unlock(&ctx->client->ctx_lock); @@ -1714,6 +1714,10 @@ static void gem_context_register(struct i915_gem_context *ctx, spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); + + /* And finally expose ourselves to userspace via the idr */ + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); } int i915_gem_context_open(struct drm_i915_private *i915, @@ -2199,14 +2203,22 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, if (IS_ERR(ctx)) return ctx; + /* + * One for the xarray and one for the caller. We need to grab + * the reference *prior* to making the ctx visble to userspace + * in gem_context_register(), as at any point after that + * userspace can try to race us with another thread destroying + * the context under our feet. + */ + i915_gem_context_get(ctx); + gem_context_register(ctx, file_priv, id); old = xa_erase(&file_priv->proto_context_xa, id); GEM_BUG_ON(old != pc); proto_context_close(file_priv->dev_priv, pc); - /* One for the xarray and one for the caller */ - return i915_gem_context_get(ctx); + return ctx; } struct i915_gem_context * From 4e4ff23a35ee3a145fbc8378ecfeaab2d235cddd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 9 Jan 2023 15:47:25 +0000 Subject: [PATCH 0525/1593] arm64/mm: Define dummy pud_user_exec() when using 2-level page-table With only two levels of page-table, the generic 'pud_*' macros are implemented using dummy operations in pgtable-nopmd.h. Since commit 730a11f982e6 ("arm64/mm: add pud_user_exec() check in pud_user_accessible_page()"), pud_user_accessible_page() unconditionally calls pud_user_exec(), which is an arm64-specific helper and therefore isn't defined by pgtable-nopmd.h. This results in a build failure for configurations with only two levels of page table: arch/arm64/include/asm/pgtable.h: In function 'pud_user_accessible_page': >> arch/arm64/include/asm/pgtable.h:870:51: error: implicit declaration of function 'pud_user_exec'; did you mean 'pmd_user_exec'? [-Werror=implicit-function-declaration] 870 | return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); | ^~~~~~~~~~~~~ | pmd_user_exec Fix the problem by defining pud_user_exec() as pud_user() in this case. Link: https://lore.kernel.org/r/202301080515.z6zEksU4-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 02ed5c0adf97a..65e78999c75d7 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -730,6 +730,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #else #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) +#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ /* Match pmd_offset folding in */ #define pmd_set_fixmap(addr) NULL From 68a63a412d18bd2e2577c8928139f92541afa7a6 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 9 Jan 2023 12:27:43 +0000 Subject: [PATCH 0526/1593] arm64: Fix build with CC=clang, CONFIG_FTRACE=y and CONFIG_STACK_TRACER=y commit 45bd8951806e ("arm64: Improve HAVE_DYNAMIC_FTRACE_WITH_REGS selection for clang") fixed the build with the above combination by splitting HAVE_DYNAMIC_FTRACE_WITH_REGS into separate checks for Clang and GCC. commit 26299b3f6ba2 ("ftrace: arm64: move from REGS to ARGS") added the GCC only check "-fpatchable-function-entry=2" back in unconditionally which breaks the build. Remove the unconditional check, because the conditional ones were also updated to _ARGS in the above commit, so they work correctly on their own. Fixes: 26299b3f6ba2 ("ftrace: arm64: move from REGS to ARGS") Signed-off-by: James Clark Link: https://lore.kernel.org/r/20230109122744.1904852-1-james.clark@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cf6d1cd8b6dc5..c5ccca26a4087 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -184,8 +184,6 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_ARGS \ - if $(cc-option,-fpatchable-function-entry=2) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \ if DYNAMIC_FTRACE_WITH_ARGS select HAVE_EFFICIENT_UNALIGNED_ACCESS From cd2d0d45a3d2c199344305a1e8fed42347cf4bec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Jan 2023 09:30:53 +0100 Subject: [PATCH 0527/1593] ARM: pxa: enable PXA310/PXA320 for DT-only build After commit b5aaaa666a85 ("ARM: pxa: add Kconfig dependencies for ATAGS based boards"), the default PXA build no longer includes support for the board files that are considered unused. As a side-effect of this, the PXA310 and PXA320 support is not built into the kernel any more, even though it should work in principle as long as the symbols are enabled. As Robert points out, there are dts files for zylonite and cm-x300, though those have not made it into the mainline kernel. Link: https://lore.kernel.org/linux-arm-kernel/m2sfglh02h.fsf@free.fr/ Reported-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- arch/arm/mach-pxa/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index b90d98bae68d7..03e25af6f48c9 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -45,6 +45,8 @@ config MACH_PXA27X_DT config MACH_PXA3XX_DT bool "Support PXA3xx platforms from device tree" select CPU_PXA300 + select CPU_PXA310 + select CPU_PXA320 select PINCTRL select POWER_SUPPLY select PXA3xx From cad90e5381d840cf2296aaac9b3eff71a30b7c5b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 20 Dec 2022 20:13:23 +1000 Subject: [PATCH 0528/1593] objtool: Tolerate STT_NOTYPE symbols at end of section Hand-written asm often contains non-function symbols in executable sections. _end symbols for finding the size of instruction blocks for runtime processing is one such usage. optprobe_template_end is one example that causes the warning: objtool: optprobe_template_end(): can't find starting instruction This is because the symbol happens to be at the end of the file (and therefore end of a section in the object file). So ignore end-of-section STT_NOTYPE symbols instead of bailing out because an instruction can't be found. While we're here, add a more descriptive warning for STT_FUNC symbols found at the end of a section. [ This also solves a PowerPC regression reported by Sathvika Vasireddy. ] Reported-by: Naveen N. Rao Reported-by: Sathvika Vasireddy Signed-off-by: Nicholas Piggin Signed-off-by: Ingo Molnar Acked-by: Sathvika Vasireddy Link: https://lore.kernel.org/r/20221220101323.3119939-1-npiggin@gmail.com --- tools/objtool/check.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4350be739f4fa..4b7c8b33069e5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -427,6 +427,15 @@ static int decode_instructions(struct objtool_file *file) if (func->type != STT_NOTYPE && func->type != STT_FUNC) continue; + if (func->offset == sec->sh.sh_size) { + /* Heuristic: likely an "end" symbol */ + if (func->type == STT_NOTYPE) + continue; + WARN("%s(): STT_FUNC at end of section", + func->name); + return -1; + } + if (func->return_thunk || func->alias != func) continue; From feaf75658783a919410f8c2039dbc24b6a29603d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Jan 2023 08:54:37 +0100 Subject: [PATCH 0529/1593] nolibc: fix fd_set type The kernel uses unsigned long for the fd_set bitmap, but nolibc use u32. This works fine on little endian machines, but fails on big endian. Convert to unsigned long to fix this. Signed-off-by: Sven Schnelle Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/types.h | 53 ++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 959997034e553..300e0ff1cd58c 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -89,39 +89,46 @@ #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 +#define FD_SETIDXMASK (8 * sizeof(unsigned long)) +#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) + /* for select() */ typedef struct { - uint32_t fd32[(FD_SETSIZE + 31) / 32]; + unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; } fd_set; -#define FD_CLR(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \ +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] &= \ + ~(1U << (__fd & FX_SETBITMASK)); \ } while (0) -#define FD_SET(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] |= 1U << (__fd & 31); \ +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] |= \ + 1 << (__fd & FD_SETBITMASK); \ } while (0) -#define FD_ISSET(fd, set) ({ \ - fd_set *__set = (set); \ - int __fd = (fd); \ - int __r = 0; \ - if (__fd >= 0) \ - __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \ - __r; \ +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ +1U << (__fd & FD_SET_BITMASK)); \ + __r; \ }) -#define FD_ZERO(set) do { \ - fd_set *__set = (set); \ - int __idx; \ - for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \ - __set->fd32[__idx] = 0; \ +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ + for (__idx = 0; __idx < __size; __idx++) \ + __set->fds[__idx] = 0; \ } while (0) /* for poll() */ From 16f5cea74179b5795af7ce359971f5128d10f80e Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Mon, 9 Jan 2023 08:54:38 +0100 Subject: [PATCH 0530/1593] tools/nolibc: Fix S_ISxxx macros The mode field has the type encoded as an value in a field, not as a bit mask. Mask the mode with S_IFMT instead of each type to test. Otherwise, false positives are possible: eg S_ISDIR will return true for block devices because S_IFDIR = 0040000 and S_IFBLK = 0060000 since mode is masked with S_IFDIR instead of S_IFMT. These macros now match the similar definitions in tools/include/uapi/linux/stat.h. Signed-off-by: Warner Losh Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/types.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 300e0ff1cd58c..f1d64fca7cf05 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -26,13 +26,13 @@ #define S_IFSOCK 0140000 #define S_IFMT 0170000 -#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) -#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) -#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) -#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) -#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) -#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) -#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) +#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) /* dirent types */ #define DT_UNKNOWN 0x0 From 184177c3d6e023da934761e198c281344d7dd65b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:39 +0100 Subject: [PATCH 0531/1593] tools/nolibc: restore mips branch ordering in the _start block Depending on the compiler used and the optimization options, the sbrk() test was crashing, both on real hardware (mips-24kc) and in qemu. One such example is kernel.org toolchain in version 11.3 optimizing at -Os. Inspecting the sys_brk() call shows the following code: 0040047c : 40047c: 24020fcd li v0,4045 400480: 27bdffe0 addiu sp,sp,-32 400484: 0000000c syscall 400488: 27bd0020 addiu sp,sp,32 40048c: 10e00001 beqz a3,400494 400490: 00021023 negu v0,v0 400494: 03e00008 jr ra It is obviously wrong, the "negu" instruction is placed in beqz's delayed slot, and worse, there's no nop nor instruction after the return, so the next function's first instruction (addiu sip,sip,-32) will also be executed as part of the delayed slot that follows the return. This is caused by the ".set noreorder" directive in the _start block, that applies to the whole program. The compiler emits code without the delayed slots and relies on the compiler to swap instructions when this option is not set. Removing the option would require to change the startup code in a way that wouldn't make it look like the resulting code, which would not be easy to debug. Instead let's just save the default ordering before changing it, and restore it at the end of the _start block. Now the code is correct: 0040047c : 40047c: 24020fcd li v0,4045 400480: 27bdffe0 addiu sp,sp,-32 400484: 0000000c syscall 400488: 10e00002 beqz a3,400494 40048c: 27bd0020 addiu sp,sp,32 400490: 00021023 negu v0,v0 400494: 03e00008 jr ra 400498: 00000000 nop Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc") #5.0 Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/arch-mips.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 5fc5b8029bff9..7380093ba9e7d 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -192,6 +192,7 @@ struct sys_stat_struct { __asm__ (".section .text\n" ".weak __start\n" ".set nomips16\n" + ".set push\n" ".set noreorder\n" ".option pic0\n" ".ent __start\n" @@ -210,6 +211,7 @@ __asm__ (".section .text\n" "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" + ".set pop\n" ""); #endif // _NOLIBC_ARCH_MIPS_H From 55abdd1f5e1e07418bf4a46c233a92f83cb5ae97 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:40 +0100 Subject: [PATCH 0532/1593] tools/nolibc: fix missing includes causing build issues at -O0 After the nolibc includes were split to facilitate portability from standard libcs, programs that include only what they need may miss some symbols which are needed by libgcc. This is the case for raise() which is needed by the divide by zero code in some architectures for example. Regardless, being able to include only the apparently needed files is convenient. Instead of trying to move all exported definitions to a single file, since this can change over time, this patch takes another approach consisting in including the nolibc header at the end of all standard include files. This way their types and functions are already known at the moment of inclusion, and including any single one of them is sufficient to bring all the required ones. Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/ctype.h | 3 +++ tools/include/nolibc/errno.h | 3 +++ tools/include/nolibc/signal.h | 3 +++ tools/include/nolibc/stdio.h | 3 +++ tools/include/nolibc/stdlib.h | 3 +++ tools/include/nolibc/string.h | 3 +++ tools/include/nolibc/sys.h | 2 ++ tools/include/nolibc/time.h | 3 +++ tools/include/nolibc/types.h | 3 +++ tools/include/nolibc/unistd.h | 3 +++ 10 files changed, 29 insertions(+) diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index e3000b2992d7b..6f90706d06442 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -96,4 +96,7 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index 06893d6dfb7a6..9dc4919c769b7 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -24,4 +24,7 @@ static int errno; */ #define MAX_ERRNO 4095 +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index ef47e71e2be37..137552216e469 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -19,4 +19,7 @@ int raise(int signal) return sys_kill(sys_getpid(), signal); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index a3cebc4bc3ac4..96ac8afc5aeed 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -303,4 +303,7 @@ void perror(const char *msg) fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 92378c4b96605..a24000d1e8222 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -419,4 +419,7 @@ char *u64toa(uint64_t in) return itoa_buffer; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index ad97c0d522b8e..0932db3817d2c 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -285,4 +285,7 @@ char *strrchr(const char *s, int c) return (char *)ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index ce3ee03aa6794..78473d34e27cd 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -1243,5 +1243,7 @@ ssize_t write(int fd, const void *buf, size_t count) return ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d18b7661fdd71..84655361b9ad2 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -25,4 +25,7 @@ time_t time(time_t *tptr) return tv.tv_sec; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index f1d64fca7cf05..fbbc0e68c001b 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -209,4 +209,7 @@ struct stat { }) #endif +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index 1c25e20ee3606..1cfcd52106a42 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -51,4 +51,7 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_UNISTD_H */ From 1bfbe1f3e96720daf185f03d101f072d69753f88 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:41 +0100 Subject: [PATCH 0533/1593] tools/nolibc: prevent gcc from making memset() loop over itself When building on ARM in thumb mode with gcc-11.3 at -O2 or -O3, nolibc-test segfaults during the select() tests. It turns out that at this level, gcc recognizes an opportunity for using memset() to zero the fd_set, but it miscompiles it because it also recognizes a memset pattern as well, and decides to call memset() from the memset() code: 000122bc : 122bc: b510 push {r4, lr} 122be: 0004 movs r4, r0 122c0: 2a00 cmp r2, #0 122c2: d003 beq.n 122cc 122c4: 23ff movs r3, #255 ; 0xff 122c6: 4019 ands r1, r3 122c8: f7ff fff8 bl 122bc 122cc: 0020 movs r0, r4 122ce: bd10 pop {r4, pc} Simply placing an empty asm() statement inside the loop suffices to avoid this. Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/string.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index 0932db3817d2c..fffdaf6ff4673 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -88,8 +88,11 @@ void *memset(void *dst, int b, size_t len) { char *p = dst; - while (len--) + while (len--) { + /* prevent gcc from recognizing memset() here */ + asm volatile(""); *(p++) = b; + } return dst; } From 00b18da4089330196906b9fe075c581c17eb726c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:42 +0100 Subject: [PATCH 0534/1593] tools/nolibc: fix the O_* fcntl/open macro definitions for riscv When RISCV port was imported in 5.2, the O_* macros were taken with their octal value and written as-is in hex, resulting in the getdents64() to fail in nolibc-test. Fixes: 582e84f7b779 ("tool headers nolibc: add RISCV support") #5.2 Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney --- tools/include/nolibc/arch-riscv.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index ba04771cb3a34..a3bdd9803f8cb 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -11,13 +11,13 @@ #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 struct sys_stat_struct { unsigned long st_dev; /* Device. */ From 29cf28235e3e57e0af01ae29db57a75f87a2ada8 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 6 Jan 2023 20:28:30 -0300 Subject: [PATCH 0535/1593] cifs: fix file info setting in cifs_query_path_info() We missed to set file info when CIFSSMBQPathInfo() returned 0, thus leaving cifs_open_info_data::fi unset. Fix this by setting cifs_open_info_data::fi when either CIFSSMBQPathInfo() or SMBQueryInformation() succeed. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216881 Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb1ops.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 50480751e521c..5fe2c2f8ef417 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -562,17 +562,20 @@ static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); - if (!rc) - move_cifs_info_to_smb2(&data->fi, &fi); *adjustTZ = true; } - if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) { + if (!rc) { int tmprc; int oplock = 0; struct cifs_fid fid; struct cifs_open_parms oparms; + move_cifs_info_to_smb2(&data->fi, &fi); + + if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) + return 0; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; From ba5d4c1596cada37793d405dd18d695cd3508902 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 6 Jan 2023 13:34:36 -0300 Subject: [PATCH 0536/1593] cifs: fix file info setting in cifs_open_file() In cifs_open_file(), @buf must hold a pointer to a cifs_open_info_data structure which is passed by cifs_nt_open(), so assigning @buf directly to @fi was obviously wrong. Fix this by passing a valid FILE_ALL_INFO structure to SMBLegacyOpen() and CIFS_open(), and then copy the set structure to the corresponding cifs_open_info_data::fi field with move_cifs_info_to_smb2() helper. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216889 Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb1ops.c | 54 ++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5fe2c2f8ef417..4cb364454e130 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -719,17 +719,25 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf) { - FILE_ALL_INFO *fi = buf; + struct cifs_open_info_data *data = buf; + FILE_ALL_INFO fi = {}; + int rc; if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS)) - return SMBLegacyOpen(xid, oparms->tcon, oparms->path, - oparms->disposition, - oparms->desired_access, - oparms->create_options, - &oparms->fid->netfid, oplock, fi, - oparms->cifs_sb->local_nls, - cifs_remap(oparms->cifs_sb)); - return CIFS_open(xid, oparms, oplock, fi); + rc = SMBLegacyOpen(xid, oparms->tcon, oparms->path, + oparms->disposition, + oparms->desired_access, + oparms->create_options, + &oparms->fid->netfid, oplock, &fi, + oparms->cifs_sb->local_nls, + cifs_remap(oparms->cifs_sb)); + else + rc = CIFS_open(xid, oparms, oplock, &fi); + + if (!rc && data) + move_cifs_info_to_smb2(&data->fi, &fi); + + return rc; } static void @@ -1053,7 +1061,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - FILE_ALL_INFO *buf = NULL; + struct cifs_open_info_data buf = {}; struct cifs_io_parms io_parms; __u32 oplock = 0; struct cifs_fid fid; @@ -1085,14 +1093,14 @@ cifs_make_node(unsigned int xid, struct inode *inode, cifs_sb->local_nls, cifs_remap(cifs_sb)); if (rc) - goto out; + return rc; rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (rc == 0) d_instantiate(dentry, newinode); - goto out; + return rc; } /* @@ -1100,19 +1108,13 @@ cifs_make_node(unsigned int xid, struct inode *inode, * support block and char device (no socket & fifo) */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - goto out; + return rc; if (!S_ISCHR(mode) && !S_ISBLK(mode)) - goto out; + return rc; cifs_dbg(FYI, "sfu compat create special file\n"); - buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (buf == NULL) { - rc = -ENOMEM; - goto out; - } - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; @@ -1127,21 +1129,21 @@ cifs_make_node(unsigned int xid, struct inode *inode, oplock = REQ_OPLOCK; else oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf); + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); if (rc) - goto out; + return rc; /* * BB Do not bother to decode buf since no local inode yet to put * timestamps in, but we can reuse it safely. */ - pdev = (struct win_dev *)buf; + pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = buf; + iov[1].iov_base = &buf.fi; iov[1].iov_len = sizeof(struct win_dev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); @@ -1160,8 +1162,8 @@ cifs_make_node(unsigned int xid, struct inode *inode, d_drop(dentry); /* FIXME: add code here to set EAs */ -out: - kfree(buf); + + cifs_free_open_info(&buf); return rc; } From 75b58cf5ce21650c1b3b88d8310f3415ce905018 Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Sat, 7 Jan 2023 15:41:48 +0100 Subject: [PATCH 0537/1593] Revert "ARM: dts: armada-38x: Fix compatible string for gpios" This reverts commit c4de4667f15d04ef5920bacf41e514ec7d1ef03d, which causes a regression on Turris Omnia (Armada 385): GPIO interrupts cease to work, ending up in the DSA switch being non-functional. The blamed commit is incorrect in the first place: If compatible = "marvell,armadaxp-gpio", the second (address, size) pair of the reg property must to point to the per-CPU interrupt registers <0x18800 0x30> / <0x18840 0x30>, and not to the blink enable registers <0x181c0 0x08> / <0x181c8 0x08>. But even fixing that leaves the GPIO interrupts broken on the Omnia. Furthermore: Commit 5f79c651e81e explains very well, why the gpio-mvebu driver does not work reliably with per-CPU interrupts. Commit 988c8c0cd04d deprecates compatible = marvell,armadaxp-gpio for this reason. Fixes: c4de4667f15d ("ARM: dts: armada-38x: Fix compatible string for gpios") Reported-by: Klaus Kudielka Link: https://lore.kernel.org/r/f24474e70c1a4e9692bd596ef6d97ceda9511245.camel@gmail.com/ Signed-off-by: Klaus Kudielka Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-38x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 12933eff419ff..446861b6b17b2 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -304,7 +304,7 @@ }; gpio0: gpio@18100 { - compatible = "marvell,armadaxp-gpio", + compatible = "marvell,armada-370-gpio", "marvell,orion-gpio"; reg = <0x18100 0x40>, <0x181c0 0x08>; reg-names = "gpio", "pwm"; @@ -323,7 +323,7 @@ }; gpio1: gpio@18140 { - compatible = "marvell,armadaxp-gpio", + compatible = "marvell,armada-370-gpio", "marvell,orion-gpio"; reg = <0x18140 0x40>, <0x181c8 0x08>; reg-names = "gpio", "pwm"; From a4d5d2ef5b1f6cddee0fec35d86404df148b9074 Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Sat, 7 Jan 2023 15:41:49 +0100 Subject: [PATCH 0538/1593] Revert "ARM: dts: armada-39x: Fix compatible string for gpios" This reverts commit d10886a4e6f85ee18d47a1066a52168461370ded. If compatible = "marvell,armadaxp-gpio", the reg property requires a second (address, size) pair, which points to the per-CPU interrupt registers <0x18800 0x30> / <0x18840 0x30>. Furthermore: Commit 5f79c651e81e explains very well, why the gpio-mvebu driver does not work reliably with per-CPU interrupts. Commit 988c8c0cd04d deprecates compatible = marvell,armadaxp-gpio for this reason. Signed-off-by: Klaus Kudielka Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-39x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi index 1e05208d9f341..9d1cac49c022f 100644 --- a/arch/arm/boot/dts/armada-39x.dtsi +++ b/arch/arm/boot/dts/armada-39x.dtsi @@ -213,7 +213,7 @@ }; gpio0: gpio@18100 { - compatible = "marvell,armadaxp-gpio", "marvell,orion-gpio"; + compatible = "marvell,orion-gpio"; reg = <0x18100 0x40>; ngpios = <32>; gpio-controller; @@ -227,7 +227,7 @@ }; gpio1: gpio@18140 { - compatible = "marvell,armadaxp-gpio", "marvell,orion-gpio"; + compatible = "marvell,orion-gpio"; reg = <0x18140 0x40>; ngpios = <28>; gpio-controller; From ee4202db16e59e792f570092ad1f440f2d31ea03 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Mon, 9 Jan 2023 21:02:38 +0100 Subject: [PATCH 0539/1593] fbdev: atmel_lcdfb: Rework backlight status updates Instead of checking the state of various backlight_properties fields against the memorised state in atmel_lcdfb_info.bl_power, atmel_bl_update_status() should retrieve the desired state using backlight_get_brightness (which takes into account the power state, blanking etc.). This means the explicit checks using props.fb_blank and props.power can be dropped. The backlight framework ensures that backlight is never negative, so the test before reading the brightness from the hardware always ends up false and the whole block can be removed. The framework retrieves the brightness from the hardware through atmel_bl_get_brightness() when necessary. As a result, bl_power in struct atmel_lcdfb_info is no longer necessary, so remove that while we're at it. Since we only ever care about reading the current state in backlight_properties, drop the updates at the end of the function. Signed-off-by: Stephen Kitt Acked-by: Sam Ravnborg Signed-off-by: Helge Deller --- drivers/video/fbdev/atmel_lcdfb.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index 1fc8de4ecbebf..8187a7c4f9106 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -49,7 +49,6 @@ struct atmel_lcdfb_info { struct clk *lcdc_clk; struct backlight_device *backlight; - u8 bl_power; u8 saved_lcdcon; u32 pseudo_palette[16]; @@ -109,22 +108,7 @@ static u32 contrast_ctr = ATMEL_LCDC_PS_DIV8 static int atmel_bl_update_status(struct backlight_device *bl) { struct atmel_lcdfb_info *sinfo = bl_get_data(bl); - int power = sinfo->bl_power; - int brightness = bl->props.brightness; - - /* REVISIT there may be a meaningful difference between - * fb_blank and power ... there seem to be some cases - * this doesn't handle correctly. - */ - if (bl->props.fb_blank != sinfo->bl_power) - power = bl->props.fb_blank; - else if (bl->props.power != sinfo->bl_power) - power = bl->props.power; - - if (brightness < 0 && power == FB_BLANK_UNBLANK) - brightness = lcdc_readl(sinfo, ATMEL_LCDC_CONTRAST_VAL); - else if (power != FB_BLANK_UNBLANK) - brightness = 0; + int brightness = backlight_get_brightness(bl); lcdc_writel(sinfo, ATMEL_LCDC_CONTRAST_VAL, brightness); if (contrast_ctr & ATMEL_LCDC_POL_POSITIVE) @@ -133,8 +117,6 @@ static int atmel_bl_update_status(struct backlight_device *bl) else lcdc_writel(sinfo, ATMEL_LCDC_CONTRAST_CTR, contrast_ctr); - bl->props.fb_blank = bl->props.power = sinfo->bl_power = power; - return 0; } @@ -155,8 +137,6 @@ static void init_backlight(struct atmel_lcdfb_info *sinfo) struct backlight_properties props; struct backlight_device *bl; - sinfo->bl_power = FB_BLANK_UNBLANK; - if (sinfo->backlight) return; From 2ec1b17f745b08526220f3c169d2eb9799a9be39 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 6 Jan 2023 09:39:49 -0700 Subject: [PATCH 0540/1593] cxl: fix cxl_report_and_clear() RAS UE addr mis-assignment 'addr' that contains RAS UE register address is re-assigned to RAS_CAP_CONTROL offset if there are multiple UE errors. Use different addr variable to avoid the reassignment mistake. Fixes: 2905cb5236cb ("cxl/pci: Add (hopeful) error handling support") Reported-by: Jonathan Cameron Signed-off-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/167302318779.580155.15233596744650706167.stgit@djiang5-mobl3.local Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 33083a522fd1c..258004f34281a 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -554,8 +554,11 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { - addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr))); + void __iomem *rcc_addr = + cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); } else { fe = status; } From 101ca8d05913b7d1e6e8b9dd792193d4082fff86 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 2 Jan 2023 17:06:30 -0600 Subject: [PATCH 0541/1593] rtc: efi: Enable SET/GET WAKEUP services as optional The current implementation of rtc-efi is expecting all the 4 time services GET{SET}_TIME{WAKEUP} must be supported by UEFI firmware. As per the EFI_RT_PROPERTIES_TABLE, the platform specific implementations can choose to enable selective time services based on the RTC device capabilities. This patch does the following changes to provide GET/SET RTC services on platforms that do not support the WAKEUP feature. 1) Relax time services cap check when creating a platform device. 2) Clear RTC_FEATURE_ALARM bit in the absence of WAKEUP services. 3) Conditional alarm entries in '/proc/driver/rtc'. Cc: # v6.0+ Signed-off-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230102230630.192911-1-sdonthineni@nvidia.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-efi.c | 48 ++++++++++++++++++++++++------------------- include/linux/efi.h | 3 ++- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index e991cccdb6e9c..1e8bc6cc1e12d 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -188,9 +188,10 @@ static int efi_set_time(struct device *dev, struct rtc_time *tm) static int efi_procfs(struct device *dev, struct seq_file *seq) { - efi_time_t eft, alm; - efi_time_cap_t cap; - efi_bool_t enabled, pending; + efi_time_t eft, alm; + efi_time_cap_t cap; + efi_bool_t enabled, pending; + struct rtc_device *rtc = dev_get_drvdata(dev); memset(&eft, 0, sizeof(eft)); memset(&alm, 0, sizeof(alm)); @@ -213,23 +214,25 @@ static int efi_procfs(struct device *dev, struct seq_file *seq) /* XXX fixme: convert to string? */ seq_printf(seq, "Timezone\t: %u\n", eft.timezone); - seq_printf(seq, - "Alarm Time\t: %u:%u:%u.%09u\n" - "Alarm Date\t: %u-%u-%u\n" - "Alarm Daylight\t: %u\n" - "Enabled\t\t: %s\n" - "Pending\t\t: %s\n", - alm.hour, alm.minute, alm.second, alm.nanosecond, - alm.year, alm.month, alm.day, - alm.daylight, - enabled == 1 ? "yes" : "no", - pending == 1 ? "yes" : "no"); - - if (eft.timezone == EFI_UNSPECIFIED_TIMEZONE) - seq_puts(seq, "Timezone\t: unspecified\n"); - else - /* XXX fixme: convert to string? */ - seq_printf(seq, "Timezone\t: %u\n", alm.timezone); + if (test_bit(RTC_FEATURE_ALARM, rtc->features)) { + seq_printf(seq, + "Alarm Time\t: %u:%u:%u.%09u\n" + "Alarm Date\t: %u-%u-%u\n" + "Alarm Daylight\t: %u\n" + "Enabled\t\t: %s\n" + "Pending\t\t: %s\n", + alm.hour, alm.minute, alm.second, alm.nanosecond, + alm.year, alm.month, alm.day, + alm.daylight, + enabled == 1 ? "yes" : "no", + pending == 1 ? "yes" : "no"); + + if (eft.timezone == EFI_UNSPECIFIED_TIMEZONE) + seq_puts(seq, "Timezone\t: unspecified\n"); + else + /* XXX fixme: convert to string? */ + seq_printf(seq, "Timezone\t: %u\n", alm.timezone); + } /* * now prints the capabilities @@ -269,7 +272,10 @@ static int __init efi_rtc_probe(struct platform_device *dev) rtc->ops = &efi_rtc_ops; clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); - set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features); + if (efi_rt_services_supported(EFI_RT_SUPPORTED_WAKEUP_SERVICES)) + set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features); + else + clear_bit(RTC_FEATURE_ALARM, rtc->features); device_init_wakeup(&dev->dev, true); diff --git a/include/linux/efi.h b/include/linux/efi.h index 4b27519143f56..98598bd1d2fa5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -668,7 +668,8 @@ extern struct efi { #define EFI_RT_SUPPORTED_ALL 0x3fff -#define EFI_RT_SUPPORTED_TIME_SERVICES 0x000f +#define EFI_RT_SUPPORTED_TIME_SERVICES 0x0003 +#define EFI_RT_SUPPORTED_WAKEUP_SERVICES 0x000c #define EFI_RT_SUPPORTED_VARIABLE_SERVICES 0x0070 extern struct mm_struct efi_mm; From 51b3802e79606b2326f4a8cac0f2766a2e15338b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jan 2023 10:56:12 +0100 Subject: [PATCH 0542/1593] dt-bindings: rtc: qcom-pm8xxx: allow 'wakeup-source' property The RTC can be used as a wakeup source on at least some platforms so allow it to be described as such. Signed-off-by: Johan Hovold Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230104095612.6756-1-johan+linaro@kernel.org Signed-off-by: Alexandre Belloni --- Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml index 0a7aa29563c1c..21c8ea08ff0a2 100644 --- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml @@ -40,6 +40,8 @@ properties: description: Indicates that the setting of RTC time is allowed by the host CPU. + wakeup-source: true + required: - compatible - reg From 0be7ed8e7eb15282b5d0f6fdfea884db594ea9bf Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 4 Jan 2023 17:09:02 -0500 Subject: [PATCH 0543/1593] drm/amdgpu: Fix potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix potential NULL dereference, in the case when "man", the resource manager might be NULL, when/if we print debug information. Cc: Alex Deucher Cc: Christian König Cc: AMD Graphics Cc: Dan Carpenter Cc: kernel test robot Fixes: 7554886daa31ea ("drm/amdgpu: Fix size validation for non-exclusive domains (v4)") Signed-off-by: Luben Tuikov Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 4e684c2afc709..25a68d8888e0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -470,8 +470,9 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, return true; fail: - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + if (man) + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, + man->size); return false; } From 1923bc5a56daeeabd7e9093bad2febcd6af2416a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 27 Dec 2022 15:49:17 -0600 Subject: [PATCH 0544/1593] drm/amd: Delay removal of the firmware framebuffer Removing the firmware framebuffer from the driver means that even if the driver doesn't support the IP blocks in a GPU it will no longer be functional after the driver fails to initialize. This change will ensure that unsupported IP blocks at least cause the driver to work with the EFI framebuffer. Cc: stable@vger.kernel.org Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index afe6af9c01385..2f28a8c02f641 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -90,6 +91,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_MAX_RETRY_LIMIT 2 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +static const struct drm_driver amdgpu_kms_driver; + const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", @@ -3687,6 +3690,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Get rid of things like offb */ + r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); + if (r) + return r; + /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1353ffd089882..cd4caaa295282 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,7 +23,6 @@ */ #include -#include #include #include #include @@ -2122,11 +2121,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif - /* Get rid of things like offb */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); - if (ret) - return ret; - adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) return PTR_ERR(adev); From 99f1a36c90a7524972be5a028424c57fa17753ee Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 6 Jan 2023 14:04:15 +0800 Subject: [PATCH 0545/1593] drm/amdgpu: Fixed bug on error when unloading amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed bug on error when unloading amdgpu. The error message is as follows: [ 377.706202] kernel BUG at drivers/gpu/drm/drm_buddy.c:278! [ 377.706215] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 377.706222] CPU: 4 PID: 8610 Comm: modprobe Tainted: G IOE 6.0.0-thomas #1 [ 377.706231] Hardware name: ASUS System Product Name/PRIME Z390-A, BIOS 2004 11/02/2021 [ 377.706238] RIP: 0010:drm_buddy_free_block+0x26/0x30 [drm_buddy] [ 377.706264] Code: 00 00 00 90 0f 1f 44 00 00 48 8b 0e 89 c8 25 00 0c 00 00 3d 00 04 00 00 75 10 48 8b 47 18 48 d3 e0 48 01 47 28 e9 fa fe ff ff <0f> 0b 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 54 55 48 89 f5 53 [ 377.706282] RSP: 0018:ffffad2dc4683cb8 EFLAGS: 00010287 [ 377.706289] RAX: 0000000000000000 RBX: ffff8b1743bd5138 RCX: 0000000000000000 [ 377.706297] RDX: ffff8b1743bd5160 RSI: ffff8b1743bd5c78 RDI: ffff8b16d1b25f70 [ 377.706304] RBP: ffff8b1743bd59e0 R08: 0000000000000001 R09: 0000000000000001 [ 377.706311] R10: ffff8b16c8572400 R11: ffffad2dc4683cf0 R12: ffff8b16d1b25f70 [ 377.706318] R13: ffff8b16d1b25fd0 R14: ffff8b1743bd59c0 R15: ffff8b16d1b25f70 [ 377.706325] FS: 00007fec56c72c40(0000) GS:ffff8b1836500000(0000) knlGS:0000000000000000 [ 377.706334] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 377.706340] CR2: 00007f9b88c1ba50 CR3: 0000000110450004 CR4: 00000000003706e0 [ 377.706347] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 377.706354] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 377.706361] Call Trace: [ 377.706365] [ 377.706369] drm_buddy_free_list+0x2a/0x60 [drm_buddy] [ 377.706376] amdgpu_vram_mgr_fini+0xea/0x180 [amdgpu] [ 377.706572] amdgpu_ttm_fini+0x12e/0x1a0 [amdgpu] [ 377.706650] amdgpu_bo_fini+0x22/0x90 [amdgpu] [ 377.706727] gmc_v11_0_sw_fini+0x26/0x30 [amdgpu] [ 377.706821] amdgpu_device_fini_sw+0xa1/0x3c0 [amdgpu] [ 377.706897] amdgpu_driver_release_kms+0x12/0x30 [amdgpu] [ 377.706975] drm_dev_release+0x20/0x40 [drm] [ 377.707006] release_nodes+0x35/0xb0 [ 377.707014] devres_release_all+0x8b/0xc0 [ 377.707020] device_unbind_cleanup+0xe/0x70 [ 377.707027] device_release_driver_internal+0xee/0x160 [ 377.707033] driver_detach+0x44/0x90 [ 377.707039] bus_remove_driver+0x55/0xe0 [ 377.707045] pci_unregister_driver+0x3b/0x90 [ 377.707052] amdgpu_exit+0x11/0x6c [amdgpu] [ 377.707194] __x64_sys_delete_module+0x142/0x2b0 [ 377.707201] ? fpregs_assert_state_consistent+0x22/0x50 [ 377.707208] ? exit_to_user_mode_prepare+0x3e/0x190 [ 377.707215] do_syscall_64+0x38/0x90 [ 377.707221] entry_SYSCALL_64_after_hwframe+0x63/0xcd Signed-off-by: YiPeng Chai Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index faa12146635cf..9fa1d814508a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -882,7 +882,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->blocks); + drm_buddy_free_list(&mgr->mm, &rsv->allocated); kfree(rsv); } drm_buddy_fini(&mgr->mm); From f58985620f55580a07d40062c4115d8c9cf6ae27 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 7 Dec 2022 08:55:02 +0000 Subject: [PATCH 0546/1593] ice: Fix potential memory leak in ice_gnss_tty_write() The ice_gnss_tty_write() return directly if the write_buf alloc failed, leaking the cmd_buf. Fix by free cmd_buf if write_buf alloc failed. Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Yuan Can Reviewed-by: Leon Romanovsky Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_gnss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index b5a7f246d230f..a1915551c69a8 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -363,6 +363,7 @@ ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) /* Send the data out to a hardware port */ write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL); if (!write_buf) { + kfree(cmd_buf); err = -ENOMEM; goto exit; } From 40543b3d9d2c13227ecd3aa90a713c201d1d7f09 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 8 Dec 2022 21:35:52 +0800 Subject: [PATCH 0547/1593] ice: Add check for kzalloc Add the check for the return value of kzalloc in order to avoid NULL pointer dereference. Moreover, use the goto-label to share the clean code. Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Jiasheng Jiang Reviewed-by: Jiri Pirko Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_gnss.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index a1915551c69a8..43e199b5b513b 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -461,6 +461,9 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { pf->gnss_tty_port[i] = kzalloc(sizeof(*pf->gnss_tty_port[i]), GFP_KERNEL); + if (!pf->gnss_tty_port[i]) + goto err_out; + pf->gnss_serial[i] = NULL; tty_port_init(pf->gnss_tty_port[i]); @@ -470,21 +473,23 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) err = tty_register_driver(tty_driver); if (err) { dev_err(dev, "Failed to register TTY driver err=%d\n", err); - - for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { - tty_port_destroy(pf->gnss_tty_port[i]); - kfree(pf->gnss_tty_port[i]); - } - kfree(ttydrv_name); - tty_driver_kref_put(pf->ice_gnss_tty_driver); - - return NULL; + goto err_out; } for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) dev_info(dev, "%s%d registered\n", ttydrv_name, i); return tty_driver; + +err_out: + while (i--) { + tty_port_destroy(pf->gnss_tty_port[i]); + kfree(pf->gnss_tty_port[i]); + } + kfree(ttydrv_name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + + return NULL; } /** From daec8b54e9445566d003ac46e267e73755b5f55a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 12 Dec 2022 14:59:52 +0100 Subject: [PATCH 0548/1593] ARM: footbridge: drop unnecessary inclusion isa-rtc.c doesn't use any definition from bcd.h, remove its inclusion Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20221212135953.212303-1-alexandre.belloni@bootlin.com Signed-off-by: Arnd Bergmann --- arch/arm/mach-footbridge/isa-rtc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-footbridge/isa-rtc.c b/arch/arm/mach-footbridge/isa-rtc.c index b8f741a3a37ea..237b828dd2f10 100644 --- a/arch/arm/mach-footbridge/isa-rtc.c +++ b/arch/arm/mach-footbridge/isa-rtc.c @@ -20,7 +20,6 @@ #include #include -#include #include #include "common.h" From febb985c06cb6f5fac63598c0bffd4fd823d110d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jan 2023 14:46:10 -0700 Subject: [PATCH 0549/1593] io_uring/poll: add hash if ready poll request can't complete inline If we don't, then we may lose access to it completely, leading to a request leak. This will eventually stall the ring exit process as well. Cc: stable@vger.kernel.org Fixes: 49f1c68e048f ("io_uring: optimise submission side poll_refs") Reported-and-tested-by: syzbot+6c95df01470a47fc3af4@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/0000000000009f829805f1ce87b2@google.com/ Suggested-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/poll.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index ee7da6150ec41..cf6a70bd54e09 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -533,6 +533,14 @@ static bool io_poll_can_finish_inline(struct io_kiocb *req, return pt->owning || io_poll_get_ownership(req); } +static void io_poll_add_hash(struct io_kiocb *req) +{ + if (req->flags & REQ_F_HASH_LOCKED) + io_poll_req_insert_locked(req); + else + io_poll_req_insert(req); +} + /* * Returns 0 when it's handed over for polling. The caller owns the requests if * it returns non-zero, but otherwise should not touch it. Negative values @@ -591,18 +599,17 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (mask && ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { - if (!io_poll_can_finish_inline(req, ipt)) + if (!io_poll_can_finish_inline(req, ipt)) { + io_poll_add_hash(req); return 0; + } io_poll_remove_entries(req); ipt->result_mask = mask; /* no one else has access to the req, forget about the ref */ return 1; } - if (req->flags & REQ_F_HASH_LOCKED) - io_poll_req_insert_locked(req); - else - io_poll_req_insert(req); + io_poll_add_hash(req); if (mask && (poll->events & EPOLLET) && io_poll_can_finish_inline(req, ipt)) { From 5a41237ad1d4b62008f93163af1d9b1da90729d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Jan 2023 17:04:49 -0600 Subject: [PATCH 0550/1593] gcc: disable -Warray-bounds for gcc-11 too We had already disabled this warning for gcc-12 due to bugs in the value range analysis, but it turns out we end up having some similar problems with gcc-11.3 too, so let's disable it there too. Older gcc versions end up being increasingly less relevant, and hopefully clang and newer version of gcc (ie gcc-13) end up working reliably enough that we still get the build coverage even when we disable this for some versions. Link: https://lore.kernel.org/all/20221227002941.GA2691687@roeck-us.net/ Link: https://lore.kernel.org/all/D8BDBF66-E44C-45D4-9758-BAAA4F0C1998@kernel.org/ Cc: Kees Cook Cc: Vlastimil Babka Cc: Guenter Roeck Signed-off-by: Linus Torvalds --- init/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 7e5c3ddc341de..0958846b005e1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -894,13 +894,17 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-12 array-bounds globally. +# Currently, disable gcc-11,12 array-bounds globally. # We may want to target only particular configurations some day. +config GCC11_NO_ARRAY_BOUNDS + def_bool y + config GCC12_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool + default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_ARRAY_BOUNDS default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS # From 52531258318ed59a2dc5a43df2eaf0eb1d65438e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 16 Dec 2022 15:33:55 -0800 Subject: [PATCH 0551/1593] drm/virtio: Fix GEM handle creation UAF Userspace can guess the handle value and try to race GEM object creation with handle close, resulting in a use-after-free if we dereference the object after dropping the handle's reference. For that reason, dropping the handle's reference must be done *after* we are done dereferencing the object. Signed-off-by: Rob Clark Reviewed-by: Chia-I Wu Fixes: 62fb7a5e1096 ("virtio-gpu: add 3d/virgl support") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20221216233355.542197-2-robdclark@gmail.com --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 5d05093014ac3..9f4a90493aeac 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -358,10 +358,18 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc->res_handle = qobj->hw_res_handle; /* similiar to a VM address */ rc->bo_handle = handle; + + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } @@ -723,11 +731,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc_blob->res_handle = bo->hw_res_handle; rc_blob->bo_handle = handle; + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } From a309c7194e8a2f8bd4539b9449917913f6c2cd50 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 7 Dec 2022 12:29:07 -0500 Subject: [PATCH 0552/1593] drm/vmwgfx: Remove rcu locks from user resources User resource lookups used rcu to avoid two extra atomics. Unfortunately the rcu paths were buggy and it was easy to make the driver crash by submitting command buffers from two different threads. Because the lookups never show up in performance profiles replace them with a regular spin lock which fixes the races in accesses to those shared resources. Fixes kernel oops'es in IGT's vmwgfx execution_buffer stress test and seen crashes with apps using shared resources. Fixes: e14c02e6b699 ("drm/vmwgfx: Look up objects without taking a reference") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20221207172907.959037-1-zack@kde.org --- drivers/gpu/drm/vmwgfx/ttm_object.c | 41 +----- drivers/gpu/drm/vmwgfx/ttm_object.h | 14 -- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 38 ----- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 18 +-- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 176 +++++++++++------------ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 ----- 6 files changed, 87 insertions(+), 233 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 932b125ebf3d6..ddf8373c1d779 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -254,40 +254,6 @@ void ttm_base_object_unref(struct ttm_base_object **p_base) kref_put(&base->refcount, ttm_release_base); } -/** - * ttm_base_object_noref_lookup - look up a base object without reference - * @tfile: The struct ttm_object_file the object is registered with. - * @key: The object handle. - * - * This function looks up a ttm base object and returns a pointer to it - * without refcounting the pointer. The returned pointer is only valid - * until ttm_base_object_noref_release() is called, and the object - * pointed to by the returned pointer may be doomed. Any persistent usage - * of the object requires a refcount to be taken using kref_get_unless_zero(). - * Iff this function returns successfully it needs to be paired with - * ttm_base_object_noref_release() and no sleeping- or scheduling functions - * may be called inbetween these function callse. - * - * Return: A pointer to the object if successful or NULL otherwise. - */ -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) -{ - struct vmwgfx_hash_item *hash; - int ret; - - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); - if (ret) { - rcu_read_unlock(); - return NULL; - } - - __release(RCU); - return hlist_entry(hash, struct ttm_ref_object, hash)->obj; -} -EXPORT_SYMBOL(ttm_base_object_noref_lookup); - struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, uint64_t key) { @@ -295,15 +261,16 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, struct vmwgfx_hash_item *hash; int ret; - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); + spin_lock(&tfile->lock); + ret = ttm_tfile_find_ref(tfile, key, &hash); if (likely(ret == 0)) { base = hlist_entry(hash, struct ttm_ref_object, hash)->obj; if (!kref_get_unless_zero(&base->refcount)) base = NULL; } - rcu_read_unlock(); + spin_unlock(&tfile->lock); + return base; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index f0ebbe340ad69..8098a3846bae3 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -307,18 +307,4 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, #define ttm_prime_object_kfree(__obj, __prime) \ kfree_rcu(__obj, __prime.base.rhead) -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key); - -/** - * ttm_base_object_noref_release - release a base object pointer looked up - * without reference - * - * Releases a base object pointer looked up with ttm_base_object_noref_lookup(). - */ -static inline void ttm_base_object_noref_release(void) -{ - __acquire(RCU); - rcu_read_unlock(); -} #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 321c551784a14..aa1cd5126a321 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -715,44 +715,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, return 0; } -/** - * vmw_user_bo_noref_lookup - Look up a vmw user buffer object without reference - * @filp: The TTM object file the handle is registered with. - * @handle: The user buffer object handle. - * - * This function looks up a struct vmw_bo and returns a pointer to the - * struct vmw_buffer_object it derives from without refcounting the pointer. - * The returned pointer is only valid until vmw_user_bo_noref_release() is - * called, and the object pointed to by the returned pointer may be doomed. - * Any persistent usage of the object requires a refcount to be taken using - * ttm_bo_reference_unless_doomed(). Iff this function returns successfully it - * needs to be paired with vmw_user_bo_noref_release() and no sleeping- - * or scheduling functions may be called in between these function calls. - * - * Return: A struct vmw_buffer_object pointer if successful or negative - * error pointer on failure. - */ -struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle) -{ - struct vmw_buffer_object *vmw_bo; - struct ttm_buffer_object *bo; - struct drm_gem_object *gobj = drm_gem_object_lookup(filp, handle); - - if (!gobj) { - DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", - (unsigned long)handle); - return ERR_PTR(-ESRCH); - } - vmw_bo = gem_to_vmw_bo(gobj); - bo = ttm_bo_get_unless_zero(&vmw_bo->base); - vmw_bo = vmw_buffer_object(bo); - drm_gem_object_put(gobj); - - return vmw_bo; -} - - /** * vmw_bo_fence_single - Utility function to fence a single TTM buffer * object without unreserving it. diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index b062b020b3782..5acbf5849b270 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -830,12 +830,7 @@ extern int vmw_user_resource_lookup_handle( uint32_t handle, const struct vmw_user_resource_conv *converter, struct vmw_resource **p_res); -extern struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv * - converter); + extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, @@ -874,15 +869,6 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) return !RB_EMPTY_NODE(&res->mob_node); } -/** - * vmw_user_resource_noref_release - release a user resource pointer looked up - * without reference - */ -static inline void vmw_user_resource_noref_release(void) -{ - ttm_base_object_noref_release(); -} - /** * Buffer object helper functions - vmwgfx_bo.c */ @@ -934,8 +920,6 @@ extern void vmw_bo_unmap(struct vmw_buffer_object *vbo); extern void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); extern void vmw_bo_swap_notify(struct ttm_buffer_object *bo); -extern struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle); /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index a5379f6fb5ab1..a44d53e33cdb1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -290,20 +290,26 @@ static void vmw_execbuf_rcache_update(struct vmw_res_cache_entry *rcache, rcache->valid_handle = 0; } +enum vmw_val_add_flags { + vmw_val_add_flag_none = 0, + vmw_val_add_flag_noctx = 1 << 0, +}; + /** - * vmw_execbuf_res_noref_val_add - Add a resource described by an unreferenced - * rcu-protected pointer to the validation list. + * vmw_execbuf_res_val_add - Add a resource to the validation list. * * @sw_context: Pointer to the software context. * @res: Unreferenced rcu-protected pointer to the resource. * @dirty: Whether to change dirty status. + * @flags: specifies whether to use the context or not * * Returns: 0 on success. Negative error code on failure. Typical error codes * are %-EINVAL on inconsistency and %-ESRCH if the resource was doomed. */ -static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) +static int vmw_execbuf_res_val_add(struct vmw_sw_context *sw_context, + struct vmw_resource *res, + u32 dirty, + u32 flags) { struct vmw_private *dev_priv = res->dev_priv; int ret; @@ -318,24 +324,30 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, if (dirty) vmw_validation_res_set_dirty(sw_context->ctx, rcache->private, dirty); - vmw_user_resource_noref_release(); return 0; } - priv_size = vmw_execbuf_res_size(dev_priv, res_type); - ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, - dirty, (void **)&ctx_info, - &first_usage); - vmw_user_resource_noref_release(); - if (ret) - return ret; + if ((flags & vmw_val_add_flag_noctx) != 0) { + ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, + (void **)&ctx_info, NULL); + if (ret) + return ret; - if (priv_size && first_usage) { - ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, - ctx_info); - if (ret) { - VMW_DEBUG_USER("Failed first usage context setup.\n"); + } else { + priv_size = vmw_execbuf_res_size(dev_priv, res_type); + ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, + dirty, (void **)&ctx_info, + &first_usage); + if (ret) return ret; + + if (priv_size && first_usage) { + ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, + ctx_info); + if (ret) { + VMW_DEBUG_USER("Failed first usage context setup.\n"); + return ret; + } } } @@ -343,43 +355,6 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, return 0; } -/** - * vmw_execbuf_res_noctx_val_add - Add a non-context resource to the resource - * validation list if it's not already on it - * - * @sw_context: Pointer to the software context. - * @res: Pointer to the resource. - * @dirty: Whether to change dirty status. - * - * Returns: Zero on success. Negative error code on failure. - */ -static int vmw_execbuf_res_noctx_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) -{ - struct vmw_res_cache_entry *rcache; - enum vmw_res_type res_type = vmw_res_type(res); - void *ptr; - int ret; - - rcache = &sw_context->res_cache[res_type]; - if (likely(rcache->valid && rcache->res == res)) { - if (dirty) - vmw_validation_res_set_dirty(sw_context->ctx, - rcache->private, dirty); - return 0; - } - - ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, - &ptr, NULL); - if (ret) - return ret; - - vmw_execbuf_rcache_update(rcache, res, ptr); - - return 0; -} - /** * vmw_view_res_val_add - Add a view and the surface it's pointing to to the * validation list @@ -398,13 +373,13 @@ static int vmw_view_res_val_add(struct vmw_sw_context *sw_context, * First add the resource the view is pointing to, otherwise it may be * swapped out when the view is validated. */ - ret = vmw_execbuf_res_noctx_val_add(sw_context, vmw_view_srf(view), - vmw_view_dirtying(view)); + ret = vmw_execbuf_res_val_add(sw_context, vmw_view_srf(view), + vmw_view_dirtying(view), vmw_val_add_flag_noctx); if (ret) return ret; - return vmw_execbuf_res_noctx_val_add(sw_context, view, - VMW_RES_DIRTY_NONE); + return vmw_execbuf_res_val_add(sw_context, view, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); } /** @@ -475,8 +450,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (IS_ERR(res)) continue; - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_SET); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_SET, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; } @@ -490,9 +466,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (vmw_res_type(entry->res) == vmw_res_view) ret = vmw_view_res_val_add(sw_context, entry->res); else - ret = vmw_execbuf_res_noctx_val_add - (sw_context, entry->res, - vmw_binding_dirtying(entry->bt)); + ret = vmw_execbuf_res_val_add(sw_context, entry->res, + vmw_binding_dirtying(entry->bt), + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) break; } @@ -658,7 +634,8 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, { struct vmw_res_cache_entry *rcache = &sw_context->res_cache[res_type]; struct vmw_resource *res; - int ret; + int ret = 0; + bool needs_unref = false; if (p_res) *p_res = NULL; @@ -683,17 +660,18 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle - (dev_priv, sw_context->fp->tfile, *id_loc, converter); - if (IS_ERR(res)) { + ret = vmw_user_resource_lookup_handle + (dev_priv, sw_context->fp->tfile, *id_loc, converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find/use resource 0x%08x.\n", (unsigned int) *id_loc); - return PTR_ERR(res); + return ret; } + needs_unref = true; - ret = vmw_execbuf_res_noref_val_add(sw_context, res, dirty); + ret = vmw_execbuf_res_val_add(sw_context, res, dirty, vmw_val_add_flag_none); if (unlikely(ret != 0)) - return ret; + goto res_check_done; if (rcache->valid && rcache->res == res) { rcache->valid_handle = true; @@ -708,7 +686,11 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (p_res) *p_res = res; - return 0; +res_check_done: + if (needs_unref) + vmw_resource_unreference(&res); + + return ret; } /** @@ -1171,9 +1153,9 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use MOB buffer.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false); @@ -1225,9 +1207,9 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use GMR region.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false); @@ -2025,8 +2007,9 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, res = vmw_shader_lookup(vmw_context_res_man(ctx), cmd->body.shid, cmd->body.type); if (!IS_ERR(res)) { - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; @@ -2273,8 +2256,9 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) return ret; } @@ -2777,8 +2761,8 @@ static int vmw_cmd_dx_bind_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { VMW_DEBUG_USER("Error creating resource validation node.\n"); return ret; @@ -3098,8 +3082,8 @@ static int vmw_cmd_dx_bind_streamoutput(struct vmw_private *dev_priv, vmw_dx_streamoutput_set_size(res, cmd->body.sizeInBytes); - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -3148,8 +3132,8 @@ static int vmw_cmd_dx_set_streamoutput(struct vmw_private *dev_priv, return 0; } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -4066,22 +4050,26 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle + ret = vmw_user_resource_lookup_handle (dev_priv, sw_context->fp->tfile, handle, - user_context_converter); - if (IS_ERR(res)) { + user_context_converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find or user DX context 0x%08x.\n", (unsigned int) handle); - return PTR_ERR(res); + return ret; } - ret = vmw_execbuf_res_noref_val_add(sw_context, res, VMW_RES_DIRTY_SET); - if (unlikely(ret != 0)) + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_SET, + vmw_val_add_flag_none); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); return ret; + } sw_context->dx_ctx_node = vmw_execbuf_info_from_res(sw_context, res); sw_context->man = vmw_context_res_man(res); + vmw_resource_unreference(&res); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index f66caa540e146..c7d645e5ec7bf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -281,39 +281,6 @@ int vmw_user_resource_lookup_handle(struct vmw_private *dev_priv, return ret; } -/** - * vmw_user_resource_noref_lookup_handle - lookup a struct resource from a - * TTM user-space handle and perform basic type checks - * - * @dev_priv: Pointer to a device private struct - * @tfile: Pointer to a struct ttm_object_file identifying the caller - * @handle: The TTM user-space handle - * @converter: Pointer to an object describing the resource type - * - * If the handle can't be found or is associated with an incorrect resource - * type, -EINVAL will be returned. - */ -struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv - *converter) -{ - struct ttm_base_object *base; - - base = ttm_base_object_noref_lookup(tfile, handle); - if (!base) - return ERR_PTR(-ESRCH); - - if (unlikely(ttm_base_object_type(base) != converter->object_type)) { - ttm_base_object_noref_release(); - return ERR_PTR(-EINVAL); - } - - return converter->base_obj_to_res(base); -} - /* * Helper function that looks either a surface or bo. * From 9e17f99220d111ea031b44153fdfe364b0024ff2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 7 Jan 2023 19:10:04 +0200 Subject: [PATCH 0553/1593] net/sched: act_mpls: Fix warning during failed attribute validation The 'TCA_MPLS_LABEL' attribute is of 'NLA_U32' type, but has a validation type of 'NLA_VALIDATE_FUNCTION'. This is an invalid combination according to the comment above 'struct nla_policy': " Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: NLA_BINARY Validation function called for the attribute. All other Unused - but note that it's a union " This can trigger the warning [1] in nla_get_range_unsigned() when validation of the attribute fails. Despite being of 'NLA_U32' type, the associated 'min'/'max' fields in the policy are negative as they are aliased by the 'validate' field. Fix by changing the attribute type to 'NLA_BINARY' which is consistent with the above comment and all other users of NLA_POLICY_VALIDATE_FN(). As a result, move the length validation to the validation function. No regressions in MPLS tests: # ./tdc.py -f tc-tests/actions/mpls.json [...] # echo $? 0 [1] WARNING: CPU: 0 PID: 17743 at lib/nlattr.c:118 nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117 Modules linked in: CPU: 0 PID: 17743 Comm: syz-executor.0 Not tainted 6.1.0-rc8 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117 [...] Call Trace: __netlink_policy_dump_write_attr+0x23d/0x990 net/netlink/policy.c:310 netlink_policy_dump_write_attr+0x22/0x30 net/netlink/policy.c:411 netlink_ack_tlv_fill net/netlink/af_netlink.c:2454 [inline] netlink_ack+0x546/0x760 net/netlink/af_netlink.c:2506 netlink_rcv_skb+0x1b7/0x240 net/netlink/af_netlink.c:2546 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:6109 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5e9/0x6b0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x739/0x860 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2482 ___sys_sendmsg net/socket.c:2536 [inline] __sys_sendmsg+0x197/0x230 net/socket.c:2565 __do_sys_sendmsg net/socket.c:2574 [inline] __se_sys_sendmsg net/socket.c:2572 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2572 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lore.kernel.org/netdev/CAO4mrfdmjvRUNbDyP0R03_DrD_eFCLCguz6OxZ2TYRSv0K9gxA@mail.gmail.com/ Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Reported-by: Wei Chen Tested-by: Wei Chen Signed-off-by: Ido Schimmel Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230107171004.608436-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/sched/act_mpls.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index ff47ce4d39685..6b26bdb999d77 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -134,6 +134,11 @@ static int valid_label(const struct nlattr *attr, { const u32 *label = nla_data(attr); + if (nla_len(attr) != sizeof(*label)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length"); + return -EINVAL; + } + if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) { NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range"); return -EINVAL; @@ -145,7 +150,8 @@ static int valid_label(const struct nlattr *attr, static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)), [TCA_MPLS_PROTO] = { .type = NLA_U16 }, - [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label), + [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + valid_label), [TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7), [TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), From ef01f4e25c1760920e2c94f1c232350277ace69b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 6 Jan 2023 10:43:59 -0500 Subject: [PATCH 0554/1593] bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD When changing the ebpf program put() routines to support being called from within IRQ context the program ID was reset to zero prior to calling the perf event and audit UNLOAD record generators, which resulted in problems as the ebpf program ID was bogus (always zero). This patch addresses this problem by removing an unnecessary call to bpf_prog_free_id() in __bpf_prog_offload_destroy() and adjusting __bpf_prog_put() to only call bpf_prog_free_id() after audit and perf have finished their bpf program unload tasks in bpf_prog_put_deferred(). For the record, no one can determine, or remember, why it was necessary to free the program ID, and remove it from the IDR, prior to executing bpf_prog_put_deferred(); regardless, both Stanislav and Alexei agree that the approach in this patch should be safe. It is worth noting that when moving the bpf_prog_free_id() call, the do_idr_lock parameter was forced to true as the ebpf devs determined this was the correct as the do_idr_lock should always be true. The do_idr_lock parameter will be removed in a follow-up patch, but it was kept here to keep the patch small in an effort to ease any stable backports. I also modified the bpf_audit_prog() logic used to associate the AUDIT_BPF record with other associated records, e.g. @ctx != NULL. Instead of keying off the operation, it now keys off the execution context, e.g. '!in_irg && !irqs_disabled()', which is much more appropriate and should help better connect the UNLOAD operations with the associated audit state (other audit records). Cc: stable@vger.kernel.org Fixes: d809e134be7a ("bpf: Prepare bpf_prog_put() to be called from irq context.") Reported-by: Burn Alting Reported-by: Jiri Olsa Suggested-by: Stanislav Fomichev Suggested-by: Alexei Starovoitov Signed-off-by: Paul Moore Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230106154400.74211-1-paul@paul-moore.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/offload.c | 3 --- kernel/bpf/syscall.c | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 13e4efc971e6d..190d9f9dc9870 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) if (offload->dev_state) offload->offdev->ops->destroy(prog); - /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ - bpf_prog_free_id(prog, true); - list_del_init(&offload->offloads); kfree(offload); prog->aux->offload = NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64131f88c5537..61bb19e81b9c8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1972,7 +1972,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) return; if (audit_enabled == AUDIT_OFF) return; - if (op == BPF_AUDIT_LOAD) + if (!in_irq() && !irqs_disabled()) ctx = audit_context(); ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); if (unlikely(!ab)) @@ -2067,6 +2067,7 @@ static void bpf_prog_put_deferred(struct work_struct *work) prog = aux->prog; perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + bpf_prog_free_id(prog, true); __bpf_prog_put_noref(prog, true); } @@ -2075,9 +2076,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) struct bpf_prog_aux *aux = prog->aux; if (atomic64_dec_and_test(&aux->refcnt)) { - /* bpf_prog_free_id() must be called first */ - bpf_prog_free_id(prog, do_idr_lock); - if (in_irq() || irqs_disabled()) { INIT_WORK(&aux->work, bpf_prog_put_deferred); schedule_work(&aux->work); From e7895f017b79410bf4591396a733b876dc1e0e9d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 6 Jan 2023 10:44:00 -0500 Subject: [PATCH 0555/1593] bpf: remove the do_idr_lock parameter from bpf_prog_free_id() It was determined that the do_idr_lock parameter to bpf_prog_free_id() was not necessary as it should always be true. Suggested-by: Stanislav Fomichev Signed-off-by: Paul Moore Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230106154400.74211-2-paul@paul-moore.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/syscall.c | 20 ++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3de24cfb7a3de..634d37a599fa7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1832,7 +1832,7 @@ void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); +void bpf_prog_free_id(struct bpf_prog *prog); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); struct btf_field *btf_record_find(const struct btf_record *rec, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 61bb19e81b9c8..ecca9366c7a6f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2001,7 +2001,7 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) return id > 0 ? 0 : id; } -void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) +void bpf_prog_free_id(struct bpf_prog *prog) { unsigned long flags; @@ -2013,18 +2013,10 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) if (!prog->aux->id) return; - if (do_idr_lock) - spin_lock_irqsave(&prog_idr_lock, flags); - else - __acquire(&prog_idr_lock); - + spin_lock_irqsave(&prog_idr_lock, flags); idr_remove(&prog_idr, prog->aux->id); prog->aux->id = 0; - - if (do_idr_lock) - spin_unlock_irqrestore(&prog_idr_lock, flags); - else - __release(&prog_idr_lock); + spin_unlock_irqrestore(&prog_idr_lock, flags); } static void __bpf_prog_put_rcu(struct rcu_head *rcu) @@ -2067,11 +2059,11 @@ static void bpf_prog_put_deferred(struct work_struct *work) prog = aux->prog; perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); - bpf_prog_free_id(prog, true); + bpf_prog_free_id(prog); __bpf_prog_put_noref(prog, true); } -static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) +static void __bpf_prog_put(struct bpf_prog *prog) { struct bpf_prog_aux *aux = prog->aux; @@ -2087,7 +2079,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) void bpf_prog_put(struct bpf_prog *prog) { - __bpf_prog_put(prog, true); + __bpf_prog_put(prog); } EXPORT_SYMBOL_GPL(bpf_prog_put); From 08f0adb193c008de640fde34a2e00a666c01d77c Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Tue, 10 Jan 2023 11:12:52 +0800 Subject: [PATCH 0556/1593] cpufreq: armada-37xx: stop using 0 as NULL pointer Use NULL for NULL pointer to fix the following sparse warning: drivers/cpufreq/armada-37xx-cpufreq.c:448:32: sparse: warning: Using plain integer as NULL pointer Signed-off-by: Miles Chen Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-37xx-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index c10fc33b29b18..b74289a95a171 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -445,7 +445,7 @@ static int __init armada37xx_cpufreq_driver_init(void) return -ENODEV; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { dev_err(cpu_dev, "Cannot get clock for CPU0\n"); return PTR_ERR(clk); From 2ea26b4de6f42b74a5f1701de41efa6bc9f12666 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 8 Jan 2023 20:37:53 +0100 Subject: [PATCH 0557/1593] Revert "r8169: disable detection of chip version 36" This reverts commit 42666b2c452ce87894786aae05e3fad3cfc6cb59. This chip version seems to be very rare, but it exits in consumer devices, see linked report. Link: https://stackoverflow.com/questions/75049473/cant-setup-a-wired-network-in-archlinux-fresh-install Fixes: 42666b2c452c ("r8169: disable detection of chip version 36") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/42e9674c-d5d0-a65a-f578-e5c74f244739@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 24592d9725230..dadd61bccfe72 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1996,10 +1996,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) /* 8168F family. */ { 0x7c8, 0x488, RTL_GIGA_MAC_VER_38 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, - */ + { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, { 0x7cf, 0x480, RTL_GIGA_MAC_VER_35 }, /* 8168E family. */ From 17b3222e943701c408d6e03357967829aaea0550 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 13 Nov 2022 00:04:17 +0200 Subject: [PATCH 0558/1593] net/mlx5: DR, Fix 'stack frame size exceeds limit' error in dr_rule If the kernel configuration asks the compiler to check frame limit of 1K, dr_rule_create_rule_nic exceed this limit: "stack frame size (1184) exceeds limit (1024)" Fixing this issue by checking configured frame limit and using the optimization STE array only for cases with the usual 2K (or larger) stack size warning. Fixes: b9b81e1e9382 ("net/mlx5: DR, For short chains of STEs, avoid allocating ste_arr dynamically") Reported-by: kernel test robot Signed-off-by: Yevgeny Kliteynik --- .../ethernet/mellanox/mlx5/core/steering/dr_rule.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 74cbe53ee9dbb..b851141e03de3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -3,7 +3,12 @@ #include "dr_types.h" +#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN < 2048) +/* don't try to optimize STE allocation if the stack is too constaraining */ +#define DR_RULE_MAX_STES_OPTIMIZED 0 +#else #define DR_RULE_MAX_STES_OPTIMIZED 5 +#endif #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) static int dr_rule_append_to_miss_list(struct mlx5dr_domain *dmn, @@ -1218,10 +1223,7 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, mlx5dr_domain_nic_unlock(nic_dmn); - if (unlikely(!hw_ste_arr_is_opt)) - kfree(hw_ste_arr); - - return 0; + goto out; free_rule: dr_rule_clean_rule_members(rule, nic_rule); @@ -1238,6 +1240,7 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, free_hw_ste: mlx5dr_domain_nic_unlock(nic_dmn); +out: if (unlikely(!hw_ste_arr_is_opt)) kfree(hw_ste_arr); From e0bf81bf0d3d4747c146e0bf44774d3d881d7137 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 16 Aug 2022 23:19:11 +0300 Subject: [PATCH 0559/1593] net/mlx5: check attr pointer validity before dereferencing it Fix attr pointer validity checks after it was already dereferenced. Fixes: cb0d54cbf948 ("net/mlx5e: Fix wrong source vport matching on tunnel rule") Signed-off-by: Ariel Levkovich Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e455b215c7088..75b77dd2392b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -143,7 +143,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (attr && !attr->chain && esw_attr->int_port) + if (!attr->chain && esw_attr && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else From 5e72f3f1c558019082cfeedeed73748f35d780c6 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 30 Aug 2022 00:32:30 +0300 Subject: [PATCH 0560/1593] net/mlx5e: TC, Keep mod hdr actions after mod hdr alloc When offloading TC NIC rule which has mod_hdr action, the mod_hdr actions list is freed upon mod_hdr allocation. In the new format of handling multi table actions and CT in particular, the mod_hdr actions list is still relevant when setting the pre and post rules and therefore, freeing the list may cause adding rules which don't set the FTE_ID. Therefore, the mod_hdr actions list needs to be kept for the pre/post flows as well and should be left for these handler to be freed. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9af2aa2922f5d..dbadaf1664879 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1301,7 +1301,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1359,8 +1358,10 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } mutex_unlock(&tc->t_lock); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); mlx5e_detach_mod_hdr(priv, flow); + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(priv->mdev, attr->counter); From da2e552b469a0cd130ff70a88ccc4139da428a65 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 28 Nov 2022 19:05:47 +0200 Subject: [PATCH 0561/1593] net/mlx5: Fix command stats access after free Command may fail while driver is reloading and can't accept FW commands till command interface is reinitialized. Such command failure is being logged to command stats. This results in NULL pointer access as command stats structure is being freed and reallocated during mlx5 devlink reload (see kernel log below). Fix it by making command stats statically allocated on driver probe. Kernel log: [ 2394.808802] BUG: unable to handle kernel paging request at 000000000002a9c0 [ 2394.810610] PGD 0 P4D 0 [ 2394.811811] Oops: 0002 [#1] SMP NOPTI ... [ 2394.815482] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0 ... [ 2394.829505] Call Trace: [ 2394.830667] _raw_spin_lock_irq+0x23/0x26 [ 2394.831858] cmd_status_err+0x55/0x110 [mlx5_core] [ 2394.833020] mlx5_access_reg+0xe7/0x150 [mlx5_core] [ 2394.834175] mlx5_query_port_ptys+0x78/0xa0 [mlx5_core] [ 2394.835337] mlx5e_ethtool_get_link_ksettings+0x74/0x590 [mlx5_core] [ 2394.836454] ? kmem_cache_alloc_trace+0x140/0x1c0 [ 2394.837562] __rh_call_get_link_ksettings+0x33/0x100 [ 2394.838663] ? __rtnl_unlock+0x25/0x50 [ 2394.839755] __ethtool_get_link_ksettings+0x72/0x150 [ 2394.840862] duplex_show+0x6e/0xc0 [ 2394.841963] dev_attr_show+0x1c/0x40 [ 2394.843048] sysfs_kf_seq_show+0x9b/0x100 [ 2394.844123] seq_read+0x153/0x410 [ 2394.845187] vfs_read+0x91/0x140 [ 2394.846226] ksys_read+0x4f/0xb0 [ 2394.847234] do_syscall_64+0x5b/0x1a0 [ 2394.848228] entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 13 ++----------- include/linux/mlx5/driver.h | 2 +- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d3ca745d107d6..c837103a9ee33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2176,15 +2176,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL); - if (!cmd->stats) - return -ENOMEM; - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) { - err = -ENOMEM; - goto dma_pool_err; - } + if (!cmd->pool) + return -ENOMEM; err = alloc_cmd_page(dev, cmd); if (err) @@ -2268,8 +2262,6 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) err_free_pool: dma_pool_destroy(cmd->pool); -dma_pool_err: - kvfree(cmd->stats); return err; } @@ -2282,7 +2274,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); - kvfree(cmd->stats); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d476255c9a3f0..76ef2e4fde38d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -315,7 +315,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats *stats; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_cmd_mailbox { From ab4b01bfdaa69492fb36484026b0a0f0af02d75a Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 4 Jan 2023 11:16:21 +0200 Subject: [PATCH 0562/1593] net/mlx5e: Verify dev is present for fix features ndo The native NIC port net device instance is being used as Uplink representor. While changing profiles private resources are not available, fix features ndo does not check if the netdev is present. Add driver protection to verify private resources are ready. Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Roy Novich Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cff5f2e29e1e2..abcc614b6191b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4084,6 +4084,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; + if (!netif_device_present(netdev)) + return features; + vlan = mlx5e_fs_get_vlan(priv->fs); mutex_lock(&priv->state_lock); params = &priv->channels.params; From 806a8df7126a8c05d60411eeb81057c2a8bbe7a7 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 15 Dec 2022 13:02:38 +0200 Subject: [PATCH 0563/1593] net/mlx5e: IPoIB, Block queue count configuration when sub interfaces are present PKEY sub interfaces share the receive queues with the parent interface. While setting the sub interface queue count is not supported, it is currently possible to change the number of queues of the parent interface. Thus we can end up with inconsistent queue sizes between the parent and its sub interfaces. This change disallows setting the queue count on the parent interface when sub interfaces are present. This is achieved by introducing an explicit reference to the parent netdev in the mlx5i_priv of the child interface. An additional counter is also required on the parent side to detect when sub interfaces are attached and for proper cleanup. The rtnl lock is taken during the ethtool op and the sub interface ndo_init/uninit ops. There is no race here around counting the sub interfaces, reading the sub interfaces and setting the number of channels. The ASSERT_RTNL was added to document that. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/ipoib/ethtool.c | 16 +++++++- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 38 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 6 +++ .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 9 ++--- 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index c247cca154e9c..eff92dc0927c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -90,9 +90,21 @@ static void mlx5i_get_ringparam(struct net_device *dev, static int mlx5i_set_channels(struct net_device *dev, struct ethtool_channels *ch) { - struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = netdev_priv(dev); + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + + /* rtnl lock protects from race between this ethtool op and sub + * interface ndo_init/uninit. + */ + ASSERT_RTNL(); + if (ipriv->num_sub_interfaces > 0) { + mlx5_core_warn(epriv->mdev, + "can't change number of channels for interfaces with sub interfaces (%u)\n", + ipriv->num_sub_interfaces); + return -EINVAL; + } - return mlx5e_ethtool_set_channels(priv, ch); + return mlx5e_ethtool_set_channels(epriv, ch); } static void mlx5i_get_channels(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 2c73c8445e630..911cf4d239645 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -160,6 +160,44 @@ void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_dropped = sstats->tx_queue_dropped; } +struct net_device *mlx5i_parent_get(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev); + parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex); + if (!parent_dev) + return NULL; + + parent_ipriv = netdev_priv(parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces++; + + ipriv->parent_dev = parent_dev; + + return parent_dev; +} + +void mlx5i_parent_put(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + + ipriv = priv->ppriv; + parent_ipriv = netdev_priv(ipriv->parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces--; + + dev_put(ipriv->parent_dev); +} + int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 99d46fda9f82f..f3f2af972020a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -54,9 +54,11 @@ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ u32 qpn; bool sub_interface; + u32 num_sub_interfaces; u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; + struct net_device *parent_dev; char *mlx5e_priv[]; }; @@ -117,5 +119,9 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more); void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +/* Reference management for child to parent interfaces. */ +struct net_device *mlx5i_parent_get(struct net_device *netdev); +void mlx5i_parent_put(struct net_device *netdev); + #endif /* CONFIG_MLX5_CORE_IPOIB */ #endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 4d9c9e49645c9..28795fb6bccc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -158,21 +158,19 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv, *parent_ipriv; struct net_device *parent_dev; - int parent_ifindex; ipriv = priv->ppriv; - /* Get QPN to netdevice hash table from parent */ - parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); - parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + /* Link to parent */ + parent_dev = mlx5i_parent_get(dev); if (!parent_dev) { mlx5_core_warn(priv->mdev, "failed to get parent device\n"); return -EINVAL; } + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; - dev_put(parent_dev); return mlx5i_dev_init(dev); } @@ -184,6 +182,7 @@ static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) { + mlx5i_parent_put(netdev); return mlx5i_dev_cleanup(netdev); } From 31c70bfe58ef09fe36327ddcced9143a16e9e83d Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 25 Nov 2022 17:51:19 +0200 Subject: [PATCH 0564/1593] net/mlx5e: IPoIB, Block PKEY interfaces with less rx queues than parent A user is able to configure an arbitrary number of rx queues when creating an interface via netlink. This doesn't work for child PKEY interfaces because the child interface uses the parent receive channels. Although the child shares the parent's receive channels, the number of rx queues is important for the channel_stats array: the parent's rx channel index is used to access the child's channel_stats. So the array has to be at least as large as the parent's rx queue size for the counting to work correctly and to prevent out of bound accesses. This patch checks for the mentioned scenario and returns an error when trying to create the interface. The error is propagated to the user. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 28795fb6bccc3..03e681297937f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -168,6 +168,15 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) return -EINVAL; } + if (dev->num_rx_queues < parent_dev->real_num_rx_queues) { + mlx5_core_warn(priv->mdev, + "failed to create child device with rx queues [%d] less than parent's [%d]\n", + dev->num_rx_queues, + parent_dev->real_num_rx_queues); + mlx5i_parent_put(dev); + return -EINVAL; + } + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; From b5e23931c45a2f99f60a2f2b98a9e4d5a62a5b13 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 23 Nov 2022 16:59:13 +0200 Subject: [PATCH 0565/1593] net/mlx5e: IPoIB, Fix child PKEY interface stats on rx path The current code always does the accounting using the stats from the parent interface (linked in the rq). This doesn't work when there are child interfaces configured. Fix this behavior by always using the stats from the child interface priv. This will also work for parent only interfaces: the child (netdev) and parent netdev (rq->netdev) will point to the same thing. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index c8820ab221694..3df455f6b1685 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2419,7 +2419,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = rq->stats; + stats = &priv->channel_stats[rq->ix]->rq; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; From 2414c9b7a29d237c9c40abd62853dbc08b4ba7df Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 7 Dec 2022 11:05:02 +0000 Subject: [PATCH 0566/1593] net/mlx5e: TC, ignore match level for post meter rules The post meter table only matches on reg_c5. As such, the inner/outer match levels are irrelevant for the match critieria. The cited patch only sets the outer criteria to none, thus setting the inner match level for encapsulated packets. This caused rules with police action on tunnel devices to not find an existing flow group for the match criteria, thus failing to offload the rule. Set both the inner and outer match levels to none for post_meter rules. Fixes: 0d8c38d44f33 ("net/mlx5e: TC, init post meter rules with branching attributes") Signed-off-by: Oz Shlomo Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c index 8d7d761482d27..50b60fd009467 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c @@ -127,6 +127,7 @@ mlx5e_post_meter_add_rule(struct mlx5e_priv *priv, attr->counter = act_counter; attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + attr->inner_match_level = MLX5_MATCH_NONE; attr->outer_match_level = MLX5_MATCH_NONE; attr->chain = 0; attr->prio = 0; From c09502d54dc109530ccb9a8910ab286d0745f119 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Thu, 8 Dec 2022 08:31:48 +0000 Subject: [PATCH 0567/1593] net/mlx5e: TC, Restore pkt rate policing support The offending commit removed the support for all packet rate metering. Restore the pkt rate metering support by removing the restriction. Fixes: 3603f26633e7 ("net/mlx5e: TC, allow meter jump control action") Signed-off-by: Oz Shlomo Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c index 512d431489228..c4378afdec09e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -34,12 +34,6 @@ static int police_act_validate(const struct flow_action_entry *act, return -EOPNOTSUPP; } - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } - return 0; } From 3099d2e62f9061ed57798e68e285d35837be686f Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 8 Jan 2023 12:53:02 +0200 Subject: [PATCH 0568/1593] net/mlx5e: Fix memory leak on updating vport counters When updating statistics driver queries the vport's counters. On fail, add error path releasing the allocated buffer avoiding memory leak. Fixes: 64b68e369649 ("net/mlx5: Refactor and expand rep vport stat group") Signed-off-by: Aya Levin Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 75b9e1528fd29..7d90e5b728548 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -191,7 +191,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) if (err) { netdev_warn(priv->netdev, "vport %d error %d reading stats\n", rep->vport, err); - return; + goto out; } #define MLX5_GET_CTR(p, x) \ @@ -241,6 +241,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) rep_stats->tx_vport_rdma_multicast_bytes = MLX5_GET_CTR(out, received_ib_multicast.octets); +out: kvfree(out); } From fe91d57277eef8bb4aca05acfa337b4a51d0bba4 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 5 Dec 2022 14:26:09 -0800 Subject: [PATCH 0569/1593] net/mlx5: Fix ptp max frequency adjustment range .max_adj of ptp_clock_info acts as an absolute value for the amount in ppb that can be set for a single call of .adjfine. This means that a single call to .getfine cannot be greater than .max_adj or less than -(.max_adj). Provides correct value for max frequency adjustment value supported by devices. Fixes: 3d8c38af1493 ("net/mlx5e: Add PTP Hardware Clock (PHC) support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 69cfe60c558a7..69318b1432688 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -681,7 +681,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, .name = "mlx5_ptp", - .max_adj = 100000000, + .max_adj = 50000000, .n_alarm = 0, .n_ext_ts = 0, .n_per_out = 0, From d515d63cae2cd186acf40deaa8ef33067bb7f637 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Tue, 27 Dec 2022 04:54:09 +0200 Subject: [PATCH 0570/1593] net/mlx5e: Don't support encap rules with gbp option Previously, encap rules with gbp option would be offloaded by mistake but driver does not support gbp option offload. To fix this issue, check if the encap rule has gbp option and don't offload the rule Fixes: d8f9dfae49ce ("net: sched: allow flower to match vxlan options") Signed-off-by: Gavin Li Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index fd07c4cbfd1d2..1f62c702b6255 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], struct udphdr *udp = (struct udphdr *)(buf); struct vxlanhdr *vxh; + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); *ip_proto = IPPROTO_UDP; From cd4f186dc110a0f0b3484048b8ecd42ee006ca6d Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 15 Dec 2022 15:33:38 +0200 Subject: [PATCH 0571/1593] net/mlx5: E-switch, Coverity: overlapping copy When a capability is set via port function caps callbacks, a memcpy() is performed in which the source and the target are the same address, e.g.: the copy is redundant. Hence, Remove it. Discovered by Coverity. Fixes: 7db98396ef45 ("net/mlx5: E-Switch, Implement devlink port function cmds to control RoCE") Fixes: e5b9642a33be ("net/mlx5: E-Switch, Implement devlink port function cmds to control migratable") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 75b77dd2392b1..c981fa77f4398 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4143,8 +4143,6 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), - MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, @@ -4236,8 +4234,6 @@ int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), - MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap, hca_caps, roce, enable); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, From f5e1ed04aa2ea665a796f0109091ca3f2b01024a Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 14 Dec 2022 16:34:13 +0200 Subject: [PATCH 0572/1593] net/mlx5e: Fix macsec ssci attribute handling in offload path Currently when macsec offload is set with extended packet number (epn) enabled, the driver wrongly deduce the short secure channel identifier (ssci) from the salt instead of the stand alone ssci attribute as it should, consequently creating a mismatch between the kernel and driver's ssci values. Fix by using the ssci value from the relevant attribute. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 9369a580743e1..cf7b3bb54c861 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -62,6 +62,7 @@ struct mlx5e_macsec_sa { u32 enc_key_id; u32 next_pn; sci_t sci; + ssci_t ssci; salt_t salt; struct rhash_head hash; @@ -499,10 +500,11 @@ mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, } static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, - const pn_t *next_pn_halves) + const pn_t *next_pn_halves, ssci_t ssci) { struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + sa->ssci = ssci; sa->salt = key->salt; epn_state->epn_enabled = 1; epn_state->epn_msb = next_pn_halves->upper; @@ -550,7 +552,8 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) tx_sa->assoc_num = assoc_num; if (secy->xpn) - update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves); + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves, + ctx_tx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, @@ -945,7 +948,8 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; if (ctx->secy->xpn) - update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves); + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, + ctx_rx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, From 9828994ac492e8e7de47fe66097b7e665328f348 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Sun, 11 Dec 2022 13:22:23 +0200 Subject: [PATCH 0573/1593] net/mlx5e: Fix macsec possible null dereference when updating MAC security entity (SecY) Upon updating MAC security entity (SecY) in hw offload path, the macsec security association (SA) initialization routine is called. In case of extended packet number (epn) is enabled the salt and ssci attributes are retrieved using the MACsec driver rx_sa context which is unavailable when updating a SecY property such as encoding-sa hence the null dereference. Fix by using the provided SA to set those attributes. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index cf7b3bb54c861..7f6b940830b31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -359,7 +359,6 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; union mlx5e_macsec_rule *macsec_rule; - struct macsec_key *key; int err; obj_attrs.next_pn = sa->next_pn; @@ -369,13 +368,9 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; - if (sa->epn_state.epn_enabled) { - obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : - cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); + obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci); + memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt)); } obj_attrs.replay_window = ctx->secy->replay_window; From aa96d6aa7563ec2948195d1f5892cb7a2caa88e3 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 4 Jan 2023 18:21:49 +0900 Subject: [PATCH 0574/1593] nvme-apple: add NVME_QUIRK_IDENTIFY_CNS quirk to fix regression From the get-go, this driver and the ANS syslog have been complaining about namespace identification. In 6.2-rc1, commit 811f4de0344d ("nvme: avoid fallback to sequential scan due to transient issues") regressed the driver by no longer allowing fallback to sequential namespace scans, leaving us with no namespaces. It turns out that the real problem is that this controller claiming NVMe 1.1 compat is treating the CNS field as a binary field, as in NVMe 1.0. This already has a quirk, NVME_QUIRK_IDENTIFY_CNS, so set it for the controller to fix all this nonsense (including other errors triggered by other CNS commands). Fixes: 811f4de0344d ("nvme: avoid fallback to sequential scan due to transient issues") Fixes: 5bd2927aceba ("nvme-apple: Add initial Apple SoC NVMe driver") Signed-off-by: Hector Martin Reviewed-by: Sven Peter Signed-off-by: Christoph Hellwig --- drivers/nvme/host/apple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index e36aeb50b4edc..bf1c60edb7f9a 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1493,7 +1493,7 @@ static int apple_nvme_probe(struct platform_device *pdev) } ret = nvme_init_ctrl(&anv->ctrl, anv->dev, &nvme_ctrl_ops, - NVME_QUIRK_SKIP_CID_GEN); + NVME_QUIRK_SKIP_CID_GEN | NVME_QUIRK_IDENTIFY_CNS); if (ret) { dev_err_probe(dev, ret, "Failed to initialize nvme_ctrl"); goto put_dev; From 453116a441f0e556fe8900c99a632529e48f18c9 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 4 Jan 2023 19:16:42 +0900 Subject: [PATCH 0575/1593] nvme-pci: add NVME_QUIRK_IDENTIFY_CNS quirk to Apple T2 controllers This mirrors the quirk added to Apple Silicon controllers in apple.c. These controllers do not support the Active NS ID List command and behave identically to the SoC version judging by existing user reports/syslogs, so will need the same fix. This quirk reverts back to NVMe 1.0 behavior and disables the broken commands. Fixes: 811f4de0344d ("nvme: avoid fallback to sequential scan due to transient issues") Signed-off-by: Hector Martin Tested-by: Orlando Chamberlain Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b13baccedb4a9..91f8adcf6056e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3495,7 +3495,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | - NVME_QUIRK_SKIP_CID_GEN }, + NVME_QUIRK_SKIP_CID_GEN | + NVME_QUIRK_IDENTIFY_CNS }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; From 09113abfb6ba981cb7cb4960b230251afd8966dc Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Thu, 29 Dec 2022 10:37:31 -0800 Subject: [PATCH 0576/1593] nvme-pci: fix error handling in nvme_pci_enable() There are two issues in nvme_pci_enable(): 1) If pci_alloc_irq_vectors() fails, device is left enabled. Fix this by adding a goto disable statement. 2) nvme_pci_configure_admin_queue could return -ENODEV, in this case, we will need to free IRQ properly. Otherwise the following warning could be triggered: [ 5.286752] WARNING: CPU: 0 PID: 33 at kernel/irq/irqdomain.c:253 irq_domain_remove+0x12d/0x140 [ 5.290547] Call Trace: [ 5.290626] [ 5.290695] msi_remove_device_irq_domain+0xc9/0xf0 [ 5.290843] msi_device_data_release+0x15/0x80 [ 5.290978] release_nodes+0x58/0x90 [ 5.293788] WARNING: CPU: 0 PID: 33 at kernel/irq/msi.c:276 msi_device_data_release+0x76/0x80 [ 5.297573] Call Trace: [ 5.297651] [ 5.297719] release_nodes+0x58/0x90 [ 5.297831] devres_release_all+0xef/0x140 [ 5.298339] device_unbind_cleanup+0x11/0xc0 [ 5.298479] really_probe+0x296/0x320 Fixes: a6ee7f19ebfd ("nvme-pci: call nvme_pci_configure_admin_queue from nvme_pci_enable") Co-developed-by: Keith Busch Signed-off-by: Tong Zhang Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 91f8adcf6056e..a2553b7d9bb8e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2533,7 +2533,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) */ result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); if (result < 0) - return result; + goto disable; dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -2586,8 +2586,13 @@ static int nvme_pci_enable(struct nvme_dev *dev) pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); - return nvme_pci_configure_admin_queue(dev); + result = nvme_pci_configure_admin_queue(dev); + if (result) + goto free_irq; + return result; + free_irq: + pci_free_irq_vectors(pdev); disable: pci_disable_device(pdev); return result; From 2fa1dc8637b5f3397f22b2771f8a4fb8712c10dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 8 Jan 2023 07:45:30 +0100 Subject: [PATCH 0577/1593] nvme: remove __nvme_ioctl Open code __nvme_ioctl in the two callers to make future changes that pass down additional paramters in the ioctl path easier. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/ioctl.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index a8639919237e6..c12b7c445fc0b 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -695,28 +695,26 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, } } -static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg, - fmode_t mode) -{ - if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode); - return nvme_ns_ioctl(ns, cmd, arg, mode); -} - int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = bdev->bd_disk->private_data; + void __user *argp = (void __user *)arg; - return __nvme_ioctl(ns, cmd, (void __user *)arg, mode); + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); + return nvme_ns_ioctl(ns, cmd, argp, mode); } long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); + void __user *argp = (void __user *)arg; - return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode); + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode); + return nvme_ns_ioctl(ns, cmd, argp, file->f_mode); } static int nvme_uring_cmd_checks(unsigned int issue_flags) From 7b7fdb8e2dbc15ad4e81a328f1c60d1691c6d6be Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 8 Jan 2023 07:53:03 +0100 Subject: [PATCH 0578/1593] nvme: replace the "bool vec" arguments with flags in the ioctl path To prepare for passing down more information, replace the boolean vec argument with a more extensible flags one. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/ioctl.c | 53 +++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index c12b7c445fc0b..999ebc1b70005 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -8,6 +8,10 @@ #include #include "nvme.h" +enum { + NVME_IOCTL_VEC = (1 << 0), +}; + static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, fmode_t mode) { @@ -150,7 +154,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, - bool vec) + unsigned int flags) { struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; @@ -163,7 +167,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(vec)) + if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) return -EINVAL; ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd); @@ -172,8 +176,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); } else { ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), - bufflen, GFP_KERNEL, vec, 0, 0, - rq_data_dir(req)); + bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, + 0, rq_data_dir(req)); } if (ret) @@ -203,9 +207,9 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, } static int nvme_submit_user_cmd(struct request_queue *q, - struct nvme_command *cmd, u64 ubuffer, - unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u64 *result, unsigned timeout, bool vec) + struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, + void __user *meta_buffer, unsigned meta_len, u32 meta_seed, + u64 *result, unsigned timeout, unsigned int flags) { struct nvme_ctrl *ctrl; struct request *req; @@ -221,7 +225,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, req->timeout = timeout; if (ubuffer && bufflen) { ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, - meta_len, meta_seed, &meta, NULL, vec); + meta_len, meta_seed, &meta, NULL, flags); if (ret) return ret; } @@ -304,10 +308,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - return nvme_submit_user_cmd(ns->queue, &c, - io.addr, length, - metadata, meta_len, lower_32_bits(io.slba), NULL, 0, - false); + return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, + meta_len, lower_32_bits(io.slba), NULL, 0, 0); } static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, @@ -360,9 +362,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - cmd.addr, cmd.data_len, - nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &result, timeout, false); + cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), + cmd.metadata_len, 0, &result, timeout, 0); if (status >= 0) { if (put_user(result, &ucmd->result)) @@ -373,8 +374,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd, bool vec, - fmode_t mode) + struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, + fmode_t mode) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; @@ -408,9 +409,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - cmd.addr, cmd.data_len, - nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &cmd.result, timeout, vec); + cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), + cmd.metadata_len, 0, &cmd.result, timeout, flags); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) @@ -643,7 +643,7 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp, mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false, mode); + return nvme_user_cmd64(ctrl, NULL, argp, 0, mode); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -670,6 +670,8 @@ struct nvme_user_io32 { static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp, fmode_t mode) { + unsigned int flags = 0; + switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); @@ -686,10 +688,11 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, #endif case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); - case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode); case NVME_IOCTL_IO64_CMD_VEC: - return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode); + flags |= NVME_IOCTL_VEC; + fallthrough; + case NVME_IOCTL_IO64_CMD: + return nvme_user_cmd64(ns->ctrl, ns, argp, flags, mode); default: return -ENOTTY; } @@ -962,7 +965,7 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp, file->f_mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode); + return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp, file->f_mode); case NVME_IOCTL_RESET: From 313c08c72ee7f87c54e34baec5cc4f4005e8800d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 8 Jan 2023 07:56:54 +0100 Subject: [PATCH 0579/1593] nvme: don't allow unprivileged passthrough on partitions Passthrough commands can always access the entire device, and thus submitting them on partitions is an privelege escalation. In hindsight we should have never allowed any passthrough commands on partitions, but it's probably too late to change that decision now. Fixes: e4fbcf32c860 ("nvme: identify-namespace without CAP_SYS_ADMIN") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/ioctl.c | 47 ++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 999ebc1b70005..06f52db34be9b 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -10,16 +10,24 @@ enum { NVME_IOCTL_VEC = (1 << 0), + NVME_IOCTL_PARTITION = (1 << 1), }; static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, - fmode_t mode) + unsigned int flags, fmode_t mode) { u32 effects; if (capable(CAP_SYS_ADMIN)) return true; + /* + * Do not allow unprivileged passthrough on partitions, as that allows an + * escape from the containment of the partition. + */ + if (flags & NVME_IOCTL_PARTITION) + return false; + /* * Do not allow unprivileged processes to send vendor specific or fabrics * commands as we can't be sure about their effects. @@ -327,7 +335,8 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd __user *ucmd, fmode_t mode) + struct nvme_passthru_cmd __user *ucmd, unsigned int flags, + fmode_t mode) { struct nvme_passthru_cmd cmd; struct nvme_command c; @@ -355,7 +364,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); - if (!nvme_cmd_allowed(ns, &c, mode)) + if (!nvme_cmd_allowed(ns, &c, 0, mode)) return -EACCES; if (cmd.timeout_ms) @@ -402,7 +411,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); - if (!nvme_cmd_allowed(ns, &c, mode)) + if (!nvme_cmd_allowed(ns, &c, flags, mode)) return -EACCES; if (cmd.timeout_ms) @@ -571,7 +580,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); - if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode)) + if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode)) return -EACCES; d.metadata = READ_ONCE(cmd->metadata); @@ -641,7 +650,7 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, { switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, mode); + return nvme_user_cmd(ctrl, NULL, argp, 0, mode); case NVME_IOCTL_ADMIN64_CMD: return nvme_user_cmd64(ctrl, NULL, argp, 0, mode); default: @@ -668,16 +677,14 @@ struct nvme_user_io32 { #endif /* COMPAT_FOR_U64_ALIGNMENT */ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, fmode_t mode) + void __user *argp, unsigned int flags, fmode_t mode) { - unsigned int flags = 0; - switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); return ns->head->ns_id; case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, argp, mode); + return nvme_user_cmd(ns->ctrl, ns, argp, flags, mode); /* * struct nvme_user_io can have different padding on some 32-bit ABIs. * Just accept the compat version as all fields that are used are the @@ -703,10 +710,14 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode, { struct nvme_ns *ns = bdev->bd_disk->private_data; void __user *argp = (void __user *)arg; + unsigned int flags = 0; + + if (bdev_is_partition(bdev)) + flags |= NVME_IOCTL_PARTITION; if (is_ctrl_ioctl(cmd)) return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); - return nvme_ns_ioctl(ns, cmd, argp, mode); + return nvme_ns_ioctl(ns, cmd, argp, flags, mode); } long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -717,7 +728,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (is_ctrl_ioctl(cmd)) return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode); - return nvme_ns_ioctl(ns, cmd, argp, file->f_mode); + return nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode); } static int nvme_uring_cmd_checks(unsigned int issue_flags) @@ -807,6 +818,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, void __user *argp = (void __user *)arg; struct nvme_ns *ns; int srcu_idx, ret = -EWOULDBLOCK; + unsigned int flags = 0; + + if (bdev_is_partition(bdev)) + flags |= NVME_IOCTL_PARTITION; srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); @@ -822,7 +837,7 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, mode); - ret = nvme_ns_ioctl(ns, cmd, argp, mode); + ret = nvme_ns_ioctl(ns, cmd, argp, flags, mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -847,7 +862,7 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, file->f_mode); - ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode); + ret = nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -946,7 +961,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, kref_get(&ns->kref); up_read(&ctrl->namespaces_rwsem); - ret = nvme_user_cmd(ctrl, ns, argp, mode); + ret = nvme_user_cmd(ctrl, ns, argp, 0, mode); nvme_put_ns(ns); return ret; @@ -963,7 +978,7 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, file->f_mode); + return nvme_user_cmd(ctrl, NULL, argp, 0, file->f_mode); case NVME_IOCTL_ADMIN64_CMD: return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode); case NVME_IOCTL_IO_CMD: From c7c0644ead24c59cc5e0f2ff0ade89b21783614a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Jan 2023 17:02:05 +0000 Subject: [PATCH 0580/1593] MAINTAINERS: stop nvme matching for nvmem files The nvme patterns detect all include files starting with nvme, which also picks up the nvmem subsystem header files. Fix this by using a more specific pattern. Signed-off-by: Russell King (Oracle) [hch: switched to a purely inclusive pattern instead of excluding nvmem*] Signed-off-by: Christoph Hellwig --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3ef137fea4f6d..b9800f5cca6d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14830,7 +14830,8 @@ T: git://git.infradead.org/nvme.git F: Documentation/nvme/ F: drivers/nvme/host/ F: drivers/nvme/common/ -F: include/linux/nvme* +F: include/linux/nvme.h +F: include/linux/nvme-*.h F: include/uapi/linux/nvme_ioctl.h NVM EXPRESS FABRICS AUTHENTICATION From 6acd87d50998ef0afafc441613aeaf5a8f5c9eff Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Fri, 9 Dec 2022 15:51:51 +0530 Subject: [PATCH 0581/1593] erofs/zmap.c: Fix incorrect offset calculation Effective offset to add to length was being incorrectly calculated, which resulted in iomap->length being set to 0, triggering a WARN_ON in iomap_iter_done(). Fix that, and describe it in comments. This was reported as a crash by syzbot under an issue about a warning encountered in iomap_iter_done(), but unrelated to erofs. C reproducer: https://syzkaller.appspot.com/text?tag=ReproC&x=1037a6b2880000 Kernel config: https://syzkaller.appspot.com/text?tag=KernelConfig&x=e2021a61197ebe02 Dashboard link: https://syzkaller.appspot.com/bug?extid=a8e049cd3abd342936b6 Reported-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com Suggested-by: Gao Xiang Signed-off-by: Siddh Raman Pant Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20221209102151.311049-1-code@siddh.me Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0150570c33aae..98fb90b9af715 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -793,12 +793,16 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; /* - * No strict rule how to describe extents for post EOF, yet - * we need do like below. Otherwise, iomap itself will get + * No strict rule on how to describe extents for post EOF, yet + * we need to do like below. Otherwise, iomap itself will get * into an endless loop on post EOF. + * + * Calculate the effective offset by subtracting extent start + * (map.m_la) from the requested offset, and add it to length. + * (NB: offset >= map.m_la always) */ if (iomap->offset >= inode->i_size) - iomap->length = length + map.m_la - offset; + iomap->length = length + offset - map.m_la; } iomap->flags = 0; return 0; From 12724ba38992bd045e92a9a88a868a530f89d13e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 10 Jan 2023 15:49:27 +0800 Subject: [PATCH 0582/1593] erofs: fix kvcalloc() misuse with __GFP_NOFAIL As reported by syzbot [1], kvcalloc() cannot work with __GFP_NOFAIL. Let's use kcalloc() instead. [1] https://lore.kernel.org/r/0000000000007796bd05f1852ec2@google.com Reported-by: syzbot+c3729cda01706a04fb98@syzkaller.appspotmail.com Fixes: fe3e5914e6dc ("erofs: try to leave (de)compressed_pages on stack if possible") Fixes: 4f05687fd703 ("erofs: introduce struct z_erofs_decompress_backend") Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230110074927.41651-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index ccf7c55d477fe..5200bb86e2643 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1032,12 +1032,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (!be->decompressed_pages) be->decompressed_pages = - kvcalloc(be->nr_pages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kcalloc(be->nr_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); if (!be->compressed_pages) be->compressed_pages = - kvcalloc(pclusterpages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kcalloc(pclusterpages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); z_erofs_parse_out_bvecs(be); err2 = z_erofs_parse_in_bvecs(be, &overlapped); @@ -1085,7 +1085,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, } if (be->compressed_pages < be->onstack_pages || be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) - kvfree(be->compressed_pages); + kfree(be->compressed_pages); z_erofs_fill_other_copies(be, err); for (i = 0; i < be->nr_pages; ++i) { @@ -1104,7 +1104,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, } if (be->decompressed_pages != be->onstack_pages) - kvfree(be->decompressed_pages); + kfree(be->decompressed_pages); pcl->length = 0; pcl->partial = true; From e59370b2e96eb8e7e057a2a16e999ff385a3f2fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:41 +0100 Subject: [PATCH 0583/1593] selftests/net: l2_tos_ttl_inherit.sh: Set IPv6 addresses with "nodad". The ping command can run before DAD completes. In that case, ping may fail and break the selftest. We don't need DAD here since we're working on isolated device pairs. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/l2_tos_ttl_inherit.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a89..e2574b08eabc9 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -137,8 +137,8 @@ setup() { if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 + ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad + $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad ip link add name tep0 type $type $local_addr1 \ remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ $vxlan $geneve @@ -170,8 +170,8 @@ setup() { ip addr add 198.19.0.1/24 brd + dev ${parent}0 $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad + $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad fi } From c53cb00f7983a5474f2d36967f84908b85af9159 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:46 +0100 Subject: [PATCH 0584/1593] selftests/net: l2_tos_ttl_inherit.sh: Run tests in their own netns. This selftest currently runs half in the current namespace and half in a netns of its own. Therefore, the test can fail if the current namespace is already configured with incompatible parameters (for example if it already has a veth0 interface). Adapt the script to put both ends of the veth pair in their own netns. Now veth0 is created in NS0 instead of the current namespace, while veth1 is set up in NS1 (instead of the 'testing' netns). The user visible netns names are randomised to minimise the risk of conflicts with already existing namespaces. The cleanup() function doesn't need to remove the virtual interface anymore: deleting NS0 and NS1 automatically removes the virtual interfaces they contained. We can remove $ns, which was only used to run ip commands in the 'testing' netns (let's use the builtin "-netns" option instead). However, we still need a similar functionality as ping and tcpdump now need to run in NS0. So we now have $RUN_NS0 for that. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni --- .../selftests/net/l2_tos_ttl_inherit.sh | 162 ++++++++++-------- 1 file changed, 93 insertions(+), 69 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index e2574b08eabc9..cf56680d598f2 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -25,6 +25,11 @@ expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +66,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +98,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +132,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +205,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +229,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +246,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +267,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +292,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +309,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,11 +330,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid @@ -351,9 +376,8 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } printf "┌────────┬───────┬───────┬──────────────┬" From d68ff8ad3351b8fc8d6f14b9a4f5cc8ba3e8bd13 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:50 +0100 Subject: [PATCH 0585/1593] selftests/net: l2_tos_ttl_inherit.sh: Ensure environment cleanup on failure. Use 'set -e' and an exit handler to stop the script if a command fails and ensure the test environment is cleaned up in any case. Also, handle the case where the script is interrupted by SIGINT. The only command that's expected to fail is 'wait $ping_pid', since it's killed by the script. Handle this case with '|| true' to make it play well with 'set -e'. Finally, return the Kselftest SKIP code (4) when the script breaks because of an environment problem or a command line failure. The 0 and 1 return codes should now reliably indicate that all tests have been run (0: all tests run and passed, 1: all tests run but at least one failed, 4: test script didn't run completely). Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni --- .../selftests/net/l2_tos_ttl_inherit.sh | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index cf56680d598f2..f11756e7df2f9 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,13 +12,16 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" @@ -340,7 +343,7 @@ verify() { fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -380,6 +383,31 @@ cleanup() { ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -409,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi From 80502ffab2fa92ba9777e381efea2efddc348d13 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 15 Dec 2022 15:54:02 +1300 Subject: [PATCH 0586/1593] arm64: dts: marvell: AC5/AC5X: Fix address for UART1 The correct address offset is 0x12100. Fixes: 31be791e26cf ("arm64: dts: marvell: Add UART1-3 for AC5/AC5X") Signed-off-by: Chris Packham Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi b/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi index 7308f7b6b22c9..8bce640691387 100644 --- a/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi +++ b/arch/arm64/boot/dts/marvell/ac5-98dx25xx.dtsi @@ -98,7 +98,7 @@ uart1: serial@12100 { compatible = "snps,dw-apb-uart"; - reg = <0x11000 0x100>; + reg = <0x12100 0x100>; reg-shift = <2>; interrupts = ; reg-io-width = <1>; From 53da7aec32982f5ee775b69dce06d63992ce4af3 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Mon, 9 Jan 2023 11:43:25 +0530 Subject: [PATCH 0587/1593] octeontx2-pf: Fix resource leakage in VF driver unbind resources allocated like mcam entries to support the Ntuple feature and hash tables for the tc feature are not getting freed in driver unbind. This patch fixes the issue. Fixes: 2da489432747 ("octeontx2-pf: devlink params support to set mcam entry count") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Link: https://lore.kernel.org/r/20230109061325.21395-1-hkelam@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 86653bb8e403a..7f8ffbf79cf74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -758,6 +758,8 @@ static void otx2vf_remove(struct pci_dev *pdev) if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); otx2_ptp_destroy(vf); + otx2_mcam_flow_del(vf); + otx2_shutdown_tc(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) From 0afec5e9cea732cb47014655685a2a47fb180c31 Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Mon, 9 Jan 2023 13:37:11 +0000 Subject: [PATCH 0588/1593] RDMA/core: Fix ib block iterator counter overflow When registering a new DMA MR after selecting the best aligned page size for it, we iterate over the given sglist to split each entry to smaller, aligned to the selected page size, DMA blocks. In given circumstances where the sg entry and page size fit certain sizes and the sg entry is not aligned to the selected page size, the total size of the aligned pages we need to cover the sg entry is >= 4GB. Under this circumstances, while iterating page aligned blocks, the counter responsible for counting how much we advanced from the start of the sg entry is overflowed because its type is u32 and we pass 4GB in size. This can lead to an infinite loop inside the iterator function because the overflow prevents the counter to be larger than the size of the sg entry. Fix the presented problem by changing the advancement condition to eliminate overflow. Backtrace: [ 192.374329] efa_reg_user_mr_dmabuf [ 192.376783] efa_register_mr [ 192.382579] pgsz_bitmap 0xfffff000 rounddown 0x80000000 [ 192.386423] pg_sz [0x80000000] umem_length[0xc0000000] [ 192.392657] start 0x0 length 0xc0000000 params.page_shift 31 params.page_num 3 [ 192.399559] hp_cnt[3], pages_in_hp[524288] [ 192.403690] umem->sgt_append.sgt.nents[1] [ 192.407905] number entries: [1], pg_bit: [31] [ 192.411397] biter->__sg_nents [1] biter->__sg [0000000008b0c5d8] [ 192.415601] biter->__sg_advance [665837568] sg_dma_len[3221225472] [ 192.419823] biter->__sg_nents [1] biter->__sg [0000000008b0c5d8] [ 192.423976] biter->__sg_advance [2813321216] sg_dma_len[3221225472] [ 192.428243] biter->__sg_nents [1] biter->__sg [0000000008b0c5d8] [ 192.432397] biter->__sg_advance [665837568] sg_dma_len[3221225472] Fixes: a808273a495c ("RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks") Signed-off-by: Yonatan Nachum Link: https://lore.kernel.org/r/20230109133711.13678-1-ynachum@amazon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 26b021f43ba40..11b1c1603aeb4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2957,15 +2957,18 @@ EXPORT_SYMBOL(__rdma_block_iter_start); bool __rdma_block_iter_next(struct ib_block_iter *biter) { unsigned int block_offset; + unsigned int sg_delta; if (!biter->__sg_nents || !biter->__sg) return false; biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); - biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset; + sg_delta = BIT_ULL(biter->__pg_bit) - block_offset; - if (biter->__sg_advance >= sg_dma_len(biter->__sg)) { + if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) { + biter->__sg_advance += sg_delta; + } else { biter->__sg_advance = 0; biter->__sg = sg_next(biter->__sg); biter->__sg_nents--; From 0a0a6e80472c98947d73c3d13bcd7d101895f55d Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:11 -0500 Subject: [PATCH 0589/1593] IB/hfi1: Reject a zero-length user expected buffer A zero length user buffer makes no sense and the code does not handle it correctly. Instead, reject a zero length as invalid. Fixes: 97736f36dbeb ("IB/hfi1: Validate page aligned for a given virtual addres") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328547120.1472310.6362802432127399257.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 186d302912606..3c609b11e71ca 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -256,6 +256,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, if (!PAGE_ALIGNED(tinfo->vaddr)) return -EINVAL; + if (tinfo->length == 0) + return -EINVAL; tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); if (!tidbuf) From ecf91551cdd2925ed6d9a9d99074fa5f67b90596 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:16 -0500 Subject: [PATCH 0590/1593] IB/hfi1: Reserve user expected TIDs To avoid a race, reserve the number of user expected TIDs before setup. Fixes: 7e7a436ecb6e ("staging/hfi1: Add TID entry program function body") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328547636.1472310.7419712824785353905.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 3c609b11e71ca..d7487555d109a 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -282,16 +282,13 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, /* Find sets of physically contiguous pages */ tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); - /* - * We don't need to access this under a lock since tid_used is per - * process and the same process cannot be in hfi1_user_exp_rcv_clear() - * and hfi1_user_exp_rcv_setup() at the same time. - */ + /* Reserve the number of expected tids to be used. */ spin_lock(&fd->tid_lock); if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) pageset_count = fd->tid_limit - fd->tid_used; else pageset_count = tidbuf->n_psets; + fd->tid_used += pageset_count; spin_unlock(&fd->tid_lock); if (!pageset_count) @@ -400,10 +397,11 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); + /* adjust reserved tid_used to actual count */ + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count - tididx; + spin_unlock(&fd->tid_lock); if (tididx) { - spin_lock(&fd->tid_lock); - fd->tid_used += tididx; - spin_unlock(&fd->tid_lock); tinfo->tidcnt = tididx; tinfo->length = mapped_pages * PAGE_SIZE; From e0c4a422f5246abefbf7c178ef99a1f2dc3c5f62 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:21 -0500 Subject: [PATCH 0591/1593] IB/hfi1: Fix expected receive setup error exit issues Fix three error exit issues in expected receive setup. Re-arrange error exits to increase readability. Issues and fixes: 1. Possible missed page unpin if tidlist copyout fails and not all pinned pages where made part of a TID. Fix: Unpin the unused pages. 2. Return success with unset return values tidcnt and length when no pages were pinned. Fix: Return -ENOSPC if no pages were pinned. 3. Return success with unset return values tidcnt and length when no rcvarray entries available. Fix: Return -ENOSPC if no rcvarray entries are available. Fixes: 7e7a436ecb6e ("staging/hfi1: Add TID entry program function body") Fixes: 97736f36dbeb ("IB/hfi1: Validate page aligned for a given virtual addres") Fixes: f404ca4c7ea8 ("IB/hfi1: Refactor hfi_user_exp_rcv_setup() IOCTL") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328548150.1472310.1492305874804187634.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 83 ++++++++++++++--------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index d7487555d109a..88df8ca4bb577 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -268,15 +268,14 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), GFP_KERNEL); if (!tidbuf->psets) { - kfree(tidbuf); - return -ENOMEM; + ret = -ENOMEM; + goto fail_release_mem; } pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { - kfree(tidbuf->psets); - kfree(tidbuf); - return pinned; + ret = (pinned < 0) ? pinned : -ENOSPC; + goto fail_unpin; } /* Find sets of physically contiguous pages */ @@ -291,14 +290,16 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, fd->tid_used += pageset_count; spin_unlock(&fd->tid_lock); - if (!pageset_count) - goto bail; + if (!pageset_count) { + ret = -ENOSPC; + goto fail_unreserve; + } ngroups = pageset_count / dd->rcv_entries.group_size; tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); if (!tidlist) { ret = -ENOMEM; - goto nomem; + goto fail_unreserve; } tididx = 0; @@ -394,44 +395,60 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, } unlock: mutex_unlock(&uctxt->exp_mutex); -nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); + + /* fail if nothing was programmed, set error if none provided */ + if (tididx == 0) { + if (ret >= 0) + ret = -ENOSPC; + goto fail_unreserve; + } + /* adjust reserved tid_used to actual count */ spin_lock(&fd->tid_lock); fd->tid_used -= pageset_count - tididx; spin_unlock(&fd->tid_lock); - if (tididx) { - tinfo->tidcnt = tididx; - tinfo->length = mapped_pages * PAGE_SIZE; - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), - tidlist, sizeof(tidlist[0]) * tididx)) { - /* - * On failure to copy to the user level, we need to undo - * everything done so far so we don't leak resources. - */ - tinfo->tidlist = (unsigned long)&tidlist; - hfi1_user_exp_rcv_clear(fd, tinfo); - tinfo->tidlist = 0; - ret = -EFAULT; - goto bail; - } + /* unpin all pages not covered by a TID */ + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, + false); + + tinfo->tidcnt = tididx; + tinfo->length = mapped_pages * PAGE_SIZE; + + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), + tidlist, sizeof(tidlist[0]) * tididx)) { + ret = -EFAULT; + goto fail_unprogram; } - /* - * If not everything was mapped (due to insufficient RcvArray entries, - * for example), unpin all unmapped pages so we can pin them nex time. - */ - if (mapped_pages != pinned) - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, - (pinned - mapped_pages), false); -bail: + kfree(tidbuf->pages); kfree(tidbuf->psets); + kfree(tidbuf); kfree(tidlist); + return 0; + +fail_unprogram: + /* unprogram, unmap, and unpin all allocated TIDs */ + tinfo->tidlist = (unsigned long)tidlist; + hfi1_user_exp_rcv_clear(fd, tinfo); + tinfo->tidlist = 0; + pinned = 0; /* nothing left to unpin */ + pageset_count = 0; /* nothing left reserved */ +fail_unreserve: + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count; + spin_unlock(&fd->tid_lock); +fail_unpin: + if (pinned > 0) + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); +fail_release_mem: kfree(tidbuf->pages); + kfree(tidbuf->psets); kfree(tidbuf); - return ret > 0 ? 0 : ret; + kfree(tidlist); + return ret; } int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, From 1c7edde1b5720ddb0aff5ca8c7f605a0f92526eb Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:26 -0500 Subject: [PATCH 0592/1593] IB/hfi1: Immediately remove invalid memory from hardware When a user expected receive page is unmapped, it should be immediately removed from hardware rather than depend on a reaction from user space. Fixes: 2677a7680e77 ("IB/hfi1: Fix memory leak during unexpected shutdown") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328548663.1472310.7871808081861622659.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 43 +++++++++++++++-------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 1 + 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 88df8ca4bb577..f402af1e29035 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -28,8 +28,9 @@ static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, unsigned int start, u16 count, u32 *tidlist, unsigned int *tididx, unsigned int *pmapped); -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, - struct tid_group **grp); +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); +static void __clear_tid_node(struct hfi1_filedata *fd, + struct tid_rb_node *node); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static const struct mmu_interval_notifier_ops tid_mn_ops = { @@ -469,7 +470,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, mutex_lock(&uctxt->exp_mutex); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { - ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); + ret = unprogram_rcvarray(fd, tidinfo[tididx]); if (ret) { hfi1_cdbg(TID, "Failed to unprogram rcv array %d", ret); @@ -723,6 +724,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, } node->fdata = fd; + mutex_init(&node->invalidate_mutex); node->phys = page_to_phys(pages[0]); node->npages = npages; node->rcventry = rcventry; @@ -762,8 +764,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, return -EFAULT; } -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, - struct tid_group **grp) +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; @@ -786,9 +787,6 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; - if (grp) - *grp = node->grp; - if (fd->use_mn) mmu_interval_notifier_remove(&node->notifier); cacheless_tid_rb_remove(fd, node); @@ -796,23 +794,34 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, return 0; } -static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) +static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; + mutex_lock(&node->invalidate_mutex); + if (node->freed) + goto done; + node->freed = true; + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, node->npages, node->notifier.interval_tree.start, node->phys, node->dma_addr); - /* - * Make sure device has seen the write before we unpin the - * pages. - */ + /* Make sure device has seen the write before pages are unpinned */ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); +done: + mutex_unlock(&node->invalidate_mutex); +} + +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + + __clear_tid_node(fd, node); node->grp->used--; node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); @@ -871,10 +880,16 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, if (node->freed) return true; + /* take action only if unmapping */ + if (range->event != MMU_NOTIFY_UNMAP) + return true; + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->notifier.interval_tree.start, node->rcventry, node->npages, node->dma_addr); - node->freed = true; + + /* clear the hardware rcvarray entry */ + __clear_tid_node(fdata, node); spin_lock(&fdata->invalid_lock); if (fdata->invalid_tid_idx < uctxt->expected_count) { diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 8c53e416bf843..2ddb3dac7d916 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -27,6 +27,7 @@ struct tid_user_buf { struct tid_rb_node { struct mmu_interval_notifier notifier; struct hfi1_filedata *fdata; + struct mutex invalidate_mutex; /* covers hw removal */ unsigned long phys; struct tid_group *grp; u32 rcventry; From b3deec25847bda34e34d5d7be02f633caf000bd8 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:31 -0500 Subject: [PATCH 0593/1593] IB/hfi1: Remove user expected buffer invalidate race During setup, there is a possible race between a page invalidate and hardware programming. Add a covering invalidate over the user target range during setup. If anything within that range is invalidated during setup, fail the setup. Once set up, each TID will have its own invalidate callback and invalidate. Fixes: 3889551db212 ("RDMA/hfi1: Use mmu_interval_notifier_insert for user_exp_rcv") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328549178.1472310.9867497376936699488.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 58 +++++++++++++++++++++-- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 2 + 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index f402af1e29035..b02f2f0809c81 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -23,6 +23,9 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq); +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, struct tid_group *grp, unsigned int start, u16 count, @@ -36,6 +39,9 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static const struct mmu_interval_notifier_ops tid_mn_ops = { .invalidate = tid_rb_invalidate, }; +static const struct mmu_interval_notifier_ops tid_cover_ops = { + .invalidate = tid_cover_invalidate, +}; /* * Initialize context and file private data needed for Expected @@ -254,6 +260,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, tididx = 0, mapped, mapped_pages = 0; u32 *tidlist = NULL; struct tid_user_buf *tidbuf; + unsigned long mmu_seq = 0; if (!PAGE_ALIGNED(tinfo->vaddr)) return -EINVAL; @@ -264,6 +271,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, if (!tidbuf) return -ENOMEM; + mutex_init(&tidbuf->cover_mutex); tidbuf->vaddr = tinfo->vaddr; tidbuf->length = tinfo->length; tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), @@ -273,6 +281,16 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, goto fail_release_mem; } + if (fd->use_mn) { + ret = mmu_interval_notifier_insert( + &tidbuf->notifier, current->mm, + tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, + &tid_cover_ops); + if (ret) + goto fail_release_mem; + mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); + } + pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { ret = (pinned < 0) ? pinned : -ENOSPC; @@ -415,6 +433,20 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, false); + if (fd->use_mn) { + /* check for an invalidate during setup */ + bool fail = false; + + mutex_lock(&tidbuf->cover_mutex); + fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); + mutex_unlock(&tidbuf->cover_mutex); + + if (fail) { + ret = -EBUSY; + goto fail_unprogram; + } + } + tinfo->tidcnt = tididx; tinfo->length = mapped_pages * PAGE_SIZE; @@ -424,6 +456,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, goto fail_unprogram; } + if (fd->use_mn) + mmu_interval_notifier_remove(&tidbuf->notifier); kfree(tidbuf->pages); kfree(tidbuf->psets); kfree(tidbuf); @@ -442,6 +476,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, fd->tid_used -= pageset_count; spin_unlock(&fd->tid_lock); fail_unpin: + if (fd->use_mn) + mmu_interval_notifier_remove(&tidbuf->notifier); if (pinned > 0) unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); fail_release_mem: @@ -740,11 +776,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, &tid_mn_ops); if (ret) goto out_unmap; - /* - * FIXME: This is in the wrong order, the notifier should be - * established before the pages are pinned by pin_rcv_pages. - */ - mmu_interval_read_begin(&node->notifier); } fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; @@ -919,6 +950,23 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, return true; } +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct tid_user_buf *tidbuf = + container_of(mni, struct tid_user_buf, notifier); + + /* take action only if unmapping */ + if (range->event == MMU_NOTIFY_UNMAP) { + mutex_lock(&tidbuf->cover_mutex); + mmu_interval_set_seq(mni, cur_seq); + mutex_unlock(&tidbuf->cover_mutex); + } + + return true; +} + static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode) { diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 2ddb3dac7d916..f8ee997d0050e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -16,6 +16,8 @@ struct tid_pageset { }; struct tid_user_buf { + struct mmu_interval_notifier notifier; + struct mutex cover_mutex; unsigned long vaddr; unsigned long length; unsigned int npages; From 3bd68b32c911a3a610ad782bb04d1a7bfc440638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 9 Jan 2023 09:06:03 +0100 Subject: [PATCH 0594/1593] drm/amdgpu: fix pipeline sync v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a potential memory leak of dma_fence objects in the CS code as well as glitches in firefox because of missing pipeline sync. v2: use the scheduler instead of the fence context Signed-off-by: Christian König Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2323 Tested-by: Michal Kubecek mkubecek@suse.cz Tested-by: Vlastimil Babka Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20230109130120.73389-1-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 46 +++++++++++++++++--------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 47763ac0d14a5..7b5ce00f06026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -61,6 +61,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, amdgpu_ctx_put(p->ctx); return -ECANCELED; } + + amdgpu_sync_create(&p->sync); return 0; } @@ -452,18 +454,6 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, } r = amdgpu_sync_fence(&p->sync, fence); - if (r) - goto error; - - /* - * When we have an explicit dependency it might be necessary to insert a - * pipeline sync to make sure that all caches etc are flushed and the - * next job actually sees the results from the previous one. - */ - if (fence->context == p->gang_leader->base.entity->fence_context) - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); - -error: dma_fence_put(fence); return r; } @@ -1188,10 +1178,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct drm_gpu_scheduler *sched; struct amdgpu_bo_list_entry *e; + struct dma_fence *fence; unsigned int i; int r; + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); + if (r) { + if (r != -ERESTARTSYS) + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); + return r; + } + list_for_each_entry(e, &p->validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); struct dma_resv *resv = bo->tbo.base.resv; @@ -1211,10 +1210,24 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); - if (r && r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - return r; + sched = p->gang_leader->base.entity->rq->sched; + while ((fence = amdgpu_sync_get_fence(&p->sync))) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + + /* + * When we have an dependency it might be necessary to insert a + * pipeline sync to make sure that all caches etc are flushed and the + * next job actually sees the results from the previous one + * before we start executing on the same scheduler ring. + */ + if (!s_fence || s_fence->sched != sched) + continue; + + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + if (r) + return r; + } + return 0; } static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1347,6 +1360,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { unsigned i; + amdgpu_sync_free(&parser->sync); for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); kfree(parser->post_deps[i].chain); From 526970be53d5dd60122141540142fb0eeb0b22d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Jan 2023 11:44:51 +0100 Subject: [PATCH 0595/1593] sh/mm: Fix pmd_t for real Because typing is hard... Fixes: 0862ff059c9e ("sh/mm: Make pmd_t similar to pte_t") Reported-by: Guenter Roeck Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- arch/sh/include/asm/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index a889a3a938bab..d1ce73f3bd85e 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -28,7 +28,7 @@ #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) -typedef struct { +typedef union { struct { unsigned long pmd_low; unsigned long pmd_high; From 7c6dd961d0c8e7e8f9fdc65071fb09ece702e18d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Jan 2023 12:15:40 +0100 Subject: [PATCH 0596/1593] x86/boot: Avoid using Intel mnemonics in AT&T syntax asm With 'GNU assembler (GNU Binutils for Debian) 2.39.90.20221231' the build now reports: arch/x86/realmode/rm/../../boot/bioscall.S: Assembler messages: arch/x86/realmode/rm/../../boot/bioscall.S:35: Warning: found `movsd'; assuming `movsl' was meant arch/x86/realmode/rm/../../boot/bioscall.S:70: Warning: found `movsd'; assuming `movsl' was meant arch/x86/boot/bioscall.S: Assembler messages: arch/x86/boot/bioscall.S:35: Warning: found `movsd'; assuming `movsl' was meant arch/x86/boot/bioscall.S:70: Warning: found `movsd'; assuming `movsl' was meant Which is due to: PR gas/29525 Note that with the dropped CMPSD and MOVSD Intel Syntax string insn templates taking operands, mixed IsString/non-IsString template groups (with memory operands) cannot occur anymore. With that maybe_adjust_templates() becomes unnecessary (and is hence being removed). More details: https://sourceware.org/bugzilla/show_bug.cgi?id=29525 Borislav Petkov further explains: " the particular problem here is is that the 'd' suffix is "conflicting" in the sense that you can have SSE mnemonics like movsD %xmm... and the same thing also for string ops (which is the case here) so apparently the agreement in binutils land is to use the always accepted suffixes 'l' or 'q' and phase out 'd' slowly... " Fixes: 7a734e7dd93b ("x86, setup: "glove box" BIOS calls -- infrastructure") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/Y71I3Ex2pvIxMpsP@hirez.programming.kicks-ass.net --- arch/x86/boot/bioscall.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 5521ea12f44e0..aa9b964575843 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S @@ -32,7 +32,7 @@ intcall: movw %dx, %si movw %sp, %di movw $11, %cx - rep; movsd + rep; movsl /* Pop full state from the stack */ popal @@ -67,7 +67,7 @@ intcall: jz 4f movw %sp, %si movw $11, %cx - rep; movsd + rep; movsl 4: addw $44, %sp /* Restore state and return */ From e66b7920aa5ac5b1a1997a454004ba9246a3c005 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 30 Dec 2022 21:07:47 +0100 Subject: [PATCH 0597/1593] wifi: mac80211: fix initialization of rx->link and rx->link_sta There are some codepaths that do not initialize rx->link_sta properly. This causes a crash in places which assume that rx->link_sta is valid if rx->sta is valid. One known instance is triggered by __ieee80211_rx_h_amsdu being called from fast-rx. It results in a crash like this one: BUG: kernel NULL pointer dereference, address: 00000000000000a8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP PTI CPU: 1 PID: 506 Comm: mt76-usb-rx phy Tainted: G E 6.1.0-debian64x+1.7 #3 Hardware name: ZOTAC ZBOX-ID92/ZBOX-IQ01/ZBOX-ID92/ZBOX-IQ01, BIOS B220P007 05/21/2014 RIP: 0010:ieee80211_deliver_skb+0x62/0x1f0 [mac80211] Code: 00 48 89 04 24 e8 9e a7 c3 df 89 c0 48 03 1c c5 a0 ea 39 a1 4c 01 6b 08 48 ff 03 48 83 7d 28 00 74 11 48 8b 45 30 48 63 55 44 <48> 83 84 d0 a8 00 00 00 01 41 8b 86 c0 11 00 00 8d 50 fd 83 fa 01 RSP: 0018:ffff999040803b10 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffffb9903f496480 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff999040803ce0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8d21828ac900 R13: 000000000000004a R14: ffff8d2198ed89c0 R15: ffff8d2198ed8000 FS: 0000000000000000(0000) GS:ffff8d24afe80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000a8 CR3: 0000000429810002 CR4: 00000000001706e0 Call Trace: __ieee80211_rx_h_amsdu+0x1b5/0x240 [mac80211] ? ieee80211_prepare_and_rx_handle+0xcdd/0x1320 [mac80211] ? __local_bh_enable_ip+0x3b/0xa0 ieee80211_prepare_and_rx_handle+0xcdd/0x1320 [mac80211] ? prepare_transfer+0x109/0x1a0 [xhci_hcd] ieee80211_rx_list+0xa80/0xda0 [mac80211] mt76_rx_complete+0x207/0x2e0 [mt76] mt76_rx_poll_complete+0x357/0x5a0 [mt76] mt76u_rx_worker+0x4f5/0x600 [mt76_usb] ? mt76_get_min_avg_rssi+0x140/0x140 [mt76] __mt76_worker_fn+0x50/0x80 [mt76] kthread+0xed/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Since the initialization of rx->link and rx->link_sta is rather convoluted and duplicated in many places, clean it up by using a helper function to set it. Fixes: ccdde7c74ffd ("wifi: mac80211: properly implement MLO key handling") Fixes: b320d6c456ff ("wifi: mac80211: use correct rx link_sta instead of default") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20221230200747.19040-1-nbd@nbd.name [remove unnecessary rx->sta->sta.mlo check] Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 222 +++++++++++++++++++++------------------------- 1 file changed, 99 insertions(+), 123 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7e3ab6e1b28f3..c518287b2356b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4049,6 +4049,58 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) #undef CALL_RXH } +static bool +ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id) +{ + if (!sta->mlo) + return false; + + return !!(sta->valid_links & BIT(link_id)); +} + +static bool ieee80211_rx_data_set_link(struct ieee80211_rx_data *rx, + u8 link_id) +{ + rx->link_id = link_id; + rx->link = rcu_dereference(rx->sdata->link[link_id]); + + if (!rx->sta) + return rx->link; + + if (!ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, link_id)) + return false; + + rx->link_sta = rcu_dereference(rx->sta->link[link_id]); + + return rx->link && rx->link_sta; +} + +static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx, + struct ieee80211_sta *pubsta, + int link_id) +{ + struct sta_info *sta; + + sta = container_of(pubsta, struct sta_info, sta); + + rx->link_id = link_id; + rx->sta = sta; + + if (sta) { + rx->local = sta->sdata->local; + if (!rx->sdata) + rx->sdata = sta->sdata; + rx->link_sta = &sta->deflink; + } + + if (link_id < 0) + rx->link = &rx->sdata->deflink; + else if (!ieee80211_rx_data_set_link(rx, link_id)) + return false; + + return true; +} + /* * This function makes calls into the RX path, therefore * it has to be invoked under RCU read lock. @@ -4057,16 +4109,19 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) { struct sk_buff_head frames; struct ieee80211_rx_data rx = { - .sta = sta, - .sdata = sta->sdata, - .local = sta->local, /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .link_id = -1, }; struct tid_ampdu_rx *tid_agg_rx; - u8 link_id; + int link_id = -1; + + /* FIXME: statistics won't be right with this */ + if (sta->sta.valid_links) + link_id = ffs(sta->sta.valid_links) - 1; + + if (!ieee80211_rx_data_set_sta(&rx, &sta->sta, link_id)) + return; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) @@ -4086,10 +4141,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) }; drv_event_callback(rx.local, rx.sdata, &event); } - /* FIXME: statistics won't be right with this */ - link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0; - rx.link = rcu_dereference(sta->sdata->link[link_id]); - rx.link_sta = rcu_dereference(sta->link[link_id]); ieee80211_rx_handlers(&rx, &frames); } @@ -4105,7 +4156,6 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .link_id = -1, }; int i, diff; @@ -4116,10 +4166,8 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, sta = container_of(pubsta, struct sta_info, sta); - rx.sta = sta; - rx.sdata = sta->sdata; - rx.link = &rx.sdata->deflink; - rx.local = sta->local; + if (!ieee80211_rx_data_set_sta(&rx, pubsta, -1)) + return; rcu_read_lock(); tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); @@ -4506,15 +4554,6 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } -static bool -ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id) -{ - if (!sta->mlo) - return false; - - return !!(sta->valid_links & BIT(link_id)); -} - static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, struct ieee80211_fast_rx *fast_rx, int orig_len) @@ -4625,7 +4664,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - struct sta_info *sta = rx->sta; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; @@ -4637,7 +4675,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; } addrs __aligned(2); - struct link_sta_info *link_sta; struct ieee80211_sta_rx_stats *stats; /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write @@ -4740,18 +4777,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, drop: dev_kfree_skb(skb); - if (rx->link_id >= 0) { - link_sta = rcu_dereference(sta->link[rx->link_id]); - if (!link_sta) - return true; - } else { - link_sta = &sta->deflink; - } - if (fast_rx->uses_rss) - stats = this_cpu_ptr(link_sta->pcpu_rx_stats); + stats = this_cpu_ptr(rx->link_sta->pcpu_rx_stats); else - stats = &link_sta->rx_stats; + stats = &rx->link_sta->rx_stats; stats->dropped++; return true; @@ -4769,8 +4798,8 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_hdr *hdr = (void *)skb->data; - struct link_sta_info *link_sta = NULL; - struct ieee80211_link_data *link; + struct link_sta_info *link_sta = rx->link_sta; + struct ieee80211_link_data *link = rx->link; rx->skb = skb; @@ -4792,35 +4821,6 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, if (!ieee80211_accept_frame(rx)) return false; - if (rx->link_id >= 0) { - link = rcu_dereference(rx->sdata->link[rx->link_id]); - - /* we might race link removal */ - if (!link) - return true; - rx->link = link; - - if (rx->sta) { - rx->link_sta = - rcu_dereference(rx->sta->link[rx->link_id]); - if (!rx->link_sta) - return true; - } - } else { - if (rx->sta) - rx->link_sta = &rx->sta->deflink; - - rx->link = &sdata->deflink; - } - - if (unlikely(!is_multicast_ether_addr(hdr->addr1) && - rx->link_id >= 0 && rx->sta && rx->sta->sta.mlo)) { - link_sta = rcu_dereference(rx->sta->link[rx->link_id]); - - if (WARN_ON_ONCE(!link_sta)) - return true; - } - if (!consume) { struct skb_shared_hwtstamps *shwt; @@ -4840,7 +4840,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, shwt->hwtstamp = skb_hwtstamps(skb)->hwtstamp; } - if (unlikely(link_sta)) { + if (unlikely(rx->sta && rx->sta->sta.mlo)) { /* translate to MLD addresses */ if (ether_addr_equal(link->conf->addr, hdr->addr1)) ether_addr_copy(hdr->addr1, rx->sdata->vif.addr); @@ -4870,6 +4870,7 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_fast_rx *fast_rx; struct ieee80211_rx_data rx; + int link_id = -1; memset(&rx, 0, sizeof(rx)); rx.skb = skb; @@ -4886,12 +4887,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, if (!pubsta) goto drop; - rx.sta = container_of(pubsta, struct sta_info, sta); - rx.sdata = rx.sta->sdata; - - if (status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(pubsta, status->link_id)) - goto drop; + if (status->link_valid) + link_id = status->link_id; /* * TODO: Should the frame be dropped if the right link_id is not @@ -4900,19 +4897,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, * link_id is used only for stats purpose and updating the stats on * the deflink is fine? */ - if (status->link_valid) - rx.link_id = status->link_id; - - if (rx.link_id >= 0) { - struct ieee80211_link_data *link; - - link = rcu_dereference(rx.sdata->link[rx.link_id]); - if (!link) - goto drop; - rx.link = link; - } else { - rx.link = &rx.sdata->deflink; - } + if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id)) + goto drop; fast_rx = rcu_dereference(rx.sta->fast_rx); if (!fast_rx) @@ -4930,6 +4916,8 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx, { struct link_sta_info *link_sta; struct ieee80211_hdr *hdr = (void *)skb->data; + struct sta_info *sta; + int link_id = -1; /* * Look up link station first, in case there's a @@ -4939,24 +4927,19 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx, */ link_sta = link_sta_info_get_bss(rx->sdata, hdr->addr2); if (link_sta) { - rx->sta = link_sta->sta; - rx->link_id = link_sta->link_id; + sta = link_sta->sta; + link_id = link_sta->link_id; } else { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - rx->sta = sta_info_get_bss(rx->sdata, hdr->addr2); - if (rx->sta) { - if (status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, - status->link_id)) - return false; - - rx->link_id = status->link_valid ? status->link_id : -1; - } else { - rx->link_id = -1; - } + sta = sta_info_get_bss(rx->sdata, hdr->addr2); + if (status->link_valid) + link_id = status->link_id; } + if (!ieee80211_rx_data_set_sta(rx, &sta->sta, link_id)) + return false; + return ieee80211_prepare_and_rx_handle(rx, skb, consume); } @@ -5015,19 +4998,15 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; - u8 link_id = status->link_id; + int link_id = -1; - if (pubsta) { - rx.sta = container_of(pubsta, struct sta_info, sta); - rx.sdata = rx.sta->sdata; + if (status->link_valid) + link_id = status->link_id; - if (status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(pubsta, link_id)) + if (pubsta) { + if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id)) goto out; - if (status->link_valid) - rx.link_id = status->link_id; - /* * In MLO connection, fetch the link_id using addr2 * when the driver does not pass link_id in status. @@ -5045,7 +5024,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!link_sta) goto out; - rx.link_id = link_sta->link_id; + ieee80211_rx_data_set_link(&rx, link_sta->link_id); } if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) @@ -5061,30 +5040,27 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, continue; } - if ((status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta, - link_id)) || - (!status->link_valid && prev_sta->sta.mlo)) + rx.sdata = prev_sta->sdata; + if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta, + link_id)) + goto out; + + if (!status->link_valid && prev_sta->sta.mlo) continue; - rx.link_id = status->link_valid ? link_id : -1; - rx.sta = prev_sta; - rx.sdata = prev_sta->sdata; ieee80211_prepare_and_rx_handle(&rx, skb, false); prev_sta = sta; } if (prev_sta) { - if ((status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta, - link_id)) || - (!status->link_valid && prev_sta->sta.mlo)) + rx.sdata = prev_sta->sdata; + if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta, + link_id)) goto out; - rx.link_id = status->link_valid ? link_id : -1; - rx.sta = prev_sta; - rx.sdata = prev_sta->sdata; + if (!status->link_valid && prev_sta->sta.mlo) + goto out; if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) return; From 4444bc2116aecdcde87dce80373540adc8bd478b Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 30 Dec 2022 13:18:49 +0100 Subject: [PATCH 0598/1593] wifi: mac80211: Proper mark iTXQs for resumption When a running wake_tx_queue() call is aborted due to a hw queue stop the corresponding iTXQ is not always correctly marked for resumption: wake_tx_push_queue() can stops the queue run without setting @IEEE80211_TXQ_STOP_NETIF_TX. Without the @IEEE80211_TXQ_STOP_NETIF_TX flag __ieee80211_wake_txqs() will not schedule a new queue run and remaining frames in the queue get stuck till another frame is queued to it. Fix the issue for all drivers - also the ones with custom wake_tx_queue callbacks - by moving the logic into ieee80211_tx_dequeue() and drop the redundant @txqs_stopped. @IEEE80211_TXQ_STOP_NETIF_TX is also renamed to @IEEE80211_TXQ_DIRTY to better describe the flag. Fixes: c850e31f79f0 ("wifi: mac80211: add internal handler for wake_tx_queue") Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20221230121850.218810-1-alexander@wetzel-home.de Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ---- net/mac80211/debugfs_sta.c | 5 +++-- net/mac80211/driver-ops.h | 2 +- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/tx.c | 20 +++++++++++------- net/mac80211/util.c | 42 +++----------------------------------- 6 files changed, 21 insertions(+), 54 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 689da327ce2e8..e3235b9c02c21 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1832,8 +1832,6 @@ struct ieee80211_vif_cfg { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). * @txq: the multicast data TX queue - * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped, - * protected by fq->lock. * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. @@ -1863,8 +1861,6 @@ struct ieee80211_vif { bool probe_req_reg; bool rx_mcast_action_reg; - bool txqs_stopped[IEEE80211_NUM_ACS]; - struct ieee80211_vif *mbssid_tx_vif; /* must be last */ diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 7a3d7893e19d6..f1914bf39f0e6 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -167,7 +167,7 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, continue; txqi = to_txq_info(sta->sta.txq[i]); p += scnprintf(p, bufsz + buf - p, - "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n", + "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s%s)\n", txqi->txq.tid, txqi->txq.ac, txqi->tin.backlog_bytes, @@ -182,7 +182,8 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, txqi->flags, test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN", test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "", - test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : ""); + test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "", + test_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ? " DIRTY" : ""); } rcu_read_unlock(); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 809bad53e15b6..5d13a3dfd3664 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1199,7 +1199,7 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, /* In reconfig don't transmit now, but mark for waking later */ if (local->in_reconfig) { - set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags); + set_bit(IEEE80211_TXQ_DIRTY, &txq->flags); return; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 63ff0d2524b65..d16606e84e22d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -838,7 +838,7 @@ enum txq_info_flags { IEEE80211_TXQ_STOP, IEEE80211_TXQ_AMPDU, IEEE80211_TXQ_NO_AMSDU, - IEEE80211_TXQ_STOP_NETIF_TX, + IEEE80211_TXQ_DIRTY, }; /** diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2171cd1ca807e..178043f844893 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3783,6 +3783,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_tx_data tx; ieee80211_tx_result r; struct ieee80211_vif *vif = txq->vif; + int q = vif->hw_queue[txq->ac]; + bool q_stopped; WARN_ON_ONCE(softirq_count() == 0); @@ -3790,16 +3792,20 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, return NULL; begin: - spin_lock_bh(&fq->lock); + spin_lock(&local->queue_stop_reason_lock); + q_stopped = local->queue_stop_reasons[q]; + spin_unlock(&local->queue_stop_reason_lock); - if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) || - test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags)) - goto out; + if (unlikely(q_stopped)) { + /* mark for waking later */ + set_bit(IEEE80211_TXQ_DIRTY, &txqi->flags); + return NULL; + } - if (vif->txqs_stopped[txq->ac]) { - set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags); + spin_lock_bh(&fq->lock); + + if (unlikely(test_bit(IEEE80211_TXQ_STOP, &txqi->flags))) goto out; - } /* Make sure fragments stay together. */ skb = __skb_dequeue(&txqi->frags); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6f5407038459d..261ac667887f8 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -292,22 +292,12 @@ static void wake_tx_push_queue(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_txq *queue) { - int q = sdata->vif.hw_queue[queue->ac]; struct ieee80211_tx_control control = { .sta = queue->sta, }; struct sk_buff *skb; - unsigned long flags; - bool q_stopped; while (1) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - q_stopped = local->queue_stop_reasons[q]; - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - - if (q_stopped) - break; - skb = ieee80211_tx_dequeue(&local->hw, queue); if (!skb) break; @@ -347,8 +337,6 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) local_bh_disable(); spin_lock(&fq->lock); - sdata->vif.txqs_stopped[ac] = false; - if (!test_bit(SDATA_STATE_RUNNING, &sdata->state)) goto out; @@ -370,7 +358,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) if (ac != txq->ac) continue; - if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, + if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags)) continue; @@ -385,7 +373,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) txqi = to_txq_info(vif->txq); - if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) || + if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) || (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac) goto out; @@ -517,8 +505,6 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, bool refcounted) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; - int n_acs = IEEE80211_NUM_ACS; trace_stop_queue(local, queue, reason); @@ -530,29 +516,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, else local->q_stop_reasons[queue][reason]++; - if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue])) - return; - - if (local->hw.queues < IEEE80211_NUM_ACS) - n_acs = 1; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - int ac; - - if (!sdata->dev) - continue; - - for (ac = 0; ac < n_acs; ac++) { - if (sdata->vif.hw_queue[ac] == queue || - sdata->vif.cab_queue == queue) { - spin_lock(&local->fq.lock); - sdata->vif.txqs_stopped[ac] = true; - spin_unlock(&local->fq.lock); - } - } - } - rcu_read_unlock(); + set_bit(reason, &local->queue_stop_reasons[queue]); } void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, From 69403bad97aa0162e3d7911b27e25abe774093df Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 30 Dec 2022 13:18:50 +0100 Subject: [PATCH 0599/1593] wifi: mac80211: sdata can be NULL during AMPDU start ieee80211_tx_ba_session_handle_start() may get NULL for sdata when a deauthentication is ongoing. Here a trace triggering the race with the hostapd test multi_ap_fronthaul_on_ap: (gdb) list *drv_ampdu_action+0x46 0x8b16 is in drv_ampdu_action (net/mac80211/driver-ops.c:396). 391 int ret = -EOPNOTSUPP; 392 393 might_sleep(); 394 395 sdata = get_bss_sdata(sdata); 396 if (!check_sdata_in_driver(sdata)) 397 return -EIO; 398 399 trace_drv_ampdu_action(local, sdata, params); 400 wlan0: moving STA 02:00:00:00:03:00 to state 3 wlan0: associated wlan0: deauthenticating from 02:00:00:00:03:00 by local choice (Reason: 3=DEAUTH_LEAVING) wlan3.sta1: Open BA session requested for 02:00:00:00:00:00 tid 0 wlan3.sta1: dropped frame to 02:00:00:00:00:00 (unauthorized port) wlan0: moving STA 02:00:00:00:03:00 to state 2 wlan0: moving STA 02:00:00:00:03:00 to state 1 wlan0: Removed STA 02:00:00:00:03:00 wlan0: Destroyed STA 02:00:00:00:03:00 BUG: unable to handle page fault for address: fffffffffffffb48 PGD 11814067 P4D 11814067 PUD 11816067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 2 PID: 133397 Comm: kworker/u16:1 Tainted: G W 6.1.0-rc8-wt+ #59 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-20220807_005459-localhost 04/01/2014 Workqueue: phy3 ieee80211_ba_session_work [mac80211] RIP: 0010:drv_ampdu_action+0x46/0x280 [mac80211] Code: 53 48 89 f3 be 89 01 00 00 e8 d6 43 bf ef e8 21 46 81 f0 83 bb a0 1b 00 00 04 75 0e 48 8b 9b 28 0d 00 00 48 81 eb 10 0e 00 00 <8b> 93 58 09 00 00 f6 c2 20 0f 84 3b 01 00 00 8b 05 dd 1c 0f 00 85 RSP: 0018:ffffc900025ebd20 EFLAGS: 00010287 RAX: 0000000000000000 RBX: fffffffffffff1f0 RCX: ffff888102228240 RDX: 0000000080000000 RSI: ffffffff918c5de0 RDI: ffff888102228b40 RBP: ffffc900025ebd40 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000000 R12: ffff888118c18ec0 R13: 0000000000000000 R14: ffffc900025ebd60 R15: ffff888018b7efb8 FS: 0000000000000000(0000) GS:ffff88817a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffffffffffb48 CR3: 0000000105228006 CR4: 0000000000170ee0 Call Trace: ieee80211_tx_ba_session_handle_start+0xd0/0x190 [mac80211] ieee80211_ba_session_work+0xff/0x2e0 [mac80211] process_one_work+0x29f/0x620 worker_thread+0x4d/0x3d0 ? process_one_work+0x620/0x620 kthread+0xfb/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20221230121850.218810-2-alexander@wetzel-home.de Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 6 +++++- net/mac80211/driver-ops.c | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 9c40f8d3bce8c..3dbb724d7dc47 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -491,7 +491,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_sub_if_data *sdata; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_TX_START, @@ -521,6 +521,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ synchronize_net(); + sdata = sta->sdata; params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); tid_tx->ssn = params.ssn; @@ -534,6 +535,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); } else if (ret) { + if (!sdata) + return; + ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", sta->sta.addr, tid); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index d737db4e07e24..cfb09e4aed4d3 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -392,6 +392,9 @@ int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); + if (!sdata) + return -EIO; + sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) return -EIO; From 592234e941f1addaa598601c9227e3b72d608625 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 6 Jan 2023 23:31:41 +0100 Subject: [PATCH 0600/1593] wifi: mac80211: Fix iTXQ AMPDU fragmentation handling mac80211 must not enable aggregation wile transmitting a fragmented MPDU. Enforce that for mac80211 internal TX queues (iTXQs). Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202301021738.7cd3e6ae-oliver.sang@intel.com Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20230106223141.98696-1-alexander@wetzel-home.de Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 2 -- net/mac80211/ht.c | 31 +++++++++++++++++++++++++++++++ net/mac80211/tx.c | 18 ++++++------------ 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 3dbb724d7dc47..f9514bacbd4a1 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -511,8 +511,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); - ieee80211_agg_stop_txq(sta, tid); - /* * Make sure no packets are being processed. This ensures that * we have a valid starting sequence number and that in-flight diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 83bc41346ae7f..5315ab7502802 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -391,6 +391,37 @@ void ieee80211_ba_session_work(struct work_struct *work) tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; if (!blocked && tid_tx) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); + struct ieee80211_sub_if_data *sdata = + vif_to_sdata(txqi->txq.vif); + struct fq *fq = &sdata->local->fq; + + spin_lock_bh(&fq->lock); + + /* Allow only frags to be dequeued */ + set_bit(IEEE80211_TXQ_STOP, &txqi->flags); + + if (!skb_queue_empty(&txqi->frags)) { + /* Fragmented Tx is ongoing, wait for it to + * finish. Reschedule worker to retry later. + */ + + spin_unlock_bh(&fq->lock); + spin_unlock_bh(&sta->lock); + + /* Give the task working on the txq a chance + * to send out the queued frags + */ + synchronize_net(); + + mutex_unlock(&sta->ampdu_mlme.mtx); + + ieee80211_queue_work(&sdata->local->hw, work); + return; + } + + spin_unlock_bh(&fq->lock); + /* * Assign it over to the normal tid_tx array * where it "goes live". diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 178043f844893..defe97a31724d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1129,7 +1129,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, struct sk_buff *purge_skb = NULL; if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { - info->flags |= IEEE80211_TX_CTL_AMPDU; reset_agg_timer = true; } else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { /* @@ -1161,7 +1160,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, if (!tid_tx) { /* do nothing, let packet pass through */ } else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { - info->flags |= IEEE80211_TX_CTL_AMPDU; reset_agg_timer = true; } else { queued = true; @@ -3677,8 +3675,7 @@ static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, info->band = fast_tx->band; info->control.vif = &sdata->vif; info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | - IEEE80211_TX_CTL_DONTFRAG | - (ampdu ? IEEE80211_TX_CTL_AMPDU : 0); + IEEE80211_TX_CTL_DONTFRAG; info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT | u32_encode_bits(IEEE80211_LINK_UNSPECIFIED, IEEE80211_TX_CTRL_MLO_LINK); @@ -3804,9 +3801,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, spin_lock_bh(&fq->lock); - if (unlikely(test_bit(IEEE80211_TXQ_STOP, &txqi->flags))) - goto out; - /* Make sure fragments stay together. */ skb = __skb_dequeue(&txqi->frags); if (unlikely(skb)) { @@ -3816,6 +3810,9 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, IEEE80211_SKB_CB(skb)->control.flags &= ~IEEE80211_TX_INTCFL_NEED_TXPROCESSING; } else { + if (unlikely(test_bit(IEEE80211_TXQ_STOP, &txqi->flags))) + goto out; + skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); } @@ -3866,9 +3863,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, } if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) - info->flags |= IEEE80211_TX_CTL_AMPDU; - else - info->flags &= ~IEEE80211_TX_CTL_AMPDU; + info->flags |= (IEEE80211_TX_CTL_AMPDU | + IEEE80211_TX_CTL_DONTFRAG); if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) { if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { @@ -4602,8 +4598,6 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); - if (tid_tx) - info->flags |= IEEE80211_TX_CTL_AMPDU; info->hw_queue = sdata->vif.hw_queue[queue]; From 0eb38842ada035d71bb06fb9116f26f24ee0f998 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Wed, 21 Dec 2022 10:56:16 -0800 Subject: [PATCH 0601/1593] wifi: mac80211: reset multiple BSSID options in stop_ap() Reset multiple BSSID options when all AP related configurations are reset in ieee80211_stop_ap(). Stale values result in HWSIM test failures (e.g. p2p_group_cli_invalid), if run after 'he_ap_ema'. Reported-by: Jouni Malinen Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20221221185616.11514-1-quic_alokad@quicinc.com Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8f9a2ab502b38..672eff6f5d328 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -147,6 +147,7 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; + link_conf->bssid_indicator = 0; if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) return -EINVAL; @@ -1511,6 +1512,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, kfree(link_conf->ftmr_params); link_conf->ftmr_params = NULL; + sdata->vif.mbssid_tx_vif = NULL; + link_conf->bssid_index = 0; + link_conf->nontransmitted = false; + link_conf->ema_ap = false; + link_conf->bssid_indicator = 0; + __sta_info_flush(sdata, true); ieee80211_free_keys(sdata, true); From fa22b51ace8aa106267636f36170e940e676809c Mon Sep 17 00:00:00 2001 From: Sriram R Date: Thu, 8 Dec 2022 09:30:50 +0530 Subject: [PATCH 0602/1593] mac80211: Fix MLO address translation for multiple bss case When multiple interfaces are present in the local interface list, new skb copy is taken before rx processing except for the first interface. The address translation happens each time only on the original skb since the hdr pointer is not updated properly to the newly created skb. As a result frames start to drop in userspace when address based checks or search fails. Signed-off-by: Sriram R Link: https://lore.kernel.org/r/20221208040050.25922-1-quic_srirrama@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c518287b2356b..c6562a6d25035 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4838,6 +4838,9 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, */ shwt = skb_hwtstamps(rx->skb); shwt->hwtstamp = skb_hwtstamps(skb)->hwtstamp; + + /* Update the hdr pointer to the new skb for translation below */ + hdr = (struct ieee80211_hdr *)rx->skb->data; } if (unlikely(rx->sta && rx->sta->sta.mlo)) { From f216033d770f7ca0eda491fe01a9f02e7af59576 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 14 Dec 2022 14:03:26 +0100 Subject: [PATCH 0603/1593] wifi: mac80211: fix MLO + AP_VLAN check Instead of preventing adding AP_VLAN to MLO enabled APs, this check was preventing adding more than one 4-addr AP_VLAN regardless of the MLO status. Fix this by adding missing extra checks. Fixes: ae960ee90bb1 ("wifi: mac80211: prevent VLANs on MLDs") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20221214130326.37756-1-nbd@nbd.name Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d49a5906a9435..e20c3fe9a0b19 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -364,7 +364,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, /* No support for VLAN with MLO yet */ if (iftype == NL80211_IFTYPE_AP_VLAN && - nsdata->wdev.use_4addr) + sdata->wdev.use_4addr && + nsdata->vif.type == NL80211_IFTYPE_AP && + nsdata->vif.valid_links) return -EOPNOTSUPP; /* From c2337a40e04dde1692b5b0a46ecc59f89aaba8a1 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Mon, 14 Nov 2022 11:40:08 +0100 Subject: [PATCH 0604/1593] s390/kexec: fix ipl report address for kdump This commit addresses the following erroneous situation with file-based kdump executed on a system with a valid IPL report. On s390, a kdump kernel, its initrd and IPL report if present are loaded into a special and reserved on boot memory region - crashkernel. When a system crashes and kdump was activated before, the purgatory code is entered first which swaps the crashkernel and [0 - crashkernel size] memory regions. Only after that the kdump kernel is entered. For this reason, the pointer to an IPL report in lowcore must point to the IPL report after the swap and not to the address of the IPL report that was located in crashkernel memory region before the swap. Failing to do so, makes the kdump's decompressor try to read memory from the crashkernel memory region which already contains the production's kernel memory. The situation described above caused spontaneous kdump failures/hangs on systems where the Secure IPL is activated because on such systems an IPL report is always present. In that case kdump's decompressor tried to parse an IPL report which frequently lead to illegal memory accesses because an IPL report contains addresses to various data. Cc: Fixes: 99feaa717e55 ("s390/kexec_file: Create ipl report and pass to next kernel") Reviewed-by: Vasily Gorbik Signed-off-by: Alexander Egorenkov Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index fc6d5f58debeb..2df94d32140c4 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -187,8 +187,6 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; - if (image->type == KEXEC_TYPE_CRASH) - buf.mem += crashk_res.start; ptr = (void *)ipl_cert_list_addr; end = ptr + ipl_cert_list_size; @@ -225,6 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + ret = kexec_add_buffer(&buf); out: return ret; From 33e24f0738b922b6f5f4118dbdc26cac8400d7b9 Mon Sep 17 00:00:00 2001 From: Jarrah Gosbell Date: Wed, 7 Dec 2022 11:32:13 +0000 Subject: [PATCH 0605/1593] arm64: dts: rockchip: reduce thermal limits on rk3399-pinephone-pro While this device uses the rk3399 it is also enclosed in a tight package and cooled through the screen and back case. The default rk3399 thermal limits can result in a burnt screen. These lower limits have resulted in the existing burn not expanding and will hopefully result in future devices not experiencing the issue. Signed-off-by: Jarrah Gosbell Link: https://lore.kernel.org/r/20221207113212.8216-1-kernel@undef.tools Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts index 04403a76238b8..a0795a2b1cb16 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts @@ -104,6 +104,13 @@ }; }; +&cpu_alert0 { + temperature = <65000>; +}; +&cpu_alert1 { + temperature = <68000>; +}; + &cpu_l0 { cpu-supply = <&vdd_cpu_l>; }; From d891f2b724b39a2a41e3ad7b57110193993242ff Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jan 2023 07:13:19 -0800 Subject: [PATCH 0606/1593] perf build: Properly guard libbpf includes Including libbpf header files should be guarded by HAVE_LIBBPF_SUPPORT. In bpf_counter.h, move the skeleton utilities under HAVE_BPF_SKEL. Fixes: d6a735ef3277c45f ("perf bpf_counter: Move common functions to bpf_counter.h") Reported-by: Mike Leach Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Jiri Olsa Tested-by: Mike Leach Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20230105172243.7238-1-mike.leach@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 ++ tools/perf/util/bpf_counter.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 86e06f136f402..d21fe0f32a6de 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -16,7 +16,9 @@ #include "util/record.h" #include +#ifdef HAVE_LIBBPF_SUPPORT #include +#endif #include "util/bpf_map.h" #include "util/rlimit.h" #include "builtin.h" diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 4dbf26408b692..c6d21c07b14cd 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -4,9 +4,12 @@ #include #include + +#ifdef HAVE_LIBBPF_SUPPORT #include #include #include +#endif struct evsel; struct target; @@ -87,6 +90,8 @@ static inline void set_max_rlimit(void) setrlimit(RLIMIT_MEMLOCK, &rinf); } +#ifdef HAVE_BPF_SKEL + static inline __u32 bpf_link_get_id(int fd) { struct bpf_link_info link_info = { .id = 0, }; @@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu) return bpf_prog_test_run_opts(prog_fd, &opts); } +#endif /* HAVE_BPF_SKEL */ #endif /* __PERF_BPF_COUNTER_H */ From b3719108ae60169eda5c941ca5e1be1faa371c57 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 8 Jan 2023 14:23:59 +0800 Subject: [PATCH 0607/1593] perf kmem: Support legacy tracepoints Commit 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") removed tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node', we need to consider the tool should be backward compatible. If it detect the tracepoint "kmem:kmalloc_node", this patch enables the legacy tracepoints, otherwise, it will ignore them. Fixes: 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") Reported-by: Ravi Bangoria Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vlastimil Babka Link: https://lore.kernel.org/r/20230108062400.250690-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index e20656c431a4b..50a3df5dc18a8 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1824,6 +1824,19 @@ static int parse_line_opt(const struct option *opt __maybe_unused, return 0; } +static bool slab_legacy_tp_is_exposed(void) +{ + /* + * The tracepoints "kmem:kmalloc_node" and + * "kmem:kmem_cache_alloc_node" have been removed on the latest + * kernel, if the tracepoint "kmem:kmalloc_node" is existed it + * means the tool is running on an old kernel, we need to + * rollback to support these legacy tracepoints. + */ + return IS_ERR(trace_event__tp_format("kmem", "kmalloc_node")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { @@ -1831,22 +1844,28 @@ static int __cmd_record(int argc, const char **argv) }; const char * const slab_events[] = { "-e", "kmem:kmalloc", - "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", "-e", "kmem:kmem_cache_alloc", - "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const slab_legacy_events[] = { + "-e", "kmem:kmalloc_node", + "-e", "kmem:kmem_cache_alloc_node", + }; const char * const page_events[] = { "-e", "kmem:mm_page_alloc", "-e", "kmem:mm_page_free", }; unsigned int rec_argc, i, j; const char **rec_argv; + unsigned int slab_legacy_tp_exposed = slab_legacy_tp_is_exposed(); rec_argc = ARRAY_SIZE(record_args) + argc - 1; - if (kmem_slab) + if (kmem_slab) { rec_argc += ARRAY_SIZE(slab_events); + if (slab_legacy_tp_exposed) + rec_argc += ARRAY_SIZE(slab_legacy_events); + } if (kmem_page) rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ @@ -1861,6 +1880,10 @@ static int __cmd_record(int argc, const char **argv) if (kmem_slab) { for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) rec_argv[i] = strdup(slab_events[j]); + if (slab_legacy_tp_exposed) { + for (j = 0; j < ARRAY_SIZE(slab_legacy_events); j++, i++) + rec_argv[i] = strdup(slab_legacy_events[j]); + } } if (kmem_page) { rec_argv[i++] = strdup("-g"); From dce088ab0d51ae3b14fb2bd608e9c649aadfe5dc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 8 Jan 2023 14:24:00 +0800 Subject: [PATCH 0608/1593] perf kmem: Support field "node" in evsel__process_alloc_event() coping with recent tracepoint restructuring Commit 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") adds the field "node" into the tracepoints 'kmalloc' and 'kmem_cache_alloc', so this patch modifies the event process function to support the field "node". If field "node" is detected by checking function evsel__field(), it stats the cross allocation. When the "node" value is NUMA_NO_NODE (-1), it means the memory can be allocated from any memory node, in this case, we don't account it as a cross allocation. Fixes: 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") Reported-by: Ravi Bangoria Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vlastimil Babka Link: https://lore.kernel.org/r/20230108062400.250690-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 50a3df5dc18a8..8ae0a15352936 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include +#include #include #include #include @@ -185,22 +186,33 @@ static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *s total_allocated += bytes_alloc; nr_allocs++; - return 0; -} -static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) -{ - int ret = evsel__process_alloc_event(evsel, sample); + /* + * Commit 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA + * version of tracepoints") adds the field "node" into the + * tracepoints 'kmalloc' and 'kmem_cache_alloc'. + * + * The legacy tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node' + * also contain the field "node". + * + * If the tracepoint contains the field "node" the tool stats the + * cross allocation. + */ + if (evsel__field(evsel, "node")) { + int node1, node2; - if (!ret) { - int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), - node2 = evsel__intval(evsel, sample, "node"); + node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}); + node2 = evsel__intval(evsel, sample, "node"); - if (node1 != node2) + /* + * If the field "node" is NUMA_NO_NODE (-1), we don't take it + * as a cross allocation. + */ + if ((node2 != NUMA_NO_NODE) && (node1 != node2)) nr_cross_allocs++; } - return ret; + return 0; } static int ptr_cmp(void *, void *); @@ -1369,8 +1381,8 @@ static int __cmd_kmem(struct perf_session *session) /* slab allocator */ { "kmem:kmalloc", evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, - { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_event, }, { "kmem:kfree", evsel__process_free_event, }, { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ From 213b760fbc69f2d6aed8f64f006a17869f0d8da4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Jan 2023 11:59:30 -0300 Subject: [PATCH 0609/1593] perf tools: Don't install libtraceevent plugins as its not anymore in the kernel sources While doing 'make -C tools/perf build-test' one can notice error messages while trying to install libtraceevent plugins, stop doing that as libtraceevent isn't anymore a homie. These are the warnings dealt with: make_install_prefix_slash_O: make install prefix=/tmp/krava/ failed to find: /tmp/krava/etc/bash_completion.d/perf failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_cfg80211.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_scsi.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_xen.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_function.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_sched_switch.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_mac80211.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_kvm.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_kmem.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_hrtimer.so failed to find: /tmp/krava/lib64/traceevent/plugins/plugin_jbd2.so Fixes: 4171925aa9f3f7bf ("tools lib traceevent: Remove libtraceevent") Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Rostedt Link: http://lore.kernel.org/lkml/Y7xXz+TSpiCbQGjw@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 8 -------- tools/perf/tests/make | 12 ------------ 2 files changed, 20 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c2504c39bdcb8..156440ea01914 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1314,14 +1314,6 @@ tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) export perfexec_instdir_SQ -# If we install to $(HOME) we keep the traceevent default: -# $(HOME)/.traceevent/plugins -# Otherwise we install plugins into the global $(libdir). -ifdef DESTDIR -plugindir=$(libdir)/traceevent/plugins -plugindir_SQ= $(subst ','\'',$(plugindir)) -endif - print_var = $(eval $(print_var_code)) $(info $(MSG)) define print_var_code MSG = $(shell printf '...%40s: %s' $(1) $($(1))) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 05e818a8bbad1..009d6efb673ce 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -222,19 +222,7 @@ installed_files_bin := bin/perf installed_files_bin += etc/bash_completion.d/perf installed_files_bin += libexec/perf-core/perf-archive -installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so - installed_files_all := $(installed_files_bin) -installed_files_all += $(installed_files_plugins) test_make_install := $(call test_dest_files,$(installed_files_all)) test_make_install_O := $(call test_dest_files,$(installed_files_all)) From f00eccb447762c99675f3f5b0311a1216135af95 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jan 2023 07:13:20 -0800 Subject: [PATCH 0610/1593] perf build: Fix build error when NO_LIBBPF=1 The $(LIBBPF) target should only be a dependency of prepare if the static version of libbpf is needed. Add a new LIBBPF_STATIC variable that is set by Makefile.config. Use LIBBPF_STATIC to determine whether the CFLAGS, etc. need updating and for adding $(LIBBPF) as a prepare dependency. As Makefile.config isn't loaded for "clean" as a target, always set LIBBPF_OUTPUT regardless of whether it is needed for $(LIBBPF). This is done to minimize conditional logic for $(LIBBPF)-clean. This issue and an original fix was reported by Mike Leach in: https://lore.kernel.org/lkml/20230105172243.7238-1-mike.leach@linaro.org/ Fixes: 746bd29e348f99b4 ("perf build: Use tools/lib headers from install path") Reported-by: Mike Leach Signed-off-by: Ian Rogers Tested-by: Jiri Olsa Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Ian Rogers Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20230106151320.619514-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 ++ tools/perf/Makefile.perf | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 156440ea01914..9962ae23ab8c5 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -602,6 +602,8 @@ ifndef NO_LIBELF dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif else + # Libbpf will be built as a static library from tools/lib/bpf. + LIBBPF_STATIC := 1 CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1e32c93b80429..b7d9c42062300 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -303,10 +303,12 @@ ifneq ($(OUTPUT),) else LIBBPF_OUTPUT = $(CURDIR)/libbpf endif -LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include -LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a -CFLAGS += -I$(LIBBPF_OUTPUT)/include +ifdef LIBBPF_STATIC + LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) + LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include + LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a + CFLAGS += -I$(LIBBPF_OUTPUT)/include +endif ifneq ($(OUTPUT),) LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd @@ -393,10 +395,8 @@ endif export PERL_PATH PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL) -ifndef NO_LIBBPF - ifndef LIBBPF_DYNAMIC - PERFLIBS += $(LIBBPF) - endif +ifdef LIBBPF_STATIC + PERFLIBS += $(LIBBPF) endif # We choose to avoid "if .. else if .. else .. endif endif" @@ -756,12 +756,15 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(arch_errno_name_array) \ $(sync_file_range_arrays) \ $(LIBAPI) \ - $(LIBBPF) \ $(LIBPERF) \ $(LIBSUBCMD) \ $(LIBSYMBOL) \ bpf-skel +ifdef LIBBPF_STATIC +prepare: $(LIBBPF) +endif + $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ From eef034ac6690118c88f357b00e2b3239c9d8575d Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 10 Jan 2023 13:49:30 +0100 Subject: [PATCH 0611/1593] affs: initialize fsdata in affs_truncate() When aops->write_begin() does not initialize fsdata, KMSAN may report an error passing the latter to aops->write_end(). Fix this by unconditionally initializing fsdata. Fixes: f2b6a16eb8f5 ("fs: affs convert to new aops") Suggested-by: Eric Biggers Signed-off-by: Alexander Potapenko Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/affs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index cefa222f7881c..8daeed31e1af9 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -880,7 +880,7 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; loff_t isize = inode->i_size; int res; From 1692bffec674551163a7a4be32f59fdde04ecd27 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 25 Nov 2022 15:41:34 +0100 Subject: [PATCH 0612/1593] arm64: dts: rockchip: drop unused LED mode property from rk3328-roc-cc GPIO LEDs do not have a 'mode' property: rockchip/rk3328-roc-pc.dtb: leds: led-0: Unevaluated properties are not allowed ('mode' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221125144135.477144-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index aa22a0c222655..5d5d9574088ca 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -96,7 +96,6 @@ linux,default-trigger = "heartbeat"; gpios = <&rk805 1 GPIO_ACTIVE_LOW>; default-state = "on"; - mode = <0x23>; }; user_led: led-1 { @@ -104,7 +103,6 @@ linux,default-trigger = "mmc1"; gpios = <&rk805 0 GPIO_ACTIVE_LOW>; default-state = "off"; - mode = <0x05>; }; }; }; From 14292a4ae1a37a7c97fda59b85cd625c77165ddf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Jan 2023 11:46:37 -0300 Subject: [PATCH 0613/1593] perf bpf: Avoid build breakage with libbpf < 0.8.0 + LIBBPF_DYNAMIC=1 In 746bd29e348f99b4 ("perf build: Use tools/lib headers from install path") we stopped having the tools/lib/ directory from the kernel sources in the header include path unconditionally, which breaks the build on systems with older versions of libbpf-devel, in this case 0.7.0 as some of the structures and function declarations present in the newer version of libbpf included in the kernel sources (tools/lib/bpf) are not anymore used, just the ones in the system libbpf. So instead of trying to provide alternative functions when the libbpf-bpf_program__set_insns feature test fails, fail a LIBBPF_DYNAMIC=1 build (requesting the use of the system's libbpf) and emit this build error message: $ make LIBBPF_DYNAMIC=1 -C tools/perf Makefile.config:593: *** Error: libbpf devel library needs to be >= 0.8.0 to build with LIBBPF_DYNAMIC, update or build statically with the version that comes with the kernel sources. Stop. $ For v6.3 these tests will be revamped and we'll require libbpf 1.0 as a minimal version for using LIBBPF_DYNAMIC=1, most distros should have it by now or at v6.3 time. Fixes: 746bd29e348f99b4 ("perf build: Use tools/lib headers from install path") Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/CAP-5=fVa51_URGsdDFVTzpyGmdDRj_Dj2EKPuDHNQ0BYgMSzUA@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 9962ae23ab8c5..5b87846759036 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -589,6 +589,8 @@ ifndef NO_LIBELF $(call feature_check,libbpf-bpf_program__set_insns) ifeq ($(feature-libbpf-bpf_program__set_insns), 1) CFLAGS += -DHAVE_LIBBPF_BPF_PROGRAM__SET_INSNS + else + dummy := $(error Error: libbpf devel library needs to be >= 0.8.0 to build with LIBBPF_DYNAMIC, update or build statically with the version that comes with the kernel sources); endif $(call feature_check,libbpf-btf__raw_data) ifeq ($(feature-libbpf-btf__raw_data), 1) From 0b693c8f8b88d50114caaa4d2337932d4d172631 Mon Sep 17 00:00:00 2001 From: Chukun Pan Date: Mon, 19 Dec 2022 18:10:52 +0800 Subject: [PATCH 0614/1593] arm64: dts: rockchip: remove unsupported property from sdmmc2 for rock-3a 'supports-sdio' is not part of the DT binding and not supported by the Linux driver. Signed-off-by: Chukun Pan Link: https://lore.kernel.org/r/20221219101052.7899-1-amadeus@jmu.edu.cn Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index a1c5fdf7d68f1..5af11acb5c16c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -737,7 +737,6 @@ }; &sdmmc2 { - supports-sdio; bus-width = <4>; disable-wp; cap-sd-highspeed; From 380cd3a34b7f9825a60ccb045611af9cb4533b70 Mon Sep 17 00:00:00 2001 From: Dominik Kobinski Date: Fri, 30 Dec 2022 20:48:45 +0100 Subject: [PATCH 0615/1593] arm64: dts: msm8994-angler: fix the memory map Add reserved regions for memory hole and tz app mem to prevent rebooting. Also enable cont_splash_mem, it is the same as the generic 8994 one. Reported-by: Petr Vorel Signed-off-by: Dominik Kobinski Reviewed-by: Petr Vorel Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221230194845.57780-1-dominikkobinski314@gmail.com --- .../qcom/msm8994-huawei-angler-rev-101.dts | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts index 85abff0e9b3f7..7b0f62144c3ee 100644 --- a/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8994-huawei-angler-rev-101.dts @@ -9,9 +9,6 @@ #include "msm8994.dtsi" -/* Angler's firmware does not report where the memory is allocated */ -/delete-node/ &cont_splash_mem; - / { model = "Huawei Nexus 6P"; compatible = "huawei,angler", "qcom,msm8994"; @@ -28,6 +25,22 @@ chosen { stdout-path = "serial0:115200n8"; }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + tzapp_mem: tzapp@4800000 { + reg = <0 0x04800000 0 0x1900000>; + no-map; + }; + + removed_region: reserved@6300000 { + reg = <0 0x06300000 0 0xD00000>; + no-map; + }; + }; }; &blsp1_uart2 { From 6049aae52392539e505bfb8ccbcff3c26f1d2f0b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 17 Dec 2022 17:05:41 +0100 Subject: [PATCH 0616/1593] PM: AVS: qcom-cpr: Fix an error handling path in cpr_probe() If an error occurs after a successful pm_genpd_init() call, it should be undone by a corresponding pm_genpd_remove(). Add the missing call in the error handling path, as already done in the remove function. Fixes: bf6910abf548 ("power: avs: Add support for CPR (Core Power Reduction)") Signed-off-by: Christophe JAILLET Reviewed-by: Ulf Hansson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/0f520597dbad89ab99c217c8986912fa53eaf5f9.1671293108.git.christophe.jaillet@wanadoo.fr --- drivers/soc/qcom/cpr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/cpr.c b/drivers/soc/qcom/cpr.c index e9b854ed1bdfd..144ea68e0920a 100644 --- a/drivers/soc/qcom/cpr.c +++ b/drivers/soc/qcom/cpr.c @@ -1708,12 +1708,16 @@ static int cpr_probe(struct platform_device *pdev) ret = of_genpd_add_provider_simple(dev->of_node, &drv->pd); if (ret) - return ret; + goto err_remove_genpd; platform_set_drvdata(pdev, drv); cpr_debugfs_init(drv); return 0; + +err_remove_genpd: + pm_genpd_remove(&drv->pd); + return ret; } static int cpr_remove(struct platform_device *pdev) From 2bd5ab93335bf2c4d22c8db427822ae637ed8dc3 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 19 Dec 2022 14:19:17 +0100 Subject: [PATCH 0617/1593] arm64: dts: qcom: msm8992: Don't use sfpb mutex MSM8992 uses the same mutex hardware as MSM8994. This was wrong from the start, but never presented as an issue until the sfpb compatible was given different driver data. Fixes: 6a6d1978f9c0 ("arm64: dts: msm8992 SoC and LG Bullhead (Nexus 5X) support") Reported-by: Eugene Lepshy Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221219131918.446587-1-konrad.dybcio@linaro.org --- arch/arm64/boot/dts/qcom/msm8992.dtsi | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8992.dtsi b/arch/arm64/boot/dts/qcom/msm8992.dtsi index 10adb4986ef1b..02fc3795dbfd7 100644 --- a/arch/arm64/boot/dts/qcom/msm8992.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8992.dtsi @@ -37,10 +37,6 @@ compatible = "qcom,rpmcc-msm8992", "qcom,rpmcc"; }; -&tcsr_mutex { - compatible = "qcom,sfpb-mutex"; -}; - &timer { interrupts = , , From 69876bc6fd4de3ad2dc7826fe269e91fa2c1807f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 19 Dec 2022 14:19:18 +0100 Subject: [PATCH 0618/1593] arm64: dts: qcom: msm8992-libra: Fix the memory map The memory map was wrong. Fix it to prevent the device from randomly rebooting. Fixes: 0f5cdb31e850 ("arm64: dts: qcom: Add Xiaomi Libra (Mi 4C) device tree") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221219131918.446587-2-konrad.dybcio@linaro.org --- .../boot/dts/qcom/msm8992-xiaomi-libra.dts | 77 +++++++++++++++---- 1 file changed, 60 insertions(+), 17 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts index b242c272d2af1..fcca1ba94da69 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts @@ -11,6 +11,12 @@ #include #include +/delete-node/ &adsp_mem; +/delete-node/ &audio_mem; +/delete-node/ &mpss_mem; +/delete-node/ &peripheral_region; +/delete-node/ &rmtfs_mem; + / { model = "Xiaomi Mi 4C"; compatible = "xiaomi,libra", "qcom,msm8992"; @@ -70,25 +76,67 @@ #size-cells = <2>; ranges; - /* This is for getting crash logs using Android downstream kernels */ - ramoops@dfc00000 { - compatible = "ramoops"; - reg = <0x0 0xdfc00000 0x0 0x40000>; - console-size = <0x10000>; - record-size = <0x10000>; - ftrace-size = <0x10000>; - pmsg-size = <0x20000>; + memory_hole: hole@6400000 { + reg = <0 0x06400000 0 0x600000>; + no-map; + }; + + memory_hole2: hole2@6c00000 { + reg = <0 0x06c00000 0 0x2400000>; + no-map; + }; + + mpss_mem: mpss@9000000 { + reg = <0 0x09000000 0 0x5a00000>; + no-map; + }; + + tzapp: tzapp@ea00000 { + reg = <0 0x0ea00000 0 0x1900000>; + no-map; + }; + + mdm_rfsa_mem: mdm-rfsa@ca0b0000 { + reg = <0 0xca0b0000 0 0x10000>; + no-map; + }; + + rmtfs_mem: rmtfs@ca100000 { + compatible = "qcom,rmtfs-mem"; + reg = <0 0xca100000 0 0x180000>; + no-map; + + qcom,client-id = <1>; }; - modem_region: modem_region@9000000 { - reg = <0x0 0x9000000 0x0 0x5a00000>; + audio_mem: audio@cb400000 { + reg = <0 0xcb000000 0 0x400000>; + no-mem; + }; + + qseecom_mem: qseecom@cb400000 { + reg = <0 0xcb400000 0 0x1c00000>; + no-mem; + }; + + adsp_rfsa_mem: adsp-rfsa@cd000000 { + reg = <0 0xcd000000 0 0x10000>; no-map; }; - tzapp: modem_region@ea00000 { - reg = <0x0 0xea00000 0x0 0x1900000>; + sensor_rfsa_mem: sensor-rfsa@cd010000 { + reg = <0 0xcd010000 0 0x10000>; no-map; }; + + ramoops@dfc00000 { + compatible = "ramoops"; + reg = <0 0xdfc00000 0 0x40000>; + console-size = <0x10000>; + record-size = <0x10000>; + ftrace-size = <0x10000>; + pmsg-size = <0x20000>; + }; }; }; @@ -130,11 +178,6 @@ status = "okay"; }; -&peripheral_region { - reg = <0x0 0x7400000 0x0 0x1c00000>; - no-map; -}; - &pm8994_spmi_regulators { VDD_APC0: s8 { regulator-min-microvolt = <680000>; From d3de5616d36462a646f5b360ba82d3b09ff668eb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 12 Dec 2022 17:13:38 +0100 Subject: [PATCH 0619/1593] drm/i915/gt: Reset twice After applying an engine reset, on some platforms like Jasperlake, we occasionally detect that the engine state is not cleared until shortly after the resume. As we try to resume the engine with volatile internal state, the first request fails with a spurious CS event (it looks like it reports a lite-restore to the hung context, instead of the expected idle->active context switch). Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Cc: Mika Kuoppala Signed-off-by: Andi Shyti Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi.shyti@linux.intel.com (cherry picked from commit 3db9d590557da3aa2c952f2fecd3e9b703dad790) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_reset.c | 34 ++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 24736ebee17c2..78dc5e493c622 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -278,6 +278,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { struct intel_uncore *uncore = gt->uncore; + int loops = 2; int err; /* @@ -285,18 +286,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) * for fifo space for the write or forcewake the chip for * the read */ - intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + do { + intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(uncore, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); + /* + * Wait for the device to ack the reset requests. + * + * On some platforms, e.g. Jasperlake, we see that the + * engine register state is not cleared until shortly after + * GDRST reports completion, causing a failure as we try + * to immediately resume while the internal state is still + * in flux. If we immediately repeat the reset, the second + * reset appears to serialise with the first, and since + * it is a no-op, the registers should retain their reset + * value. However, there is still a concern that upon + * leaving the second reset, the internal engine state + * is still in flux and not ready for resuming. + */ + err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, + hw_domain_mask, 0, + 2000, 0, + NULL); + } while (err == 0 && --loops); if (err) GT_TRACE(gt, "Wait for 0x%08x engines reset failed\n", hw_domain_mask); + /* + * As we have observed that the engine state is still volatile + * after GDRST is acked, impose a small delay to let everything settle. + */ + udelay(50); + return err; } From 90b926e68f500844dff16b5bcea178dc55cf580a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 10 Jan 2023 07:54:27 +0100 Subject: [PATCH 0620/1593] x86/pat: Fix pat_x_mtrr_type() for MTRR disabled case Since 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") PAT can be enabled without MTRR. This has resulted in problems e.g. for a SEV-SNP guest running under Hyper-V, when trying to establish a new mapping via memremap() with WB caching mode, as pat_x_mtrr_type() will call mtrr_type_lookup(), which in turn is returning MTRR_TYPE_INVALID due to MTRR being disabled in this configuration. The result is a mapping with UC- caching, leading to severe performance degradation. Fix that by handling MTRR_TYPE_INVALID the same way as MTRR_TYPE_WRBACK in pat_x_mtrr_type() because MTRR_TYPE_INVALID means MTRRs are disabled. [ bp: Massage commit message. ] Fixes: 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") Reported-by: Michael Kelley (LINUX) Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Michael Kelley Tested-by: Michael Kelley Cc: Link: https://lore.kernel.org/r/20230110065427.20767-1-jgross@suse.com --- arch/x86/mm/pat/memtype.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 46de9cf5c91d2..fb4b1b5e0deab 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -387,7 +387,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, u8 mtrr_type, uniform; mtrr_type = mtrr_type_lookup(start, end, &uniform); - if (mtrr_type != MTRR_TYPE_WRBACK) + if (mtrr_type != MTRR_TYPE_WRBACK && + mtrr_type != MTRR_TYPE_INVALID) return _PAGE_CACHE_MODE_UC_MINUS; return _PAGE_CACHE_MODE_WB; From ea97cbebaf861d99c3e892275147e6fca6d2c1ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Jan 2023 10:24:52 -0700 Subject: [PATCH 0621/1593] io_uring/fdinfo: include locked hash table in fdinfo output A previous commit split the hash table for polled requests into two parts, but didn't get the fdinfo output updated. This means that it's less useful for debugging, as we may think a given request is not pending poll. Fix this up by dumping the locked hash table contents too. Fixes: 9ca9fb24d5fe ("io_uring: mutex locked poll hashing") Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 2e04850a657b0..882bd56b01ed0 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -170,12 +170,11 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, xa_for_each(&ctx->personalities, index, cred) io_uring_show_cred(m, index, cred); } - if (has_lock) - mutex_unlock(&ctx->uring_lock); seq_puts(m, "PollList:\n"); for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; + struct io_hash_bucket *hbl = &ctx->cancel_table_locked.hbs[i]; struct io_kiocb *req; spin_lock(&hb->lock); @@ -183,8 +182,17 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, seq_printf(m, " op=%d, task_works=%d\n", req->opcode, task_work_pending(req->task)); spin_unlock(&hb->lock); + + if (!has_lock) + continue; + hlist_for_each_entry(req, &hbl->list, hash_node) + seq_printf(m, " op=%d, task_works=%d\n", req->opcode, + task_work_pending(req->task)); } + if (has_lock) + mutex_unlock(&ctx->uring_lock); + seq_puts(m, "CqOverflowList:\n"); spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { From 895c0747f726bb50c9b7a805613a61d1b6f9fa06 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 10 Jan 2023 17:44:27 +0100 Subject: [PATCH 0622/1593] vfio/type1: Respect IOMMU reserved regions in vfio_test_domain_fgsp() Since commit cbf7827bc5dc ("iommu/s390: Fix potential s390_domain aperture shrinking") the s390 IOMMU driver uses reserved regions for the system provided DMA ranges of PCI devices. Previously it reduced the size of the IOMMU aperture and checked it on each mapping operation. On current machines the system denies use of DMA addresses below 2^32 for all PCI devices. Usually mapping IOVAs in a reserved regions is harmless until a DMA actually tries to utilize the mapping. However on s390 there is a virtual PCI device called ISM which is implemented in firmware and used for cross LPAR communication. Unlike real PCI devices this device does not use the hardware IOMMU but inspects IOMMU translation tables directly on IOTLB flush (s390 RPCIT instruction). If it detects IOVA mappings outside the allowed ranges it goes into an error state. This error state then causes the device to be unavailable to the KVM guest. Analysing this we found that vfio_test_domain_fgsp() maps 2 pages at DMA address 0 irrespective of the IOMMUs reserved regions. Even if usually harmless this seems wrong in the general case so instead go through the freshly updated IOVA list and try to find a range that isn't reserved, and fits 2 pages, is PAGE_SIZE * 2 aligned. If found use that for testing for fine grained super pages. Fixes: af029169b8fd ("vfio/type1: Check reserved region conflict and update iova list") Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230110164427.4051938-2-schnelle@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 23c24fe98c00d..2209372f236db 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1856,24 +1856,33 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when * hugetlbfs is in use. */ -static void vfio_test_domain_fgsp(struct vfio_domain *domain) +static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *regions) { - struct page *pages; int ret, order = get_order(PAGE_SIZE * 2); + struct vfio_iova *region; + struct page *pages; + dma_addr_t start; pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (!pages) return; - ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); - if (!ret) { - size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); + list_for_each_entry(region, regions, list) { + start = ALIGN(region->start, PAGE_SIZE * 2); + if (start >= region->end || (region->end - start < PAGE_SIZE * 2)) + continue; - if (unmapped == PAGE_SIZE) - iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE); - else - domain->fgsp = true; + ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2, + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); + if (!ret) { + size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE); + + if (unmapped == PAGE_SIZE) + iommu_unmap(domain->domain, start + PAGE_SIZE, PAGE_SIZE); + else + domain->fgsp = true; + } + break; } __free_pages(pages, order); @@ -2326,7 +2335,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } } - vfio_test_domain_fgsp(domain); + vfio_test_domain_fgsp(domain, &iova_copy); /* replay mappings on new domains */ ret = vfio_iommu_replay(iommu, domain); From fe1f0714385fbcf76b0cbceb02b7277d842014fc Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Tue, 20 Dec 2022 17:11:23 +0100 Subject: [PATCH 0623/1593] x86/resctrl: Fix task CLOSID/RMID update race When the user moves a running task to a new rdtgroup using the task's file interface or by deleting its rdtgroup, the resulting change in CLOSID/RMID must be immediately propagated to the PQR_ASSOC MSR on the task(s) CPUs. x86 allows reordering loads with prior stores, so if the task starts running between a task_curr() check that the CPU hoisted before the stores in the CLOSID/RMID update then it can start running with the old CLOSID/RMID until it is switched again because __rdtgroup_move_task() failed to determine that it needs to be interrupted to obtain the new CLOSID/RMID. Refer to the diagram below: CPU 0 CPU 1 ----- ----- __rdtgroup_move_task(): curr <- t1->cpu->rq->curr __schedule(): rq->curr <- t1 resctrl_sched_in(): t1->{closid,rmid} -> {1,1} t1->{closid,rmid} <- {2,2} if (curr == t1) // false IPI(t1->cpu) A similar race impacts rdt_move_group_tasks(), which updates tasks in a deleted rdtgroup. In both cases, use smp_mb() to order the task_struct::{closid,rmid} stores before the loads in task_curr(). In particular, in the rdt_move_group_tasks() case, simply execute an smp_mb() on every iteration with a matching task. It is possible to use a single smp_mb() in rdt_move_group_tasks(), but this would require two passes and a means of remembering which task_structs were updated in the first loop. However, benchmarking results below showed too little performance impact in the simple approach to justify implementing the two-pass approach. Times below were collected using `perf stat` to measure the time to remove a group containing a 1600-task, parallel workload. CPU: Intel(R) Xeon(R) Platinum P-8136 CPU @ 2.00GHz (112 threads) # mkdir /sys/fs/resctrl/test # echo $$ > /sys/fs/resctrl/test/tasks # perf bench sched messaging -g 40 -l 100000 task-clock time ranges collected using: # perf stat rmdir /sys/fs/resctrl/test Baseline: 1.54 - 1.60 ms smp_mb() every matching task: 1.57 - 1.67 ms [ bp: Massage commit message. ] Fixes: ae28d1aae48a ("x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR") Fixes: 0efc89be9471 ("x86/intel_rdt: Update task closid immediately on CPU in rmdir and unmount") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Cc: Link: https://lore.kernel.org/r/20221220161123.432120-1-peternewman@google.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e5a48f05e7876..5993da21d8225 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk, /* * Ensure the task's closid and rmid are written before determining if * the task is current that will decide if it will be interrupted. + * This pairs with the full barrier between the rq->curr update and + * resctrl_sched_in() during context switch. */ - barrier(); + smp_mb(); /* * By now, the task's closid and rmid are set. If the task is current @@ -2401,6 +2403,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, WRITE_ONCE(t->closid, to->closid); WRITE_ONCE(t->rmid, to->mon.rmid); + /* + * Order the closid/rmid stores above before the loads + * in task_curr(). This pairs with the full barrier + * between the rq->curr update and resctrl_sched_in() + * during context switch. + */ + smp_mb(); + /* * If the task is on a CPU, set the CPU in the mask. * The detection is inaccurate as tasks might move or From 2a81160d29d65b5876ab3f824fda99ae0219f05e Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Tue, 20 Dec 2022 17:41:31 +0100 Subject: [PATCH 0624/1593] x86/resctrl: Fix event counts regression in reused RMIDs When creating a new monitoring group, the RMID allocated for it may have been used by a group which was previously removed. In this case, the hardware counters will have non-zero values which should be deducted from what is reported in the new group's counts. resctrl_arch_reset_rmid() initializes the prev_msr value for counters to 0, causing the initial count to be charged to the new group. Resurrect __rmid_read() and use it to initialize prev_msr correctly. Unlike before, __rmid_read() checks for error bits in the MSR read so that callers don't need to. Fixes: 1d81d15db39c ("x86/resctrl: Move mbm_overflow_count() into resctrl_arch_rmid_read()") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221220164132.443083-1-peternewman@google.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 49 ++++++++++++++++++--------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index efe0c30d3a12d..77538abeb72af 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } +static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + + /* + * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured + * with a valid event code for supported resource type and the bits + * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, + * IA32_QM_CTR.data (bits 61:0) reports the monitored data. + * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) + * are error bits. + */ + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) @@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); - if (am) + if (am) { memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __rmid_read(rmid, eventid, &am->prev_msr); + } } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; u64 msr_val, chunks; + int ret; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) return -EINVAL; - /* - * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured - * with a valid event code for supported resource type and the bits - * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, - * IA32_QM_CTR.data (bits 61:0) reports the monitored data. - * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) - * are error bits. - */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, msr_val); - - if (msr_val & RMID_VAL_ERROR) - return -EIO; - if (msr_val & RMID_VAL_UNAVAIL) - return -EINVAL; + ret = __rmid_read(rmid, eventid, &msr_val); + if (ret) + return ret; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { From 77c7248882385397cd7dffe9e1437f59f32ce2de Mon Sep 17 00:00:00 2001 From: Tamim Khan Date: Fri, 30 Dec 2022 00:58:39 -0500 Subject: [PATCH 0625/1593] ACPI: resource: Skip IRQ override on Asus Expertbook B2402CBA Like the Asus Expertbook B2502CBA and various Asus Vivobook laptops, the Asus Expertbook B2402CBA has an ACPI DSDT table that describes IRQ 1 as ActiveLow while the kernel overrides it to Edge_High. This prevents the keyboard from working. To fix this issue, add this laptop to the skip_override_table so that the kernel does not override IRQ 1. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216864 Tested-by: zelenat Signed-off-by: Tamim Khan Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 16dcd31d124fe..192d1784e409b 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -432,6 +432,13 @@ static const struct dmi_system_id asus_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "S5602ZA"), }, }, + { + .ident = "Asus ExpertBook B2402CBA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B2402CBA"), + }, + }, { .ident = "Asus ExpertBook B2502", .matches = { From 420a1116aef0e8e12c305508f45ce73e5ae30a09 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Jan 2023 20:18:11 +0100 Subject: [PATCH 0626/1593] ACPI: video: Allow selecting NVidia-WMI-EC or Apple GMUX backlight from the cmdline The patches adding NVidia-WMI-EC and Apple GMUX backlight detection support to acpi_video_get_backlight_type(), forgot to update acpi_video_parse_cmdline() to allow manually selecting these from the commandline. Add support for these to acpi_video_parse_cmdline(). Fixes: fe7aebb40d42 ("ACPI: video: Add Nvidia WMI EC brightness control detection (v3)") Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection") Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 1b78c74344928..8a541efc56756 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -50,6 +50,10 @@ static void acpi_video_parse_cmdline(void) acpi_backlight_cmdline = acpi_backlight_video; if (!strcmp("native", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_native; + if (!strcmp("nvidia_wmi_ec", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec; + if (!strcmp("apple_gmux", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_apple_gmux; if (!strcmp("none", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_none; } From 8debed3efe3a731451ad9a91a7a74eeb18a7f7eb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 9 Jan 2023 04:23:17 +0900 Subject: [PATCH 0627/1593] kbuild: export top-level LDFLAGS_vmlinux only to scripts/Makefile.vmlinux Nathan Chancellor reports that $(NM) emits an error message when GNU Make 4.4 is used to build the ARM zImage. $ make-4.4 ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- O=build defconfig zImage [snip] LD vmlinux NM System.map SORTTAB vmlinux OBJCOPY arch/arm/boot/Image Kernel: arch/arm/boot/Image is ready arm-linux-gnueabi-nm: 'arch/arm/boot/compressed/../../../../vmlinux': No such file /bin/sh: 1: arithmetic expression: expecting primary: " " LDS arch/arm/boot/compressed/vmlinux.lds AS arch/arm/boot/compressed/head.o GZIP arch/arm/boot/compressed/piggy_data AS arch/arm/boot/compressed/piggy.o CC arch/arm/boot/compressed/misc.o This occurs since GNU Make commit 98da874c4303 ("[SV 10593] Export variables to $(shell ...) commands"), and the O= option is needed to reproduce it. The generated zImage is correct despite the error message. As the commit description of 98da874c4303 [1] says, exported variables are passed down to $(shell ) functions, which means exported recursive variables might be expanded earlier than before, in the parse stage. The following test code demonstrates the change for GNU Make 4.4. [Test Makefile] $(shell echo hello > foo) export foo = $(shell cat bar/../foo) $(shell mkdir bar) all: @echo $(foo) [GNU Make 4.3] $ rm -rf bar; make-4.3 hello [GNU Make 4.4] $ rm -rf bar; make-4.4 cat: bar/../foo: No such file or directory hello The 'foo' is a resursively expanded (i.e. lazily expanded) variable. GNU Make 4.3 expands 'foo' just before running the recipe '@echo $(foo)', at this point, the directory 'bar' exists. GNU Make 4.4 expands 'foo' to evaluate $(shell mkdir bar) because it is exported. At this point, the directory 'bar' does not exit yet. The cat command cannot resolve the bar/../foo path, hence the error message. Let's get back to the kernel Makefile. In arch/arm/boot/compressed/Makefile, KBSS_SZ is referenced by LDFLAGS_vmlinux, which is recursive and also exported by the top Makefile. GNU Make 4.3 expands KBSS_SZ just before running the recipes, so no error message. GNU Make 4.4 expands KBSS_SZ in the parse stage, where the directory arm/arm/boot/compressed does not exit yet. When compiled with O=, the output directory is created by $(shell mkdir -p $(obj-dirs)) in scripts/Makefile.build. There are two ways to fix this particular issue: - change "$(obj)/../../../../vmlinux" in KBSS_SZ to "vmlinux" - unexport LDFLAGS_vmlinux This commit takes the latter course because it is what I originally intended. Commit 3ec8a5b33dea ("kbuild: do not export LDFLAGS_vmlinux") unexported LDFLAGS_vmlinux. Commit 5d4aeffbf709 ("kbuild: rebuild .vmlinux.export.o when its prerequisite is updated") accidentally exported it again. We can clean up arch/arm/boot/compressed/Makefile later. [1]: https://git.savannah.gnu.org/cgit/make.git/commit/?id=98da874c43035a490cdca81331724f233a3d0c9a Link: https://lore.kernel.org/all/Y7i8+EjwdnhHtlrr@dev-arch.thelio-3990X/ Fixes: 5d4aeffbf709 ("kbuild: rebuild .vmlinux.export.o when its prerequisite is updated") Reported-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Tested-by: Nathan Chancellor --- Makefile | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 460716314fb3c..7607e20385a8d 100644 --- a/Makefile +++ b/Makefile @@ -549,7 +549,7 @@ LDFLAGS_MODULE = CFLAGS_KERNEL = RUSTFLAGS_KERNEL = AFLAGS_KERNEL = -export LDFLAGS_vmlinux = +LDFLAGS_vmlinux = # Use USERINCLUDE when you must reference the UAPI directories only. USERINCLUDE := \ @@ -1248,6 +1248,18 @@ vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o @: PHONY += vmlinux +# LDFLAGS_vmlinux in the top Makefile defines linker flags for the top vmlinux, +# not for decompressors. LDFLAGS_vmlinux in arch/*/boot/compressed/Makefile is +# unrelated; the decompressors just happen to have the same base name, +# arch/*/boot/compressed/vmlinux. +# Export LDFLAGS_vmlinux only to scripts/Makefile.vmlinux. +# +# _LDFLAGS_vmlinux is a workaround for the 'private export' bug: +# https://savannah.gnu.org/bugs/?61463 +# For Make > 4.4, the following simple code will work: +# vmlinux: private export LDFLAGS_vmlinux := $(LDFLAGS_vmlinux) +vmlinux: private _LDFLAGS_vmlinux := $(LDFLAGS_vmlinux) +vmlinux: export LDFLAGS_vmlinux = $(_LDFLAGS_vmlinux) vmlinux: vmlinux.o $(KBUILD_LDS) modpost $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux From 74d3320f6f7cf72de88a7e8df573821f6db90239 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 10 Jan 2023 14:48:00 +0900 Subject: [PATCH 0628/1593] kbuild: fix 'make modules' error when CONFIG_DEBUG_INFO_BTF_MODULES=y When CONFIG_DEBUG_INFO_BTF_MODULES=y, running 'make modules' in the clean kernel tree will get the following error. $ grep CONFIG_DEBUG_INFO_BTF_MODULES .config CONFIG_DEBUG_INFO_BTF_MODULES=y $ make -s clean $ make modules [snip] AR vmlinux.a ar: ./built-in.a: No such file or directory make: *** [Makefile:1241: vmlinux.a] Error 1 'modules' depends on 'vmlinux', but builtin objects are not built. Define KBUILD_BUILTIN. Fixes: f73edc8951b2 ("kbuild: unify two modpost invocations") Signed-off-by: Masahiro Yamada --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 7607e20385a8d..eb02f3823ce6d 100644 --- a/Makefile +++ b/Makefile @@ -1545,6 +1545,7 @@ endif # *.ko are usually independent of vmlinux, but CONFIG_DEBUG_INFOBTF_MODULES # is an exception. ifdef CONFIG_DEBUG_INFO_BTF_MODULES +KBUILD_BUILTIN := 1 modules: vmlinux endif From f64e4275ef7407d5c3eca20436519bbd1f796e40 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Jan 2023 16:30:28 +0100 Subject: [PATCH 0629/1593] ACPI: Fix selecting wrong ACPI fwnode for the iGPU on some Dell laptops The Dell Latitude E6430 both with and without the optional NVidia dGPU has a bug in its ACPI tables which is causing Linux to assign the wrong ACPI fwnode / companion to the pci_device for the i915 iGPU. Specifically under the PCI root bridge there are these 2 ACPI Device()s : Scope (_SB.PCI0) { Device (GFX0) { Name (_ADR, 0x00020000) // _ADR: Address } ... Device (VID) { Name (_ADR, 0x00020000) // _ADR: Address ... Method (_DOS, 1, NotSerialized) // _DOS: Disable Output Switching { VDP8 = Arg0 VDP1 (One, VDP8) } Method (_DOD, 0, NotSerialized) // _DOD: Display Output Devices { ... } ... } } The non-functional GFX0 ACPI device is a problem, because this gets returned as ACPI companion-device by acpi_find_child_device() for the iGPU. This is a long standing problem and the i915 driver does use the ACPI companion for some things, but works fine without it. However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()") acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device and that is set on the wrong acpi_device because of the wrong acpi_find_child_device() return. This breaks the ACPI video code, leading to non working backlight control in some cases. Add a type.backlight flag, mark ACPI video bus devices with this and make find_child_checks() return a higher score for children with this flag set, so that it picks the right companion-device. Fixes: 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()") Co-developed-by: Rafael J. Wysocki Signed-off-by: Hans de Goede Cc: 6.1+ # 6.1+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/glue.c | 14 ++++++++++++-- drivers/acpi/scan.c | 7 +++++-- include/acpi/acpi_bus.h | 3 ++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 204fe94c7e458..a194f30876c59 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) } #define FIND_CHILD_MIN_SCORE 1 -#define FIND_CHILD_MAX_SCORE 2 +#define FIND_CHILD_MID_SCORE 2 +#define FIND_CHILD_MAX_SCORE 3 static int match_any(struct acpi_device *adev, void *not_used) { @@ -96,8 +97,17 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta); - if (status == AE_NOT_FOUND) + if (status == AE_NOT_FOUND) { + /* + * Special case: backlight device objects without _STA are + * preferred to other objects with the same _ADR value, because + * it is more likely that they are actually useful. + */ + if (adev->pnp.type.backlight) + return FIND_CHILD_MID_SCORE; + return FIND_CHILD_MIN_SCORE; + } if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) return -ENODEV; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 274344434282e..0c6f06abe3f47 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, * Some devices don't reliably have _HIDs & _CIDs, so add * synthetic HIDs to make sure drivers can find them. */ - if (acpi_is_video_device(handle)) + if (acpi_is_video_device(handle)) { acpi_add_id(pnp, ACPI_VIDEO_HID); - else if (acpi_bay_match(handle)) + pnp->type.backlight = 1; + break; + } + if (acpi_bay_match(handle)) acpi_add_id(pnp, ACPI_BAY_HID); else if (acpi_dock_match(handle)) acpi_add_id(pnp, ACPI_DOCK_HID); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index cd3b75e08ec3f..e44be31115a67 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -230,7 +230,8 @@ struct acpi_pnp_type { u32 hardware_id:1; u32 bus_address:1; u32 platform_id:1; - u32 reserved:29; + u32 backlight:1; + u32 reserved:28; }; struct acpi_device_pnp { From 4f3085f87b51a551a0647f218d4f324796ecb703 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 10 Jan 2023 23:10:29 +0800 Subject: [PATCH 0630/1593] cpufreq: amd-pstate: fix kernel hang issue while amd-pstate unregistering In the amd_pstate_adjust_perf(), there is one cpufreq_cpu_get() call to increase increments the kobject reference count of policy and make it as busy. Therefore, a corresponding call to cpufreq_cpu_put() is needed to decrement the kobject reference count back, it will resolve the kernel hang issue when unregistering the amd-pstate driver and register the `amd_pstate_epp` driver instance. Fixes: 1d215f0319 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State") Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Wyes Karny Signed-off-by: Perry Yuan Cc: 5.17+ # 5.17+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 204e39006dda8..c17bd845f5fcb 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -307,6 +307,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, max_perf = min_perf; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); + cpufreq_cpu_put(policy); } static int amd_get_min_freq(struct amd_cpudata *cpudata) From b93fb4405fcb5112c5739c5349afb52ec7f15c07 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 29 Nov 2022 09:57:48 +0800 Subject: [PATCH 0631/1593] ixgbe: fix pci device refcount leak As the comment of pci_get_domain_bus_and_slot() says, it returns a PCI device with refcount incremented, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). In ixgbe_get_first_secondary_devfn() and ixgbe_x550em_a_has_mii(), pci_dev_put() is called to avoid leak. Fixes: 8fa10ef01260 ("ixgbe: register a mdiobus") Signed-off-by: Yang Yingliang Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 24aa97f993ca1..123dca9ce4683 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn) rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn); if (rp_pdev && rp_pdev->subordinate) { bus = rp_pdev->subordinate->number; + pci_dev_put(rp_pdev); return pci_get_domain_bus_and_slot(0, bus, 0); } + pci_dev_put(rp_pdev); return NULL; } @@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) struct ixgbe_adapter *adapter = hw->back; struct pci_dev *pdev = adapter->pdev; struct pci_dev *func0_pdev; + bool has_mii = false; /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0 @@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0)); if (func0_pdev) { if (func0_pdev == pdev) - return true; - else - return false; + has_mii = true; + goto out; } func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0)); if (func0_pdev == pdev) - return true; + has_mii = true; - return false; +out: + pci_dev_put(func0_pdev); + return has_mii; } /** From 5e91c72e560cc85f7163bbe3d14197268de31383 Mon Sep 17 00:00:00 2001 From: Christopher S Hall Date: Wed, 14 Dec 2022 16:10:38 +0800 Subject: [PATCH 0632/1593] igc: Fix PPS delta between two synchronized end-points This patch fix the pulse per second output delta between two synchronized end-points. Based on Intel Discrete I225 Software User Manual Section 4.2.15 TimeSync Auxiliary Control Register, ST0[Bit 4] and ST1[Bit 7] must be set to ensure that clock output will be toggles based on frequency value defined. This is to ensure that output of the PPS is aligned with the clock. How to test: 1) Running time synchronization on both end points. Ex: ptp4l --step_threshold=1 -m -f gPTP.cfg -i 2) Configure PPS output using below command for both end-points Ex: SDP0 on I225 REV4 SKU variant ./testptp -d /dev/ptp0 -L 0,2 ./testptp -d /dev/ptp0 -p 1000000000 3) Measure the output using analyzer for both end-points Fixes: 87938851b6ef ("igc: enable auxiliary PHC functions for the i225") Signed-off-by: Christopher S Hall Signed-off-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 2 ++ drivers/net/ethernet/intel/igc/igc_ptp.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index a7b22639cfcd9..e9747ec5ac0b8 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -475,7 +475,9 @@ #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */ #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */ #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 8dbb9f903ca70..c34734d432e0d 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -322,7 +322,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, ts = ns_to_timespec64(ns); if (rq->perout.index == 1) { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT1; @@ -333,7 +333,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, freqout = IGC_FREQOUT1; } else { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT0; @@ -347,10 +347,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsauxc = rd32(IGC_TSAUXC); tsim = rd32(IGC_TSIM); if (rq->perout.index == 1) { - tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 | + IGC_TSAUXC_ST1); tsim &= ~IGC_TSICR_TT1; } else { - tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 | + IGC_TSAUXC_ST0); tsim &= ~IGC_TSICR_TT0; } if (on) { From 6650c8e906ce58404bfdfceceeba7bd10d397d40 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Tue, 20 Dec 2022 09:32:46 +0300 Subject: [PATCH 0633/1593] iavf/iavf_main: actually log ->src mask when talking about it This fixes a copy-paste issue where dev_err would log the dst mask even though it is clearly talking about src. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: 0075fa0fadd0 ("i40evf: Add support to apply cloud filters") Signed-off-by: Daniil Tatianin Reviewed-by: Michal Swiatkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index c4e451ef79422..adc02adef83a2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3850,7 +3850,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", - be32_to_cpu(match.mask->dst)); + be32_to_cpu(match.mask->src)); return -EINVAL; } } From 2f57e4464cddfceda850ae4224779d11b6eb171f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Jan 2023 13:41:11 +0000 Subject: [PATCH 0634/1593] cifs: remove redundant assignment to the variable match The variable match is being assigned a value that is never read, it is being re-assigned a new value later on. The assignment is redundant and can be removed. Cleans up clang scan-build warning: fs/cifs/dfs_cache.c:1302:2: warning: Value stored to 'match' is never read Signed-off-by: Colin Ian King Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/dfs_cache.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 43ad1176dcb9d..e20f8880363fa 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1299,7 +1299,6 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c * Resolve share's hostname and check if server address matches. Otherwise just ignore it * as we could not have upcall to resolve hostname or failed to convert ip address. */ - match = true; extract_unc_hostname(s1, &host, &hostlen); scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); From 39e8db3c860e2678ce5a7d74193925876507c9eb Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 10 Jan 2023 17:55:20 -0300 Subject: [PATCH 0635/1593] cifs: fix double free on failed kerberos auth If session setup failed with kerberos auth, we ended up freeing cifs_ses::auth_key.response twice in SMB2_auth_kerberos() and sesInfoFree(). Fix this by zeroing out cifs_ses::auth_key.response after freeing it in SMB2_auth_kerberos(). Fixes: a4e430c8c8ba ("cifs: replace kfree() with kfree_sensitive() for sensitive data") Signed-off-by: Paulo Alcantara (SUSE) Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2c484d47c5922..727f16b426be5 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1482,8 +1482,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) out_put_spnego_key: key_invalidate(spnego_key); key_put(spnego_key); - if (rc) + if (rc) { kfree_sensitive(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } out: sess_data->result = rc; sess_data->func = NULL; From a1904fa4dfc929e5ea36bd409cf0d800df56fb8b Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 2 Jan 2023 12:52:55 +0100 Subject: [PATCH 0636/1593] firmware: raspberrypi: Fix type assignment We silently cast an unsigned int into a __le32 which makes sparse complain. Moreover, we never actually convert endianness between the CPU's and the expected little-endian value. Fix both at once by calling cpu_to_le32(). Fixes: 40c31955e4e9 ("firmware: raspberrypi: Provide a helper to query a clock max rate") Reported-by: kernel test robot Link: https://lore.kernel.org/r/20221116091712.1309651-3-maxime@cerno.tech Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20230102115255.17802-1-maxime@cerno.tech Signed-off-by: Florian Fainelli --- include/soc/bcm2835/raspberrypi-firmware.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/soc/bcm2835/raspberrypi-firmware.h b/include/soc/bcm2835/raspberrypi-firmware.h index ab955591cb72c..73cac8d0287e8 100644 --- a/include/soc/bcm2835/raspberrypi-firmware.h +++ b/include/soc/bcm2835/raspberrypi-firmware.h @@ -170,7 +170,7 @@ struct rpi_firmware_clk_rate_request { #define RPI_FIRMWARE_CLK_RATE_REQUEST(_id) \ { \ - .id = _id, \ + .id = cpu_to_le32(_id), \ } #if IS_ENABLED(CONFIG_RASPBERRYPI_FIRMWARE) From 980a637d11fe8dfc734f508a422185c2de55e669 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Jan 2023 09:35:09 +0100 Subject: [PATCH 0637/1593] ARM: omap1: fix !ARCH_OMAP1_ANY link failures While compile-testing randconfig builds for the upcoming boardfile removal, I noticed that an earlier patch of mine was completely broken, and the introduction of CONFIG_ARCH_OMAP1_ANY only replaced one set of build failures with another one, now resulting in link failures like ld: drivers/video/fbdev/omap/omapfb_main.o: in function `omapfb_do_probe': drivers/video/fbdev/omap/omapfb_main.c:1703: undefined reference to `omap_set_dma_priority' ld: drivers/dma/ti/omap-dma.o: in function `omap_dma_free_chan_resources': drivers/dma/ti/omap-dma.c:777: undefined reference to `omap_free_dma' drivers/dma/ti/omap-dma.c:1685: undefined reference to `omap_get_plat_info' ld: drivers/usb/gadget/udc/omap_udc.o: in function `next_in_dma': drivers/usb/gadget/udc/omap_udc.c:820: undefined reference to `omap_get_dma_active_status' I tried reworking it, but the resulting patch ended up much bigger than simply avoiding the original problem of unused-function warnings like arch/arm/mach-omap1/mcbsp.c:76:30: error: unused variable 'omap1_mcbsp_ops' [-Werror,-Wunused-variable] As a result, revert the previous fix, and rearrange the code that produces warnings to hide them. For mcbsp, the #ifdef check can simply be removed as the cpu_is_omapxxx() checks already achieve the same result, while in the io.c the easiest solution appears to be to merge the common map bits into each soc specific portion. This gets cleaned in a nicer way after omap7xx support gets dropped, as the remaining SoCs all have the exact same I/O map. Fixes: 615dce5bf736 ("ARM: omap1: fix build with no SoC selected") Cc: stable@vger.kernel.org Acked-by: Aaro Koskinen Signed-off-by: Arnd Bergmann --- arch/arm/mach-omap1/Kconfig | 5 +---- arch/arm/mach-omap1/Makefile | 4 ---- arch/arm/mach-omap1/io.c | 32 +++++++++++++++----------------- arch/arm/mach-omap1/mcbsp.c | 21 --------------------- arch/arm/mach-omap1/pm.h | 7 ------- include/linux/soc/ti/omap1-io.h | 4 ++-- 6 files changed, 18 insertions(+), 55 deletions(-) diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig index 538a960257cc7..7ec7ada287e05 100644 --- a/arch/arm/mach-omap1/Kconfig +++ b/arch/arm/mach-omap1/Kconfig @@ -4,6 +4,7 @@ menuconfig ARCH_OMAP1 depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 depends on CPU_LITTLE_ENDIAN depends on ATAGS + select ARCH_OMAP select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_OMAP select CLKSRC_MMIO @@ -45,10 +46,6 @@ config ARCH_OMAP16XX select CPU_ARM926T select OMAP_DM_TIMER -config ARCH_OMAP1_ANY - select ARCH_OMAP - def_bool ARCH_OMAP730 || ARCH_OMAP850 || ARCH_OMAP15XX || ARCH_OMAP16XX - config ARCH_OMAP bool diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile index 506074b86333f..0615cb0ba580b 100644 --- a/arch/arm/mach-omap1/Makefile +++ b/arch/arm/mach-omap1/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -ifdef CONFIG_ARCH_OMAP1_ANY - # Common support obj-y := io.o id.o sram-init.o sram.o time.o irq.o mux.o flash.o \ serial.o devices.o dma.o omap-dma.o fb.o @@ -59,5 +57,3 @@ obj-$(CONFIG_ARCH_OMAP730) += gpio7xx.o obj-$(CONFIG_ARCH_OMAP850) += gpio7xx.o obj-$(CONFIG_ARCH_OMAP15XX) += gpio15xx.o obj-$(CONFIG_ARCH_OMAP16XX) += gpio16xx.o - -endif diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c index d2db9b8aed3fb..0074b011a05a4 100644 --- a/arch/arm/mach-omap1/io.c +++ b/arch/arm/mach-omap1/io.c @@ -22,17 +22,14 @@ * The machine specific code may provide the extra mapping besides the * default mapping provided here. */ -static struct map_desc omap_io_desc[] __initdata = { +#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850) +static struct map_desc omap7xx_io_desc[] __initdata = { { .virtual = OMAP1_IO_VIRT, .pfn = __phys_to_pfn(OMAP1_IO_PHYS), .length = OMAP1_IO_SIZE, .type = MT_DEVICE - } -}; - -#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850) -static struct map_desc omap7xx_io_desc[] __initdata = { + }, { .virtual = OMAP7XX_DSP_BASE, .pfn = __phys_to_pfn(OMAP7XX_DSP_START), @@ -49,6 +46,12 @@ static struct map_desc omap7xx_io_desc[] __initdata = { #ifdef CONFIG_ARCH_OMAP15XX static struct map_desc omap1510_io_desc[] __initdata = { + { + .virtual = OMAP1_IO_VIRT, + .pfn = __phys_to_pfn(OMAP1_IO_PHYS), + .length = OMAP1_IO_SIZE, + .type = MT_DEVICE + }, { .virtual = OMAP1510_DSP_BASE, .pfn = __phys_to_pfn(OMAP1510_DSP_START), @@ -65,6 +68,12 @@ static struct map_desc omap1510_io_desc[] __initdata = { #if defined(CONFIG_ARCH_OMAP16XX) static struct map_desc omap16xx_io_desc[] __initdata = { + { + .virtual = OMAP1_IO_VIRT, + .pfn = __phys_to_pfn(OMAP1_IO_PHYS), + .length = OMAP1_IO_SIZE, + .type = MT_DEVICE + }, { .virtual = OMAP16XX_DSP_BASE, .pfn = __phys_to_pfn(OMAP16XX_DSP_START), @@ -79,18 +88,9 @@ static struct map_desc omap16xx_io_desc[] __initdata = { }; #endif -/* - * Maps common IO regions for omap1 - */ -static void __init omap1_map_common_io(void) -{ - iotable_init(omap_io_desc, ARRAY_SIZE(omap_io_desc)); -} - #if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850) void __init omap7xx_map_io(void) { - omap1_map_common_io(); iotable_init(omap7xx_io_desc, ARRAY_SIZE(omap7xx_io_desc)); } #endif @@ -98,7 +98,6 @@ void __init omap7xx_map_io(void) #ifdef CONFIG_ARCH_OMAP15XX void __init omap15xx_map_io(void) { - omap1_map_common_io(); iotable_init(omap1510_io_desc, ARRAY_SIZE(omap1510_io_desc)); } #endif @@ -106,7 +105,6 @@ void __init omap15xx_map_io(void) #if defined(CONFIG_ARCH_OMAP16XX) void __init omap16xx_map_io(void) { - omap1_map_common_io(); iotable_init(omap16xx_io_desc, ARRAY_SIZE(omap16xx_io_desc)); } #endif diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c index 05c25c432449f..b1632cbe37e6f 100644 --- a/arch/arm/mach-omap1/mcbsp.c +++ b/arch/arm/mach-omap1/mcbsp.c @@ -89,7 +89,6 @@ static struct omap_mcbsp_ops omap1_mcbsp_ops = { #define OMAP1610_MCBSP2_BASE 0xfffb1000 #define OMAP1610_MCBSP3_BASE 0xe1017000 -#if defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP850) struct resource omap7xx_mcbsp_res[][6] = { { { @@ -159,14 +158,7 @@ static struct omap_mcbsp_platform_data omap7xx_mcbsp_pdata[] = { }; #define OMAP7XX_MCBSP_RES_SZ ARRAY_SIZE(omap7xx_mcbsp_res[1]) #define OMAP7XX_MCBSP_COUNT ARRAY_SIZE(omap7xx_mcbsp_res) -#else -#define omap7xx_mcbsp_res_0 NULL -#define omap7xx_mcbsp_pdata NULL -#define OMAP7XX_MCBSP_RES_SZ 0 -#define OMAP7XX_MCBSP_COUNT 0 -#endif -#ifdef CONFIG_ARCH_OMAP15XX struct resource omap15xx_mcbsp_res[][6] = { { { @@ -266,14 +258,7 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { }; #define OMAP15XX_MCBSP_RES_SZ ARRAY_SIZE(omap15xx_mcbsp_res[1]) #define OMAP15XX_MCBSP_COUNT ARRAY_SIZE(omap15xx_mcbsp_res) -#else -#define omap15xx_mcbsp_res_0 NULL -#define omap15xx_mcbsp_pdata NULL -#define OMAP15XX_MCBSP_RES_SZ 0 -#define OMAP15XX_MCBSP_COUNT 0 -#endif -#ifdef CONFIG_ARCH_OMAP16XX struct resource omap16xx_mcbsp_res[][6] = { { { @@ -373,12 +358,6 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { }; #define OMAP16XX_MCBSP_RES_SZ ARRAY_SIZE(omap16xx_mcbsp_res[1]) #define OMAP16XX_MCBSP_COUNT ARRAY_SIZE(omap16xx_mcbsp_res) -#else -#define omap16xx_mcbsp_res_0 NULL -#define omap16xx_mcbsp_pdata NULL -#define OMAP16XX_MCBSP_RES_SZ 0 -#define OMAP16XX_MCBSP_COUNT 0 -#endif static void omap_mcbsp_register_board_cfg(struct resource *res, int res_count, struct omap_mcbsp_platform_data *config, int size) diff --git a/arch/arm/mach-omap1/pm.h b/arch/arm/mach-omap1/pm.h index d9165709c5323..0d1f092821ff8 100644 --- a/arch/arm/mach-omap1/pm.h +++ b/arch/arm/mach-omap1/pm.h @@ -106,13 +106,6 @@ #define OMAP7XX_IDLECT3 0xfffece24 #define OMAP7XX_IDLE_LOOP_REQUEST 0x0C00 -#if !defined(CONFIG_ARCH_OMAP730) && \ - !defined(CONFIG_ARCH_OMAP850) && \ - !defined(CONFIG_ARCH_OMAP15XX) && \ - !defined(CONFIG_ARCH_OMAP16XX) -#warning "Power management for this processor not implemented yet" -#endif - #ifndef __ASSEMBLER__ #include diff --git a/include/linux/soc/ti/omap1-io.h b/include/linux/soc/ti/omap1-io.h index f7f12728d4a63..9a60f45899d3c 100644 --- a/include/linux/soc/ti/omap1-io.h +++ b/include/linux/soc/ti/omap1-io.h @@ -5,7 +5,7 @@ #ifndef __ASSEMBLER__ #include -#ifdef CONFIG_ARCH_OMAP1_ANY +#ifdef CONFIG_ARCH_OMAP1 /* * NOTE: Please use ioremap + __raw_read/write where possible instead of these */ @@ -15,7 +15,7 @@ extern u32 omap_readl(u32 pa); extern void omap_writeb(u8 v, u32 pa); extern void omap_writew(u16 v, u32 pa); extern void omap_writel(u32 v, u32 pa); -#else +#elif defined(CONFIG_COMPILE_TEST) static inline u8 omap_readb(u32 pa) { return 0; } static inline u16 omap_readw(u32 pa) { return 0; } static inline u32 omap_readl(u32 pa) { return 0; } From 9d46ce57f4d1c626bb48170226ea5e35deb5877c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Dec 2022 22:06:49 +0100 Subject: [PATCH 0638/1593] ARM: omap1: fix building gpio15xx In some randconfig builds, the asm/irq.h header is not included in gpio15xx.c, so add an explicit include to avoid a build fialure: In file included from arch/arm/mach-omap1/gpio15xx.c:15: arch/arm/mach-omap1/irqs.h:99:34: error: 'NR_IRQS_LEGACY' undeclared here (not in a function) 99 | #define IH2_BASE (NR_IRQS_LEGACY + 32) | ^~~~~~~~~~~~~~ arch/arm/mach-omap1/irqs.h:105:38: note: in expansion of macro 'IH2_BASE' 105 | #define INT_MPUIO (5 + IH2_BASE) | ^~~~~~~~ arch/arm/mach-omap1/gpio15xx.c:28:27: note: in expansion of macro 'INT_MPUIO' 28 | .start = INT_MPUIO, | ^~~~~~~~~ Acked-by: Aaro Koskinen Signed-off-by: Arnd Bergmann --- arch/arm/mach-omap1/gpio15xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap1/gpio15xx.c b/arch/arm/mach-omap1/gpio15xx.c index c675f11de99db..61fa26efd8653 100644 --- a/arch/arm/mach-omap1/gpio15xx.c +++ b/arch/arm/mach-omap1/gpio15xx.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "irqs.h" From ba029e9991d9be90a28b6a0ceb25e9a6fb348829 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 9 Jan 2023 14:16:42 -0500 Subject: [PATCH 0639/1593] drm/amdkfd: Add sync after creating vram bo There will be data corruption on vram allocated by svm if the initialization is not complete and application is writting on the memory. Adding sync to wait for the initialization completion is to resolve this issue. Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 814f99888ab10..b94d2c1422ad8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -570,6 +570,15 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, goto reserve_bo_failed; } + if (clear) { + r = amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); + if (r) { + pr_debug("failed %d to sync bo\n", r); + amdgpu_bo_unreserve(bo); + goto reserve_bo_failed; + } + } + r = dma_resv_reserve_fences(bo->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve bo\n", r); From 972fb53d3605eb6cdf0d6ae9a52e910626a91ff7 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 10 Jan 2023 11:33:44 +0800 Subject: [PATCH 0640/1593] drm/amd/pm/smu13: BACO is supported when it's in BACO state This leverages the logic in smu11. No need to talk to SMU to check BACO enablement as it's in BACO state already. Signed-off-by: Guchun Chen Reviewed-by: Kenneth Feng Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index d1f50d42288d2..b4373b6568ae6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2298,6 +2298,10 @@ bool smu_v13_0_baco_is_support(struct smu_context *smu) !smu_baco->platform_support) return false; + /* return true if ASIC is in BACO state already */ + if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; + if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) return false; From a6941f89d7c6a6ba49316bbd7da2fb2f719119a7 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 5 Jan 2023 14:01:18 -0500 Subject: [PATCH 0641/1593] drm/amdkfd: Fix NULL pointer error for GC 11.0.1 on mGPU The point bo->kfd_bo is NULL for queue's write pointer BO when creating queue on mGPU. To avoid using the pointer fixes the error. Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b15091d8310d9..3b5c53712d319 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2099,7 +2099,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b } amdgpu_amdkfd_remove_eviction_fence( - bo, bo->kfd_bo->process_info->eviction_fence); + bo, bo->vm_bo->vm->process_info->eviction_fence); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ecb4c3abc6297..c06ada0844ba1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -200,7 +200,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; if (q->wptr_bo) { - wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va; + wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; } From 9c7417b5ec440242bb5b64521acd53d4e19130c1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Oct 2022 11:06:57 +0200 Subject: [PATCH 0642/1593] coredump: Move dump_emit_page() to kill unused warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_ELF_CORE is not set: fs/coredump.c:835:12: error: ‘dump_emit_page’ defined but not used [-Werror=unused-function] 835 | static int dump_emit_page(struct coredump_params *cprm, struct page *page) | ^~~~~~~~~~~~~~ Fix this by moving dump_emit_page() inside the existing section protected by #ifdef CONFIG_ELF_CORE. Fixes: 06bbaa6dc53cb720 ("[coredump] don't use __kernel_write() on kmap_local_page()") Signed-off-by: Geert Uytterhoeven Signed-off-by: Al Viro --- fs/coredump.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index de78bde2991be..a25ecec9ca7c4 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -838,6 +838,30 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr) } } +int dump_emit(struct coredump_params *cprm, const void *addr, int nr) +{ + if (cprm->to_skip) { + if (!__dump_skip(cprm, cprm->to_skip)) + return 0; + cprm->to_skip = 0; + } + return __dump_emit(cprm, addr, nr); +} +EXPORT_SYMBOL(dump_emit); + +void dump_skip_to(struct coredump_params *cprm, unsigned long pos) +{ + cprm->to_skip = pos - cprm->pos; +} +EXPORT_SYMBOL(dump_skip_to); + +void dump_skip(struct coredump_params *cprm, size_t nr) +{ + cprm->to_skip += nr; +} +EXPORT_SYMBOL(dump_skip); + +#ifdef CONFIG_ELF_CORE static int dump_emit_page(struct coredump_params *cprm, struct page *page) { struct bio_vec bvec = { @@ -871,30 +895,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) return 1; } -int dump_emit(struct coredump_params *cprm, const void *addr, int nr) -{ - if (cprm->to_skip) { - if (!__dump_skip(cprm, cprm->to_skip)) - return 0; - cprm->to_skip = 0; - } - return __dump_emit(cprm, addr, nr); -} -EXPORT_SYMBOL(dump_emit); - -void dump_skip_to(struct coredump_params *cprm, unsigned long pos) -{ - cprm->to_skip = pos - cprm->pos; -} -EXPORT_SYMBOL(dump_skip_to); - -void dump_skip(struct coredump_params *cprm, size_t nr) -{ - cprm->to_skip += nr; -} -EXPORT_SYMBOL(dump_skip); - -#ifdef CONFIG_ELF_CORE int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len) { From 96398560f26aa07e8f2969d73c8197e6a6d10407 Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Mon, 9 Jan 2023 10:39:06 -0600 Subject: [PATCH 0643/1593] net: sched: disallow noqueue for qdisc classes While experimenting with applying noqueue to a classful queue discipline, we discovered a NULL pointer dereference in the __dev_queue_xmit() path that generates a kernel OOPS: # dev=enp0s5 # tc qdisc replace dev $dev root handle 1: htb default 1 # tc class add dev $dev parent 1: classid 1:1 htb rate 10mbit # tc qdisc add dev $dev parent 1:1 handle 10: noqueue # ping -I $dev -w 1 -c 1 1.1.1.1 [ 2.172856] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 2.173217] #PF: supervisor instruction fetch in kernel mode ... [ 2.178451] Call Trace: [ 2.178577] [ 2.178686] htb_enqueue+0x1c8/0x370 [ 2.178880] dev_qdisc_enqueue+0x15/0x90 [ 2.179093] __dev_queue_xmit+0x798/0xd00 [ 2.179305] ? _raw_write_lock_bh+0xe/0x30 [ 2.179522] ? __local_bh_enable_ip+0x32/0x70 [ 2.179759] ? ___neigh_create+0x610/0x840 [ 2.179968] ? eth_header+0x21/0xc0 [ 2.180144] ip_finish_output2+0x15e/0x4f0 [ 2.180348] ? dst_output+0x30/0x30 [ 2.180525] ip_push_pending_frames+0x9d/0xb0 [ 2.180739] raw_sendmsg+0x601/0xcb0 [ 2.180916] ? _raw_spin_trylock+0xe/0x50 [ 2.181112] ? _raw_spin_unlock_irqrestore+0x16/0x30 [ 2.181354] ? get_page_from_freelist+0xcd6/0xdf0 [ 2.181594] ? sock_sendmsg+0x56/0x60 [ 2.181781] sock_sendmsg+0x56/0x60 [ 2.181958] __sys_sendto+0xf7/0x160 [ 2.182139] ? handle_mm_fault+0x6e/0x1d0 [ 2.182366] ? do_user_addr_fault+0x1e1/0x660 [ 2.182627] __x64_sys_sendto+0x1b/0x30 [ 2.182881] do_syscall_64+0x38/0x90 [ 2.183085] entry_SYSCALL_64_after_hwframe+0x63/0xcd ... [ 2.187402] Previously in commit d66d6c3152e8 ("net: sched: register noqueue qdisc"), NULL was set for the noqueue discipline on noqueue init so that __dev_queue_xmit() falls through for the noqueue case. This also sets a bypass of the enqueue NULL check in the register_qdisc() function for the struct noqueue_disc_ops. Classful queue disciplines make it past the NULL check in __dev_queue_xmit() because the discipline is set to htb (in this case), and then in the call to __dev_xmit_skb(), it calls into htb_enqueue() which grabs a leaf node for a class and then calls qdisc_enqueue() by passing in a queue discipline which assumes ->enqueue() is not set to NULL. Fix this by not allowing classes to be assigned to the noqueue discipline. Linux TC Notes states that classes cannot be set to the noqueue discipline. [1] Let's enforce that here. Links: 1. https://linux-tc-notes.sourceforge.net/tc/doc/sch_noqueue.txt Fixes: d66d6c3152e8 ("net: sched: register noqueue qdisc") Cc: stable@vger.kernel.org Signed-off-by: Frederick Lawler Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20230109163906.706000-1-fred@cloudflare.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2317db02c764d..72d2c204d5f34 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1133,6 +1133,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return -ENOENT; } + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + return -EINVAL; + } + err = cops->graft(parent, cl, new, &old, extack); if (err) return err; From 9c445d2637c938a800fcc8b5f0b10e60c94460c7 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 10 Jan 2023 14:45:24 +0100 Subject: [PATCH 0644/1593] Input: i8042 - add Clevo PCX0DX to i8042 quirk table The Clevo PCX0DX/TUXEDO XP1511, need quirks for the keyboard to not be occasionally unresponsive after resume. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Reviewed-by: Mattijs Korpershoek Link: https://lore.kernel.org/r/20230110134524.553620-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 46f8a694291ed..efc61736099b9 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1238,6 +1238,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170SM"), From 22aeb01db7080e18c6aeb4361cc2556c9887099a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 10 Jan 2023 19:23:21 -0300 Subject: [PATCH 0645/1593] cifs: do not query ifaces on smb1 mounts Users have reported the following error on every 600 seconds (SMB_INTERFACE_POLL_INTERVAL) when mounting SMB1 shares: CIFS: VFS: \\srv\share error -5 on ioctl to get interface list It's supported only by SMB2+, so do not query network interfaces on SMB1 mounts. Fixes: 6e1c1c08cdf3 ("cifs: periodically query network interfaces from server") Reviewed-by: Shyam Prasad N Reviewed-by: Tom Talpey Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d371259d6808a..b2a04b4e89a5e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2606,11 +2606,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - /* schedule query interfaces poll */ INIT_DELAYED_WORK(&tcon->query_interfaces, smb2_query_server_interfaces); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (ses->server->dialect >= SMB30_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + /* schedule query interfaces poll */ + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); From 2fe58d977ee05da5bb89ef5dc4f5bf2dc15db46f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 10 Jan 2023 20:35:46 -0300 Subject: [PATCH 0646/1593] cifs: fix potential memory leaks in session setup Make sure to free cifs_ses::auth_key.response before allocating it as we might end up leaking memory in reconnect or mounting. Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 1 + fs/cifs/sess.c | 2 ++ fs/cifs/smb2pdu.c | 1 + 3 files changed, 4 insertions(+) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5db73c0f792a5..cbc18b4a9cb20 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -278,6 +278,7 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) * ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) + * unicode length of a netbios domain name */ + kfree_sensitive(ses->auth_key.response); ses->auth_key.len = size + 2 * dlen; ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); if (!ses->auth_key.response) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 0b842a07e1579..c47b254f0d1e2 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -815,6 +815,7 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, return -EINVAL; } if (tilen) { + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(bcc_ptr + tioffset, tilen, GFP_KERNEL); if (!ses->auth_key.response) { @@ -1428,6 +1429,7 @@ sess_auth_kerberos(struct sess_data *sess_data) goto out_put_spnego_key; } + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 727f16b426be5..4b71f4a92f76c 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1453,6 +1453,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) /* keep session key if binding */ if (!is_binding) { + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { From 3287ebd7fd01e853ca4da8be675322429400e2bd Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Thu, 5 Jan 2023 01:54:37 +0530 Subject: [PATCH 0647/1593] powerpc/boot: Fix incorrect version calculation issue in ld_version The ld_version() function computes the wrong version value for certain ld versions such as the following: $ ld --version GNU ld (GNU Binutils; SUSE Linux Enterprise 15) 2.37.20211103-150100.7.37 For input 2.37.20211103, the value computed is 202348030000 which is higher than the value for a later version like 2.39.0, which is 23900000. This issue was highlighted because with the above ld version, the powerpc kernel build started failing with ld error: "unrecognized option --no-warn-rwx-segments". This was caused due to the recent commit 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") which added the --no-warn-rwx-segments linker flag if the ld version is greater than 2.39. Due to the bug in ld_version(), ld version 2.37.20111103 is wrongly calculated to be greater than 2.39 and the unsupported flag is added. To fix it, if version is of the form x.y.z and length(z) == 8, then most probably it is a date [yyyymmdd] commonly used for release snapshots and not an actual new version. Hence, ignore the date part replacing it with 0. Fixes: 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") Signed-off-by: Ojaswin Mujoo [mpe: Tweak change log wording/formatting, add Fixes tag] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230104202437.90039-1-ojaswin@linux.ibm.com --- arch/powerpc/boot/wrapper | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index af04cea82b941..352d7de24018f 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -210,6 +210,10 @@ ld_version() gsub(".*version ", ""); gsub("-.*", ""); split($1,a, "."); + if( length(a[3]) == "8" ) + # a[3] is probably a date of format yyyymmdd used for release snapshots. We + # can assume it to be zero as it does not signify a new version as such. + a[3] = 0; print a[1]*100000000 + a[2]*1000000 + a[3]*10000; exit }' From 76d588dddc459fefa1da96e0a081a397c5c8e216 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 6 Jan 2023 12:21:57 +0530 Subject: [PATCH 0648/1593] powerpc/imc-pmu: Fix use of mutex in IRQs disabled section Current imc-pmu code triggers a WARNING with CONFIG_DEBUG_ATOMIC_SLEEP and CONFIG_PROVE_LOCKING enabled, while running a thread_imc event. Command to trigger the warning: # perf stat -e thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ sleep 5 Performance counter stats for 'sleep 5': 0 thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ 5.002117947 seconds time elapsed 0.000131000 seconds user 0.001063000 seconds sys Below is snippet of the warning in dmesg: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2869, name: perf-exec preempt_count: 2, expected: 0 4 locks held by perf-exec/2869: #0: c00000004325c540 (&sig->cred_guard_mutex){+.+.}-{3:3}, at: bprm_execve+0x64/0xa90 #1: c00000004325c5d8 (&sig->exec_update_lock){++++}-{3:3}, at: begin_new_exec+0x460/0xef0 #2: c0000003fa99d4e0 (&cpuctx_lock){-...}-{2:2}, at: perf_event_exec+0x290/0x510 #3: c000000017ab8418 (&ctx->lock){....}-{2:2}, at: perf_event_exec+0x29c/0x510 irq event stamp: 4806 hardirqs last enabled at (4805): [] _raw_spin_unlock_irqrestore+0x94/0xd0 hardirqs last disabled at (4806): [] perf_event_exec+0x394/0x510 softirqs last enabled at (0): [] copy_process+0xc34/0x1ff0 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 36 PID: 2869 Comm: perf-exec Not tainted 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV Call Trace: dump_stack_lvl+0x98/0xe0 (unreliable) __might_resched+0x2f8/0x310 __mutex_lock+0x6c/0x13f0 thread_imc_event_add+0xf4/0x1b0 event_sched_in+0xe0/0x210 merge_sched_in+0x1f0/0x600 visit_groups_merge.isra.92.constprop.166+0x2bc/0x6c0 ctx_flexible_sched_in+0xcc/0x140 ctx_sched_in+0x20c/0x2a0 ctx_resched+0x104/0x1c0 perf_event_exec+0x340/0x510 begin_new_exec+0x730/0xef0 load_elf_binary+0x3f8/0x1e10 ... do not call blocking ops when !TASK_RUNNING; state=2001 set at [<00000000fd63e7cf>] do_nanosleep+0x60/0x1a0 WARNING: CPU: 36 PID: 2869 at kernel/sched/core.c:9912 __might_sleep+0x9c/0xb0 CPU: 36 PID: 2869 Comm: sleep Tainted: G W 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV NIP: c000000000194a1c LR: c000000000194a18 CTR: c000000000a78670 REGS: c00000004d2134e0 TRAP: 0700 Tainted: G W (6.2.0-rc2-00011-g1247637727f2) MSR: 9000000000021033 CR: 48002824 XER: 00000000 CFAR: c00000000013fb64 IRQMASK: 1 The above warning triggered because the current imc-pmu code uses mutex lock in interrupt disabled sections. The function mutex_lock() internally calls __might_resched(), which will check if IRQs are disabled and in case IRQs are disabled, it will trigger the warning. Fix the issue by changing the mutex lock to spinlock. Fixes: 8f95faaac56c ("powerpc/powernv: Detect and create IMC device") Reported-by: Michael Petlan Reported-by: Peter Zijlstra Signed-off-by: Kajol Jain [mpe: Fix comments, trim oops in change log, add reported-by tags] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230106065157.182648-1-kjain@linux.ibm.com --- arch/powerpc/include/asm/imc-pmu.h | 2 +- arch/powerpc/perf/imc-pmu.c | 136 ++++++++++++++--------------- 2 files changed, 67 insertions(+), 71 deletions(-) diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 4f897993b7107..699a88584ae16 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -137,7 +137,7 @@ struct imc_pmu { * are inited. */ struct imc_pmu_ref { - struct mutex lock; + spinlock_t lock; unsigned int id; int refc; }; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index d517aba94d1bc..100e97daf76ba 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Nest IMC data structures and variables */ @@ -21,7 +22,7 @@ * Used to avoid races in counting the nest-pmu units during hotplug * register and unregister */ -static DEFINE_MUTEX(nest_init_lock); +static DEFINE_SPINLOCK(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; @@ -50,7 +51,7 @@ static int trace_imc_mem_size; * core and trace-imc */ static struct imc_pmu_ref imc_global_refc = { - .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), .id = 0, .refc = 0, }; @@ -400,7 +401,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) get_hard_smp_processor_id(cpu)); /* * If this is the last cpu in this chip then, skip the reference - * count mutex lock and make the reference count on this chip zero. + * count lock and make the reference count on this chip zero. */ ref = get_nest_pmu_ref(cpu); if (!ref) @@ -462,15 +463,15 @@ static void nest_imc_counters_release(struct perf_event *event) /* * See if we need to disable the nest PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the nest counters. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return; - /* Take the mutex lock for this node and then decrement the reference count */ - mutex_lock(&ref->lock); + /* Take the lock for this node and then decrement the reference count */ + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -482,7 +483,7 @@ static void nest_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that node. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -490,7 +491,7 @@ static void nest_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id); return; } @@ -498,7 +499,7 @@ static void nest_imc_counters_release(struct perf_event *event) WARN(1, "nest-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); } static int nest_imc_event_init(struct perf_event *event) @@ -557,26 +558,25 @@ static int nest_imc_event_init(struct perf_event *event) /* * Get the imc_pmu_ref struct for this node. - * Take the mutex lock and then increment the count of nest pmu events - * inited. + * Take the lock and then increment the count of nest pmu events inited. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to start the counters for node %d\n", node_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); event->destroy = nest_imc_counters_release; return 0; @@ -612,9 +612,8 @@ static int core_imc_mem_init(int cpu, int size) return -ENOMEM; mem_info->vbase = page_address(page); - /* Init the mutex */ core_imc_refc[core_id].id = core_id; - mutex_init(&core_imc_refc[core_id].lock); + spin_lock_init(&core_imc_refc[core_id].lock); rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, __pa((void *)mem_info->vbase), @@ -703,9 +702,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); } else { /* - * If this is the last cpu in this core then, skip taking refernce - * count mutex lock for this core and directly zero "refc" for - * this core. + * If this is the last cpu in this core then skip taking reference + * count lock for this core and directly zero "refc" for this core. */ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(cpu)); @@ -720,11 +718,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) * last cpu in this core and core-imc event running * in this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_CORE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } return 0; } @@ -739,7 +737,7 @@ static int core_imc_pmu_cpumask_init(void) static void reset_global_refc(struct perf_event *event) { - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); imc_global_refc.refc--; /* @@ -751,7 +749,7 @@ static void reset_global_refc(struct perf_event *event) imc_global_refc.refc = 0; imc_global_refc.id = 0; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } static void core_imc_counters_release(struct perf_event *event) @@ -764,17 +762,17 @@ static void core_imc_counters_release(struct perf_event *event) /* * See if we need to disable the IMC PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the core counters. */ core_id = event->cpu / threads_per_core; - /* Take the mutex lock and decrement the refernce count for this core */ + /* Take the lock and decrement the refernce count for this core */ ref = &core_imc_refc[core_id]; if (!ref) return; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -786,7 +784,7 @@ static void core_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that core. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -794,7 +792,7 @@ static void core_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("IMC: Unable to stop the counters for core %d\n", core_id); return; } @@ -802,7 +800,7 @@ static void core_imc_counters_release(struct perf_event *event) WARN(1, "core-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); reset_global_refc(event); } @@ -840,7 +838,6 @@ static int core_imc_event_init(struct perf_event *event) if ((!pcmi->vbase)) return -ENODEV; - /* Get the core_imc mutex for this core */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; @@ -848,22 +845,22 @@ static int core_imc_event_init(struct perf_event *event) /* * Core pmu units are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the core counters. + * If yes, take the lock and enable the core counters. * If not, just increment the count in core_imc_refc struct. */ - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("core-imc: Unable to start the counters for core %d\n", core_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* * Since the system can run either in accumulation or trace-mode @@ -874,7 +871,7 @@ static int core_imc_event_init(struct perf_event *event) * to know whether any other trace/thread imc * events are running. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { /* * No other trace/thread imc events are running in @@ -883,10 +880,10 @@ static int core_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_CORE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; @@ -958,10 +955,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu) mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); /* Reduce the refc if thread-imc event running on this cpu */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_THREAD) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1001,7 +998,7 @@ static int thread_imc_event_init(struct perf_event *event) if (!target) return -EINVAL; - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); /* * Check if any other trace/core imc events are running in the * system, if not set the global id to thread-imc. @@ -1010,10 +1007,10 @@ static int thread_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_THREAD; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; @@ -1135,25 +1132,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags) /* * imc pmus are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the counters. + * If yes, take the lock and enable the counters. * If not, just increment the count in ref count struct. */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to start the counter\ for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1170,12 +1167,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to stop the counters\ for core %d\n", core_id); return; @@ -1183,7 +1180,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags) } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); @@ -1224,9 +1221,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size) } } - /* Init the mutex, if not already */ trace_imc_refc[core_id].id = core_id; - mutex_init(&trace_imc_refc[core_id].lock); + spin_lock_init(&trace_imc_refc[core_id].lock); mtspr(SPRN_LDBAR, 0); return 0; @@ -1246,10 +1242,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu) * Reduce the refc if any trace-imc event running * on this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_TRACE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1371,17 +1367,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags) } mtspr(SPRN_LDBAR, ldbar_value); - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1414,19 +1410,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id); return; } } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); trace_imc_event_stop(event, flags); } @@ -1448,7 +1444,7 @@ static int trace_imc_event_init(struct perf_event *event) * no other thread is running any core/thread imc * events */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { /* * No core/thread imc events are running in the @@ -1457,10 +1453,10 @@ static int trace_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_TRACE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.idx = -1; @@ -1533,10 +1529,10 @@ static int init_nest_pmu_ref(void) i = 0; for_each_node(nid) { /* - * Mutex lock to avoid races while tracking the number of + * Take the lock to avoid races while tracking the number of * sessions using the chip's nest pmu units. */ - mutex_init(&nest_imc_refc[i].lock); + spin_lock_init(&nest_imc_refc[i].lock); /* * Loop to init the "id" with the node_id. Variable "i" initialized to @@ -1633,7 +1629,7 @@ static void imc_common_mem_free(struct imc_pmu *pmu_ptr) static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) { if (pmu_ptr->domain == IMC_DOMAIN_NEST) { - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); @@ -1643,7 +1639,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus > 0) nest_pmus--; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); } /* Free core_imc memory */ @@ -1800,11 +1796,11 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id * rest. To handle the cpuhotplug callback unregister, we track * the number of nest pmus in "nest_pmus". */ - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 0) { ret = init_nest_pmu_ref(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; goto err_free_mem; @@ -1812,7 +1808,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id /* Register for cpu hotplug notification. */ ret = nest_pmu_cpumask_init(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(nest_imc_refc); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; @@ -1820,7 +1816,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id } } nest_pmus++; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); break; case IMC_DOMAIN_CORE: ret = core_imc_pmu_cpumask_init(); From 7124c93887cc4e6c5b48920f83115e4a5892e870 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Jan 2023 22:25:29 -0800 Subject: [PATCH 0649/1593] phy: ti: fix Kconfig warning and operator precedence Fix Kconfig depends operator precedence to prevent a Kconfig warning: WARNING: unmet direct dependencies detected for MUX_MMIO Depends on [n]: MULTIPLEXER [=m] && OF [=n] Selected by [m]: - PHY_AM654_SERDES [=m] && (OF [=n] && ARCH_K3 || COMPILE_TEST [=y]) && COMMON_CLK [=y] Fixes: 71e2f5c5c224 ("phy: ti: Add a new SERDES driver for TI's AM654x SoC") Fixes: 091876cc355d ("phy: ti: j721e-wiz: Add support for WIZ module present in TI J721E SoC") Signed-off-by: Randy Dunlap Cc: Vinod Koul Cc: Kishon Vijay Abraham I Cc: linux-phy@lists.infradead.org Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20230110062529.22668-1-rdunlap@infradead.org Signed-off-by: Vinod Koul --- drivers/phy/ti/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/Kconfig b/drivers/phy/ti/Kconfig index 15a3bcf323086..b905902d57508 100644 --- a/drivers/phy/ti/Kconfig +++ b/drivers/phy/ti/Kconfig @@ -23,7 +23,7 @@ config PHY_DM816X_USB config PHY_AM654_SERDES tristate "TI AM654 SERDES support" - depends on OF && ARCH_K3 || COMPILE_TEST + depends on OF && (ARCH_K3 || COMPILE_TEST) depends on COMMON_CLK select GENERIC_PHY select MULTIPLEXER @@ -35,7 +35,7 @@ config PHY_AM654_SERDES config PHY_J721E_WIZ tristate "TI J721E WIZ (SERDES Wrapper) support" - depends on OF && ARCH_K3 || COMPILE_TEST + depends on OF && (ARCH_K3 || COMPILE_TEST) depends on HAS_IOMEM && OF_ADDRESS depends on COMMON_CLK select GENERIC_PHY From b0e380b5d4275299adf43e249f18309331b6f54f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 9 Jan 2023 16:32:23 +0100 Subject: [PATCH 0650/1593] net: lan966x: check for ptp to be enabled in lan966x_ptp_deinit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If ptp was not enabled due to missing IRQ for instance, lan966x_ptp_deinit() will dereference NULL pointers. Fixes: d096459494a8 ("net: lan966x: Add support for ptp clocks") Signed-off-by: Clément Léger Reviewed-by: Horatiu Vultur Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index f9ebfaafbebc2..a8348437dd87f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -1073,6 +1073,9 @@ void lan966x_ptp_deinit(struct lan966x *lan966x) struct lan966x_port *port; int i; + if (!lan966x->ptp) + return; + for (i = 0; i < lan966x->num_phys_ports; i++) { port = lan966x->ports[i]; if (!port) From 4e2ec2500bfc5cf429ddcfe78b49cb76b36bc46d Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 10 Jan 2023 13:35:26 +0100 Subject: [PATCH 0651/1593] drm/nouveau: Remove file nouveau_fbcon.c Commit 4a16dd9d18a0 ("drm/nouveau/kms: switch to drm fbdev helpers") converted nouveau to generic fbdev emulation. The driver's internal implementation later got accidentally restored during a merge commit. Remove the file from the driver. No functional changes. v2: * point Fixes tag to merge commit (Alex) Signed-off-by: Thomas Zimmermann Reviewed-by: Alex Deucher Fixes: 4e291f2f5853 ("Merge tag 'drm-misc-next-2022-11-10-1' of git://anongit.freedesktop.org/drm/drm-misc into drm-next") Cc: Ben Skeggs Cc: Karol Herbst Cc: Lyude Paul Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Sam Ravnborg Cc: Jani Nikula Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Link: https://patchwork.freedesktop.org/patch/msgid/20230110123526.28770-1-tzimmermann@suse.de --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 613 ------------------------ 1 file changed, 613 deletions(-) delete mode 100644 drivers/gpu/drm/nouveau/nouveau_fbcon.c diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c deleted file mode 100644 index e87de7906f780..0000000000000 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright © 2007 David Airlie - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * David Airlie - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "nouveau_drv.h" -#include "nouveau_gem.h" -#include "nouveau_bo.h" -#include "nouveau_fbcon.h" -#include "nouveau_chan.h" -#include "nouveau_vmm.h" - -#include "nouveau_crtc.h" - -MODULE_PARM_DESC(nofbaccel, "Disable fbcon acceleration"); -int nouveau_nofbaccel = 0; -module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400); - -MODULE_PARM_DESC(fbcon_bpp, "fbcon bits-per-pixel (default: auto)"); -static int nouveau_fbcon_bpp; -module_param_named(fbcon_bpp, nouveau_fbcon_bpp, int, 0400); - -static void -nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nvif_device *device = &drm->client.device; - int ret; - - if (info->state != FBINFO_STATE_RUNNING) - return; - - ret = -ENODEV; - if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) && - mutex_trylock(&drm->client.mutex)) { - if (device->info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_fillrect(info, rect); - else - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_fillrect(info, rect); - else - ret = nvc0_fbcon_fillrect(info, rect); - mutex_unlock(&drm->client.mutex); - } - - if (ret == 0) - return; - - if (ret != -ENODEV) - nouveau_fbcon_gpu_lockup(info); - drm_fb_helper_cfb_fillrect(info, rect); -} - -static void -nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nvif_device *device = &drm->client.device; - int ret; - - if (info->state != FBINFO_STATE_RUNNING) - return; - - ret = -ENODEV; - if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) && - mutex_trylock(&drm->client.mutex)) { - if (device->info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_copyarea(info, image); - else - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_copyarea(info, image); - else - ret = nvc0_fbcon_copyarea(info, image); - mutex_unlock(&drm->client.mutex); - } - - if (ret == 0) - return; - - if (ret != -ENODEV) - nouveau_fbcon_gpu_lockup(info); - drm_fb_helper_cfb_copyarea(info, image); -} - -static void -nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nvif_device *device = &drm->client.device; - int ret; - - if (info->state != FBINFO_STATE_RUNNING) - return; - - ret = -ENODEV; - if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) && - mutex_trylock(&drm->client.mutex)) { - if (device->info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_imageblit(info, image); - else - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_imageblit(info, image); - else - ret = nvc0_fbcon_imageblit(info, image); - mutex_unlock(&drm->client.mutex); - } - - if (ret == 0) - return; - - if (ret != -ENODEV) - nouveau_fbcon_gpu_lockup(info); - drm_fb_helper_cfb_imageblit(info, image); -} - -static int -nouveau_fbcon_sync(struct fb_info *info) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nouveau_channel *chan = drm->channel; - int ret; - - if (!chan || !chan->accel_done || in_interrupt() || - info->state != FBINFO_STATE_RUNNING || - info->flags & FBINFO_HWACCEL_DISABLED) - return 0; - - if (!mutex_trylock(&drm->client.mutex)) - return 0; - - ret = nouveau_channel_idle(chan); - mutex_unlock(&drm->client.mutex); - if (ret) { - nouveau_fbcon_gpu_lockup(info); - return 0; - } - - chan->accel_done = false; - return 0; -} - -static int -nouveau_fbcon_open(struct fb_info *info, int user) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - int ret = pm_runtime_get_sync(drm->dev->dev); - if (ret < 0 && ret != -EACCES) { - pm_runtime_put(drm->dev->dev); - return ret; - } - return 0; -} - -static int -nouveau_fbcon_release(struct fb_info *info, int user) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - pm_runtime_put(drm->dev->dev); - return 0; -} - -static const struct fb_ops nouveau_fbcon_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_open = nouveau_fbcon_open, - .fb_release = nouveau_fbcon_release, - .fb_fillrect = nouveau_fbcon_fillrect, - .fb_copyarea = nouveau_fbcon_copyarea, - .fb_imageblit = nouveau_fbcon_imageblit, - .fb_sync = nouveau_fbcon_sync, -}; - -static const struct fb_ops nouveau_fbcon_sw_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_open = nouveau_fbcon_open, - .fb_release = nouveau_fbcon_release, - .fb_fillrect = drm_fb_helper_cfb_fillrect, - .fb_copyarea = drm_fb_helper_cfb_copyarea, - .fb_imageblit = drm_fb_helper_cfb_imageblit, -}; - -void -nouveau_fbcon_accel_save_disable(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon && drm->fbcon->helper.info) { - drm->fbcon->saved_flags = drm->fbcon->helper.info->flags; - drm->fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED; - } -} - -void -nouveau_fbcon_accel_restore(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon && drm->fbcon->helper.info) - drm->fbcon->helper.info->flags = drm->fbcon->saved_flags; -} - -static void -nouveau_fbcon_accel_fini(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon = drm->fbcon; - if (fbcon && drm->channel) { - console_lock(); - if (fbcon->helper.info) - fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED; - console_unlock(); - nouveau_channel_idle(drm->channel); - nvif_object_dtor(&fbcon->twod); - nvif_object_dtor(&fbcon->blit); - nvif_object_dtor(&fbcon->gdi); - nvif_object_dtor(&fbcon->patt); - nvif_object_dtor(&fbcon->rop); - nvif_object_dtor(&fbcon->clip); - nvif_object_dtor(&fbcon->surf2d); - } -} - -static void -nouveau_fbcon_accel_init(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon = drm->fbcon; - struct fb_info *info = fbcon->helper.info; - int ret; - - if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_accel_init(info); - else - if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_accel_init(info); - else - ret = nvc0_fbcon_accel_init(info); - - if (ret == 0) - info->fbops = &nouveau_fbcon_ops; -} - -static void -nouveau_fbcon_zfill(struct drm_device *dev, struct nouveau_fbdev *fbcon) -{ - struct fb_info *info = fbcon->helper.info; - struct fb_fillrect rect; - - /* Clear the entire fbcon. The drm will program every connector - * with it's preferred mode. If the sizes differ, one display will - * quite likely have garbage around the console. - */ - rect.dx = rect.dy = 0; - rect.width = info->var.xres_virtual; - rect.height = info->var.yres_virtual; - rect.color = 0; - rect.rop = ROP_COPY; - info->fbops->fb_fillrect(info, &rect); -} - -static int -nouveau_fbcon_create(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct nouveau_fbdev *fbcon = - container_of(helper, struct nouveau_fbdev, helper); - struct drm_device *dev = fbcon->helper.dev; - struct nouveau_drm *drm = nouveau_drm(dev); - struct nvif_device *device = &drm->client.device; - struct fb_info *info; - struct drm_framebuffer *fb; - struct nouveau_channel *chan; - struct nouveau_bo *nvbo; - struct drm_mode_fb_cmd2 mode_cmd = {}; - int ret; - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - mode_cmd.pitches[0] = mode_cmd.width * (sizes->surface_bpp >> 3); - mode_cmd.pitches[0] = roundup(mode_cmd.pitches[0], 256); - - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - ret = nouveau_gem_new(&drm->client, mode_cmd.pitches[0] * - mode_cmd.height, 0, NOUVEAU_GEM_DOMAIN_VRAM, - 0, 0x0000, &nvbo); - if (ret) { - NV_ERROR(drm, "failed to allocate framebuffer\n"); - goto out; - } - - ret = nouveau_framebuffer_new(dev, &mode_cmd, &nvbo->bo.base, &fb); - if (ret) - goto out_unref; - - ret = nouveau_bo_pin(nvbo, NOUVEAU_GEM_DOMAIN_VRAM, false); - if (ret) { - NV_ERROR(drm, "failed to pin fb: %d\n", ret); - goto out_unref; - } - - ret = nouveau_bo_map(nvbo); - if (ret) { - NV_ERROR(drm, "failed to map fb: %d\n", ret); - goto out_unpin; - } - - chan = nouveau_nofbaccel ? NULL : drm->channel; - if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_vma_new(nvbo, chan->vmm, &fbcon->vma); - if (ret) { - NV_ERROR(drm, "failed to map fb into chan: %d\n", ret); - chan = NULL; - } - } - - info = drm_fb_helper_alloc_info(helper); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto out_unlock; - } - - /* setup helper */ - fbcon->helper.fb = fb; - - if (!chan) - info->flags = FBINFO_HWACCEL_DISABLED; - else - info->flags = FBINFO_HWACCEL_COPYAREA | - FBINFO_HWACCEL_FILLRECT | - FBINFO_HWACCEL_IMAGEBLIT; - info->fbops = &nouveau_fbcon_sw_ops; - info->fix.smem_start = nvbo->bo.resource->bus.offset; - info->fix.smem_len = nvbo->bo.base.size; - - info->screen_base = nvbo_kmap_obj_iovirtual(nvbo); - info->screen_size = nvbo->bo.base.size; - - drm_fb_helper_fill_info(info, &fbcon->helper, sizes); - - /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - - if (chan) - nouveau_fbcon_accel_init(dev); - nouveau_fbcon_zfill(dev, fbcon); - - /* To allow resizeing without swapping buffers */ - NV_INFO(drm, "allocated %dx%d fb: 0x%llx, bo %p\n", - fb->width, fb->height, nvbo->offset, nvbo); - - if (dev_is_pci(dev->dev)) - vga_switcheroo_client_fb_set(to_pci_dev(dev->dev), info); - - return 0; - -out_unlock: - if (chan) - nouveau_vma_del(&fbcon->vma); - nouveau_bo_unmap(nvbo); -out_unpin: - nouveau_bo_unpin(nvbo); -out_unref: - nouveau_bo_ref(NULL, &nvbo); -out: - return ret; -} - -static int -nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) -{ - struct drm_framebuffer *fb = fbcon->helper.fb; - struct nouveau_bo *nvbo; - - drm_fb_helper_unregister_info(&fbcon->helper); - drm_fb_helper_fini(&fbcon->helper); - - if (fb && fb->obj[0]) { - nvbo = nouveau_gem_object(fb->obj[0]); - nouveau_vma_del(&fbcon->vma); - nouveau_bo_unmap(nvbo); - nouveau_bo_unpin(nvbo); - drm_framebuffer_put(fb); - } - - return 0; -} - -void nouveau_fbcon_gpu_lockup(struct fb_info *info) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - - NV_ERROR(drm, "GPU lockup - switching to software fbcon\n"); - info->flags |= FBINFO_HWACCEL_DISABLED; -} - -static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = { - .fb_probe = nouveau_fbcon_create, -}; - -static void -nouveau_fbcon_set_suspend_work(struct work_struct *work) -{ - struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work); - int state = READ_ONCE(drm->fbcon_new_state); - - if (state == FBINFO_STATE_RUNNING) - pm_runtime_get_sync(drm->dev->dev); - - console_lock(); - if (state == FBINFO_STATE_RUNNING) - nouveau_fbcon_accel_restore(drm->dev); - drm_fb_helper_set_suspend(&drm->fbcon->helper, state); - if (state != FBINFO_STATE_RUNNING) - nouveau_fbcon_accel_save_disable(drm->dev); - console_unlock(); - - if (state == FBINFO_STATE_RUNNING) { - nouveau_fbcon_hotplug_resume(drm->fbcon); - pm_runtime_mark_last_busy(drm->dev->dev); - pm_runtime_put_autosuspend(drm->dev->dev); - } -} - -void -nouveau_fbcon_set_suspend(struct drm_device *dev, int state) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - - if (!drm->fbcon) - return; - - drm->fbcon_new_state = state; - /* Since runtime resume can happen as a result of a sysfs operation, - * it's possible we already have the console locked. So handle fbcon - * init/deinit from a seperate work thread - */ - schedule_work(&drm->fbcon_work); -} - -void -nouveau_fbcon_output_poll_changed(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon = drm->fbcon; - int ret; - - if (!fbcon) - return; - - mutex_lock(&fbcon->hotplug_lock); - - ret = pm_runtime_get(dev->dev); - if (ret == 1 || ret == -EACCES) { - drm_fb_helper_hotplug_event(&fbcon->helper); - - pm_runtime_mark_last_busy(dev->dev); - pm_runtime_put_autosuspend(dev->dev); - } else if (ret == 0) { - /* If the GPU was already in the process of suspending before - * this event happened, then we can't block here as we'll - * deadlock the runtime pmops since they wait for us to - * finish. So, just defer this event for when we runtime - * resume again. It will be handled by fbcon_work. - */ - NV_DEBUG(drm, "fbcon HPD event deferred until runtime resume\n"); - fbcon->hotplug_waiting = true; - pm_runtime_put_noidle(drm->dev->dev); - } else { - DRM_WARN("fbcon HPD event lost due to RPM failure: %d\n", - ret); - } - - mutex_unlock(&fbcon->hotplug_lock); -} - -void -nouveau_fbcon_hotplug_resume(struct nouveau_fbdev *fbcon) -{ - struct nouveau_drm *drm; - - if (!fbcon) - return; - drm = nouveau_drm(fbcon->helper.dev); - - mutex_lock(&fbcon->hotplug_lock); - if (fbcon->hotplug_waiting) { - fbcon->hotplug_waiting = false; - - NV_DEBUG(drm, "Handling deferred fbcon HPD events\n"); - drm_fb_helper_hotplug_event(&fbcon->helper); - } - mutex_unlock(&fbcon->hotplug_lock); -} - -int -nouveau_fbcon_init(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon; - int preferred_bpp = nouveau_fbcon_bpp; - int ret; - - if (!dev->mode_config.num_crtc || - (to_pci_dev(dev->dev)->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return 0; - - fbcon = kzalloc(sizeof(struct nouveau_fbdev), GFP_KERNEL); - if (!fbcon) - return -ENOMEM; - - drm->fbcon = fbcon; - INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work); - mutex_init(&fbcon->hotplug_lock); - - drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs); - - ret = drm_fb_helper_init(dev, &fbcon->helper); - if (ret) - goto free; - - if (preferred_bpp != 8 && preferred_bpp != 16 && preferred_bpp != 32) { - if (drm->client.device.info.ram_size <= 32 * 1024 * 1024) - preferred_bpp = 8; - else - if (drm->client.device.info.ram_size <= 64 * 1024 * 1024) - preferred_bpp = 16; - else - preferred_bpp = 32; - } - - /* disable all the possible outputs/crtcs before entering KMS mode */ - if (!drm_drv_uses_atomic_modeset(dev)) - drm_helper_disable_unused_functions(dev); - - ret = drm_fb_helper_initial_config(&fbcon->helper, preferred_bpp); - if (ret) - goto fini; - - if (fbcon->helper.info) - fbcon->helper.info->pixmap.buf_align = 4; - return 0; - -fini: - drm_fb_helper_fini(&fbcon->helper); -free: - kfree(fbcon); - drm->fbcon = NULL; - return ret; -} - -void -nouveau_fbcon_fini(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - - if (!drm->fbcon) - return; - - drm_kms_helper_poll_fini(dev); - nouveau_fbcon_accel_fini(dev); - nouveau_fbcon_destroy(dev, drm->fbcon); - kfree(drm->fbcon); - drm->fbcon = NULL; -} From 9554f023385825be4b1e3557398c82e25be83da4 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 29 Dec 2022 19:50:43 +0800 Subject: [PATCH 0652/1593] arm64: dts: rockchip: add io domain setting to rk3566-box-demo Add the missing pmu_io_domains setting, the gmac can't work well without this. Fixes: 2e0537b16b25 ("arm64: dts: rockchip: Add dts for rockchip rk3566 box demo board") Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20221229115043.3899733-1-andyshrk@163.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts index 4c7f9abd594f1..d956496d52219 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts @@ -353,6 +353,17 @@ }; }; +&pmu_io_domains { + pmuio2-supply = <&vcc_3v3>; + vccio1-supply = <&vcc_3v3>; + vccio3-supply = <&vcc_3v3>; + vccio4-supply = <&vcca_1v8>; + vccio5-supply = <&vcc_3v3>; + vccio6-supply = <&vcca_1v8>; + vccio7-supply = <&vcc_3v3>; + status = "okay"; +}; + &pwm0 { status = "okay"; }; From 80422339a75088322b4d3884bd12fa0fe5d11050 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 28 Dec 2022 21:17:32 +0100 Subject: [PATCH 0653/1593] ARM: dts: rockchip: add power-domains property to dp node on rk3288 The clocks in the Rockchip rk3288 DisplayPort node are included in the power-domain@RK3288_PD_VIO logic, but the power-domains property in the dp node is missing, so fix it. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/dab85bfb-9f55-86a1-5cd5-7388c43e0ec5@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 487b0e03d4b43..2ca76b69add78 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1181,6 +1181,7 @@ clock-names = "dp", "pclk"; phys = <&edp_phy>; phy-names = "dp"; + power-domains = <&power RK3288_PD_VIO>; resets = <&cru SRST_EDP>; reset-names = "dp"; rockchip,grf = <&grf>; From 6f515b663d49a14fb63f8c5d0a2a4ae53d44790a Mon Sep 17 00:00:00 2001 From: Arnaud Ferraris Date: Thu, 15 Dec 2022 11:19:47 +0100 Subject: [PATCH 0654/1593] arm64: dts: rockchip: fix input enable pinconf on rk3399 When the input enable pinconf was introduced, a default drive-strength value of 2 was set for the pull up/down configs. However, this parameter is unneeded when configuring the pin as input, and having a single hardcoded value here is actually harmful: GPIOs on the RK3399 have various same drive-strength capabilities depending on the bank and port they belong to. As an example, trying to configure the GPIO4_PD3 pin as an input with pull-up enabled fails with the following output: [ 10.706542] rockchip-pinctrl pinctrl: unsupported driver strength 2 [ 10.713661] rockchip-pinctrl pinctrl: pin_config_set op failed for pin 155 (acceptable drive-strength values for this pin being 3, 6, 9 and 12) Let's drop the drive-strength property from all input pinconfs in order to solve this issue. Fixes: ec48c3e82ca3 ("arm64: dts: rockchip: add an input enable pinconf to rk3399") Signed-off-by: Arnaud Ferraris Reviewed-by: Caleb Connolly Link: https://lore.kernel.org/r/20221215101947.254896-1-arnaud.ferraris@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 834d16acb546f..1881b4b71f91d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -2241,13 +2241,11 @@ pcfg_input_pull_up: pcfg-input-pull-up { input-enable; bias-pull-up; - drive-strength = <2>; }; pcfg_input_pull_down: pcfg-input-pull-down { input-enable; bias-pull-down; - drive-strength = <2>; }; clock { From 0f9c608d4a1eb852d6769d2fc5906c71c02565ae Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 11 Jan 2023 10:38:22 +0100 Subject: [PATCH 0655/1593] init/Kconfig: fix LOCALVERSION_AUTO help text It was never guaranteed to be exactly eight, but since commit 548b8b5168c9 ("scripts/setlocalversion: make git describe output more reliable"), it has been exactly 12. Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 7e5c3ddc341de..ccd3cdcf3df19 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -204,7 +204,7 @@ config LOCALVERSION_AUTO appended after any matching localversion* files, and after the value set in CONFIG_LOCALVERSION. - (The actual string used here is the first eight characters produced + (The actual string used here is the first 12 characters produced by running the command: $ git rev-parse --verify HEAD From 169dd78043f7f4eabdef8d7ba0d4ddffe5cb3930 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 11 Jan 2023 11:11:56 +0100 Subject: [PATCH 0656/1593] scripts: rpm: make clear that mkspec script contains 4.13 feature A fix was made in the mkspec script that uses a feature, ie. the OR expression, which requires RPM 4.13. However, the script indicates another minimum version. Lower versions may have success by using the --no-deps option as suggested, but feels like bumping the version to 4.13 is reasonable as it put me on the wrong track at first with RPM 4.11 on my Centos7 machine. Fixes: 02a893bc9975 ("kbuild: rpm-pkg: add libelf-devel as alternative for BuildRequires") Signed-off-by: Arend van Spriel Signed-off-by: Masahiro Yamada --- scripts/package/mkspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/mkspec b/scripts/package/mkspec index adab28fa7f892..094e52c979a8c 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -1,7 +1,7 @@ #!/bin/sh # # Output a simple RPM spec file. -# This version assumes a minimum of RPM 4.0.3. +# This version assumes a minimum of RPM 4.13 # # The only gothic bit here is redefining install_post to avoid # stripping the symbols from files in the kernel which we want From cb3e9864cdbe35ff6378966660edbcbac955fe17 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2023 08:59:06 +0800 Subject: [PATCH 0657/1593] ipv6: raw: Deduct extension header length in rawv6_push_pending_frames The total cork length created by ip6_append_data includes extension headers, so we must exclude them when comparing them against the IPV6_CHECKSUM offset which does not include extension headers. Reported-by: Kyle Zeng Fixes: 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can corrupt kernel memory") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/raw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a06a9f847db5c..ada087b50541a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -505,6 +505,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -522,6 +523,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); From a720416d94634068951773cb9e9d6f1b73769e5b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 6 Jan 2023 11:07:18 +0100 Subject: [PATCH 0658/1593] spi: spidev: fix a race condition when accessing spidev->spi There's a spinlock in place that is taken in file_operations callbacks whenever we check if spidev->spi is still alive (not null). It's also taken when spidev->spi is set to NULL in remove(). This however doesn't protect the code against driver unbind event while one of the syscalls is still in progress. To that end we need a lock taken continuously as long as we may still access spidev->spi. As both the file ops and the remove callback are never called from interrupt context, we can replace the spinlock with a mutex. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230106100719.196243-1-brgl@bgdev.pl Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 6313e7d0cdf87..42aaaca672654 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -68,7 +68,7 @@ static_assert(N_SPI_MINORS > 0 && N_SPI_MINORS <= 256); struct spidev_data { dev_t devt; - spinlock_t spi_lock; + struct mutex spi_lock; struct spi_device *spi; struct list_head device_entry; @@ -95,9 +95,8 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message) int status; struct spi_device *spi; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spidev->spi; - spin_unlock_irq(&spidev->spi_lock); if (spi == NULL) status = -ESHUTDOWN; @@ -107,6 +106,7 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message) if (status == 0) status = message->actual_length; + mutex_unlock(&spidev->spi_lock); return status; } @@ -359,12 +359,12 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) * we issue this ioctl. */ spidev = filp->private_data; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spi_dev_get(spidev->spi); - spin_unlock_irq(&spidev->spi_lock); - - if (spi == NULL) + if (spi == NULL) { + mutex_unlock(&spidev->spi_lock); return -ESHUTDOWN; + } /* use the buffer lock here for triple duty: * - prevent I/O (from us) so calling spi_setup() is safe; @@ -508,6 +508,7 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) mutex_unlock(&spidev->buf_lock); spi_dev_put(spi); + mutex_unlock(&spidev->spi_lock); return retval; } @@ -529,12 +530,12 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd, * we issue this ioctl. */ spidev = filp->private_data; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spi_dev_get(spidev->spi); - spin_unlock_irq(&spidev->spi_lock); - - if (spi == NULL) + if (spi == NULL) { + mutex_unlock(&spidev->spi_lock); return -ESHUTDOWN; + } /* SPI_IOC_MESSAGE needs the buffer locked "normally" */ mutex_lock(&spidev->buf_lock); @@ -561,6 +562,7 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd, done: mutex_unlock(&spidev->buf_lock); spi_dev_put(spi); + mutex_unlock(&spidev->spi_lock); return retval; } @@ -640,10 +642,10 @@ static int spidev_release(struct inode *inode, struct file *filp) spidev = filp->private_data; filp->private_data = NULL; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); /* ... after we unbound from the underlying device? */ dofree = (spidev->spi == NULL); - spin_unlock_irq(&spidev->spi_lock); + mutex_unlock(&spidev->spi_lock); /* last close? */ spidev->users--; @@ -776,7 +778,7 @@ static int spidev_probe(struct spi_device *spi) /* Initialize the driver data */ spidev->spi = spi; - spin_lock_init(&spidev->spi_lock); + mutex_init(&spidev->spi_lock); mutex_init(&spidev->buf_lock); INIT_LIST_HEAD(&spidev->device_entry); @@ -821,9 +823,9 @@ static void spidev_remove(struct spi_device *spi) /* prevent new opens */ mutex_lock(&device_list_lock); /* make sure ops on existing fds can abort cleanly */ - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spidev->spi = NULL; - spin_unlock_irq(&spidev->spi_lock); + mutex_unlock(&spidev->spi_lock); list_del(&spidev->device_entry); device_destroy(spidev_class, spidev->devt); From 6b35b173dbc1711f8d272e3f322d2ad697015919 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 6 Jan 2023 11:07:19 +0100 Subject: [PATCH 0659/1593] spi: spidev: remove debug messages that access spidev->spi without locking The two debug messages in spidev_open() dereference spidev->spi without taking the lock and without checking if it's not null. This can lead to a crash. Drop the messages as they're not needed - the user-space will get informed about ENOMEM with the syscall return value. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230106100719.196243-2-brgl@bgdev.pl Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 42aaaca672654..1935ca6134470 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -603,7 +603,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->tx_buffer) { spidev->tx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->tx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_find_dev; } @@ -612,7 +611,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->rx_buffer) { spidev->rx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->rx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_alloc_rx_buf; } From 82d3edb50a11bf3c5ef63294d5358ba230181413 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 5 Jan 2023 15:44:20 +0100 Subject: [PATCH 0660/1593] s390/cpum_sf: add READ_ONCE() semantics to compare and swap loops The current cmpxchg_double() loops within the perf hw sampling code do not have READ_ONCE() semantics to read the old value from memory. This allows the compiler to generate code which reads the "old" value several times from memory, which again allows for inconsistencies. For example: /* Reset trailer (using compare-double-and-swap) */ do { te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; te_flags |= SDB_TE_ALERT_REQ_MASK; } while (!cmpxchg_double(&te->flags, &te->overflow, te->flags, te->overflow, te_flags, 0ULL)); The compiler could generate code where te->flags used within the cmpxchg_double() call may be refetched from memory and which is not necessarily identical to the previous read version which was used to generate te_flags. Which in turn means that an incorrect update could happen. Fix this by adding READ_ONCE() semantics to all cmpxchg_double() loops. Given that READ_ONCE() cannot generate code on s390 which atomically reads 16 bytes, use a private compare-and-swap-double implementation to achieve that. Also replace cmpxchg_double() with the private implementation to be able to re-use the old value within the loops. As a side effect this converts the whole code to only use bit fields to read and modify bits within the hws trailer header. Reported-by: Alexander Gordeev Acked-by: Alexander Gordeev Acked-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Link: https://lore.kernel.org/linux-s390/Y71QJBhNTIatvxUT@osiris/T/#ma14e2a5f7aa8ed4b94b6f9576799b3ad9c60f333 Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cpu_mf.h | 31 +++++----- arch/s390/kernel/perf_cpum_sf.c | 101 ++++++++++++++++++++------------ 2 files changed, 77 insertions(+), 55 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index feaba12dbecb8..efa103b52a1a1 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -131,19 +131,21 @@ struct hws_combined_entry { struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ } __packed; -struct hws_trailer_entry { - union { - struct { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned int t:1; /* 2 - Timestamp format */ - unsigned int :29; /* 3 - 31: Reserved */ - unsigned int bsdes:16; /* 32-47: size of basic SDE */ - unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ - }; - unsigned long long flags; /* 0 - 63: All indicators */ +union hws_trailer_header { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ + unsigned long long overflow; /* 64 - Overflow Count */ }; - unsigned long long overflow; /* 64 - sample Overflow count */ + __uint128_t val; +}; + +struct hws_trailer_entry { + union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */ unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ @@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, return USEC_PER_SEC * qsi->cpu_speed / rate; } -#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL -#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL - /* Return TOD timestamp contained in an trailer entry */ static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) { /* TOD in STCKE format */ - if (te->t) + if (te->header.t) return *((unsigned long long *) &te->timestamp[1]); /* TOD in STCK format */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 332a499651308..ce886a03545ae 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb) static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) { - unsigned long sdb, *trailer; + struct hws_trailer_entry *te; + unsigned long sdb; /* Allocate and initialize sample-data-block */ sdb = get_zeroed_page(gfp_flags); if (!sdb) return -ENOMEM; - trailer = trailer_entry_ptr(sdb); - *trailer = SDB_TE_ALERT_REQ_MASK; + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te->header.a = 1; /* Link SDB into the sample-data-block-table */ *sdbt = sdb; @@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, "%s: Found unknown" " sampling data entry: te->f %i" " basic.def %#4x (%p)\n", __func__, - te->f, sample->def, sample); + te->header.f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, * that are not full. Stop processing if the first * invalid format was detected. */ - if (!te->f) + if (!te->header.f) break; } @@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } +static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) +{ + asm volatile( + " cdsg %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; +} + /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { + unsigned long long event_overflow, sampl_overflow, num_sdb; + union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; /* @@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); /* Leave loop if no more work to do (block full indicator) */ - if (!te->f) { + if (!te->header.f) { done = 1; if (!flush_all) break; } /* Check the sample overflow count */ - if (te->overflow) + if (te->header.overflow) /* Account sample overflows and, if a particular limit * is reached, extend the sampling buffer. * For details, see sfb_account_overflows(). */ - sampl_overflow += te->overflow; + sampl_overflow += te->header.overflow; /* Timestamps are valid for full sample-data-blocks only */ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " "overflow %llu timestamp %#llx\n", - __func__, (unsigned long)sdbt, te->overflow, - (te->f) ? trailer_timestamp(te) : 0ULL); + __func__, (unsigned long)sdbt, te->header.overflow, + (te->header.f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU @@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) num_sdb++; /* Reset trailer (using compare-double-and-swap) */ + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te_flags |= SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - te->flags, te->overflow, - te_flags, 0ULL)); + old.val = prev.val; + new.val = prev.val; + new.f = 0; + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); /* Advance to next sample-data-block */ sdbt++; @@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle) range_scan = AUX_SDB_NUM_ALERT(aux); for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + if (!te->header.f) break; } /* i is num of SDBs which are full */ @@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle) /* Remove alert indicators in the buffer */ te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags &= ~SDB_TE_ALERT_REQ_MASK; + te->header.a = 0; debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", __func__, i, range_scan, aux->head); @@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle, idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | - SDB_TE_ALERT_REQ_MASK); - te->overflow = 0; + te->header.f = 0; + te->header.a = 0; + te->header.overflow = 0; } /* Save the position of empty SDBs */ aux->empty_mark = aux->head + range - 1; @@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle, /* Set alert indicator */ aux->alert_mark = aux->head + range/2 - 1; te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + te->header.a = 1; /* Reset hardware buffer head */ head = AUX_SDB_INDEX(aux, aux->head); @@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle, static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; + union hws_trailer_header old, prev, new; struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - *overflow = orig_overflow = te->overflow; - if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + old.val = prev.val; + new.val = prev.val; + *overflow = old.overflow; + if (old.f) { /* * SDB is already set by hardware. * Abort and try to set somewhere @@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, */ return false; } - new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); return true; } @@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; unsigned long i, range_scan, idx, idx_old; + union hws_trailer_header old, prev, new; + unsigned long long orig_overflow; struct hws_trailer_entry *te; debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " @@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - orig_overflow = te->overflow; - new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + old.val = prev.val; + new.val = prev.val; + orig_overflow = old.overflow; + new.f = 0; + new.overflow = 0; if (idx == aux->alert_mark) - new_flags |= SDB_TE_ALERT_REQ_MASK; + new.a = 1; else - new_flags &= ~SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); *overflow += orig_overflow; } From e3f360db08d55a14112bd27454e616a24296a8b0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Jan 2023 11:51:20 +0100 Subject: [PATCH 0661/1593] s390/percpu: add READ_ONCE() to arch_this_cpu_to_op_simple() Make sure that *ptr__ within arch_this_cpu_to_op_simple() is only dereferenced once by using READ_ONCE(). Otherwise the compiler could generate incorrect code. Cc: Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index cb5fc06904354..081837b391e35 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -31,7 +31,7 @@ pcp_op_T__ *ptr__; \ preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ - prev__ = *ptr__; \ + prev__ = READ_ONCE(*ptr__); \ do { \ old__ = prev__; \ new__ = old__ op (val); \ From 42400d99e9f0728c17240edb9645637ead40f6b9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Jan 2023 15:54:56 +0100 Subject: [PATCH 0662/1593] KVM: s390: interrupt: use READ_ONCE() before cmpxchg() Use READ_ONCE() before cmpxchg() to prevent that the compiler generates code that fetches the to be compared old value several times from memory. Reviewed-by: Christian Borntraeger Acked-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20230109145456.2895385-1-hca@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/kvm/interrupt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1dae78deddf28..ab26aa53ee371 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -83,8 +83,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union esca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -95,8 +96,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union bsca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -126,16 +128,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl old = *sigp_ctrl; + union esca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl old = *sigp_ctrl; + union bsca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } From 58fc14e14d288d728bf48377b81bb77fd17bfe3f Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 5 Jan 2023 10:37:01 -0300 Subject: [PATCH 0663/1593] drm/i915/gt: Cover rest of SVG unit MCR registers CHICKEN_RASTER_{1,2} got overlooked with the move done in commit a9e69428b1b4 ("drm/i915: Define MCR registers explicitly"). Registers from the SVG unit became multicast as of Xe_HP graphics. BSpec: 66534 Fixes: a9e69428b1b4 ("drm/i915: Define MCR registers explicitly") Signed-off-by: Gustavo Sousa Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230105133701.19556-1-gustavo.sousa@intel.com (cherry picked from commit 10903b0a0f4d4964b352fa3df12d3d2ef5fb7a3b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 ++-- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index c3cd926917957..4a14f87e441e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -406,10 +406,10 @@ #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) -#define CHICKEN_RASTER_1 _MMIO(0x6204) +#define CHICKEN_RASTER_1 MCR_REG(0x6204) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) -#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define CHICKEN_RASTER_2 MCR_REG(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) #define VFLSKPD MCR_REG(0x62a8) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 2afb4f80a954d..5be2f91801fbf 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -645,7 +645,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); + wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_mcr_add(wal, @@ -775,7 +775,7 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); /* Wa_15010599737:dg2 */ - wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); + wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, From 7fd26a27680aa9032920f798a5a8b38a2c61075f Mon Sep 17 00:00:00 2001 From: Syed Saba Kareem Date: Wed, 11 Jan 2023 15:51:23 +0530 Subject: [PATCH 0664/1593] ASoC: amd: yc: Add DMI support for new acer/emdoor platforms Adding DMI entries to support new acer/emdoor platforms. Suggested-by: shanshengwang Signed-off-by: Syed Saba Kareem Link: https://lore.kernel.org/r/20230111102130.2276391-1-Syed.SabaKareem@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 0d283e41f66dc..00fb976e0b81e 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -234,6 +234,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Blade 14 (2022) - RZ09-0427"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "RB"), + DMI_MATCH(DMI_PRODUCT_NAME, "Swift SFA16-41"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IRBIS"), + DMI_MATCH(DMI_PRODUCT_NAME, "15NBC1011"), + } + }, {} }; From a4e03921c1bb118e6718e0a3b0322a2c13ed172b Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Tue, 13 Dec 2022 20:24:03 +0100 Subject: [PATCH 0665/1593] ARM: 9280/1: mm: fix warning on phys_addr_t to void pointer assignment zero_page is a void* pointer but memblock_alloc() returns phys_addr_t type so this generates a warning while using clang and with -Wint-error enabled that becomes and error. So let's cast the return of memblock_alloc() to (void *). Cc: # 4.14.x + Fixes: 340a982825f7 ("ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation") Signed-off-by: Giulio Benetti Signed-off-by: Russell King (Oracle) --- arch/arm/mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index c1494a4dee25b..53f2d8774fdb9 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -161,7 +161,7 @@ void __init paging_init(const struct machine_desc *mdesc) mpu_setup(); /* allocate the zero page. */ - zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + zero_page = (void *)memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!zero_page) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); From 3cb0f23039e0e3395a96e0f52bd69910ced2f720 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 6 Jan 2023 17:18:21 +0100 Subject: [PATCH 0666/1593] ARM: 9284/1: include from proc-macros.S to fix -Wundef warnings Since commit 80b6093b55e3 ("kbuild: add -Wundef to KBUILD_CPPFLAGS for W=1 builds"), building with W=1 detects -Wundef warnings for assembly code. $ make W=1 ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- arch/arm/mm/ [snip] AS arch/arm/mm/cache-v7.o In file included from arch/arm/mm/cache-v7.S:17: arch/arm/mm/proc-macros.S:109:5: warning: "L_PTE_SHARED" is not defined, evaluates to 0 [-Wundef] 109 | #if L_PTE_SHARED != PTE_EXT_SHARED | ^~~~~~~~~~~~ arch/arm/mm/proc-macros.S:109:21: warning: "PTE_EXT_SHARED" is not defined, evaluates to 0 [-Wundef] 109 | #if L_PTE_SHARED != PTE_EXT_SHARED | ^~~~~~~~~~~~~~ arch/arm/mm/proc-macros.S:113:10: warning: "L_PTE_XN" is not defined, evaluates to 0 [-Wundef] 113 | (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ | ^~~~~~~~ arch/arm/mm/proc-macros.S:113:19: warning: "L_PTE_USER" is not defined, evaluates to 0 [-Wundef] 113 | (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ | ^~~~~~~~~~ arch/arm/mm/proc-macros.S:113:30: warning: "L_PTE_RDONLY" is not defined, evaluates to 0 [-Wundef] 113 | (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ | ^~~~~~~~~~~~ arch/arm/mm/proc-macros.S:113:43: warning: "L_PTE_DIRTY" is not defined, evaluates to 0 [-Wundef] 113 | (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ | ^~~~~~~~~~~ arch/arm/mm/proc-macros.S:113:55: warning: "L_PTE_YOUNG" is not defined, evaluates to 0 [-Wundef] 113 | (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ | ^~~~~~~~~~~ arch/arm/mm/proc-macros.S:114:10: warning: "L_PTE_PRESENT" is not defined, evaluates to 0 [-Wundef] 114 | L_PTE_PRESENT) > L_PTE_SHARED | ^~~~~~~~~~~~~ arch/arm/mm/proc-macros.S:114:27: warning: "L_PTE_SHARED" is not defined, evaluates to 0 [-Wundef] 114 | L_PTE_PRESENT) > L_PTE_SHARED | ^~~~~~~~~~~~ Include from proc-macros.S to fix the warnings. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King (Oracle) --- arch/arm/mm/proc-macros.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index fa6999e24b074..e43f6d716b4b8 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -6,6 +6,7 @@ * VM_EXEC */ #include +#include #include #ifdef CONFIG_CPU_V7M From e752e5454e6417da3f40ec1306a041ea96c56423 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 9 Jan 2023 22:25:47 +0000 Subject: [PATCH 0667/1593] adreno: Shutdown the GPU properly During kexec on ARM device, we notice that device_shutdown() only calls pm_runtime_force_suspend() while shutting down the GPU. This means the GPU kthread is still running and further, there maybe active submits. This causes all kinds of issues during a kexec reboot: Warning from shutdown path: [ 292.509662] WARNING: CPU: 0 PID: 6304 at [...] adreno_runtime_suspend+0x3c/0x44 [ 292.509863] Hardware name: Google Lazor (rev3 - 8) with LTE (DT) [ 292.509872] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 292.509881] pc : adreno_runtime_suspend+0x3c/0x44 [ 292.509891] lr : pm_generic_runtime_suspend+0x30/0x44 [ 292.509905] sp : ffffffc014473bf0 [...] [ 292.510043] Call trace: [ 292.510051] adreno_runtime_suspend+0x3c/0x44 [ 292.510061] pm_generic_runtime_suspend+0x30/0x44 [ 292.510071] pm_runtime_force_suspend+0x54/0xc8 [ 292.510081] adreno_shutdown+0x1c/0x28 [ 292.510090] platform_shutdown+0x2c/0x38 [ 292.510104] device_shutdown+0x158/0x210 [ 292.510119] kernel_restart_prepare+0x40/0x4c And here from GPU kthread, an SError OOPs: [ 192.648789] el1h_64_error+0x7c/0x80 [ 192.648812] el1_interrupt+0x20/0x58 [ 192.648833] el1h_64_irq_handler+0x18/0x24 [ 192.648854] el1h_64_irq+0x7c/0x80 [ 192.648873] local_daif_inherit+0x10/0x18 [ 192.648900] el1h_64_sync_handler+0x48/0xb4 [ 192.648921] el1h_64_sync+0x7c/0x80 [ 192.648941] a6xx_gmu_set_oob+0xbc/0x1fc [ 192.648968] a6xx_hw_init+0x44/0xe38 [ 192.648991] msm_gpu_hw_init+0x48/0x80 [ 192.649013] msm_gpu_submit+0x5c/0x1a8 [ 192.649034] msm_job_run+0xb0/0x11c [ 192.649058] drm_sched_main+0x170/0x434 [ 192.649086] kthread+0x134/0x300 [ 192.649114] ret_from_fork+0x10/0x20 Fix by calling adreno_system_suspend() in the device_shutdown() path. [ Applied Rob Clark feedback on fixing adreno_unbind() similarly, also tested as above. ] Cc: Rob Clark Cc: Steven Rostedt Cc: Ricardo Ribalda Cc: Ross Zwisler Signed-off-by: Joel Fernandes (Google) Reviewed-by: Ricardo Ribalda Reviewed-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/517633/ Link: https://lore.kernel.org/r/20230109222547.1368644-1-joel@joelfernandes.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 628806423f7d2..36f062c7582f9 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -551,13 +551,14 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) return 0; } +static int adreno_system_suspend(struct device *dev); static void adreno_unbind(struct device *dev, struct device *master, void *data) { struct msm_drm_private *priv = dev_get_drvdata(master); struct msm_gpu *gpu = dev_to_gpu(dev); - pm_runtime_force_suspend(dev); + WARN_ON_ONCE(adreno_system_suspend(dev)); gpu->funcs->destroy(gpu); priv->gpu_pdev = NULL; @@ -609,7 +610,7 @@ static int adreno_remove(struct platform_device *pdev) static void adreno_shutdown(struct platform_device *pdev) { - pm_runtime_force_suspend(&pdev->dev); + WARN_ON_ONCE(adreno_system_suspend(&pdev->dev)); } static const struct of_device_id dt_match[] = { From a66f1efcf748febea7758c4c3c8b5bc5294949ef Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 10 Jan 2023 13:28:59 -0800 Subject: [PATCH 0668/1593] drm/msm/gpu: Fix potential double-free If userspace was calling the MSM_SET_PARAM ioctl on multiple threads to set the COMM or CMDLINE param, it could trigger a race causing the previous value to be kfree'd multiple times. Fix this by serializing on the gpu lock. Signed-off-by: Rob Clark Fixes: d4726d770068 ("drm/msm: Add a way to override processes comm/cmdline") Patchwork: https://patchwork.freedesktop.org/patch/517778/ Link: https://lore.kernel.org/r/20230110212903.1925878-1-robdclark@gmail.com --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 ++++ drivers/gpu/drm/msm/msm_gpu.c | 2 ++ drivers/gpu/drm/msm/msm_gpu.h | 12 ++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 57586c794b84b..3605f095b2de2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -352,6 +352,8 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, /* Ensure string is null terminated: */ str[len] = '\0'; + mutex_lock(&gpu->lock); + if (param == MSM_PARAM_COMM) { paramp = &ctx->comm; } else { @@ -361,6 +363,8 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, kfree(*paramp); *paramp = str; + mutex_unlock(&gpu->lock); + return 0; } case MSM_PARAM_SYSPROF: diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 021f4e29b613b..4f495eecc34ba 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -335,6 +335,8 @@ static void get_comm_cmdline(struct msm_gem_submit *submit, char **comm, char ** struct msm_file_private *ctx = submit->queue->ctx; struct task_struct *task; + WARN_ON(!mutex_is_locked(&submit->gpu->lock)); + /* Note that kstrdup will return NULL if argument is NULL: */ *comm = kstrdup(ctx->comm, GFP_KERNEL); *cmd = kstrdup(ctx->cmdline, GFP_KERNEL); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 58a72e6b14008..a89bfdc3d7f90 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -366,10 +366,18 @@ struct msm_file_private { */ int sysprof; - /** comm: Overridden task comm, see MSM_PARAM_COMM */ + /** + * comm: Overridden task comm, see MSM_PARAM_COMM + * + * Accessed under msm_gpu::lock + */ char *comm; - /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */ + /** + * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE + * + * Accessed under msm_gpu::lock + */ char *cmdline; /** From cf129830ee820f7fc90b98df193cd49d49344d09 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 10 Jan 2023 20:56:59 +0200 Subject: [PATCH 0669/1593] perf auxtrace: Fix address filter duplicate symbol selection When a match has been made to the nth duplicate symbol, return success not error. Example: Before: $ cat file.c cat: file.c: No such file or directory $ cat file1.c #include static void func(void) { printf("First func\n"); } void other(void); int main() { func(); other(); return 0; } $ cat file2.c #include static void func(void) { printf("Second func\n"); } void other(void) { func(); } $ gcc -Wall -Wextra -o test file1.c file2.c $ perf record -e intel_pt//u --filter 'filter func @ ./test' -- ./test Multiple symbols with name 'func' #1 0x1149 l func which is near main #2 0x1179 l func which is near other Disambiguate symbol name by inserting #n after the name e.g. func #2 Or select a global symbol by inserting #0 or #g or #G Failed to parse address filter: 'filter func @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test Failed to parse address filter: 'filter func #2 @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. After: $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test First func Second func [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data ] $ perf script --itrace=b -Ftime,flags,ip,sym,addr --ns 1231062.526977619: tr strt 0 [unknown] => 558495708179 func 1231062.526977619: tr end call 558495708188 func => 558495708050 _init 1231062.526979286: tr strt 0 [unknown] => 55849570818d func 1231062.526979286: tr end return 55849570818f func => 55849570819d other Fixes: 1b36c03e356936d6 ("perf record: Add support for using symbols in address filters") Reported-by: Dmitrii Dolgov <9erthalion6@gmail.com> Signed-off-by: Adrian Hunter Tested-by: Dmitry Dolgov <9erthalion6@gmail.com> Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230110185659.15979-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 265d20cc126b1..c2e323cd7d496 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2611,7 +2611,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *size = sym->start - *start; if (idx > 0) { if (*size) - return 1; + return 0; } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { print_duplicate_syms(dso, sym_name); return -EINVAL; From c273289fac370b6488757236cd62cc2cf04830b7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Jan 2023 12:54:42 +0100 Subject: [PATCH 0670/1593] selftests: netfilter: fix transaction test script timeout handling The kselftest framework uses a default timeout of 45 seconds for all test scripts. Increase the timeout to two minutes for the netfilter tests, this should hopefully be enough, Make sure that, should the script be canceled, the net namespace and the spawned ping instances are removed. Fixes: 25d8bcedbf43 ("selftests: add script to stress-test nft packet path vs. control plane") Reported-by: Mirsad Goran Todorovac Signed-off-by: Florian Westphal Tested-by: Mirsad Goran Todorovac Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_trans_stress.sh | 16 +++++++++------- tools/testing/selftests/netfilter/settings | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/netfilter/settings diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh index a7f62ad4f6611..2ffba45a78bf4 100755 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -10,12 +10,20 @@ ksft_skip=4 testns=testns-$(mktemp -u "XXXXXXXX") +tmp="" tables="foo bar baz quux" global_ret=0 eret=0 lret=0 +cleanup() { + ip netns pids "$testns" | xargs kill 2>/dev/null + ip netns del "$testns" + + rm -f "$tmp" +} + check_result() { local r=$1 @@ -43,6 +51,7 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +trap cleanup EXIT tmp=$(mktemp) for table in $tables; do @@ -139,11 +148,4 @@ done check_result $lret "add/delete with nftrace enabled" -pkill -9 ping - -wait - -rm -f "$tmp" -ip netns del "$testns" - exit $global_ret diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings new file mode 100644 index 0000000000000..6091b45d226ba --- /dev/null +++ b/tools/testing/selftests/netfilter/settings @@ -0,0 +1 @@ +timeout=120 From 9ea4b476cea1b7d461d16dda25ca3c7e616e2d15 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Wed, 11 Jan 2023 11:57:39 +0000 Subject: [PATCH 0671/1593] netfilter: ipset: Fix overflow before widen in the bitmap_ip_create() function. When first_ip is 0, last_ip is 0xFFFFFFFF, and netmask is 31, the value of an arithmetic expression 2 << (netmask - mask_bits - 1) is subject to overflow due to a failure casting operands to a larger data type before performing the arithmetic. Note that it's harmless since the value will be checked at the next step. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: b9fed748185a ("netfilter: ipset: Check and reject crazy /0 input parameters") Signed-off-by: Ilia.Gavrilov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_bitmap_ip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index a8ce04a4bb72a..e4fa00abde6a2 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_BITMAP_RANGE; pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); - hosts = 2 << (32 - netmask - 1); - elements = 2 << (netmask - mask_bits - 1); + hosts = 2U << (32 - netmask - 1); + elements = 2UL << (netmask - mask_bits - 1); } if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; From 696e1a48b1a1b01edad542a1ef293665864a4dd0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 11 Jan 2023 17:07:33 +0100 Subject: [PATCH 0672/1593] netfilter: nft_payload: incorrect arithmetics when fetching VLAN header bits If the offset + length goes over the ethernet + vlan header, then the length is adjusted to copy the bytes that are within the boundaries of the vlan_ethhdr scratchpad area. The remaining bytes beyond ethernet + vlan header are copied directly from the skbuff data area. Fix incorrect arithmetic operator: subtract, not add, the size of the vlan header in case of double-tagged packets to adjust the length accordingly to address CVE-2023-0179. Reported-by: Davide Ornaghi Fixes: f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 17b418a5a593a..3a3c7746e88fe 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -63,7 +63,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return false; if (offset + len > VLAN_ETH_HLEN + vlan_hlen) - ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen; + ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); From 3a9ae31ac26a58d33008c42f6cd022afc2af2dc0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Jan 2023 06:02:16 -0500 Subject: [PATCH 0673/1593] Documentation: kvm: fix SRCU locking order docs kvm->srcu is taken in KVM_RUN and several other vCPU ioctls, therefore vcpu->mutex is susceptible to the same deadlock that is documented for kvm->slots_lock. The same holds for kvm->lock, since kvm->lock is held outside vcpu->mutex. Fix the documentation and rearrange it to highlight the difference between these locks and kvm->slots_arch_lock, and how kvm->slots_arch_lock can be useful while processing a vmexit. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index a3ca76f9be751..5ee017740d554 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -24,17 +24,18 @@ The acquisition orders for mutexes are as follows: For SRCU: -- ``synchronize_srcu(&kvm->srcu)`` is called _inside_ - the kvm->slots_lock critical section, therefore kvm->slots_lock - cannot be taken inside a kvm->srcu read-side critical section. - Instead, kvm->slots_arch_lock is released before the call - to ``synchronize_srcu()`` and _can_ be taken inside a - kvm->srcu read-side critical section. - -- kvm->lock is taken inside kvm->srcu, therefore - ``synchronize_srcu(&kvm->srcu)`` cannot be called inside - a kvm->lock critical section. If you cannot delay the - call until after kvm->lock is released, use ``call_srcu``. +- ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections + for kvm->lock, vcpu->mutex and kvm->slots_lock. These locks _cannot_ + be taken inside a kvm->srcu read-side critical section; that is, the + following is broken:: + + srcu_read_lock(&kvm->srcu); + mutex_lock(&kvm->slots_lock); + +- kvm->slots_arch_lock instead is released before the call to + ``synchronize_srcu()``. It _can_ therefore be taken inside a + kvm->srcu read-side critical section, for example while processing + a vmexit. On x86: From 23e60258aeafb04e5dd813f03cb0c8ab7b01462a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2023 18:06:48 +0000 Subject: [PATCH 0674/1593] KVM: x86/xen: Fix lockdep warning on "recursive" gpc locking In commit 5ec3289b31 ("KVM: x86/xen: Compatibility fixes for shared runstate area") we declared it safe to obtain two gfn_to_pfn_cache locks at the same time: /* * The guest's runstate_info is split across two pages and we * need to hold and validate both GPCs simultaneously. We can * declare a lock ordering GPC1 > GPC2 because nothing else * takes them more than one at a time. */ However, we forgot to tell lockdep. Do so, by setting a subclass on the first lock before taking the second. Fixes: 5ec3289b31 ("KVM: x86/xen: Compatibility fixes for shared runstate area") Suggested-by: Peter Zijlstra Signed-off-by: David Woodhouse Message-Id: <20230111180651.14394-1-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2e29bdc2949ca..bfa9809721b5b 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -304,8 +304,10 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * The guest's runstate_info is split across two pages and we * need to hold and validate both GPCs simultaneously. We can * declare a lock ordering GPC1 > GPC2 because nothing else - * takes them more than one at a time. + * takes them more than one at a time. Set a subclass on the + * gpc1 lock to make lockdep shut up about it. */ + lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); read_lock(&gpc2->lock); if (!kvm_gpc_check(gpc2, user_len2)) { From bbe17c625d6843e9cdf14d81fbece1b0f0c3fb2f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2023 18:06:49 +0000 Subject: [PATCH 0675/1593] KVM: x86/xen: Fix potential deadlock in kvm_xen_update_runstate_guest() The kvm_xen_update_runstate_guest() function can be called when the vCPU is being scheduled out, from a preempt notifier. It *opportunistically* updates the runstate area in the guest memory, if the gfn_to_pfn_cache which caches the appropriate address is still valid. If there is *contention* when it attempts to obtain gpc->lock, then locking inside the priority inheritance checks may cause a deadlock. Lockdep reports: [13890.148997] Chain exists of: &gpc->lock --> &p->pi_lock --> &rq->__lock [13890.149002] Possible unsafe locking scenario: [13890.149003] CPU0 CPU1 [13890.149004] ---- ---- [13890.149005] lock(&rq->__lock); [13890.149007] lock(&p->pi_lock); [13890.149009] lock(&rq->__lock); [13890.149011] lock(&gpc->lock); [13890.149013] *** DEADLOCK *** In the general case, if there's contention for a read lock on gpc->lock, that's going to be because something else is either invalidating or revalidating the cache. Either way, we've raced with seeing it in an invalid state, in which case we would have aborted the opportunistic update anyway. So in the 'atomic' case when called from the preempt notifier, just switch to using read_trylock() and avoid the PI handling altogether. Signed-off-by: David Woodhouse Message-Id: <20230111180651.14394-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index bfa9809721b5b..651f9c5b873dd 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -271,7 +271,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * Attempt to obtain the GPC lock on *both* (if there are two) * gfn_to_pfn caches that cover the region. */ - read_lock_irqsave(&gpc1->lock, flags); + if (atomic) { + local_irq_save(flags); + if (!read_trylock(&gpc1->lock)) { + local_irq_restore(flags); + return; + } + } else { + read_lock_irqsave(&gpc1->lock, flags); + } while (!kvm_gpc_check(gpc1, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); @@ -308,7 +316,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * gpc1 lock to make lockdep shut up about it. */ lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); - read_lock(&gpc2->lock); + if (atomic) { + if (!read_trylock(&gpc2->lock)) { + read_unlock_irqrestore(&gpc1->lock, flags); + return; + } + } else { + read_lock(&gpc2->lock); + } if (!kvm_gpc_check(gpc2, user_len2)) { read_unlock(&gpc2->lock); From 42a90008f890afc41837dfeec1f0b1e7bcecf94a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2023 18:06:50 +0000 Subject: [PATCH 0676/1593] KVM: Ensure lockdep knows about kvm->lock vs. vcpu->mutex ordering rule Documentation/virt/kvm/locking.rst tells us that kvm->lock is taken outside vcpu->mutex. But that doesn't actually happen very often; it's only in some esoteric cases like migration with AMD SEV. This means that lockdep usually doesn't notice, and doesn't do its job of keeping us honest. Ensure that lockdep *always* knows about the ordering of these two locks, by briefly taking vcpu->mutex in kvm_vm_ioctl_create_vcpu() while kvm->lock is held. Signed-off-by: David Woodhouse Message-Id: <20230111180651.14394-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 13e88297f9996..9c60384b5ae0b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3954,6 +3954,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) } mutex_lock(&kvm->lock); + +#ifdef CONFIG_LOCKDEP + /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ + mutex_lock(&vcpu->mutex); + mutex_unlock(&vcpu->mutex); +#endif + if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; From ed02363fbbed52a3f5ea0d188edd09045a806eb5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 12 Dec 2022 10:19:37 +0800 Subject: [PATCH 0677/1593] btrfs: add extra error messages to cover non-ENOMEM errors from device_add_list() [BUG] When test case btrfs/219 (aka, mount a registered device but with a lower generation) failed, there is not any useful information for the end user to find out what's going wrong. The mount failure just looks like this: # mount -o loop /tmp/219.img2 /mnt/btrfs/ mount: /mnt/btrfs: mount(2) system call failed: File exists. dmesg(1) may have more information after failed mount system call. While the dmesg contains nothing but the loop device change: loop1: detected capacity change from 0 to 524288 [CAUSE] In device_list_add() we have a lot of extra checks to reject invalid cases. That function also contains the regular device scan result like the following prompt: BTRFS: device fsid 6222333e-f9f1-47e6-b306-55ddd4dcaef4 devid 1 transid 8 /dev/loop0 scanned by systemd-udevd (3027) But unfortunately not all errors have their own error messages, thus if we hit something wrong in device_add_list(), there may be no error messages at all. [FIX] Add errors message for all non-ENOMEM errors. For ENOMEM, I'd say we're in a much worse situation, and there should be some OOM messages way before our call sites. CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index aa25fa335d3ed..bf0decaac7f30 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -768,8 +768,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, BTRFS_SUPER_FLAG_CHANGING_FSID_V2); error = lookup_bdev(path, &path_devt); - if (error) + if (error) { + btrfs_err(NULL, "failed to lookup block device for path %s: %d", + path, error); return ERR_PTR(error); + } if (fsid_change_in_progress) { if (!has_metadata_uuid) @@ -836,6 +839,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, unsigned int nofs_flag; if (fs_devices->opened) { + btrfs_err(NULL, + "device %s belongs to fsid %pU, and the fs is already mounted", + path, fs_devices->fsid); mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); } @@ -905,6 +911,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, * generation are equal. */ mutex_unlock(&fs_devices->device_list_mutex); + btrfs_err(NULL, +"device %s already registered with a higher generation, found %llu expect %llu", + path, found_transid, device->generation); return ERR_PTR(-EEXIST); } From 75181406b4eafacc531ff2ee5fb032bd93317e2b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 10 Jan 2023 15:14:17 +0800 Subject: [PATCH 0678/1593] btrfs: qgroup: do not warn on record without old_roots populated [BUG] There are some reports from the mailing list that since v6.1 kernel, the WARN_ON() inside btrfs_qgroup_account_extent() gets triggered during rescan: WARNING: CPU: 3 PID: 6424 at fs/btrfs/qgroup.c:2756 btrfs_qgroup_account_extents+0x1ae/0x260 [btrfs] CPU: 3 PID: 6424 Comm: snapperd Tainted: P OE 6.1.2-1-default #1 openSUSE Tumbleweed 05c7a1b1b61d5627475528f71f50444637b5aad7 RIP: 0010:btrfs_qgroup_account_extents+0x1ae/0x260 [btrfs] Call Trace: btrfs_commit_transaction+0x30c/0xb40 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] ? start_transaction+0xc3/0x5b0 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] btrfs_qgroup_rescan+0x42/0xc0 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] btrfs_ioctl+0x1ab9/0x25c0 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] ? __rseq_handle_notify_resume+0xa9/0x4a0 ? mntput_no_expire+0x4a/0x240 ? __seccomp_filter+0x319/0x4d0 __x64_sys_ioctl+0x90/0xd0 do_syscall_64+0x5b/0x80 ? syscall_exit_to_user_mode+0x17/0x40 ? do_syscall_64+0x67/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fd9b790d9bf [CAUSE] Since commit e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting"), if our qgroup is already in inconsistent state, we will no longer do the time-consuming backref walk. This can leave some qgroup records without a valid old_roots ulist. Normally this is fine, as btrfs_qgroup_account_extents() would also skip those records if we have NO_ACCOUNTING flag set. But there is a small window, if we have NO_ACCOUNTING flag set, and inserted some qgroup_record without a old_roots ulist, but then the user triggered a qgroup rescan. During btrfs_qgroup_rescan(), we firstly clear NO_ACCOUNTING flag, then commit current transaction. And since we have a qgroup_record with old_roots = NULL, we trigger the WARN_ON() during btrfs_qgroup_account_extents(). [FIX] Unfortunately due to the introduction of NO_ACCOUNTING flag, the assumption that every qgroup_record would have its old_roots populated is no longer correct. Fix the false alerts and drop the WARN_ON(). Reported-by: Lukas Straub Reported-by: HanatoK Fixes: e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting") CC: stable@vger.kernel.org # 6.1 Link: https://lore.kernel.org/linux-btrfs/2403c697-ddaf-58ad-3829-0335fc89df09@gmail.com/ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d275bf24b250b..00851c86aa8aa 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2765,9 +2765,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) /* * Old roots should be searched when inserting qgroup - * extent record + * extent record. + * + * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case, + * we may have some record inserted during + * NO_ACCOUNTING (thus no old_roots populated), but + * later we start rescan, which clears NO_ACCOUNTING, + * leaving some inserted records without old_roots + * populated. + * + * Those cases are rare and should not cause too much + * time spent during commit_transaction(). */ - if (WARN_ON(!record->old_roots)) { + if (!record->old_roots) { /* Search commit root to find old_roots */ ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) From 85e79ec7b78f863178ca488fd8cb5b3de6347756 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 10 Jan 2023 15:04:32 +0900 Subject: [PATCH 0679/1593] btrfs: zoned: enable metadata over-commit for non-ZNS setup The commit 79417d040f4f ("btrfs: zoned: disable metadata overcommit for zoned") disabled the metadata over-commit to track active zones properly. However, it also introduced a heavy overhead by allocating new metadata block groups and/or flushing dirty buffers to release the space reservations. Specifically, a workload (write only without any sync operations) worsen its performance from 343.77 MB/sec (v5.19) to 182.89 MB/sec (v6.0). The performance is still bad on current misc-next which is 187.95 MB/sec. And, with this patch applied, it improves back to 326.70 MB/sec (+73.82%). This patch introduces a new fs_info->flag BTRFS_FS_NO_OVERCOMMIT to indicate it needs to disable the metadata over-commit. The flag is enabled when a device with max active zones limit is loaded into a file-system. Fixes: 79417d040f4f ("btrfs: zoned: disable metadata overcommit for zoned") CC: stable@vger.kernel.org # 6.0+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/fs.h | 6 ++++++ fs/btrfs/space-info.c | 3 ++- fs/btrfs/zoned.c | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index a749367e5ae2a..37b86acfcbcf8 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -119,6 +119,12 @@ enum { /* Indicate that we want to commit the transaction. */ BTRFS_FS_NEED_TRANS_COMMIT, + /* + * Indicate metadata over-commit is disabled. This is set when active + * zone tracking is needed. + */ + BTRFS_FS_NO_OVERCOMMIT, + #if BITS_PER_LONG == 32 /* Indicate if we have error/warn message printed on 32bit systems */ BTRFS_FS_32BIT_ERROR, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d28ee4e36f3d9..69c09508afb50 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -407,7 +407,8 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) + if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) && + (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) avail = 0; else avail = calc_available_free_space(fs_info, space_info, flush); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index a759668477bb2..1f503e8e42d48 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -539,6 +539,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } atomic_set(&zone_info->active_zones_left, max_active_zones - nactive); + /* Overcommit does not work well with active zone tacking. */ + set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags); } /* Validate superblock log */ From 1ecf7bd9c267ab85aff3c4a17fe56bc9754a13be Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 11 Jan 2023 15:04:47 +0100 Subject: [PATCH 0680/1593] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 7 ++++--- arch/s390/configs/defconfig | 6 ++++-- arch/s390/configs/zfcpdump_defconfig | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index a7b4e1d827580..74b35ec2ad28a 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -190,7 +190,6 @@ CONFIG_NFT_CT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m @@ -569,6 +568,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +# CONFIG_LEGACY_TIOCSTI is not set CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m @@ -660,6 +660,7 @@ CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y @@ -705,6 +706,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_INTEGRITY_PLATFORM_KEYRING=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y @@ -781,6 +783,7 @@ CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m @@ -848,7 +851,6 @@ CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set @@ -870,7 +872,6 @@ CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y -CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 2bc2d0fe57743..cec71268e3bc2 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -181,7 +181,6 @@ CONFIG_NFT_CT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m @@ -559,6 +558,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +# CONFIG_LEGACY_TIOCSTI is not set CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m @@ -645,6 +645,7 @@ CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y @@ -688,6 +689,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_INTEGRITY_PLATFORM_KEYRING=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y @@ -766,6 +768,7 @@ CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRYPTO_LIB_CURVE25519=m @@ -798,7 +801,6 @@ CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index ae14ab0b864d5..a9c0c81d1de99 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -13,7 +13,6 @@ CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y -# CONFIG_RELOCATABLE is not set # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y @@ -50,6 +49,7 @@ CONFIG_ZFCP=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_TIOCSTI is not set # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set # CONFIG_HMC_DRV is not set From a33ae832bf3f2ac33e2e44b99f76130d3be848c5 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Tue, 10 Jan 2023 18:47:25 +0900 Subject: [PATCH 0681/1593] docs/conf.py: Use about.html only in sidebar of alabaster theme "about.html" is available only for the alabaster theme [1]. Unconditionally putting it to html_sidebars prevents us from using other themes which respect html_sidebars. Remove about.html from the initialization and insert it at the front for the alabaster theme. Link: [1] https://alabaster.readthedocs.io/en/latest/installation.html#sidebars Fixes: d5389d3145ef ("docs: Switch the default HTML theme to alabaster") Signed-off-by: Akira Yokosawa Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/4b162dbe-2a7f-1710-93e0-754cf8680aae@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index 44899be7b2cca..d927737e3c10f 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -345,7 +345,11 @@ def get_cline_version(): # Custom sidebar templates, maps document names to template names. # Note that the RTD theme ignores this -html_sidebars = { '**': ["about.html", 'searchbox.html', 'localtoc.html', 'sourcelink.html']} +html_sidebars = { '**': ['searchbox.html', 'localtoc.html', 'sourcelink.html']} + +# about.html is available for alabaster theme. Add it at the front. +if html_theme == 'alabaster': + html_sidebars['**'].insert(0, 'about.html') # Output file base name for HTML help builder. htmlhelp_basename = 'TheLinuxKerneldoc' From a48fe6376919c182597b737984bb905a909727c2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 10 Jan 2023 12:02:42 -0600 Subject: [PATCH 0682/1593] x86/pci: Simplify is_mmconf_reserved() messages is_mmconf_reserved() takes a "with_e820" parameter that only determines the message logged if it finds the MMCONFIG region is reserved. Pass the message directly, which will simplify a future patch that adds a new way of looking for that reservation. No functional change intended. Link: https://lore.kernel.org/r/20230110180243.1590045-2-helgaas@kernel.org Tested-by: Tony Luck Tested-by: Giovanni Cabiddu Tested-by: Kan Liang Signed-off-by: Bjorn Helgaas Reviewed-by: Dan Williams --- arch/x86/pci/mmconfig-shared.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 758cbfe55daa3..51c951699b2ed 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -446,13 +446,12 @@ typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type); static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, struct pci_mmcfg_region *cfg, - struct device *dev, int with_e820) + struct device *dev, const char *method) { u64 addr = cfg->res.start; u64 size = resource_size(&cfg->res); u64 old_size = size; int num_buses; - char *method = with_e820 ? "E820" : "ACPI motherboard resources"; while (!is_reserved(addr, addr + size, E820_TYPE_RESERVED)) { size >>= 1; @@ -464,10 +463,10 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, return false; if (dev) - dev_info(dev, "MMCONFIG at %pR reserved in %s\n", + dev_info(dev, "MMCONFIG at %pR reserved as %s\n", &cfg->res, method); else - pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", + pr_info(PREFIX "MMCONFIG at %pR reserved as %s\n", &cfg->res, method); if (old_size != size) { @@ -500,7 +499,8 @@ static bool __ref pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early) { if (!early && !acpi_disabled) { - if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) + if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, + "ACPI motherboard resource")) return true; if (dev) @@ -527,7 +527,8 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1); + return is_mmconf_reserved(e820__mapped_all, cfg, dev, + "E820 entry"); return false; } From 310bc39546a435c83cc27a0eba878afac0d74714 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2023 18:06:51 +0000 Subject: [PATCH 0683/1593] KVM: x86/xen: Avoid deadlock by adding kvm->arch.xen.xen_lock leaf node lock In commit 14243b387137a ("KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery") the clever version of me left some helpful notes for those who would come after him: /* * For the irqfd workqueue, using the main kvm->lock mutex is * fine since this function is invoked from kvm_set_irq() with * no other lock held, no srcu. In future if it will be called * directly from a vCPU thread (e.g. on hypercall for an IPI) * then it may need to switch to using a leaf-node mutex for * serializing the shared_info mapping. */ mutex_lock(&kvm->lock); In commit 2fd6df2f2b47 ("KVM: x86/xen: intercept EVTCHNOP_send from guests") the other version of me ran straight past that comment without reading it, and introduced a potential deadlock by taking vcpu->mutex and kvm->lock in the wrong order. Solve this as originally suggested, by adding a leaf-node lock in the Xen state rather than using kvm->lock for it. Fixes: 2fd6df2f2b47 ("KVM: x86/xen: intercept EVTCHNOP_send from guests") Signed-off-by: David Woodhouse Message-Id: <20230111180651.14394-4-dwmw2@infradead.org> [Rebase, add docs. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 2 +- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/xen.c | 67 +++++++++++++----------------- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 5ee017740d554..a0146793d1972 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -39,7 +39,7 @@ For SRCU: On x86: -- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock +- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock - kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f35f1ff4427bb..6aaae18f18544 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1111,6 +1111,7 @@ struct msr_bitmap_range { /* Xen emulation context */ struct kvm_xen { + struct mutex xen_lock; u32 xen_version; bool long_mode; bool runstate_update_flag; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 651f9c5b873dd..8fd41f5deae39 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -607,26 +607,26 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { r = -EINVAL; } else { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.long_mode = !!data->u.long_mode; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); break; case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; else { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.upcall_vector = data->u.vector; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; @@ -636,9 +636,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; case KVM_XEN_ATTR_TYPE_XEN_VERSION: - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.xen_version = data->u.xen_version; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; @@ -647,9 +647,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = -EOPNOTSUPP; break; } - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; @@ -664,7 +664,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { int r = -ENOENT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_ATTR_TYPE_LONG_MODE: @@ -703,7 +703,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return r; } @@ -711,7 +711,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int idx, r = -ENOENT; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.xen.xen_lock); idx = srcu_read_lock(&vcpu->kvm->srcu); switch (data->type) { @@ -939,7 +939,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) } srcu_read_unlock(&vcpu->kvm->srcu, idx); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } @@ -947,7 +947,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int r = -ENOENT; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: @@ -1030,7 +1030,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } @@ -1123,7 +1123,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); if (xhc->msr && !kvm->arch.xen_hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); @@ -1132,7 +1132,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return 0; } @@ -1675,15 +1675,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) mm_borrowed = true; } - /* - * For the irqfd workqueue, using the main kvm->lock mutex is - * fine since this function is invoked from kvm_set_irq() with - * no other lock held, no srcu. In future if it will be called - * directly from a vCPU thread (e.g. on hypercall for an IPI) - * then it may need to switch to using a leaf-node mutex for - * serializing the shared_info mapping. - */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); /* * It is theoretically possible for the page to be unmapped @@ -1712,7 +1704,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } while(!rc); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (mm_borrowed) kthread_unuse_mm(kvm->mm); @@ -1828,7 +1820,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, int ret; /* Protect writes to evtchnfd as well as the idr lookup. */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); ret = -ENOENT; @@ -1859,7 +1851,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, } ret = 0; out_unlock: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return ret; } @@ -1922,10 +1914,10 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm, evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; } - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1, GFP_KERNEL); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (ret >= 0) return 0; @@ -1943,9 +1935,9 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) { struct evtchnfd *evtchnfd; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (!evtchnfd) return -ENOENT; @@ -1963,7 +1955,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) int i; int n = 0; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); /* * Because synchronize_srcu() cannot be called inside the @@ -1975,7 +1967,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); if (!all_evtchnfds) { - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return -ENOMEM; } @@ -1984,7 +1976,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) all_evtchnfds[n++] = evtchnfd; idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); synchronize_srcu(&kvm->srcu); @@ -2086,6 +2078,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) void kvm_xen_init_vm(struct kvm *kvm) { + mutex_init(&kvm->arch.xen.xen_lock); idr_init(&kvm->arch.xen.evtchn_ports); kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); } From f12cd06109f47c2fb4b23a45ab55404c47ef7fae Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 28 Dec 2022 17:36:03 +0800 Subject: [PATCH 0684/1593] powerpc/64s/hash: Make stress_hpt_timer_fn() static stress_hpt_timer_fn() is only used in hash_utils.c, make it static. Fixes: 6b34a099faa1 ("powerpc/64s/hash: add stress_hpt kernel boot option to increase hash faults") Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221228093603.3166599-1-yangyingliang@huawei.com --- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 80a148c57de81..44a35ed4f6860 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1012,7 +1012,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, void hpt_clear_stress(void); static struct timer_list stress_hpt_timer; -void stress_hpt_timer_fn(struct timer_list *timer) +static void stress_hpt_timer_fn(struct timer_list *timer) { int next_cpu; From 0e678153f5be7e6c8d28835f5a678618da4b7a9c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 22 Dec 2022 21:55:10 +0100 Subject: [PATCH 0685/1593] mm/hugetlb: fix PTE marker handling in hugetlb_change_protection() Patch series "mm/hugetlb: uffd-wp fixes for hugetlb_change_protection()". Playing with virtio-mem and background snapshots (using uffd-wp) on hugetlb in QEMU, I managed to trigger a VM_BUG_ON(). Looking into the details, hugetlb_change_protection() seems to not handle uffd-wp correctly in all cases. Patch #1 fixes my test case. I don't have reproducers for patch #2, as it requires running into migration entries. I did not yet check in detail yet if !hugetlb code requires similar care. This patch (of 2): There are two problematic cases when stumbling over a PTE marker in hugetlb_change_protection(): (1) We protect an uffd-wp PTE marker a second time using uffd-wp: we will end up in the "!huge_pte_none(pte)" case and mess up the PTE marker. (2) We unprotect a uffd-wp PTE marker: we will similarly end up in the "!huge_pte_none(pte)" case even though we cleared the PTE, because the "pte" variable is stale. We'll mess up the PTE marker. For example, if we later stumble over such a "wrongly modified" PTE marker, we'll treat it like a present PTE that maps some garbage page. This can, for example, be triggered by mapping a memfd backed by huge pages, registering uffd-wp, uffd-wp'ing an unmapped page and (a) uffd-wp'ing it a second time; or (b) uffd-unprotecting it; or (c) unregistering uffd-wp. Then, ff we trigger fallocate(FALLOC_FL_PUNCH_HOLE) on that file range, we will run into a VM_BUG_ON: [ 195.039560] page:00000000ba1f2987 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x0 [ 195.039565] flags: 0x7ffffc0001000(reserved|node=0|zone=0|lastcpupid=0x1fffff) [ 195.039568] raw: 0007ffffc0001000 ffffe742c0000008 ffffe742c0000008 0000000000000000 [ 195.039569] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 195.039569] page dumped because: VM_BUG_ON_PAGE(compound && !PageHead(page)) [ 195.039573] ------------[ cut here ]------------ [ 195.039574] kernel BUG at mm/rmap.c:1346! [ 195.039579] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 195.039581] CPU: 7 PID: 4777 Comm: qemu-system-x86 Not tainted 6.0.12-200.fc36.x86_64 #1 [ 195.039583] Hardware name: LENOVO 20WNS1F81N/20WNS1F81N, BIOS N35ET50W (1.50 ) 09/15/2022 [ 195.039584] RIP: 0010:page_remove_rmap+0x45b/0x550 [ 195.039588] Code: [...] [ 195.039589] RSP: 0018:ffffbc03c3633ba8 EFLAGS: 00010292 [ 195.039591] RAX: 0000000000000040 RBX: ffffe742c0000000 RCX: 0000000000000000 [ 195.039592] RDX: 0000000000000002 RSI: ffffffff8e7aac1a RDI: 00000000ffffffff [ 195.039592] RBP: 0000000000000001 R08: 0000000000000000 R09: ffffbc03c3633a08 [ 195.039593] R10: 0000000000000003 R11: ffffffff8f146328 R12: ffff9b04c42754b0 [ 195.039594] R13: ffffffff8fcc6328 R14: ffffbc03c3633c80 R15: ffff9b0484ab9100 [ 195.039595] FS: 00007fc7aaf68640(0000) GS:ffff9b0bbf7c0000(0000) knlGS:0000000000000000 [ 195.039596] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 195.039597] CR2: 000055d402c49110 CR3: 0000000159392003 CR4: 0000000000772ee0 [ 195.039598] PKRU: 55555554 [ 195.039599] Call Trace: [ 195.039600] [ 195.039602] __unmap_hugepage_range+0x33b/0x7d0 [ 195.039605] unmap_hugepage_range+0x55/0x70 [ 195.039608] hugetlb_vmdelete_list+0x77/0xa0 [ 195.039611] hugetlbfs_fallocate+0x410/0x550 [ 195.039612] ? _raw_spin_unlock_irqrestore+0x23/0x40 [ 195.039616] vfs_fallocate+0x12e/0x360 [ 195.039618] __x64_sys_fallocate+0x40/0x70 [ 195.039620] do_syscall_64+0x58/0x80 [ 195.039623] ? syscall_exit_to_user_mode+0x17/0x40 [ 195.039624] ? do_syscall_64+0x67/0x80 [ 195.039626] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 195.039628] RIP: 0033:0x7fc7b590651f [ 195.039653] Code: [...] [ 195.039654] RSP: 002b:00007fc7aaf66e70 EFLAGS: 00000293 ORIG_RAX: 000000000000011d [ 195.039655] RAX: ffffffffffffffda RBX: 0000558ef4b7f370 RCX: 00007fc7b590651f [ 195.039656] RDX: 0000000018000000 RSI: 0000000000000003 RDI: 000000000000000c [ 195.039657] RBP: 0000000008000000 R08: 0000000000000000 R09: 0000000000000073 [ 195.039658] R10: 0000000008000000 R11: 0000000000000293 R12: 0000000018000000 [ 195.039658] R13: 00007fb8bbe00000 R14: 000000000000000c R15: 0000000000001000 [ 195.039661] Fix it by not going into the "!huge_pte_none(pte)" case if we stumble over an exclusive marker. spin_unlock() + continue would get the job done. However, instead, make it clearer that there are no fall-through statements: we process each case (hwpoison, migration, marker, !none, none) and then unlock the page table to continue with the next PTE. Let's avoid "continue" statements and use a single spin_unlock() at the end. Link: https://lkml.kernel.org/r/20221222205511.675832-1-david@redhat.com Link: https://lkml.kernel.org/r/20221222205511.675832-2-david@redhat.com Fixes: 60dfaad65aa9 ("mm/hugetlb: allow uffd wr-protect none ptes") Signed-off-by: David Hildenbrand Reviewed-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Miaohe Lin Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index db895230ee7e4..3f0686b54701f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6658,10 +6658,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, } pte = huge_ptep_get(ptep); if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { - spin_unlock(ptl); - continue; - } - if (unlikely(is_hugetlb_entry_migration(pte))) { + /* Nothing to do. */ + } else if (unlikely(is_hugetlb_entry_migration(pte))) { swp_entry_t entry = pte_to_swp_entry(pte); struct page *page = pfn_swap_entry_to_page(entry); @@ -6682,18 +6680,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, set_huge_pte_at(mm, address, ptep, newpte); pages++; } - spin_unlock(ptl); - continue; - } - if (unlikely(pte_marker_uffd_wp(pte))) { - /* - * This is changing a non-present pte into a none pte, - * no need for huge_ptep_modify_prot_start/commit(). - */ + } else if (unlikely(is_pte_marker(pte))) { + /* No other markers apply for now. */ + WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); if (uffd_wp_resolve) + /* Safe to modify directly (non-present->none). */ huge_pte_clear(mm, address, ptep, psize); - } - if (!huge_pte_none(pte)) { + } else if (!huge_pte_none(pte)) { pte_t old_pte; unsigned int shift = huge_page_shift(hstate_vma(vma)); From 44f86392bdd165da7e43d3c772aeb1e128ffd6c8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 22 Dec 2022 21:55:11 +0100 Subject: [PATCH 0686/1593] mm/hugetlb: fix uffd-wp handling for migration entries in hugetlb_change_protection() We have to update the uffd-wp SWP PTE bit independent of the type of migration entry. Currently, if we're unlucky and we want to install/clear the uffd-wp bit just while we're migrating a read-only mapped hugetlb page, we would miss to set/clear the uffd-wp bit. Further, if we're processing a readable-exclusive migration entry and neither want to set or clear the uffd-wp bit, we could currently end up losing the uffd-wp bit. Note that the same would hold for writable migrating entries, however, having a writable migration entry with the uffd-wp bit set would already mean that something went wrong. Note that the change from !is_readable_migration_entry -> writable_migration_entry is harmless and actually cleaner, as raised by Miaohe Lin and discussed in [1]. [1] https://lkml.kernel.org/r/90dd6a93-4500-e0de-2bf0-bf522c311b0c@huawei.com Link: https://lkml.kernel.org/r/20221222205511.675832-3-david@redhat.com Fixes: 60dfaad65aa9 ("mm/hugetlb: allow uffd wr-protect none ptes") Signed-off-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Miaohe Lin Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3f0686b54701f..bd7d39227344e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6662,10 +6662,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, } else if (unlikely(is_hugetlb_entry_migration(pte))) { swp_entry_t entry = pte_to_swp_entry(pte); struct page *page = pfn_swap_entry_to_page(entry); + pte_t newpte = pte; - if (!is_readable_migration_entry(entry)) { - pte_t newpte; - + if (is_writable_migration_entry(entry)) { if (PageAnon(page)) entry = make_readable_exclusive_migration_entry( swp_offset(entry)); @@ -6673,13 +6672,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, entry = make_readable_migration_entry( swp_offset(entry)); newpte = swp_entry_to_pte(entry); - if (uffd_wp) - newpte = pte_swp_mkuffd_wp(newpte); - else if (uffd_wp_resolve) - newpte = pte_swp_clear_uffd_wp(newpte); - set_huge_pte_at(mm, address, ptep, newpte); pages++; } + + if (uffd_wp) + newpte = pte_swp_mkuffd_wp(newpte); + else if (uffd_wp_resolve) + newpte = pte_swp_clear_uffd_wp(newpte); + if (!pte_same(pte, newpte)) + set_huge_pte_at(mm, address, ptep, newpte); } else if (unlikely(is_pte_marker(pte))) { /* No other markers apply for now. */ WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); From ab0c3f1251b4670978fde0bd54161795a139b060 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 22 Dec 2022 12:41:50 -0800 Subject: [PATCH 0687/1593] mm/khugepaged: fix collapse_pte_mapped_thp() to allow anon_vma uprobe_write_opcode() uses collapse_pte_mapped_thp() to restore huge pmd, when removing a breakpoint from hugepage text: vma->anon_vma is always set in that case, so undo the prohibition. And MADV_COLLAPSE ought to be able to collapse some page tables in a vma which happens to have anon_vma set from CoWing elsewhere. Is anon_vma lock required? Almost not: if any page other than expected subpage of the non-anon huge page is found in the page table, collapse is aborted without making any change. However, it is possible that an anon page was CoWed from this extent in another mm or vma, in which case a concurrent lookup might look here: so keep it away while clearing pmd (but perhaps we shall go back to using pmd_lock() there in future). Note that collapse_pte_mapped_thp() is exceptional in freeing a page table without having cleared its ptes: I'm uneasy about that, and had thought pte_clear()ing appropriate; but exclusive i_mmap lock does fix the problem, and we would have to move the mmu_notification if clearing those ptes. What this fixes is not a dangerous instability. But I suggest Cc stable because uprobes "healing" has regressed in that way, so this should follow 8d3c106e19e8 into those stable releases where it was backported (and may want adjustment there - I'll supply backports as needed). Link: https://lkml.kernel.org/r/b740c9fb-edba-92ba-59fb-7a5592e5dfc@google.com Fixes: 8d3c106e19e8 ("mm/khugepaged: take the right locks for page table retraction") Signed-off-by: Hugh Dickins Acked-by: David Hildenbrand Cc: Jann Horn Cc: Yang Shi Cc: Zach O'Keefe Cc: Song Liu Cc: [5.4+] Signed-off-by: Andrew Morton --- mm/khugepaged.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 5cb401aa2b9d8..9a0135b39b197 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1460,14 +1460,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; - /* - * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings - * that got written to. Without this, we'd have to also lock the - * anon_vma if one exists. - */ - if (vma->anon_vma) - return SCAN_VMA_CHECK; - /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1567,8 +1559,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, } /* step 4: remove pte entries */ + /* we make no change to anon, but protect concurrent anon page lookup */ + if (vma->anon_vma) + anon_vma_lock_write(vma->anon_vma); + collapse_and_free_pmd(mm, vma, haddr, pmd); + if (vma->anon_vma) + anon_vma_unlock_write(vma->anon_vma); i_mmap_unlock_write(vma->vm_file->f_mapping); maybe_install_pmd: From 51d3d5eb74ff53b92dcff48b30ae2ed8edd85a32 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 9 Dec 2022 09:09:12 +0100 Subject: [PATCH 0688/1593] mm/userfaultfd: enable writenotify while userfaultfd-wp is enabled for a VMA Currently, we don't enable writenotify when enabling userfaultfd-wp on a shared writable mapping (for now only shmem and hugetlb). The consequence is that vma->vm_page_prot will still include write permissions, to be set as default for all PTEs that get remapped (e.g., mprotect(), NUMA hinting, page migration, ...). So far, vma->vm_page_prot is assumed to be a safe default, meaning that we only add permissions (e.g., mkwrite) but not remove permissions (e.g., wrprotect). For example, when enabling softdirty tracking, we enable writenotify. With uffd-wp on shared mappings, that changed. More details on vma->vm_page_prot semantics were summarized in [1]. This is problematic for uffd-wp: we'd have to manually check for a uffd-wp PTEs/PMDs and manually write-protect PTEs/PMDs, which is error prone. Prone to such issues is any code that uses vma->vm_page_prot to set PTE permissions: primarily pte_modify() and mk_pte(). Instead, let's enable writenotify such that PTEs/PMDs/... will be mapped write-protected as default and we will only allow selected PTEs that are definitely safe to be mapped without write-protection (see can_change_pte_writable()) to be writable. In the future, we might want to enable write-bit recovery -- e.g., can_change_pte_writable() -- at more locations, for example, also when removing uffd-wp protection. This fixes two known cases: (a) remove_migration_pte() mapping uffd-wp'ed PTEs writable, resulting in uffd-wp not triggering on write access. (b) do_numa_page() / do_huge_pmd_numa_page() mapping uffd-wp'ed PTEs/PMDs writable, resulting in uffd-wp not triggering on write access. Note that do_numa_page() / do_huge_pmd_numa_page() can be reached even without NUMA hinting (which currently doesn't seem to be applicable to shmem), for example, by using uffd-wp with a PROT_WRITE shmem VMA. On such a VMA, userfaultfd-wp is currently non-functional. Note that when enabling userfaultfd-wp, there is no need to walk page tables to enforce the new default protection for the PTEs: we know that they cannot be uffd-wp'ed yet, because that can only happen after enabling uffd-wp for the VMA in general. Also note that this makes mprotect() on ranges with uffd-wp'ed PTEs not accidentally set the write bit -- which would result in uffd-wp not triggering on later write access. This commit makes uffd-wp on shmem behave just like uffd-wp on anonymous memory in that regard, even though, mixing mprotect with uffd-wp is controversial. [1] https://lkml.kernel.org/r/92173bad-caa3-6b43-9d1e-9a471fdbc184@redhat.com Link: https://lkml.kernel.org/r/20221209080912.7968-1-david@redhat.com Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs") Signed-off-by: David Hildenbrand Reported-by: Ives van Hoorne Debugged-by: Peter Xu Acked-by: Peter Xu Cc: Hugh Dickins Cc: Alistair Popple Cc: Mike Rapoport Cc: Nadav Amit Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 28 ++++++++++++++++++++++------ mm/mmap.c | 4 ++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 98ac37e34e3d4..cc694846617a5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) return ctx->features & UFFD_FEATURE_INITIALIZED; } +static void userfaultfd_set_vm_flags(struct vm_area_struct *vma, + vm_flags_t flags) +{ + const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP; + + vma->vm_flags = flags; + /* + * For shared mappings, we want to enable writenotify while + * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply + * recalculate vma->vm_page_prot whenever userfaultfd-wp changes. + */ + if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed) + vma_set_page_prot(vma); +} + static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { @@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, for_each_vma(vmi, vma) { if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, + vma->vm_flags & ~__VM_UFFD_FLAGS); } } mmap_write_unlock(mm); @@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) octx = vma->vm_userfaultfd_ctx.ctx; if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); return 0; } @@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, } else { /* Drop uffd context if remap feature not enabled */ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); } } @@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) prev = vma; } - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } mmap_write_unlock(mm); @@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx.ctx = ctx; if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) @@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; skip: diff --git a/mm/mmap.c b/mm/mmap.c index 87d929316d572..420476fcfbfca 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1524,6 +1524,10 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma)) return 1; + /* Do we need write faults for uffd-wp tracking? */ + if (userfaultfd_wp(vma)) + return 1; + /* Specialty mapping? */ if (vm_flags & VM_PFNMAP) return 0; From 52dc031088f00e323140ece4004e70c33153c6dd Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Sat, 24 Dec 2022 00:20:34 -0800 Subject: [PATCH 0689/1593] mm/MADV_COLLAPSE: don't expand collapse when vm_end is past requested end MADV_COLLAPSE acts on one hugepage-aligned/sized region at a time, until it has collapsed all eligible memory contained within the bounds supplied by the user. At the top of each hugepage iteration we (re)lock mmap_lock and (re)validate the VMA for eligibility and update variables that might have changed while mmap_lock was dropped. One thing that might occur is that the VMA could be resized, and as such, we refetch vma->vm_end to make sure we don't collapse past the end of the VMA's new end. However, it's possible that when refetching vma->vm_end that we expand the region acted on by MADV_COLLAPSE if vma->vm_end is greater than size+len supplied by the user. The consequence here is that we may attempt to collapse more memory than requested, possibly yielding either "too much success" or "false failure" user-visible results. An example of the former is if we MADV_COLLAPSE the first 4MiB of a 2TiB mmap()'d file, the incorrect refetch would cause the operation to block for much longer than anticipated as we attempt to collapse the entire TiB region. An example of the latter is that applying MADV_COLLPSE to a 4MiB file mapped to the start of a 6MiB VMA will successfully collapse the first 4MiB, then incorrectly attempt to collapse the last hugepage-aligned/sized region -- fail (since readahead/page cache lookup will fail) -- and report a failure to the user. I don't believe there is a kernel stability concern here as we always (re)validate the VMA / region accordingly. Also as Hugh mentions, the user-visible effects are: we try to collapse more memory than requested by the user, and/or failing an operation that should have otherwise succeeded. An example is trying to collapse a 4MiB file contained within a 12MiB VMA. Don't expand the acted-on region when refetching vma->vm_end. Link: https://lkml.kernel.org/r/20221224082035.3197140-1-zokeefe@google.com Fixes: 4d24de9425f7 ("mm: MADV_COLLAPSE: refetch vm_end after reacquiring mmap_lock") Signed-off-by: Zach O'Keefe Reported-by: Hugh Dickins Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- mm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 9a0135b39b197..79be131333223 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2647,7 +2647,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, goto out_nolock; } - hend = vma->vm_end & HPAGE_PMD_MASK; + hend = min(hend, vma->vm_end & HPAGE_PMD_MASK); } mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); From 3de0c269adc6c2fac0bb1fb11965f0de699dc32b Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Sat, 24 Dec 2022 00:20:35 -0800 Subject: [PATCH 0690/1593] mm/shmem: restore SHMEM_HUGE_DENY precedence over MADV_COLLAPSE SHMEM_HUGE_DENY is for emergency use by the admin, to disable allocation of shmem huge pages if, for example, a dangerous bug is found in their usage: see "deny" in Documentation/mm/transhuge.rst. An app using madvise(,,MADV_COLLAPSE) should not be allowed to override it: restore its precedence over shmem_huge_force. Restore SHMEM_HUGE_DENY precedence over MADV_COLLAPSE. Link: https://lkml.kernel.org/r/20221224082035.3197140-2-zokeefe@google.com Fixes: 7c6c6cc4d3a2 ("mm/shmem: add flag to enforce shmem THP in hugepage_vma_check()") Signed-off-by: Zach O'Keefe Suggested-by: Hugh Dickins Acked-by: David Hildenbrand Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index c301487be5fb4..0005ab2c29af7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -478,12 +478,10 @@ bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) return false; - if (shmem_huge_force) - return true; - if (shmem_huge == SHMEM_HUGE_FORCE) - return true; if (shmem_huge == SHMEM_HUGE_DENY) return false; + if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE) + return true; switch (SHMEM_SB(inode->i_sb)->huge) { case SHMEM_HUGE_ALWAYS: From a1193de562f54c7c9f60ca9f2db96e50a7608de1 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 4 Jan 2023 16:02:40 -0800 Subject: [PATCH 0691/1593] mm: fix vma->anon_name memory leak for anonymous shmem VMAs free_anon_vma_name() is missing a check for anonymous shmem VMA which leads to a memory leak due to refcount not being dropped. Fix this by calling anon_vma_name_put() unconditionally. It will free vma->anon_name whenever it's non-NULL. Link: https://lkml.kernel.org/r/20230105000241.1450843-1-surenb@google.com Fixes: d09e8ca6cb93 ("mm: anonymous shared memory naming") Signed-off-by: Suren Baghdasaryan Suggested-by: David Hildenbrand Reviewed-by: David Hildenbrand Reported-by: syzbot+91edf9178386a07d06a7@syzkaller.appspotmail.com Cc: Hugh Dickins Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index e8ed225d8f7ca..ff3f3f23f6498 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -413,8 +413,7 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma) * Not using anon_vma_name because it generates a warning if mmap_lock * is not held, which might be the case here. */ - if (!vma->vm_file) - anon_vma_name_put(vma->anon_name); + anon_vma_name_put(vma->anon_name); } static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, From b30c14cd61025eeea2f2e8569606cd167ba9ad2d Mon Sep 17 00:00:00 2001 From: James Houghton Date: Wed, 4 Jan 2023 23:19:10 +0000 Subject: [PATCH 0692/1593] hugetlb: unshare some PMDs when splitting VMAs PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however, it is possible that HugeTLB VMAs are split without unsharing the PMDs first. Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE in hugetlb_change_protection [1]. The key there is that hugetlb_unshare_all_pmds will not attempt to unshare PMDs in non-PUD_SIZE-aligned sections of the VMA. It might seem ideal to unshare in hugetlb_vm_op_open, but we need to unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split seems natural. [1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp") Signed-off-by: James Houghton Reviewed-by: Mike Kravetz Acked-by: Peter Xu Cc: Axel Rasmussen Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bd7d39227344e..2ce912c915ebc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -94,6 +94,8 @@ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, unsigned long end); static inline bool subpool_is_free(struct hugepage_subpool *spool) { @@ -4834,6 +4836,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) { if (addr & ~(huge_page_mask(hstate_vma(vma)))) return -EINVAL; + + /* + * PMD sharing is only possible for PUD_SIZE-aligned address ranges + * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this + * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. + */ + if (addr & ~PUD_MASK) { + /* + * hugetlb_vm_op_split is called right before we attempt to + * split the VMA. We will need to unshare PMDs in the old and + * new VMAs, so let's unshare before we split. + */ + unsigned long floor = addr & PUD_MASK; + unsigned long ceil = floor + PUD_SIZE; + + if (floor >= vma->vm_start && ceil <= vma->vm_end) + hugetlb_unshare_pmds(vma, floor, ceil); + } + return 0; } @@ -7322,26 +7343,21 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re } } -/* - * This function will unconditionally remove all the shared pmd pgtable entries - * within the specific vma for a hugetlbfs memory range. - */ -void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) { struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mm_struct *mm = vma->vm_mm; struct mmu_notifier_range range; - unsigned long address, start, end; + unsigned long address; spinlock_t *ptl; pte_t *ptep; if (!(vma->vm_flags & VM_MAYSHARE)) return; - start = ALIGN(vma->vm_start, PUD_SIZE); - end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); - if (start >= end) return; @@ -7373,6 +7389,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) mmu_notifier_invalidate_range_end(&range); } +/* + * This function will unconditionally remove all the shared pmd pgtable entries + * within the specific vma for a hugetlbfs memory range. + */ +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +{ + hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), + ALIGN_DOWN(vma->vm_end, PUD_SIZE)); +} + #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; From fed15f1345dc8a7fc8baa81e8b55c3ba010d7f4b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:05 -0500 Subject: [PATCH 0693/1593] mm/hugetlb: pre-allocate pgtable pages for uffd wr-protects Userfaultfd-wp uses pte markers to mark wr-protected pages for both shmem and hugetlb. Shmem has pre-allocation ready for markers, but hugetlb path was overlooked. Doing so by calling huge_pte_alloc() if the initial pgtable walk fails to find the huge ptep. It's possible that huge_pte_alloc() can fail with high memory pressure, in that case stop the loop immediately and fail silently. This is not the most ideal solution but it matches with what we do with shmem meanwhile it avoids the splat in dmesg. Link: https://lkml.kernel.org/r/20230104225207.1066932-2-peterx@redhat.com Fixes: 60dfaad65aa9 ("mm/hugetlb: allow uffd wr-protect none ptes") Signed-off-by: Peter Xu Reported-by: James Houghton Reviewed-by: Mike Kravetz Acked-by: David Hildenbrand Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Muchun Song Cc: Nadav Amit Cc: [5.19+] Signed-off-by: Andrew Morton --- mm/hugetlb.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2ce912c915ebc..f96794be71d50 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6660,8 +6660,17 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, spinlock_t *ptl; ptep = huge_pte_offset(mm, address, psize); if (!ptep) { - address |= last_addr_mask; - continue; + if (!uffd_wp) { + address |= last_addr_mask; + continue; + } + /* + * Userfaultfd wr-protect requires pgtable + * pre-allocations to install pte markers. + */ + ptep = huge_pte_alloc(mm, vma, address, psize); + if (!ptep) + break; } ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, vma, address, ptep)) { From 1beb8ae302a01fb487787f5a4fb97cf5338a86c1 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Fri, 6 Jan 2023 14:00:16 -0800 Subject: [PATCH 0694/1593] Docs/admin-guide/mm/zswap: remove zsmalloc's lack of writeback warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Writeback has been implemented for zsmalloc, so this warning no longer holds. Link: https://lkml.kernel.org/r/20230106220016.172303-1-nphamcs@gmail.com Fixes: 9997bc017549a ("zsmalloc: implement writeback mechanism for zsmalloc") Suggested-by: Thomas Weißschuh Signed-off-by: Nhat Pham Reviewed-by: Sergey Senozhatsky Cc: Johannes Weiner Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/zswap.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst index f67de481c7f60..6dd74a18268ba 100644 --- a/Documentation/admin-guide/mm/zswap.rst +++ b/Documentation/admin-guide/mm/zswap.rst @@ -70,9 +70,7 @@ e.g. ``zswap.zpool=zbud``. It can also be changed at runtime using the sysfs The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which means the compression ratio will always be 2:1 or worse (because of half-full zbud pages). The zsmalloc type zpool has a more complex compressed page -storage method, and it can achieve greater storage densities. However, -zsmalloc does not implement compressed page eviction, so once zswap fills it -cannot evict the oldest page, it can only reject new pages. +storage method, and it can achieve greater storage densities. When a swap page is passed from frontswap to zswap, zswap maintains a mapping of the swap entry, a combination of the swap type and swap offset, to the zpool From 7633355e5c7f29c049a9048e461427d1d8ed3051 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 5 Jan 2023 14:53:56 +0900 Subject: [PATCH 0695/1593] nilfs2: fix general protection fault in nilfs_btree_insert() If nilfs2 reads a corrupted disk image and tries to reads a b-tree node block by calling __nilfs_btree_get_block() against an invalid virtual block address, it returns -ENOENT because conversion of the virtual block address to a disk block address fails. However, this return value is the same as the internal code that b-tree lookup routines return to indicate that the block being searched does not exist, so functions that operate on that b-tree may misbehave. When nilfs_btree_insert() receives this spurious 'not found' code from nilfs_btree_do_lookup(), it misunderstands that the 'not found' check was successful and continues the insert operation using incomplete lookup path data, causing the following crash: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] ... RIP: 0010:nilfs_btree_get_nonroot_node fs/nilfs2/btree.c:418 [inline] RIP: 0010:nilfs_btree_prepare_insert fs/nilfs2/btree.c:1077 [inline] RIP: 0010:nilfs_btree_insert+0x6d3/0x1c10 fs/nilfs2/btree.c:1238 Code: bc 24 80 00 00 00 4c 89 f8 48 c1 e8 03 42 80 3c 28 00 74 08 4c 89 ff e8 4b 02 92 fe 4d 8b 3f 49 83 c7 28 4c 89 f8 48 c1 e8 03 <42> 80 3c 28 00 74 08 4c 89 ff e8 2e 02 92 fe 4d 8b 3f 49 83 c7 02 ... Call Trace: nilfs_bmap_do_insert fs/nilfs2/bmap.c:121 [inline] nilfs_bmap_insert+0x20d/0x360 fs/nilfs2/bmap.c:147 nilfs_get_block+0x414/0x8d0 fs/nilfs2/inode.c:101 __block_write_begin_int+0x54c/0x1a80 fs/buffer.c:1991 __block_write_begin fs/buffer.c:2041 [inline] block_write_begin+0x93/0x1e0 fs/buffer.c:2102 nilfs_write_begin+0x9c/0x110 fs/nilfs2/inode.c:261 generic_perform_write+0x2e4/0x5e0 mm/filemap.c:3772 __generic_file_write_iter+0x176/0x400 mm/filemap.c:3900 generic_file_write_iter+0xab/0x310 mm/filemap.c:3932 call_write_iter include/linux/fs.h:2186 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x7dc/0xc50 fs/read_write.c:584 ksys_write+0x177/0x2a0 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd ... This patch fixes the root cause of this problem by replacing the error code that __nilfs_btree_get_block() returns on block address conversion failure from -ENOENT to another internal code -EINVAL which means that the b-tree metadata is corrupted. By returning -EINVAL, it propagates without glitches, and for all relevant b-tree operations, functions in the upper bmap layer output an error message indicating corrupted b-tree metadata via nilfs_bmap_convert_error(), and code -EIO will be eventually returned as it should be. Link: https://lkml.kernel.org/r/000000000000bd89e205f0e38355@google.com Link: https://lkml.kernel.org/r/20230105055356.8811-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+ede796cecd5296353515@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b9d15c3df3cc1..40ce92a332fe7 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { - if (ret != -EEXIST) - return ret; - goto out_check; + if (likely(ret == -EEXIST)) + goto out_check; + if (ret == -ENOENT) { + /* + * Block address translation failed due to invalid + * value of 'ptr'. In this case, return internal code + * -EINVAL (broken bmap) to notify bmap layer of fatal + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } if (ra) { From 0de4a7f5ba5744d1de5ce0b076bb73a86530c60c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Jan 2023 02:09:33 +0100 Subject: [PATCH 0696/1593] kasan: mark kasan_kunit_executing as static Mark kasan_kunit_executing as static, as it is only used within mm/kasan/report.c. Link: https://lkml.kernel.org/r/f64778a4683b16a73bba72576f73bf4a2b45a82f.1672794398.git.andreyknvl@google.com Fixes: c8c7016f50c8 ("kasan: fail non-kasan KUnit tests on KASAN reports") Reported-by: kernel test robot Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton --- mm/kasan/report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 1d02757e90a32..22598b20c7b75 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); * Whether the KASAN KUnit test suite is currently being executed. * Updated in kasan_test.c. */ -bool kasan_kunit_executing; +static bool kasan_kunit_executing; void kasan_kunit_test_suite_start(void) { From d09dce1fff8a9da10144e878ef4bbf90c65559e5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Jan 2023 13:16:14 -0800 Subject: [PATCH 0697/1593] lib/win_minmax: use /* notation for regular comments Don't use kernel-doc "/**" notation for non-kernel-doc comments. Prevents a kernel-doc warning: lib/win_minmax.c:31: warning: expecting prototype for lib/minmax.c(). Prototype was for minmax_subwin_update() instead Link: https://lkml.kernel.org/r/20230102211614.26343-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Neal Cardwell Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: Andrew Morton --- lib/win_minmax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/win_minmax.c b/lib/win_minmax.c index 6bdc1cd15f761..ec10506834b62 100644 --- a/lib/win_minmax.c +++ b/lib/win_minmax.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * lib/minmax.c: windowed min/max tracker * * Kathleen Nichols' algorithm for tracking the minimum (or maximum) From 0411d6ee50e3b74a793848e4f41f45860163f5cc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 6 Jan 2023 20:33:31 +0000 Subject: [PATCH 0698/1593] include/linux/mm: fix release_pages_arg kernel doc comment Commit 449c796768c9 ("mm: teach release_pages() to take an array of encoded page pointers too") added the kernel doc comment for release_pages() on top of 'union release_pages_arg', so making 'make htmldocs' complains as below: ./include/linux/mm.h:1268: warning: cannot understand function prototype: 'typedef union ' The kernel doc comment for the function is already on top of the function's definition in mm/swap.c, and the new comment is actually not for the function but indeed release_pages_arg. Fixing the comment to reflect the intent would be one option. But, kernel doc cannot parse the union as below due to the attribute. ./include/linux/mm.h:1272: error: Cannot parse struct or union! Modify the comment to reflect the intent but do not mark it as a kernel doc comment. Link: https://lkml.kernel.org/r/20230106203331.127532-1-sj@kernel.org Fixes: 449c796768c9 ("mm: teach release_pages() to take an array of encoded page pointers too") Signed-off-by: SeongJae Park Acked-by: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f3f196e4d66d6..8f857163ac89c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1270,10 +1270,10 @@ static inline void folio_put_refs(struct folio *folio, int refs) __folio_put(folio); } -/** - * release_pages - release an array of pages or folios +/* + * union release_pages_arg - an array of pages or folios * - * This just releases a simple array of multiple pages, and + * release_pages() releases a simple array of multiple pages, and * accepts various different forms of said page array: either * a regular old boring array of pages, an array of folios, or * an array of encoded page pointers. From 8651a137e62ebfde3df95cbb1ca055d013ec5b9e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sat, 7 Jan 2023 00:00:05 +0000 Subject: [PATCH 0699/1593] mm: update mmap_sem comments to refer to mmap_lock The rename from mm->mmap_sem to mm->mmap_lock was performed in commit da1c55f1b272 ("mmap locking API: rename mmap_sem to mmap_lock") and commit c1e8d7c6a7a6 ("map locking API: convert mmap_sem comments"), however some incorrect comments remain. This patch simply corrects those comments which are obviously incorrect within mm itself. Link: https://lkml.kernel.org/r/33fba04389ab63fc4980e7ba5442f521df6dc657.1673048927.git.lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 2 +- include/linux/page_ref.h | 2 +- mm/hugetlb.c | 4 ++-- mm/madvise.c | 2 +- mm/mmap.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3b8475007734d..9757067c30537 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -581,7 +581,7 @@ struct vm_area_struct { /* * For private and shared anonymous mappings, a pointer to a null * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_sem. Use anon_vma_name to access. + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. */ struct anon_vma_name *anon_name; #endif diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 2e677e6ad09fc..d7c2d33baa7f8 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -301,7 +301,7 @@ static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) * * You can also use this function if you're holding a lock that prevents * pages being frozen & removed; eg the i_pages lock for the page cache - * or the mmap_sem or page table lock for page tables. In this case, + * or the mmap_lock or page table lock for page tables. In this case, * it will always succeed, and you could have used a plain folio_get(), * but it's sometimes more convenient to have a common function called * from both locked and RCU-protected contexts. diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f96794be71d50..7fcdb98c9e689 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1183,7 +1183,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma) /* * Reset and decrement one ref on hugepage private reservation. - * Called with mm->mmap_sem writer semaphore held. + * Called with mm->mmap_lock writer semaphore held. * This function should be only used by move_vma() and operate on * same sized vma. It should never come here with last ref on the * reservation. @@ -5152,7 +5152,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, /* * We don't have to worry about the ordering of src and dst ptlocks - * because exclusive mmap_sem (or the i_mmap_lock) prevents deadlock. + * because exclusive mmap_lock (or the i_mmap_lock) prevents deadlock. */ if (src_ptl != dst_ptl) spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); diff --git a/mm/madvise.c b/mm/madvise.c index a56a6d17e201e..b6ea204d4e23b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -130,7 +130,7 @@ static int replace_anon_vma_name(struct vm_area_struct *vma, #endif /* CONFIG_ANON_VMA_NAME */ /* * Update the vm_flags on region of a vma, splitting it or merging it as - * necessary. Must be called with mmap_sem held for writing; + * necessary. Must be called with mmap_lock held for writing; * Caller should ensure anon_name stability by raising its refcount even when * anon_name belongs to a valid vma because this function might free that vma. */ diff --git a/mm/mmap.c b/mm/mmap.c index 420476fcfbfca..425a9349e6108 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2294,7 +2294,7 @@ static inline int munmap_sidetree(struct vm_area_struct *vma, * @start: The aligned start address to munmap. * @end: The aligned end address to munmap. * @uf: The userfaultfd list_head - * @downgrade: Set to true to attempt a write downgrade of the mmap_sem + * @downgrade: Set to true to attempt a write downgrade of the mmap_lock * * If @downgrade is true, check return code for potential release of the lock. */ @@ -2469,7 +2469,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, * @len: The length of the range to munmap * @uf: The userfaultfd list_head * @downgrade: set to true if the user wants to attempt to write_downgrade the - * mmap_sem + * mmap_lock * * This function takes a @mas that is either pointing to the previous VMA or set * to MA_START and sets it up to remove the mapping(s). The @len will be From 5316a017d093f644675a56523bcf5787ba8f4fef Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 6 Jan 2023 22:30:14 +0300 Subject: [PATCH 0700/1593] proc: fix PIE proc-empty-vm, proc-pid-vm tests vsyscall detection code uses direct call to the beginning of the vsyscall page: asm ("call %P0" :: "i" (0xffffffffff600000)) It generates "call rel32" instruction but it is not relocated if binary is PIE, so binary segfaults into random userspace address and vsyscall page status is detected incorrectly. Do more direct: asm ("call *%rax") which doesn't do need any relocaltions. Mark g_vsyscall as volatile for a good measure, I didn't find instruction setting it to 0. Now the code is obviously correct: xor eax, eax mov rdi, rbp mov rsi, rbp mov DWORD PTR [rip+0x2d15], eax # g_vsyscall = 0 mov rax, 0xffffffffff600000 call rax mov DWORD PTR [rip+0x2d02], 1 # g_vsyscall = 1 mov eax, DWORD PTR ds:0xffffffffff600000 mov DWORD PTR [rip+0x2cf1], 2 # g_vsyscall = 2 mov edi, [rip+0x2ceb] # exit(g_vsyscall) call exit Note: fixed proc-empty-vm test oopses 5.19.0-28-generic kernel but this is separate story. Link: https://lkml.kernel.org/r/Y7h2xvzKLg36DSq8@p183 Fixes: 5bc73bb3451b9 ("proc: test how it holds up with mapping'less process") Signed-off-by: Alexey Dobriyan Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-empty-vm.c | 12 +++++++----- tools/testing/selftests/proc/proc-pid-vm.c | 9 +++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index d95b1cb43d9d0..7588428b8fcd7 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -25,6 +25,7 @@ #undef NDEBUG #include #include +#include #include #include #include @@ -41,7 +42,7 @@ * 1: vsyscall VMA is --xp vsyscall=xonly * 2: vsyscall VMA is r-xp vsyscall=emulate */ -static int g_vsyscall; +static volatile int g_vsyscall; static const char *g_proc_pid_maps_vsyscall; static const char *g_proc_pid_smaps_vsyscall; @@ -147,11 +148,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 69551bfa215c4..cacbd2a4aec91 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -257,11 +257,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; From bf61acbed8a8b38949a247333ee253899acdc35d Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Fri, 6 Jan 2023 16:21:51 +0100 Subject: [PATCH 0701/1593] MAINTAINERS: update Robert Foss' email address Update the email address for Robert's maintainer entries and fill in .mailmap accordingly. Link: https://lkml.kernel.org/r/20230106152151.115648-1-robert.foss@linaro.org Signed-off-by: Robert Foss Cc: Arnd Bergmann Cc: Baolin Wang Cc: Colin Ian King Cc: Kalle Valo Cc: Kirill Tkhai Cc: Qais Yousef Cc: Vasily Averin Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index ccba4cf0d8938..7a882bc7f376f 100644 --- a/.mailmap +++ b/.mailmap @@ -371,6 +371,7 @@ Rémi Denis-Courmont Ricardo Ribalda Ricardo Ribalda Ricardo Ribalda Delgado Ricardo Ribalda +Robert Foss Roman Gushchin Roman Gushchin Roman Gushchin diff --git a/MAINTAINERS b/MAINTAINERS index a36df9ed283d3..320dc090d57ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6948,7 +6948,7 @@ F: drivers/gpu/drm/atmel-hlcdc/ DRM DRIVERS FOR BRIDGE CHIPS M: Andrzej Hajda M: Neil Armstrong -M: Robert Foss +M: Robert Foss R: Laurent Pinchart R: Jonas Karlman R: Jernej Skrabec @@ -17237,7 +17237,7 @@ F: Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml F: drivers/net/wwan/qcom_bam_dmux.c QUALCOMM CAMERA SUBSYSTEM DRIVER -M: Robert Foss +M: Robert Foss M: Todor Tomov L: linux-media@vger.kernel.org S: Maintained @@ -17317,7 +17317,7 @@ F: drivers/dma/qcom/hidma* QUALCOMM I2C CCI DRIVER M: Loic Poulain -M: Robert Foss +M: Robert Foss L: linux-i2c@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained From 7f31cced5724e6d414fe750aa1cd7e7b578ec22f Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 9 Jan 2023 20:55:21 +0000 Subject: [PATCH 0702/1593] nommu: fix memory leak in do_mmap() error path The preallocation of the maple tree nodes may leak if the error path to "error_just_free" is taken. Fix this by moving the freeing of the maple tree nodes to a shared location for all error paths. Link: https://lkml.kernel.org/r/20230109205507.955577-1-Liam.Howlett@oracle.com Fixes: 8220543df148 ("nommu: remove uses of VMA linked list") Signed-off-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton --- mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index 214c70e1d0594..c8252f01d5dbb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1240,6 +1240,7 @@ unsigned long do_mmap(struct file *file, error_just_free: up_write(&nommu_region_sem); error: + mas_destroy(&mas); if (region->vm_file) fput(region->vm_file); kmem_cache_free(vm_region_jar, region); @@ -1250,7 +1251,6 @@ unsigned long do_mmap(struct file *file, sharing_violation: up_write(&nommu_region_sem); - mas_destroy(&mas); pr_warn("Attempt to share mismatched mappings\n"); ret = -EINVAL; goto error; From 80be727ec87225797771a39f3e6801baf291faaf Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 9 Jan 2023 20:57:21 +0000 Subject: [PATCH 0703/1593] nommu: fix do_munmap() error path When removing a VMA from the tree fails due to no memory, do not free the VMA since a reference still exists. Link: https://lkml.kernel.org/r/20230109205708.956103-1-Liam.Howlett@oracle.com Fixes: 8220543df148 ("nommu: remove uses of VMA linked list") Signed-off-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton --- mm/nommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index c8252f01d5dbb..844af5be76400 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1509,7 +1509,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list erase_whole_vma: if (delete_vma_from_mm(vma)) ret = -ENOMEM; - delete_vma(mm, vma); + else + delete_vma(mm, vma); return ret; } From fd9edbdbdcde6b489ce59f326755ef16a2ffadd7 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 9 Jan 2023 20:58:20 +0000 Subject: [PATCH 0704/1593] nommu: fix split_vma() map_count error During the maple tree conversion of nommu, an error in counting the VMAs was introduced by counting the existing VMA again. The counting used to be decremented by one and incremented by two, but now it only increments by two. Fix the counting error by moving the increment outside the setup_vma_to_mm() function to the callers. Link: https://lkml.kernel.org/r/20230109205809.956325-1-Liam.Howlett@oracle.com Fixes: 8220543df148 ("nommu: remove uses of VMA linked list") Signed-off-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton --- mm/nommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index 844af5be76400..5b83938ecb67c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -559,7 +559,6 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas) static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm) { - mm->map_count++; vma->vm_mm = mm; /* add the VMA to the mapping */ @@ -587,6 +586,7 @@ static void mas_add_vma_to_mm(struct ma_state *mas, struct mm_struct *mm, BUG_ON(!vma->vm_region); setup_vma_to_mm(vma, mm); + mm->map_count++; /* add the VMA to the tree */ vma_mas_store(vma, mas); @@ -1347,6 +1347,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_file) return -ENOMEM; + mm = vma->vm_mm; if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; @@ -1398,6 +1399,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, mas_set_range(&mas, vma->vm_start, vma->vm_end - 1); mas_store(&mas, vma); vma_mas_store(new, &mas); + mm->map_count++; return 0; err_mas_preallocate: From 19fa92fb72f8bc542f1673862058f3b078114004 Mon Sep 17 00:00:00 2001 From: Lizzy Fleckenstein Date: Mon, 9 Jan 2023 21:18:37 +0100 Subject: [PATCH 0705/1593] init/Kconfig: fix typo (usafe -> unsafe) Fix the help text for the PRINTK_SAFE_LOG_BUF_SHIFT setting. Link: https://lkml.kernel.org/r/20230109201837.23873-1-eliasfleckenstein@web.de Signed-off-by: Lizzy Fleckenstein Reviewed-by: Nick Desaulniers Cc: Masahiro Yamada Signed-off-by: Andrew Morton --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 7e5c3ddc341de..57c8d224ea4cf 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -776,7 +776,7 @@ config PRINTK_SAFE_LOG_BUF_SHIFT depends on PRINTK help Select the size of an alternate printk per-CPU buffer where messages - printed from usafe contexts are temporary stored. One example would + printed from unsafe contexts are temporary stored. One example would be NMI messages, another one - printk recursion. The messages are copied to the main log buffer in a safe context to avoid a deadlock. The value defines the size as a power of 2. From e6cf91b7b47ff82b624bdfe2fdcde32bb52e71dd Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Thu, 12 Jan 2023 00:24:53 +0800 Subject: [PATCH 0706/1593] NFSD: fix use-after-free in nfsd4_ssc_setup_dul() If signal_pending() returns true, schedule_timeout() will not be executed, causing the waiting task to remain in the wait queue. Fixed by adding a call to finish_wait(), which ensures that the waiting task will always be removed from the wait queue. Fixes: f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed.") Signed-off-by: Xingyuan Mo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9b81d012666e2..f189ba7995f5a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1318,6 +1318,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, /* allow 20secs for mount/unmount for now - revisit */ if (signal_pending(current) || (schedule_timeout(20*HZ) == 0)) { + finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); return nfserr_eagain; } From f385f7d244134246f984975ed34cd75f77de479f Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 11 Jan 2023 12:17:09 -0800 Subject: [PATCH 0707/1593] NFSD: register/unregister of nfsd-client shrinker at nfsd startup/shutdown time Currently the nfsd-client shrinker is registered and unregistered at the time the nfsd module is loaded and unloaded. The problem with this is the shrinker is being registered before all of the relevant fields in nfsd_net are initialized when nfsd is started. This can lead to an oops when memory is low and the shrinker is called while nfsd is not running. This patch moves the register/unregister of nfsd-client shrinker from module load/unload time to nfsd startup/shutdown time. Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition") Reported-by: Mike Galbraith Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 22 +++++++++++----------- fs/nfsd/nfsctl.c | 7 +------ fs/nfsd/nfsd.h | 6 ++---- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 313f666d53578..db41358bd0d30 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4421,7 +4421,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4443,16 +4443,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -8069,8 +8059,17 @@ static int nfs4_state_create_net(struct net *net) INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8163,6 +8162,7 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d1e581a60480c..c2577ee7ffb22 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1457,9 +1457,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) goto out_cache_error; @@ -1469,8 +1467,6 @@ static __net_init int nfsd_init_net(struct net *net) return 0; out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1486,7 +1482,6 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 93b42ef9ed91b..fa0144a742678 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) From ae9f29fdfd827ad06c1ae8155c042245a9d00757 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 10 Jan 2023 19:53:59 +0800 Subject: [PATCH 0708/1593] net: hns3: fix wrong use of rss size during VF rss config Currently, it used old rss size to get current tc mode. As a result, the rss size is updated, but the tc mode is still configured based on the old rss size. So this patch fixes it by using the new rss size in both process. Fixes: 93969dc14fcd ("net: hns3: refactor VF rss init APIs with new common rss init APIs") Signed-off-by: Jie Wang Signed-off-by: Hao Lan Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230110115359.10163-1-lanhao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 081bd2c3f2891..e84e5be8e59ed 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3130,7 +3130,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, hclgevf_update_rss_size(handle, new_tqps_num); - hclge_comm_get_rss_tc_info(cur_rss_size, hdev->hw_tc_map, + hclge_comm_get_rss_tc_info(kinfo->rss_size, hdev->hw_tc_map, tc_offset, tc_valid, tc_size); ret = hclge_comm_set_rss_tc_mode(&hdev->hw.hw, tc_offset, tc_valid, tc_size); From 97f5e03a4a27d27ee4fed0cdb1658c81cf2784db Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jan 2023 20:25:47 -0800 Subject: [PATCH 0709/1593] bnxt: make sure we return pages to the pool Before the commit under Fixes the page would have been released from the pool before the napi_alloc_skb() call, so normal page freeing was fine (released page == no longer in the pool). After the change we just mark the page for recycling so it's still in the pool if the skb alloc fails, we need to recycle. Same commit added the same bug in the new bnxt_rx_multi_page_skb(). Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Reviewed-by: Andy Gospodarek Link: https://lore.kernel.org/r/20230111042547.987749-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 16ce7a90610c5..240a7e8a76528 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -993,7 +993,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, DMA_ATTR_WEAK_ORDERING); skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } skb_mark_for_recycle(skb); @@ -1031,7 +1031,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } From 037b48057e8b485a8d72f808122796aeadbbee32 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Wed, 4 Jan 2023 12:03:19 +0800 Subject: [PATCH 0710/1593] scsi: hisi_sas: Use abort task set to reset SAS disks when discovered Currently clear task set is used to abort all commands remaining in the disk when the SAS disk is discovered, and if the disk is discovered by two initiators, other I_T nexuses are also affected. So use abort task set instead and take effect only on the specified I_T nexus. Signed-off-by: Xingui Yang Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1672805000-141102-2-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index e9c2d306ed873..5e06b7add125b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -704,7 +704,7 @@ static int hisi_sas_init_device(struct domain_device *device) int_to_scsilun(0, &lun); while (retry-- > 0) { - rc = sas_clear_task_set(device, lun.scsi_lun); + rc = sas_abort_task_set(device, lun.scsi_lun); if (rc == TMF_RESP_FUNC_COMPLETE) { hisi_sas_release_task(hisi_hba, device); break; From f58c89700630da6554b24fd3df293a24874c10c1 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 4 Jan 2023 12:03:20 +0800 Subject: [PATCH 0711/1593] scsi: hisi_sas: Set a port invalid only if there are no devices attached when refreshing port id Currently the driver sets the port invalid if one phy in the port is not enabled, which may cause issues in expander situation. In directly attached situation, if phy up doesn't occur in time when refreshing port id, the port is incorrectly set to invalid which will also cause disk lost. Therefore set a port invalid only if there are no devices attached to the port. Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1672805000-141102-3-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 5e06b7add125b..8c038ccf1c095 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1316,7 +1316,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba) device->linkrate = phy->sas_phy.linkrate; hisi_hba->hw->setup_itct(hisi_hba, sas_dev); - } else + } else if (!port->port_attached) port->id = 0xff; } } From ae9dcb91c6069e20b3b9505d79cbc89fd6e086f5 Mon Sep 17 00:00:00 2001 From: Noor Azura Ahmad Tarmizi Date: Wed, 11 Jan 2023 13:02:00 +0800 Subject: [PATCH 0712/1593] net: stmmac: add aux timestamps fifo clearance wait Add timeout polling wait for auxiliary timestamps snapshot FIFO clear bit (ATSFC) to clear. This is to ensure no residue fifo value is being read erroneously. Fixes: f4da56529da6 ("net: stmmac: Add support for external trigger timestamping") Cc: # 5.10.x Signed-off-by: Noor Azura Ahmad Tarmizi Link: https://lore.kernel.org/r/20230111050200.2130-1-noor.azura.ahmad.tarmizi@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index fc06ddeac0d53..b4388ca8d2116 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -210,7 +210,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, } writel(acr_value, ptpaddr + PTP_ACR); mutex_unlock(&priv->aux_ts_lock); - ret = 0; + /* wait for auxts fifo clear to finish */ + ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value, + !(acr_value & PTP_ACR_ATSFC), + 10, 10000); break; default: From 26ce6ec364f18d2915923bc05784084e54a5c4cc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 9 Jan 2023 16:09:22 +0100 Subject: [PATCH 0713/1593] x86/mm: fix poking_init() for Xen PV guests Commit 3f4c8211d982 ("x86/mm: Use mm_alloc() in poking_init()") broke the kernel for running as Xen PV guest. It seems as if the new address space is never activated before being used, resulting in Xen rejecting to accept the new CR3 value (the PGD isn't pinned). Fix that by adding the now missing call of paravirt_arch_dup_mmap() to poking_init(). That call was previously done by dup_mm()->dup_mmap() and it is a NOP for all cases but for Xen PV, where it is just doing the pinning of the PGD. Fixes: 3f4c8211d982 ("x86/mm: Use mm_alloc() in poking_init()") Signed-off-by: Juergen Gross Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230109150922.10578-1-jgross@suse.com --- arch/x86/mm/init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d3987359d4414..cb258f58fdc87 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -804,6 +805,9 @@ void __init poking_init(void) poking_mm = mm_alloc(); BUG_ON(!poking_mm); + /* Xen PV guests need the PGD to be pinned. */ + paravirt_arch_dup_mmap(NULL, poking_mm); + /* * Randomize the poking address, but make sure that the following page * will be mapped at the same PMD. We need 2 pages, so find space for 3, From 9bfa2544dbd1133f0b0af4e967de3bb9c1e3a497 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 8 Dec 2022 13:52:41 +0200 Subject: [PATCH 0714/1593] ARM: dts: at91: sam9x60: fix the ddr clock for sam9x60 The 2nd DDR clock for sam9x60 DDR controller is peripheral clock with id 49. Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221208115241.36312-1-claudiu.beznea@microchip.com --- arch/arm/boot/dts/sam9x60.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi index 8f5477e307dd4..37a5d96aaf642 100644 --- a/arch/arm/boot/dts/sam9x60.dtsi +++ b/arch/arm/boot/dts/sam9x60.dtsi @@ -564,7 +564,7 @@ mpddrc: mpddrc@ffffe800 { compatible = "microchip,sam9x60-ddramc", "atmel,sama5d3-ddramc"; reg = <0xffffe800 0x200>; - clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_CORE PMC_MCK>; + clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_PERIPHERAL 49>; clock-names = "ddrck", "mpddr"; }; From d784fc8be6814b31854f7b529919ca4506ff8066 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 13:23:56 +0200 Subject: [PATCH 0715/1593] ASoC: amd: acp-es8336: Drop reference count of ACPI device after use Theoretically the device might gone if its reference count drops to 0. This might be the case when we try to find the first physical node of the ACPI device. We need to keep reference to it until we get a result of the above mentioned call. Refactor the code to drop the reference count at the correct place. While at it, move to acpi_dev_put() as symmetrical call to the acpi_dev_get_first_match_dev(). Fixes: 02527c3f2300 ("ASoC: amd: add Machine driver for Jadeite platform") Signed-off-by: Andy Shevchenko Acked-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20230112112356.67643-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/amd/acp-es8336.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/acp-es8336.c b/sound/soc/amd/acp-es8336.c index 2fe8df86053ae..89499542c803f 100644 --- a/sound/soc/amd/acp-es8336.c +++ b/sound/soc/amd/acp-es8336.c @@ -198,9 +198,11 @@ static int st_es8336_late_probe(struct snd_soc_card *card) int ret; adev = acpi_dev_get_first_match_dev("ESSX8336", NULL, -1); - if (adev) - put_device(&adev->dev); + if (!adev) + return -ENODEV; + codec_dev = acpi_get_first_physical_node(adev); + acpi_dev_put(adev); if (!codec_dev) dev_err(card->dev, "can not find codec dev\n"); From 040b35c19bf2bdbb8ba5f8742b4e199ace3cbdc3 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 11 Jan 2023 12:50:15 -0500 Subject: [PATCH 0716/1593] drm/ttm: Fix a regression causing kernel oops'es MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The branch is explicitly taken if ttm == NULL which means that to avoid a null pointer reference the ttm object can not be used inside. Switch back to dst_mem to avoid kernel oops'es. This fixes kernel oops'es with any buffer objects which don't have ttm_tt, e.g. with vram based screen objects on vmwgfx. Signed-off-by: Zack Rusin Fixes: e3c92eb4a84f ("drm/ttm: rework on ttm_resource to use size_t type") Cc: Somalapuram Amaranath Cc: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230111175015.1134923-1-zack@kde.org Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ba3aa0a0fc43c..da5493f789dfe 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -173,7 +173,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - ttm_move_memcpy(clear, ttm->num_pages, dst_iter, src_iter); + ttm_move_memcpy(clear, PFN_UP(dst_mem->size), dst_iter, src_iter); if (!src_iter->ops->maps_tt) ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem); From eb6c59b735aa6cca77cdbb59cc69d69a0d63d986 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Tue, 10 Jan 2023 12:14:50 +0300 Subject: [PATCH 0717/1593] xfrm: compat: change expression for switch in xfrm_xlate64 Compare XFRM_MSG_NEWSPDINFO (value from netlink configuration messages enum) with nlh_src->nlmsg_type instead of nlh_src->nlmsg_type - XFRM_MSG_BASE. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 4e9505064f58 ("net/xfrm/compat: Copy xfrm_spdattr_type_t atributes") Signed-off-by: Anastasia Belova Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Tested-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index a0f62fa02e06e..12405aa5bce84 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -302,7 +302,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) nla_for_each_attr(nla, attrs, len, remaining) { int err; - switch (type) { + switch (nlh_src->nlmsg_type) { case XFRM_MSG_NEWSPDINFO: err = xfrm_nla_cpy(dst, nla, nla_len(nla)); break; From 5640e81607152d7f2d2558227c0f6cb78b8f39cf Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Thu, 12 Jan 2023 04:00:27 -0800 Subject: [PATCH 0718/1593] drm: Optimize drm buddy top-down allocation method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are observing performance drop in many usecases which include games, 3D benchmark applications,etc.. To solve this problem, We are strictly not allowing top down flag enabled allocations to steal the memory space from cpu visible region. The idea is, we are sorting each order list entries in ascending order and compare the last entry of each order list in the freelist and return the max block. This patch improves the 3D benchmark scores and solves fragmentation issues. All drm buddy selftests are verfied. drm_buddy: pass:6 fail:0 skip:0 total:6 Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Acked-by: Alex Deucher Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230112120027.3072-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König CC: Cc: stable@vger.kernel.org # 5.18+ --- drivers/gpu/drm/drm_buddy.c | 81 ++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 11bb593994718..3d1f50f481cfd 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm, kmem_cache_free(slab_blocks, block); } +static void list_insert_sorted(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + struct drm_buddy_block *node; + struct list_head *head; + + head = &mm->free_list[drm_buddy_block_order(block)]; + if (list_empty(head)) { + list_add(&block->link, head); + return; + } + + list_for_each_entry(node, head, link) + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node)) + break; + + __list_add(&block->link, node->link.prev, &node->link); +} + static void mark_allocated(struct drm_buddy_block *block) { block->header &= ~DRM_BUDDY_HEADER_STATE; @@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm, block->header &= ~DRM_BUDDY_HEADER_STATE; block->header |= DRM_BUDDY_FREE; - list_add(&block->link, - &mm->free_list[drm_buddy_block_order(block)]); + list_insert_sorted(mm, block); } static void mark_split(struct drm_buddy_block *block) @@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm, } static struct drm_buddy_block * -get_maxblock(struct list_head *head) +get_maxblock(struct drm_buddy *mm, unsigned int order) { struct drm_buddy_block *max_block = NULL, *node; + unsigned int i; - max_block = list_first_entry_or_null(head, - struct drm_buddy_block, - link); - if (!max_block) - return NULL; + for (i = order; i <= mm->max_order; ++i) { + if (!list_empty(&mm->free_list[i])) { + node = list_last_entry(&mm->free_list[i], + struct drm_buddy_block, + link); + if (!max_block) { + max_block = node; + continue; + } - list_for_each_entry(node, head, link) { - if (drm_buddy_block_offset(node) > - drm_buddy_block_offset(max_block)) - max_block = node; + if (drm_buddy_block_offset(node) > + drm_buddy_block_offset(max_block)) { + max_block = node; + } + } } return max_block; @@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm, unsigned long flags) { struct drm_buddy_block *block = NULL; - unsigned int i; + unsigned int tmp; int err; - for (i = order; i <= mm->max_order; ++i) { - if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { - block = get_maxblock(&mm->free_list[i]); - if (block) - break; - } else { - block = list_first_entry_or_null(&mm->free_list[i], - struct drm_buddy_block, - link); - if (block) - break; + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { + block = get_maxblock(mm, order); + if (block) + /* Store the obtained block order */ + tmp = drm_buddy_block_order(block); + } else { + for (tmp = order; tmp <= mm->max_order; ++tmp) { + if (!list_empty(&mm->free_list[tmp])) { + block = list_last_entry(&mm->free_list[tmp], + struct drm_buddy_block, + link); + if (block) + break; + } } } @@ -434,18 +461,18 @@ alloc_from_freelist(struct drm_buddy *mm, BUG_ON(!drm_buddy_block_is_free(block)); - while (i != order) { + while (tmp != order) { err = split_block(mm, block); if (unlikely(err)) goto err_undo; block = block->right; - i--; + tmp--; } return block; err_undo: - if (i != order) + if (tmp != order) __drm_buddy_free(mm, block); return ERR_PTR(err); } From 07a2975c65f2be2e22591d795a9c39b00f95fd11 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 12 Jan 2023 10:12:43 +0100 Subject: [PATCH 0719/1593] drm/vc4: bo: Fix drmm_mutex_init memory hog Commit 374146cad469 ("drm/vc4: Switch to drmm_mutex_init") converted, among other functions, vc4_create_object() to use drmm_mutex_init(). However, that function is used to allocate a BO, and therefore the mutex needs to be freed much sooner than when the DRM device is removed from the system. For each buffer allocation we thus end up allocating a small structure as part of the DRM-managed mechanism that is never freed, eventually leading us to no longer having any free memory anymore. Let's switch back to mutex_init/mutex_destroy to deal with it properly. Fixes: 374146cad469 ("drm/vc4: Switch to drmm_mutex_init") Reviewed-by: Daniel Vetter Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230112091243.490799-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_bo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 43d9b3a6a352c..29ce2fa72cc4c 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -179,6 +179,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo) bo->validated_shader = NULL; } + mutex_destroy(&bo->madv_lock); drm_gem_dma_free(&bo->base); } @@ -406,9 +407,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) bo->madv = VC4_MADV_WILLNEED; refcount_set(&bo->usecnt, 0); - ret = drmm_mutex_init(dev, &bo->madv_lock); - if (ret) - return ERR_PTR(ret); + mutex_init(&bo->madv_lock); mutex_lock(&vc4->bo_lock); bo->label = VC4_BO_TYPE_KERNEL; From be53771c87f4e322a9835d3faa9cd73a4ecdec5b Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 11 Jan 2023 13:32:28 +0000 Subject: [PATCH 0720/1593] r8152: add vendor/device ID pair for Microsoft Devkit The Microsoft Devkit 2023 is a an ARM64 based machine featuring a Realtek 8153 USB3.0-to-GBit Ethernet adapter. As in their other machines, Microsoft uses a custom USB device ID. Add the respective ID values to the driver. This makes Ethernet work on the MS Devkit device. The chip has been visually confirmed to be a RTL8153. Signed-off-by: Andre Przywara Link: https://lore.kernel.org/r/20230111133228.190801-1-andre.przywara@arm.com Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a481a1d831e2f..23da1d9dafd1f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9836,6 +9836,7 @@ static const struct usb_device_id rtl8152_table[] = { REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab), REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6), REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927), + REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e), REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054), From 7c24fa225081f31bc6da6a355c1ba801889ab29a Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Wed, 11 Jan 2023 16:06:51 -0800 Subject: [PATCH 0721/1593] NFSD: replace delayed_work with work_struct for nfsd_client_shrinker Since nfsd4_state_shrinker_count always calls mod_delayed_work with 0 delay, we can replace delayed_work with work_struct to save some space and overhead. Also add the call to cancel_work after unregister the shrinker in nfs4_state_shutdown_net. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs4state.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba3285bb..51a4b7885cae2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -195,7 +195,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index db41358bd0d30..f194d029c3d64 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4411,7 +4411,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) if (!count) count = atomic_long_read(&num_delegations); if (count) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } @@ -6225,8 +6225,7 @@ deleg_reaper(struct nfsd_net *nn) static void nfsd4_state_shrinker_worker(struct work_struct *work) { - struct delayed_work *dwork = to_delayed_work(work); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); @@ -8056,7 +8055,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; @@ -8163,6 +8162,7 @@ nfs4_state_shutdown_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); From 6d3d970b2735b967650d319be27268fedc5598d1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:34 +0000 Subject: [PATCH 0722/1593] btrfs: fix missing error handling when logging directory items When logging a directory, at log_dir_items(), if we get an error when attempting to search the subvolume tree for a dir index item, we end up returning 0 (success) from log_dir_items() because 'err' is left with a value of 0. This can lead to a few problems, specially in the case the variable 'last_offset' has a value of (u64)-1 (and it's initialized to that when it was declared): 1) By returning from log_dir_items() with success (0) and a value of (u64)-1 for '*last_offset_ret', we end up not logging any other dir index keys that follow the missing, just deleted, index key. The (u64)-1 value makes log_directory_changes() not call log_dir_items() again; 2) Before returning with success (0), log_dir_items(), will log a dir index range item covering a range from the last old dentry index (stored in the variable 'last_old_dentry_offset') to the value of 'last_offset'. If 'last_offset' has a value of (u64)-1, then it means if the log is persisted and replayed after a power failure, it will cause deletion of all the directory entries that have an index number between last_old_dentry_offset + 1 and (u64)-1; 3) We can end up returning from log_dir_items() with ctx->last_dir_item_offset having a lower value than inode->last_dir_index_offset, because the former is set to the current key we are processing at process_dir_items_leaf(), and at the end of log_directory_changes() we set inode->last_dir_index_offset to the current value of ctx->last_dir_item_offset. So if for example a deletion of a lower dir index key happened, we set ctx->last_dir_item_offset to that index value, then if we return from log_dir_items() because btrfs_search_slot() returned an error, we end up returning without any error from log_dir_items() and then log_directory_changes() sets inode->last_dir_index_offset to a lower value than it had before. This can result in unpredictable and unexpected behaviour when we need to log again the directory in the same transaction, and can result in ending up with a log tree leaf that has duplicated keys, as we do batch insertions of dir index keys into a log tree. Fix this by setting 'err' to the value of 'ret' in case btrfs_search_slot() or btrfs_previous_item() returned an error. That will result in falling back to a full transaction commit. Reported-by: David Arendt Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ Fixes: e02119d5a7b4 ("Btrfs: Add a write ahead tree log to optimize synchronous operations") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fb52aa0600930..3ef0266e95274 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3826,7 +3826,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->slots[0]); if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; } + goto done; } @@ -3846,7 +3849,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, */ if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; + goto done; } + btrfs_release_path(path); /* @@ -3859,6 +3866,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, */ search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret < 0) + err = ret; if (ret != 0) goto done; From 8bb6898da6271d82d8e76d8088d66b971a7dcfa6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:35 +0000 Subject: [PATCH 0723/1593] btrfs: fix directory logging due to race with concurrent index key deletion Sometimes we log a directory without holding its VFS lock, so while we logging it, dir index entries may be added or removed. This typically happens when logging a dentry from a parent directory that points to a new directory, through log_new_dir_dentries(), or when while logging some other inode we also need to log its parent directories (through btrfs_log_all_parents()). This means that while we are at log_dir_items(), we may not find a dir index key we found before, because it was deleted in the meanwhile, so a call to btrfs_search_slot() may return 1 (key not found). In that case we return from log_dir_items() with a success value (the variable 'err' has a value of 0). This can lead to a few problems, specially in the case where the variable 'last_offset' has a value of (u64)-1 (and it's initialized to that when it was declared): 1) By returning from log_dir_items() with success (0) and a value of (u64)-1 for '*last_offset_ret', we end up not logging any other dir index keys that follow the missing, just deleted, index key. The (u64)-1 value makes log_directory_changes() not call log_dir_items() again; 2) Before returning with success (0), log_dir_items(), will log a dir index range item covering a range from the last old dentry index (stored in the variable 'last_old_dentry_offset') to the value of 'last_offset'. If 'last_offset' has a value of (u64)-1, then it means if the log is persisted and replayed after a power failure, it will cause deletion of all the directory entries that have an index number between last_old_dentry_offset + 1 and (u64)-1; 3) We can end up returning from log_dir_items() with ctx->last_dir_item_offset having a lower value than inode->last_dir_index_offset, because the former is set to the current key we are processing at process_dir_items_leaf(), and at the end of log_directory_changes() we set inode->last_dir_index_offset to the current value of ctx->last_dir_item_offset. So if for example a deletion of a lower dir index key happened, we set ctx->last_dir_item_offset to that index value, then if we return from log_dir_items() because btrfs_search_slot() returned 1, we end up returning from log_dir_items() with success (0) and then log_directory_changes() sets inode->last_dir_index_offset to a lower value than it had before. This can result in unpredictable and unexpected behaviour when we need to log again the directory in the same transaction, and can result in ending up with a log tree leaf that has duplicated keys, as we do batch insertions of dir index keys into a log tree. So fix this by making log_dir_items() move on to the next dir index key if it does not find the one it was looking for. Reported-by: David Arendt Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3ef0266e95274..c09daab3f19e8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3857,17 +3857,26 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, btrfs_release_path(path); /* - * Find the first key from this transaction again. See the note for - * log_new_dir_dentries, if we're logging a directory recursively we - * won't be holding its i_mutex, which means we can modify the directory - * while we're logging it. If we remove an entry between our first - * search and this search we'll not find the key again and can just - * bail. + * Find the first key from this transaction again or the one we were at + * in the loop below in case we had to reschedule. We may be logging the + * directory without holding its VFS lock, which happen when logging new + * dentries (through log_new_dir_dentries()) or in some cases when we + * need to log the parent directory of an inode. This means a dir index + * key might be deleted from the inode's root, and therefore we may not + * find it anymore. If we can't find it, just move to the next key. We + * can not bail out and ignore, because if we do that we will simply + * not log dir index keys that come after the one that was just deleted + * and we can end up logging a dir index range that ends at (u64)-1 + * (@last_offset is initialized to that), resulting in removing dir + * entries we should not remove at log replay time. */ search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret > 0) + ret = btrfs_next_item(root, path); if (ret < 0) err = ret; + /* If ret is 1, there are no more keys in the inode's root. */ if (ret != 0) goto done; From 94cd63ae679973edeb5ea95ec25a54467c3e54c8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:36 +0000 Subject: [PATCH 0724/1593] btrfs: add missing setup of log for full commit at add_conflicting_inode() When logging conflicting inodes, if we reach the maximum limit of inodes, we return BTRFS_LOG_FORCE_COMMIT to force a transaction commit. However we don't mark the log for full commit (with btrfs_set_log_full_commit()), which means that once we leave the log transaction and before we commit the transaction, some other task may sync the log, which is incomplete as we have not logged all conflicting inodes, leading to some inconsistent in case that log ends up being replayed. So also call btrfs_set_log_full_commit() at add_conflicting_inode(). Fixes: e09d94c9e448 ("btrfs: log conflicting inodes without holding log mutex of the initial inode") CC: stable@vger.kernel.org # 6.1 Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c09daab3f19e8..afad44a0becfe 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5598,8 +5598,10 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, * LOG_INODE_EXISTS mode) and slow down other fsyncs or transaction * commits. */ - if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) + if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) { + btrfs_set_log_full_commit(trans); return BTRFS_LOG_FORCE_COMMIT; + } inode = btrfs_iget(root->fs_info->sb, ino, root); /* From 16199ad9eb6db60a6b10794a09fc1ac6d09312ff Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:37 +0000 Subject: [PATCH 0725/1593] btrfs: do not abort transaction on failure to write log tree when syncing log When syncing the log, if we fail to write log tree extent buffers, we mark the log for a full commit and abort the transaction. However we don't need to abort the transaction, all we really need to do is to make sure no one can commit a superblock pointing to new log tree roots. Just because we got a failure writing extent buffers for a log tree, it does not mean we will also fail to do a transaction commit. One particular case is if due to a bug somewhere, when writing log tree extent buffers, the tree checker detects some corruption and the writeout fails because of that. Aborting the transaction can be very disruptive for a user, specially if the issue happened on a root filesystem. One example is the scenario in the Link tag below, where an isolated corruption on log tree leaves was causing transaction aborts when syncing the log. Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 ++++++++- fs/btrfs/tree-log.c | 2 -- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8aeaada1fcaec..3aa04224315eb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -367,7 +367,14 @@ static int csum_one_extent_buffer(struct extent_buffer *eb) btrfs_print_tree(eb, 0); btrfs_err(fs_info, "block=%llu write time tree block corruption detected", eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + /* + * Be noisy if this is an extent buffer from a log tree. We don't abort + * a transaction in case there's a bad log tree extent buffer, we just + * fallback to a transaction commit. Still we want to know when there is + * a bad log tree extent buffer, as that may signal a bug somewhere. + */ + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || + btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index afad44a0becfe..1f70d4ebffae4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2980,7 +2980,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = 0; if (ret) { blk_finish_plug(&plug); - btrfs_abort_transaction(trans, ret); btrfs_set_log_full_commit(trans); mutex_unlock(&root->log_mutex); goto out; @@ -3112,7 +3111,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out_wake_log_root; } else if (ret) { btrfs_set_log_full_commit(trans); - btrfs_abort_transaction(trans, ret); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } From 09e44868f1e03c7825ca4283256abedc95e249a3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:38 +0000 Subject: [PATCH 0726/1593] btrfs: do not abort transaction on failure to update log root When syncing a log, if we fail to update a log root in the log root tree, we are aborting the transaction if the failure was not -ENOSPC. This is excessive because there is a chance that a transaction commit can succeed, and therefore avoid to turn the filesystem into RO mode. All we need to be careful about is to mark the log for a full commit, which we already do, to make sure no one commits a super block pointing to an outdated log root tree. So don't abort the transaction if we fail to update a log root in the log root tree, and log an error if the failure is not -ENOSPC, so that it does not go completely unnoticed. CC: stable@vger.kernel.org # 6.0+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1f70d4ebffae4..d43261545264e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3044,15 +3044,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, blk_finish_plug(&plug); btrfs_set_log_full_commit(trans); - - if (ret != -ENOSPC) { - btrfs_abort_transaction(trans, ret); - mutex_unlock(&log_root_tree->log_mutex); - goto out; - } + if (ret != -ENOSPC) + btrfs_err(fs_info, + "failed to update log for root %llu ret %d", + root->root_key.objectid, ret); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); - ret = BTRFS_LOG_FORCE_COMMIT; goto out; } From a152d05ae4a71d802d50cf9177dba34e8bb09f68 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 11 Jan 2023 12:37:58 +0100 Subject: [PATCH 0727/1593] cifs: Fix uninitialized memory read for smb311 posix symlink create If smb311 posix is enabled, we send the intended mode for file creation in the posix create context. Instead of using what's there on the stack, create the mfsymlink file with 0644. Fixes: ce558b0e17f8a ("smb3: Add posix create context for smb3.11 posix mounts") Cc: stable@vger.kernel.org Signed-off-by: Volker Lendecke Reviewed-by: Tom Talpey Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/link.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index bd374feeccaa1..a5a097a699837 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -428,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = 0644; rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); From 6b1c0bd6fdefbf3c3d75680c2708f5423ef72e46 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 13:28:48 +0200 Subject: [PATCH 0728/1593] ASoC: Intel: bytcht_es8316: Drop reference count of ACPI device after use Theoretically the device might gone if its reference count drops to 0. This might be the case when we try to find the first physical node of the ACPI device. We need to keep reference to it until we get a result of the above mentioned call. Refactor the code to drop the reference count at the correct place. While at it, move to acpi_dev_put() as symmetrical call to the acpi_dev_get_first_match_dev(). Fixes: 3c22a73fb873 ("ASoC: Intel: bytcht_es8316: fix HID handling") Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230112112852.67714-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 09d1f0f6d6865..df157b01df8bb 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -497,21 +497,28 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) if (adev) { snprintf(codec_name, sizeof(codec_name), "i2c-%s", acpi_dev_name(adev)); - put_device(&adev->dev); byt_cht_es8316_dais[dai_index].codecs->name = codec_name; } else { dev_err(dev, "Error cannot find '%s' dev\n", mach->id); return -ENXIO; } + codec_dev = acpi_get_first_physical_node(adev); + acpi_dev_put(adev); + if (!codec_dev) + return -EPROBE_DEFER; + priv->codec_dev = get_device(codec_dev); + /* override platform name, if required */ byt_cht_es8316_card.dev = dev; platform_name = mach->mach_params.platform; ret = snd_soc_fixup_dai_links_platform_name(&byt_cht_es8316_card, platform_name); - if (ret) + if (ret) { + put_device(codec_dev); return ret; + } /* Check for BYTCR or other platform and setup quirks */ dmi_id = dmi_first_match(byt_cht_es8316_quirk_table); @@ -539,13 +546,10 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) /* get the clock */ priv->mclk = devm_clk_get(dev, "pmc_plt_clk_3"); - if (IS_ERR(priv->mclk)) + if (IS_ERR(priv->mclk)) { + put_device(codec_dev); return dev_err_probe(dev, PTR_ERR(priv->mclk), "clk_get pmc_plt_clk_3 failed\n"); - - codec_dev = acpi_get_first_physical_node(adev); - if (!codec_dev) - return -EPROBE_DEFER; - priv->codec_dev = get_device(codec_dev); + } if (quirk & BYT_CHT_ES8316_JD_INVERTED) props[cnt++] = PROPERTY_ENTRY_BOOL("everest,jack-detect-inverted"); From 721858823d7cdc8f2a897579b040e935989f6f02 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 13:28:49 +0200 Subject: [PATCH 0729/1593] ASoC: Intel: bytcr_rt5651: Drop reference count of ACPI device after use Theoretically the device might gone if its reference count drops to 0. This might be the case when we try to find the first physical node of the ACPI device. We need to keep reference to it until we get a result of the above mentioned call. Refactor the code to drop the reference count at the correct place. While at it, move to acpi_dev_put() as symmetrical call to the acpi_dev_get_first_match_dev(). Fixes: 02c0a3b3047f ("ASoC: Intel: bytcr_rt5651: add MCLK, quirks and cleanups") Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230112112852.67714-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5651.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index 81ac6eeda2e6e..8fca9b82d4d04 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -922,7 +922,6 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) if (adev) { snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name), "i2c-%s", acpi_dev_name(adev)); - put_device(&adev->dev); byt_rt5651_dais[dai_index].codecs->name = byt_rt5651_codec_name; } else { dev_err(dev, "Error cannot find '%s' dev\n", mach->id); @@ -930,6 +929,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) } codec_dev = acpi_get_first_physical_node(adev); + acpi_dev_put(adev); if (!codec_dev) return -EPROBE_DEFER; priv->codec_dev = get_device(codec_dev); From cbf87bcf46e399e9a5288430d940efbad3551c68 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 13:28:50 +0200 Subject: [PATCH 0730/1593] ASoC: Intel: bytcr_rt5640: Drop reference count of ACPI device after use Theoretically the device might gone if its reference count drops to 0. This might be the case when we try to find the first physical node of the ACPI device. We need to keep reference to it until we get a result of the above mentioned call. Refactor the code to drop the reference count at the correct place. While at it, move to acpi_dev_put() as symmetrical call to the acpi_dev_get_first_match_dev(). Fixes: a232b96dcece ("ASoC: Intel: bytcr_rt5640: use HID translation util") Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230112112852.67714-4-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 4699ca79f3ea6..79e0039c79a38 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1636,13 +1636,18 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) if (adev) { snprintf(byt_rt5640_codec_name, sizeof(byt_rt5640_codec_name), "i2c-%s", acpi_dev_name(adev)); - put_device(&adev->dev); byt_rt5640_dais[dai_index].codecs->name = byt_rt5640_codec_name; } else { dev_err(dev, "Error cannot find '%s' dev\n", mach->id); return -ENXIO; } + codec_dev = acpi_get_first_physical_node(adev); + acpi_dev_put(adev); + if (!codec_dev) + return -EPROBE_DEFER; + priv->codec_dev = get_device(codec_dev); + /* * swap SSP0 if bytcr is detected * (will be overridden if DMI quirk is detected) @@ -1717,11 +1722,6 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) byt_rt5640_quirk = quirk_override; } - codec_dev = acpi_get_first_physical_node(adev); - if (!codec_dev) - return -EPROBE_DEFER; - priv->codec_dev = get_device(codec_dev); - if (byt_rt5640_quirk & BYT_RT5640_JD_HP_ELITEP_1000G2) { acpi_dev_add_driver_gpios(ACPI_COMPANION(priv->codec_dev), byt_rt5640_hp_elitepad_1000g2_gpios); From c8aa49abdeda2ab587aadb083e670f6aa0236f93 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 13:28:51 +0200 Subject: [PATCH 0731/1593] ASoC: Intel: bytcr_wm5102: Drop reference count of ACPI device after use Theoretically the device might gone if its reference count drops to 0. This might be the case when we try to find the first physical node of the ACPI device. We need to keep reference to it until we get a result of the above mentioned call. Refactor the code to drop the reference count at the correct place. While at it, move to acpi_dev_put() as symmetrical call to the acpi_dev_get_first_match_dev(). Fixes: 9a87fc1e0619 ("ASoC: Intel: bytcr_wm5102: Add machine driver for BYT/WM5102") Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230112112852.67714-5-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_wm5102.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c index 1669eb3bd80f3..c0706537f6736 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c @@ -411,9 +411,9 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev) return -ENOENT; } snprintf(codec_name, sizeof(codec_name), "spi-%s", acpi_dev_name(adev)); - put_device(&adev->dev); codec_dev = bus_find_device_by_name(&spi_bus_type, NULL, codec_name); + acpi_dev_put(adev); if (!codec_dev) return -EPROBE_DEFER; From 64e57b2195725c1ae2246a8a2ce224abb60620ac Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2023 13:28:52 +0200 Subject: [PATCH 0732/1593] ASoC: Intel: sof_es8336: Drop reference count of ACPI device after use Theoretically the device might gone if its reference count drops to 0. This might be the case when we try to find the first physical node of the ACPI device. We need to keep reference to it until we get a result of the above mentioned call. Refactor the code to drop the reference count at the correct place. While at it, move to acpi_dev_put() as symmetrical call to the acpi_dev_get_first_match_dev(). Fixes: a164137ce91a ("ASoC: Intel: add machine driver for SOF+ES8336") Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230112112852.67714-6-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_es8336.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index 773e5d1d87d46..894b6610b9e27 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -681,7 +681,6 @@ static int sof_es8336_probe(struct platform_device *pdev) if (adev) { snprintf(codec_name, sizeof(codec_name), "i2c-%s", acpi_dev_name(adev)); - put_device(&adev->dev); dai_links[0].codecs->name = codec_name; /* also fixup codec dai name if relevant */ @@ -692,16 +691,19 @@ static int sof_es8336_probe(struct platform_device *pdev) return -ENXIO; } - ret = snd_soc_fixup_dai_links_platform_name(&sof_es8336_card, - mach->mach_params.platform); - if (ret) - return ret; - codec_dev = acpi_get_first_physical_node(adev); + acpi_dev_put(adev); if (!codec_dev) return -EPROBE_DEFER; priv->codec_dev = get_device(codec_dev); + ret = snd_soc_fixup_dai_links_platform_name(&sof_es8336_card, + mach->mach_params.platform); + if (ret) { + put_device(codec_dev); + return ret; + } + if (quirk & SOF_ES8336_JD_INVERTED) props[cnt++] = PROPERTY_ENTRY_BOOL("everest,jack-detect-inverted"); From bb78654b0b46316dac687fd4b7dc7cce636f46cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Tue, 27 Dec 2022 01:28:25 -0800 Subject: [PATCH 0733/1593] USB: serial: option: add Quectel EM05-G (CS) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EM05-G (CS) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030C Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030C Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index dee79c7d82d5c..7eeca6c7b270d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM060K 0x030b +#define QUECTEL_PRODUCT_EM05G_CS 0x030c #define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 @@ -1161,6 +1162,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, From 6c331f32e32ac71eb3e8b93fceda2802d7ecb889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Tue, 27 Dec 2022 01:44:30 -0800 Subject: [PATCH 0734/1593] USB: serial: option: add Quectel EM05-G (GR) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EM05-G (GR) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0313 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0313 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7eeca6c7b270d..07ffff889ed1e 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -257,6 +257,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM060K 0x030b #define QUECTEL_PRODUCT_EM05G_CS 0x030c #define QUECTEL_PRODUCT_EM05G_SG 0x0311 +#define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1164,6 +1165,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, From 36c2b9d6710427f802494ba070621cb415198293 Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Tue, 20 Dec 2022 13:11:03 +0100 Subject: [PATCH 0735/1593] platform/x86: touchscreen_dmi: Add info for the CSL Panther Tab HD Add touchscreen info for the CSL Panther Tab HD. Signed-off-by: Michael Klein Link: https://lore.kernel.org/r/20221220121103.uiwn5l7fii2iggct@LLGMVZLB-0037 Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/touchscreen_dmi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index baae3120efd05..f00995390fdfe 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -264,6 +264,23 @@ static const struct ts_dmi_data connect_tablet9_data = { .properties = connect_tablet9_props, }; +static const struct property_entry csl_panther_tab_hd_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 1), + PROPERTY_ENTRY_U32("touchscreen-min-y", 20), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1526), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-csl-panther-tab-hd.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data csl_panther_tab_hd_data = { + .acpi_name = "MSSL1680:00", + .properties = csl_panther_tab_hd_props, +}; + static const struct property_entry cube_iwork8_air_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 1), PROPERTY_ENTRY_U32("touchscreen-min-y", 3), @@ -1124,6 +1141,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Tablet 9"), }, }, + { + /* CSL Panther Tab HD */ + .driver_data = (void *)&csl_panther_tab_hd_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "CSL Computer GmbH & Co. KG"), + DMI_MATCH(DMI_PRODUCT_NAME, "CSL Panther Tab HD"), + }, + }, { /* CUBE iwork8 Air */ .driver_data = (void *)&cube_iwork8_air_data, From 6e5aedb9324aab1c14a23fae3d8eeb64a679c20e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Jan 2023 10:44:37 -0700 Subject: [PATCH 0736/1593] io_uring/poll: attempt request issue after racy poll wakeup If we have multiple requests waiting on the same target poll waitqueue, then it's quite possible to get a request triggered and get disappointed in not being able to make any progress with it. If we race in doing so, we'll potentially leave the poll request on the internal tables, but removed from the waitqueue. That means that any subsequent trigger of the poll waitqueue will not kick that request into action, causing an application to potentially wait for completion of a request that will never happen. Fix this by adding a new poll return state, IOU_POLL_REISSUE. Rather than have complicated logic for how to re-arm a given type of request, just punt it for a reissue. While in there, move the 'ret' variable to the only section where it gets used. This avoids confusion the scope of it. Cc: stable@vger.kernel.org Fixes: eb0089d629ba ("io_uring: single shot poll removal optimisation") Signed-off-by: Jens Axboe --- io_uring/poll.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index cf6a70bd54e09..32e5fc8365e6a 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -223,21 +223,22 @@ enum { IOU_POLL_DONE = 0, IOU_POLL_NO_ACTION = 1, IOU_POLL_REMOVE_POLL_USE_RES = 2, + IOU_POLL_REISSUE = 3, }; /* * All poll tw should go through this. Checks for poll events, manages * references, does rewait, etc. * - * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, - * which is either spurious wakeup or multishot CQE is served. - * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. - * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result - * is stored in req->cqe. + * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action + * require, which is either spurious wakeup or multishot CQE is served. + * IOU_POLL_DONE when it's done with the request, then the mask is stored in + * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot + * poll and that the result is stored in req->cqe. */ static int io_poll_check_events(struct io_kiocb *req, bool *locked) { - int v, ret; + int v; /* req->task == current here, checking PF_EXITING is safe */ if (unlikely(req->task->flags & PF_EXITING)) @@ -276,10 +277,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) if (!req->cqe.res) { struct poll_table_struct pt = { ._key = req->apoll_events }; req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; + /* + * We got woken with a mask, but someone else got to + * it first. The above vfs_poll() doesn't add us back + * to the waitqueue, so if we get nothing back, we + * should be safe and attempt a reissue. + */ + if (unlikely(!req->cqe.res)) + return IOU_POLL_REISSUE; } - - if ((unlikely(!req->cqe.res))) - continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; @@ -294,7 +300,7 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_REMOVE_POLL_USE_RES; } } else { - ret = io_poll_issue(req, locked); + int ret = io_poll_issue(req, locked); if (ret == IOU_STOP_MULTISHOT) return IOU_POLL_REMOVE_POLL_USE_RES; if (ret < 0) @@ -330,6 +336,9 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) poll = io_kiocb_to_cmd(req, struct io_poll); req->cqe.res = mangle_poll(req->cqe.res & poll->events); + } else if (ret == IOU_POLL_REISSUE) { + io_req_task_submit(req, locked); + return; } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { req->cqe.res = ret; req_set_fail(req); @@ -342,7 +351,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) if (ret == IOU_POLL_REMOVE_POLL_USE_RES) io_req_task_complete(req, locked); - else if (ret == IOU_POLL_DONE) + else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE) io_req_task_submit(req, locked); else io_req_defer_failed(req, ret); From ae0fa0a3126a86c801c3220fcd8eefe03aa39f3e Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 2 Dec 2022 23:33:19 +0100 Subject: [PATCH 0737/1593] platform/surface: aggregator: Ignore command messages not intended for us It is possible that we (the host/kernel driver) receive command messages that are not intended for us. Ignore those for now. The whole story is a bit more complicated: It is possible to enable debug output on SAM, which is sent via SSH command messages. By default this output is sent to a debug connector, with its own target ID (TID=0x03). It is possible to override the target of the debug output and set it to the host/kernel driver. This, however, does not change the original target ID of the message. Meaning, we receive messages with TID=0x03 (debug) but expect to only receive messages with TID=0x00 (host). The problem is that the different target ID also comes with a different scope of request IDs. In particular, these do not follow the standard event rules (i.e. do not fall into a set of small reserved values). Therefore, current message handling interprets them as responses to pending requests and tries to match them up via the request ID. However, these debug output messages are not in fact responses, and therefore this will at best fail to find the request and at worst pass on the wrong data as response for a request. Therefore ignore any command messages not intended for us (host) for now. We can implement support for the debug messages once we have a better understanding of them. Note that this may also provide a bit more stability and avoid some driver confusion in case any other targets want to talk to us in the future, since we don't yet know what to do with those as well. A warning for the dropped messages should suffice for now and also give us a chance of discovering new targets if they come along without any potential for bugs/instabilities. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221202223327.690880-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../surface/aggregator/ssh_request_layer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c index f5565570f16c7..69132976d297e 100644 --- a/drivers/platform/surface/aggregator/ssh_request_layer.c +++ b/drivers/platform/surface/aggregator/ssh_request_layer.c @@ -916,6 +916,20 @@ static void ssh_rtl_rx_command(struct ssh_ptl *p, const struct ssam_span *data) if (sshp_parse_command(dev, data, &command, &command_data)) return; + /* + * Check if the message was intended for us. If not, drop it. + * + * Note: We will need to change this to handle debug messages. On newer + * generation devices, these seem to be sent to tid_out=0x03. We as + * host can still receive them as they can be forwarded via an override + * option on SAM, but doing so does not change tid_out=0x00. + */ + if (command->tid_out != 0x00) { + rtl_warn(rtl, "rtl: dropping message not intended for us (tid = %#04x)\n", + command->tid_out); + return; + } + if (ssh_rqid_is_event(get_unaligned_le16(&command->rqid))) ssh_rtl_rx_event(rtl, command, &command_data); else From c965daac370f08a9b71d573a71d13cda76f2a884 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Tue, 20 Dec 2022 18:56:07 +0100 Subject: [PATCH 0738/1593] platform/surface: aggregator: Add missing call to ssam_request_sync_free() Although rare, ssam_request_sync_init() can fail. In that case, the request should be freed via ssam_request_sync_free(). Currently it is leaked instead. Fix this. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221220175608.1436273-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/aggregator/controller.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 43e7651991371..c6537a1b3a2ec 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl, return status; status = ssam_request_sync_init(rqst, spec->flags); - if (status) + if (status) { + ssam_request_sync_free(rqst); return status; + } ssam_request_sync_set_resp(rqst, rsp); From 17eee264ef386ef30a69dd70e36f29893b85c170 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Fri, 25 Nov 2022 10:12:22 +0800 Subject: [PATCH 0739/1593] phy: usb: sunplus: Fix potential null-ptr-deref in sp_usb_phy_probe() sp_usb_phy_probe() will call platform_get_resource_byname() that may fail and return NULL. devm_ioremap() will use usbphy->moon4_res_mem->start as input, which may causes null-ptr-deref. Check the ret value of platform_get_resource_byname() to avoid the null-ptr-deref. Fixes: 99d9ccd97385 ("phy: usb: Add USB2.0 phy driver for Sunplus SP7021") Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221125021222.25687-1-shangxiaojing@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/sunplus/phy-sunplus-usb2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/sunplus/phy-sunplus-usb2.c b/drivers/phy/sunplus/phy-sunplus-usb2.c index e827b79f6d493..56de41091d639 100644 --- a/drivers/phy/sunplus/phy-sunplus-usb2.c +++ b/drivers/phy/sunplus/phy-sunplus-usb2.c @@ -254,6 +254,9 @@ static int sp_usb_phy_probe(struct platform_device *pdev) return PTR_ERR(usbphy->phy_regs); usbphy->moon4_res_mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "moon4"); + if (!usbphy->moon4_res_mem) + return -EINVAL; + usbphy->moon4_regs = devm_ioremap(&pdev->dev, usbphy->moon4_res_mem->start, resource_size(usbphy->moon4_res_mem)); if (!usbphy->moon4_regs) From c78a4e191839edc1e8c3e51565cf2e71d40e8883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 21 Dec 2022 03:49:59 +0000 Subject: [PATCH 0740/1593] platform/x86: asus-nb-wmi: Add alternate mapping for KEY_CAMERA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This keycode is emitted on a Asus VivoBook E410MAB with firmware E410MAB.304. The physical key has a strikken-through camera printed on it. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20221216-asus-key-v1-1-45da124119a3@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-nb-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index c685a705b73dd..8ee5d108e9e05 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -544,6 +544,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */ { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */ { KE_KEY, 0x82, { KEY_CAMERA } }, + { KE_KEY, 0x85, { KEY_CAMERA } }, { KE_KEY, 0x86, { KEY_PROG1 } }, /* MyASUS Key */ { KE_KEY, 0x88, { KEY_RFKILL } }, /* Radio Toggle Key */ { KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */ From db9494895b405bf318dc7e563dee6daa51b3b6ed Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 12 Jan 2023 19:18:41 +0100 Subject: [PATCH 0741/1593] platform/x86: asus-nb-wmi: Add alternate mapping for KEY_SCREENLOCK The 0x33 keycode is emitted by Fn + F6 on a ASUS FX705GE laptop. Reported-by: Nemcev Aleksey Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230112181841.84652-1-hdegoede@redhat.com --- drivers/platform/x86/asus-nb-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 8ee5d108e9e05..b34bddda0a9b3 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -511,6 +511,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, + { KE_KEY, 0x33, { KEY_SCREENLOCK } }, { KE_KEY, 0x35, { KEY_SCREENLOCK } }, { KE_KEY, 0x38, { KEY_PROG3 } }, /* Armoury Crate */ { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, From ec4a1d9301100c011312357ea25627b98eb293d5 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 26 Dec 2022 15:42:16 +0900 Subject: [PATCH 0742/1593] phy: renesas: r8a779f0-eth-serdes: Fix register setting Fix register setting which is typo in r8a779f0_eth_serdes_chan_setting(). Fixes: 742859441d44 ("phy: renesas: Add Renesas Ethernet SERDES driver for R-Car S4-8") Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20221226064216.3895421-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/r8a779f0-ether-serdes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/renesas/r8a779f0-ether-serdes.c b/drivers/phy/renesas/r8a779f0-ether-serdes.c index ec6594e6dc275..e7588a940d699 100644 --- a/drivers/phy/renesas/r8a779f0-ether-serdes.c +++ b/drivers/phy/renesas/r8a779f0-ether-serdes.c @@ -126,7 +126,7 @@ r8a779f0_eth_serdes_chan_setting(struct r8a779f0_eth_serdes_channel *channel) r8a779f0_eth_serdes_write32(channel->addr, 0x0160, 0x180, 0x0007); r8a779f0_eth_serdes_write32(channel->addr, 0x01ac, 0x180, 0x0000); r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x0310); - r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x380, 0x0101); + r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x180, 0x0101); ret = r8a779f0_eth_serdes_reg_wait(channel, 0x00c8, 0x0180, BIT(0), 0); if (ret) return ret; From c874b6de4cdfa2822a07b479887cd5f87fb5d078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 21 Dec 2022 17:59:49 +0000 Subject: [PATCH 0743/1593] platform/x86: asus-wmi: Add quirk wmi_ignore_fan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some laptops have a fan device listed in their ACPI tables but do not actually contain a fan. Introduce a quirk that can be used to override the fan detection logic. This was observed with a ASUS VivoBook E410MA running firmware E410MAB.304. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20221221-asus-fan-v1-1-e07f3949725b@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-wmi.c | 4 +++- drivers/platform/x86/asus-wmi.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 6f81b2844dcbe..4a289ca7e76c9 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2243,7 +2243,9 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) asus->fan_type = FAN_TYPE_NONE; asus->agfn_pwm = -1; - if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL)) + if (asus->driver->quirks->wmi_ignore_fan) + asus->fan_type = FAN_TYPE_NONE; + else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL)) asus->fan_type = FAN_TYPE_SPEC83; else if (asus_wmi_has_agfn_fan(asus)) asus->fan_type = FAN_TYPE_AGFN; diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 65316998b898a..a478ebfd34dfa 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -38,6 +38,7 @@ struct quirk_entry { bool store_backlight_power; bool wmi_backlight_set_devstate; bool wmi_force_als_set; + bool wmi_ignore_fan; enum asus_wmi_tablet_switch_mode tablet_switch_mode; int wapf; /* From 82cc5c6c624c63f7b57214e325e2ea685d924e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 21 Dec 2022 17:59:50 +0000 Subject: [PATCH 0744/1593] platform/x86: asus-wmi: Ignore fan on E410MA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ASUS VivoBook has a fan device described in its ACPI tables but does not actually contain any physical fan. Use the quirk to inhibit fan handling. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20221221-asus-fan-v1-2-e07f3949725b@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-nb-wmi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index b34bddda0a9b3..cb15acdf14a30 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -121,6 +121,10 @@ static struct quirk_entry quirk_asus_tablet_mode = { .tablet_switch_mode = asus_wmi_lid_flip_rog_devid, }; +static struct quirk_entry quirk_asus_ignore_fan = { + .wmi_ignore_fan = true, +}; + static int dmi_matched(const struct dmi_system_id *dmi) { pr_info("Identified laptop model '%s'\n", dmi->ident); @@ -473,6 +477,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_tablet_mode, }, + { + .callback = dmi_matched, + .ident = "ASUS VivoBook E410MA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "E410MA"), + }, + .driver_data = &quirk_asus_ignore_fan, + }, {}, }; From 01fd7e7851ba2275662f771ee17d1f80e7bbfa52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 21 Dec 2022 17:59:51 +0000 Subject: [PATCH 0745/1593] platform/x86: asus-wmi: Don't load fan curves without fan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we do not have a fan it does not make sense to load curves for it. This removes the following warnings from the kernel log: asus_wmi: fan_curve_get_factory_default (0x00110024) failed: -19 asus_wmi: fan_curve_get_factory_default (0x00110025) failed: -19 Fixes: a2bdf10ce96e ("platform/x86: asus-wmi: Increase FAN_CURVE_BUF_LEN to 32") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20221221-asus-fan-v1-3-e07f3949725b@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 4a289ca7e76c9..104188d70988c 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2438,6 +2438,9 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, *available = false; + if (asus->fan_type == FAN_TYPE_NONE) + return 0; + err = fan_curve_get_factory_default(asus, fan_dev); if (err) { return 0; From 1af7fef0d9d3fa075bf4e850f705df1fe97d33ce Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2022 23:07:23 +0100 Subject: [PATCH 0746/1593] platform/x86: dell-privacy: Fix SW_CAMERA_LENS_COVER reporting Use KE_VSW instead of KE_SW for the SW_CAMERA_LENS_COVER key_entry and get the value of the switch from the status field when handling SW_CAMERA_LENS_COVER events, instead of always reporting 0. Also correctly set the initial SW_CAMERA_LENS_COVER value. Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221221220724.119594-1-hdegoede@redhat.com --- drivers/platform/x86/dell/dell-wmi-privacy.c | 22 ++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c index c82b3d6867c5b..915d5deeb971c 100644 --- a/drivers/platform/x86/dell/dell-wmi-privacy.c +++ b/drivers/platform/x86/dell/dell-wmi-privacy.c @@ -61,7 +61,7 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { /* privacy mic mute */ { KE_KEY, 0x0001, { KEY_MICMUTE } }, /* privacy camera mute */ - { KE_SW, 0x0002, { SW_CAMERA_LENS_COVER } }, + { KE_VSW, 0x0002, { SW_CAMERA_LENS_COVER } }, { KE_END, 0}, }; @@ -115,11 +115,15 @@ bool dell_privacy_process_event(int type, int code, int status) switch (code) { case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */ - case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ priv->last_status = status; sparse_keymap_report_entry(priv->input_dev, key, 1, true); ret = true; break; + case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ + priv->last_status = status; + sparse_keymap_report_entry(priv->input_dev, key, !(status & CAMERA_STATUS), false); + ret = true; + break; default: dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code); } @@ -304,6 +308,11 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) dev_set_drvdata(&wdev->dev, priv); priv->wdev = wdev; + + ret = get_current_status(priv->wdev); + if (ret) + return ret; + /* create evdev passing interface */ priv->input_dev = devm_input_allocate_device(&wdev->dev); if (!priv->input_dev) @@ -331,11 +340,12 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) priv->input_dev->name = "Dell Privacy Driver"; priv->input_dev->id.bustype = BUS_HOST; - ret = input_register_device(priv->input_dev); - if (ret) - return ret; + /* Report initial camera-cover status */ + if (priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA)) + input_report_switch(priv->input_dev, SW_CAMERA_LENS_COVER, + !(priv->last_status & CAMERA_STATUS)); - ret = get_current_status(priv->wdev); + ret = input_register_device(priv->input_dev); if (ret) return ret; From 6dc485f9940df8105ea729cbeb7a7d18d409dde5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2022 23:07:24 +0100 Subject: [PATCH 0747/1593] platform/x86: dell-privacy: Only register SW_CAMERA_LENS_COVER if present Unlike keys where userspace only reacts to keypresses, userspace may act on switches in both (0 and 1) of their positions. For example if a SW_TABLET_MODE switch is registered then GNOME will not automatically show the onscreen keyboard when a text field gets focus on touchscreen devices when SW_TABLET_MODE reports 0 and when SW_TABLET_MODE reports 1 libinput will block (filter out) builtin keyboard and touchpad events. So to avoid unwanted side-effects EV_SW type inputs should only be registered if they are actually present, only register SW_CAMERA_LENS_COVER if it is actually there. Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221221220724.119594-2-hdegoede@redhat.com --- drivers/platform/x86/dell/dell-wmi-privacy.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c index 915d5deeb971c..c517bd45dd32e 100644 --- a/drivers/platform/x86/dell/dell-wmi-privacy.c +++ b/drivers/platform/x86/dell/dell-wmi-privacy.c @@ -296,7 +296,7 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) { struct privacy_wmi_data *priv; struct key_entry *keymap; - int ret, i; + int ret, i, j; ret = wmi_has_guid(DELL_PRIVACY_GUID); if (!ret) @@ -327,9 +327,20 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) /* remap the keymap code with Dell privacy key type 0x12 as prefix * KEY_MICMUTE scancode will be reported as 0x120001 */ - for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { - keymap[i] = dell_wmi_keymap_type_0012[i]; - keymap[i].code |= (0x0012 << 16); + for (i = 0, j = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { + /* + * Unlike keys where only presses matter, userspace may act + * on switches in both of their positions. Only register + * SW_CAMERA_LENS_COVER if it is actually there. + */ + if (dell_wmi_keymap_type_0012[i].type == KE_VSW && + dell_wmi_keymap_type_0012[i].sw.code == SW_CAMERA_LENS_COVER && + !(priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA))) + continue; + + keymap[j] = dell_wmi_keymap_type_0012[i]; + keymap[j].code |= (0x0012 << 16); + j++; } ret = sparse_keymap_setup(priv->input_dev, keymap, NULL); kfree(keymap); From b0907cadabcae6f1248f37a32a6e777f9ff6d4aa Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Tue, 10 Jan 2023 09:45:12 +0800 Subject: [PATCH 0748/1593] md: fix incorrect declaration about claim_rdev in md_import_device Commit fb541ca4c365 ("md: remove lock_bdev / unlock_bdev") removes wrappers for blkdev_get/blkdev_put. However, the uninitialized local static variable of pointer type 'claim_rdev' in md_import_device() is NULL, which leads to the following warning call trace: WARNING: CPU: 22 PID: 1037 at block/bdev.c:577 bd_prepare_to_claim+0x131/0x150 CPU: 22 PID: 1037 Comm: mdadm Not tainted 6.2.0-rc3+ #69 .. RIP: 0010:bd_prepare_to_claim+0x131/0x150 .. Call Trace: ? _raw_spin_unlock+0x15/0x30 ? iput+0x6a/0x220 blkdev_get_by_dev.part.0+0x4b/0x300 md_import_device+0x126/0x1d0 new_dev_store+0x184/0x240 md_attr_store+0x80/0xf0 kernfs_fop_write_iter+0x128/0x1c0 vfs_write+0x2be/0x3c0 ksys_write+0x5f/0xe0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc It turns out the md device cannot be used: md: could not open device unknown-block(259,0). md: md127 stopped. Fix the issue by declaring the local static variable of struct type and passing the pointer of the variable to blkdev_get_by_dev(). Fixes: fb541ca4c365 ("md: remove lock_bdev / unlock_bdev") Cc: Christoph Hellwig Signed-off-by: Adrian Huang Reviewed-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8af639296b3c8..02b0240e7c715 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3644,7 +3644,7 @@ EXPORT_SYMBOL_GPL(md_rdev_init); */ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { - static struct md_rdev *claim_rdev; /* just for claiming the bdev */ + static struct md_rdev claim_rdev; /* just for claiming the bdev */ struct md_rdev *rdev; sector_t size; int err; @@ -3662,7 +3662,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - super_format == -2 ? claim_rdev : rdev); + super_format == -2 ? &claim_rdev : rdev); if (IS_ERR(rdev->bdev)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", MAJOR(newdev), MINOR(newdev)); From ed058eab22d64c00663563e8e1e112989c65c59f Mon Sep 17 00:00:00 2001 From: Henning Schild Date: Thu, 22 Dec 2022 11:37:19 +0100 Subject: [PATCH 0749/1593] platform/x86: simatic-ipc: correct name of a model What we called IPC427G should be renamed to BX-39A to be more in line with the actual product name. Signed-off-by: Henning Schild Link: https://lore.kernel.org/r/20221222103720.8546-2-henning.schild@siemens.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/simatic-ipc.c | 2 +- include/linux/platform_data/x86/simatic-ipc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c index ca76076fc706a..2ab1f8da32b01 100644 --- a/drivers/platform/x86/simatic-ipc.c +++ b/drivers/platform/x86/simatic-ipc.c @@ -46,7 +46,7 @@ static struct { {SIMATIC_IPC_IPC427D, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_NONE}, {SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E}, {SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E}, - {SIMATIC_IPC_IPC427G, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G}, + {SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G}, }; static int register_platform_devices(u32 station_id) diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index 632320ec8f082..a4a6cba412cbb 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -32,7 +32,7 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC477E = 0x00000A02, SIMATIC_IPC_IPC127E = 0x00000D01, SIMATIC_IPC_IPC227G = 0x00000F01, - SIMATIC_IPC_IPC427G = 0x00001001, + SIMATIC_IPC_IPCBX_39A = 0x00001001, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) From d348b1d761e358a4ba03fb34aa7e3dbd278db236 Mon Sep 17 00:00:00 2001 From: Henning Schild Date: Thu, 22 Dec 2022 11:37:20 +0100 Subject: [PATCH 0750/1593] platform/x86: simatic-ipc: add another model Add IPC PX-39A support. Signed-off-by: Henning Schild Link: https://lore.kernel.org/r/20221222103720.8546-3-henning.schild@siemens.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/simatic-ipc.c | 1 + include/linux/platform_data/x86/simatic-ipc.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c index 2ab1f8da32b01..b3622419cd1a4 100644 --- a/drivers/platform/x86/simatic-ipc.c +++ b/drivers/platform/x86/simatic-ipc.c @@ -47,6 +47,7 @@ static struct { {SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E}, {SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E}, {SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G}, + {SIMATIC_IPC_IPCPX_39A, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_227G}, }; static int register_platform_devices(u32 station_id) diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index a4a6cba412cbb..a48bb52409777 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -33,6 +33,7 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC127E = 0x00000D01, SIMATIC_IPC_IPC227G = 0x00000F01, SIMATIC_IPC_IPCBX_39A = 0x00001001, + SIMATIC_IPC_IPCPX_39A = 0x00001002, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) From 6aa5207838ccf79879f212034435a5606f890eea Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Wed, 28 Dec 2022 15:05:53 -0800 Subject: [PATCH 0751/1593] platform/x86: intel/pmc/core: Add Meteor Lake mobile support Add Meteor Lake mobile support to pmc core driver. Meteor Lake mobile parts reuse all the Meteor Lake PCH IPs. Cc: David E Box Signed-off-by: Gayatri Kammela Link: https://lore.kernel.org/r/20221228230553.2497183-1-gayatri.kammela@linux.intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index f1d802f6ec3f9..3a15d32d7644c 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1029,6 +1029,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, adl_core_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, adl_core_init), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, mtl_core_init), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, mtl_core_init), {} }; From ccb32e2be14271a60e9ba89c6d5660cc9998773c Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 11:25:33 +0400 Subject: [PATCH 0752/1593] platform/x86/amd: Fix refcount leak in amd_pmc_probe pci_get_domain_bus_and_slot() takes reference, the caller should release the reference by calling pci_dev_put() after use. Call pci_dev_put() in the error path to fix this. Fixes: 3d7d407dfb05 ("platform/x86: amd-pmc: Add support for AMD Spill to DRAM STB feature") Signed-off-by: Miaoqian Lin Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221229072534.1381432-1-linmq006@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 439d282aafd19..8d924986381be 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -932,7 +932,7 @@ static int amd_pmc_probe(struct platform_device *pdev) if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) { err = amd_pmc_s2d_init(dev); if (err) - return err; + goto err_pci_dev_put; } platform_set_drvdata(pdev, dev); From cf5ac2d45f6e4d11ad78e7b10ae9a4121ba5e995 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 11 Jan 2023 21:14:26 +0100 Subject: [PATCH 0753/1593] platform/x86: int3472/discrete: Ensure the clk/power enable pins are in output mode acpi_get_and_request_gpiod() does not take a gpio_lookup_flags argument specifying that the pins direction should be initialized to a specific value. This means that in some cases the pins might be left in input mode, causing the gpiod_set() calls made to enable the clk / regulator to not work. One example of this problem is the clk-enable GPIO for the ov01a1s sensor on a Dell Latitude 9420 being left in input mode causing the clk to never get enabled. Explicitly set the direction of the pins to output to fix this. Fixes: 5de691bffe57 ("platform/x86: Add intel_skl_int3472 driver") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Reviewed-by: Daniel Scally Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20230111201426.947853-1-hdegoede@redhat.com --- drivers/platform/x86/intel/int3472/clk_and_regulator.c | 3 +++ drivers/platform/x86/intel/int3472/discrete.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index b2342b3d78c72..74dc2cff799ee 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -181,6 +181,9 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, return PTR_ERR(int3472->regulator.gpio); } + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->regulator.gpio, 0); + cfg.dev = &int3472->adev->dev; cfg.init_data = &init_data; cfg.ena_gpiod = int3472->regulator.gpio; diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 974a132db6516..c42c3faa2c32d 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -168,6 +168,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.ena_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.ena_gpio, 0); break; case INT3472_GPIO_TYPE_PRIVACY_LED: gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led"); @@ -175,6 +177,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.led_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.led_gpio, 0); break; default: dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type); From 9fdaca2c1e157dc0a3c0faecf3a6a68e7d8d0c7b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 Jan 2023 22:33:07 +0000 Subject: [PATCH 0754/1593] kselftest: Fix error message for unconfigured LLVM builds We are missing a ) when we attempt to complain about not having enough configuration for clang, resulting in the rather inscrutable error: ../lib.mk:23: *** unterminated call to function 'error': missing ')'. Stop. Add the required ) so we print the message we were trying to print. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 291144c284fbc..f7900e75d2306 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,7 +20,7 @@ CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) ifeq ($(CROSS_COMPILE),) ifeq ($(CLANG_TARGET_FLAGS),) -$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk +$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk) else CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) endif # CLANG_TARGET_FLAGS From 9f907439dc80e4a2fcfb949927b36c036468dbb3 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Wed, 11 Jan 2023 17:29:01 +0800 Subject: [PATCH 0755/1593] bpf: hash map, avoid deadlock with suitable hash mask The deadlock still may occur while accessed in NMI and non-NMI context. Because in NMI, we still may access the same bucket but with different map_locked index. For example, on the same CPU, .max_entries = 2, we update the hash map, with key = 4, while running bpf prog in NMI nmi_handle(), to update hash map with key = 20, so it will have the same bucket index but have different map_locked index. To fix this issue, using min mask to hash again. Fixes: 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") Signed-off-by: Tonghao Zhang Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: John Fastabend Cc: KP Singh Cc: Stanislav Fomichev Cc: Hao Luo Cc: Jiri Olsa Cc: Hou Tao Acked-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/r/20230111092903.92389-1-tong@infragraf.org Signed-off-by: Martin KaFai Lau --- kernel/bpf/hashtab.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5aa2b5525f793..66bded1443773 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -152,7 +152,7 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, { unsigned long flags; - hash = hash & HASHTAB_MAP_LOCK_MASK; + hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); preempt_disable(); if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { @@ -171,7 +171,7 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, struct bucket *b, u32 hash, unsigned long flags) { - hash = hash & HASHTAB_MAP_LOCK_MASK; + hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); raw_spin_unlock_irqrestore(&b->raw_lock, flags); __this_cpu_dec(*(htab->map_locked[hash])); preempt_enable(); From 579923d84b04abb6cd4cd1fd9974096a2dd1832b Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 11 Jan 2023 22:58:51 +0530 Subject: [PATCH 0756/1593] amd-xgbe: TX Flow Ctrl Registers are h/w ver dependent There is difference in the TX Flow Control registers (TFCR) between the revisions of the hardware. The older revisions of hardware used to have single register per queue. Whereas, the newer revision of hardware (from ver 30H onwards) have one register per priority. Update the driver to use the TFCR based on the reported version of the hardware. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Co-developed-by: Ajith Nayak Signed-off-by: Ajith Nayak Signed-off-by: Raju Rangoju Acked-by: Shyam Sundar S K Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 3936543a74d8f..4030d619e84f5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -524,19 +524,28 @@ static void xgbe_disable_vxlan(struct xgbe_prv_data *pdata) netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration disabled\n"); } +static unsigned int xgbe_get_fc_queue_count(struct xgbe_prv_data *pdata) +{ + unsigned int max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; + + /* From MAC ver 30H the TFCR is per priority, instead of per queue */ + if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) >= 0x30) + return max_q_count; + else + return min_t(unsigned int, pdata->tx_q_count, max_q_count); +} + static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) { - unsigned int max_q_count, q_count; unsigned int reg, reg_val; - unsigned int i; + unsigned int i, q_count; /* Clear MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0); /* Clear MAC flow control */ - max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; - q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); + q_count = xgbe_get_fc_queue_count(pdata); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); @@ -553,9 +562,8 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) { struct ieee_pfc *pfc = pdata->pfc; struct ieee_ets *ets = pdata->ets; - unsigned int max_q_count, q_count; unsigned int reg, reg_val; - unsigned int i; + unsigned int i, q_count; /* Set MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) { @@ -579,8 +587,7 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) } /* Set MAC flow control */ - max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; - q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); + q_count = xgbe_get_fc_queue_count(pdata); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); From 926446ae24c03311a480fb96eb78f0ce7ea6d091 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 11 Jan 2023 22:58:52 +0530 Subject: [PATCH 0757/1593] amd-xgbe: Delay AN timeout during KR training AN restart triggered during KR training not only aborts the KR training process but also move the HW to unstable state. Driver has to wait upto 500ms or until the KR training is completed before restarting AN cycle. Fixes: 7c12aa08779c ("amd-xgbe: Move the PHY support into amd-xgbe") Co-developed-by: Sudheesh Mavila Signed-off-by: Sudheesh Mavila Signed-off-by: Raju Rangoju Acked-by: Shyam Sundar S K Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 24 +++++++++++++++++++++++ drivers/net/ethernet/amd/xgbe/xgbe.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 0c5c1b1556830..43fdd111235a6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -496,6 +496,7 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, reg |= XGBE_KR_TRAINING_ENABLE; reg |= XGBE_KR_TRAINING_START; XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); + pdata->kr_start_time = jiffies; netif_dbg(pdata, link, pdata->netdev, "KR training initiated\n"); @@ -632,6 +633,8 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) xgbe_switch_mode(pdata); + pdata->an_result = XGBE_AN_READY; + xgbe_an_restart(pdata); return XGBE_AN_INCOMPAT_LINK; @@ -1275,9 +1278,30 @@ static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata) static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata) { unsigned long link_timeout; + unsigned long kr_time; + int wait; link_timeout = pdata->link_check + (XGBE_LINK_TIMEOUT * HZ); if (time_after(jiffies, link_timeout)) { + if ((xgbe_cur_mode(pdata) == XGBE_MODE_KR) && + pdata->phy.autoneg == AUTONEG_ENABLE) { + /* AN restart should not happen while KR training is in progress. + * The while loop ensures no AN restart during KR training, + * waits up to 500ms and AN restart is triggered only if KR + * training is failed. + */ + wait = XGBE_KR_TRAINING_WAIT_ITER; + while (wait--) { + kr_time = pdata->kr_start_time + + msecs_to_jiffies(XGBE_AN_MS_TIMEOUT); + if (time_after(jiffies, kr_time)) + break; + /* AN restart is not required, if AN result is COMPLETE */ + if (pdata->an_result == XGBE_AN_COMPLETE) + return; + usleep_range(10000, 11000); + } + } netif_dbg(pdata, link, pdata->netdev, "AN link timeout\n"); xgbe_phy_config_aneg(pdata); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 71f24cb479355..7a41367c437dd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -290,6 +290,7 @@ /* Auto-negotiation */ #define XGBE_AN_MS_TIMEOUT 500 #define XGBE_LINK_TIMEOUT 5 +#define XGBE_KR_TRAINING_WAIT_ITER 50 #define XGBE_SGMII_AN_LINK_STATUS BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) @@ -1280,6 +1281,7 @@ struct xgbe_prv_data { unsigned int parallel_detect; unsigned int fec_ability; unsigned long an_start; + unsigned long kr_start_time; enum xgbe_an_mode an_mode; /* I2C support */ From 01644a1f98ff45a4044395ce2bbfd534747e0676 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Jan 2023 18:07:15 +0000 Subject: [PATCH 0758/1593] rxrpc: Fix wrong error return in rxrpc_connect_call() Fix rxrpc_connect_call() to return -ENOMEM rather than 0 if it fails to look up a peer. This generated a smatch warning: net/rxrpc/call_object.c:303 rxrpc_connect_call() warn: missing error code 'ret' I think this also fixes a syzbot-found bug: rxrpc: Assertion failed - 1(0x1) == 11(0xb) is false ------------[ cut here ]------------ kernel BUG at net/rxrpc/call_object.c:645! where the call being put is in the wrong state - as would be the case if we failed to clear up correctly after the error in rxrpc_connect_call(). Fixes: 9d35d880e0e4 ("rxrpc: Move client call connection to the I/O thread") Reported-by: kernel test robot Reported-by: Dan Carpenter Reported-and-tested-by: syzbot+4bb6356bb29d6299360e@syzkaller.appspotmail.com Signed-off-by: David Howells Link: https://lore.kernel.org/r/202301111153.9eZRYLf1-lkp@intel.com/ Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/2438405.1673460435@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/call_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 3ded5a24627c5..f3c9f0201c156 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -294,7 +294,7 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call) static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_local *local = call->local; - int ret = 0; + int ret = -ENOMEM; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); From ea22f4319c3409d847d70eaaf149cc480a73220d Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Wed, 11 Jan 2023 15:56:07 -0800 Subject: [PATCH 0759/1593] ethtool: add netlink attr in rss get reply only if value is not null Current code for RSS_GET ethtool command includes netlink attributes in reply message to user space even if they are null. Added checks to include netlink attribute in reply message only if a value is received from driver. Drivers might return null for RSS indirection table or hash key. Instead of including attributes with empty value in the reply message, add netlink attribute only if there is content. Fixes: 7112a04664bf ("ethtool: add netlink based get rss support") Signed-off-by: Sudheer Mogilappagari Reviewed-by: Michal Kubecek Link: https://lore.kernel.org/r/20230111235607.85509-1-sudheer.mogilappagari@intel.com Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index ebe6145aed3f1..be260ab34e580 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -122,10 +122,13 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, { const struct rss_reply_data *data = RSS_REPDATA(reply_base); - if (nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc) || - nla_put(skb, ETHTOOL_A_RSS_INDIR, - sizeof(u32) * data->indir_size, data->indir_table) || - nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)) + if ((data->hfunc && + nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || + (data->indir_size && + nla_put(skb, ETHTOOL_A_RSS_INDIR, + sizeof(u32) * data->indir_size, data->indir_table)) || + (data->hkey_size && + nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))) return -EMSGSIZE; return 0; From aedee9e8d9224de35081e73e63333a402907c53c Mon Sep 17 00:00:00 2001 From: Peter Foley Date: Thu, 12 Jan 2023 23:23:59 -0500 Subject: [PATCH 0760/1593] kconfig: Update all declared targets Currently qconf-cfg.sh is the only script that touches the "-bin" target, even though all of the conf_cfg rules declare that they do. Make the recipe unconditionally touch all declared targets to avoid incompatibilities with upcoming versions of GNU make: https://lists.gnu.org/archive/html/info-gnu/2022-10/msg00008.html e.g. scripts/kconfig/Makefile:215: warning: pattern recipe did not update peer target 'scripts/kconfig/nconf-bin'. scripts/kconfig/Makefile:215: warning: pattern recipe did not update peer target 'scripts/kconfig/mconf-bin'. scripts/kconfig/Makefile:215: warning: pattern recipe did not update peer target 'scripts/kconfig/gconf-bin'. Signed-off-by: Peter Foley Signed-off-by: Masahiro Yamada --- scripts/kconfig/.gitignore | 2 +- scripts/kconfig/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore index c8a3f9cd52f02..0b2ff775b2e33 100644 --- a/scripts/kconfig/.gitignore +++ b/scripts/kconfig/.gitignore @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /conf /[gmnq]conf +/[gmnq]conf-bin /[gmnq]conf-cflags /[gmnq]conf-libs -/qconf-bin /qconf-moc.cc diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 0b1d15efaeb0c..af1c96198f491 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -209,7 +209,7 @@ $(obj)/gconf: | $(obj)/gconf-libs $(obj)/gconf.o: | $(obj)/gconf-cflags # check if necessary packages are available, and configure build flags -cmd_conf_cfg = $< $(addprefix $(obj)/$*conf-, cflags libs bin) +cmd_conf_cfg = $< $(addprefix $(obj)/$*conf-, cflags libs bin); touch $(obj)/$*conf-bin $(obj)/%conf-cflags $(obj)/%conf-libs $(obj)/%conf-bin: $(src)/%conf-cfg.sh $(call cmd,conf_cfg) From e26fd28db82899be71b4b949527373d0a6be1e65 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 12 Jan 2023 12:27:07 +0000 Subject: [PATCH 0761/1593] sched/uclamp: Fix a uninitialized variable warnings Addresses the following warnings: > config: riscv-randconfig-m031-20221111 > compiler: riscv64-linux-gcc (GCC) 12.1.0 > > smatch warnings: > kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_min'. > kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_max'. Fixes: 244226035a1f ("sched/uclamp: Fix fits_capacity() check in feec()") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Qais Yousef (Google) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20230112122708.330667-2-qyousef@layalina.io --- kernel/sched/fair.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c36aa54ae071a..be43731b147da 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7229,10 +7229,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) eenv_task_busy_time(&eenv, p, prev_cpu); for (; pd; pd = pd->next) { + unsigned long util_min = p_util_min, util_max = p_util_max; unsigned long cpu_cap, cpu_thermal_cap, util; unsigned long cur_delta, max_spare_cap = 0; unsigned long rq_util_min, rq_util_max; - unsigned long util_min, util_max; unsigned long prev_spare_cap = 0; int max_spare_cap_cpu = -1; unsigned long base_energy; @@ -7251,6 +7251,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) eenv.pd_cap = 0; for_each_cpu(cpu, cpus) { + struct rq *rq = cpu_rq(cpu); + eenv.pd_cap += cpu_thermal_cap; if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) @@ -7269,24 +7271,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) * much capacity we can get out of the CPU; this is * aligned with sched_cpu_util(). */ - if (uclamp_is_used()) { - if (uclamp_rq_is_idle(cpu_rq(cpu))) { - util_min = p_util_min; - util_max = p_util_max; - } else { - /* - * Open code uclamp_rq_util_with() except for - * the clamp() part. Ie: apply max aggregation - * only. util_fits_cpu() logic requires to - * operate on non clamped util but must use the - * max-aggregated uclamp_{min, max}. - */ - rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); - rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); - - util_min = max(rq_util_min, p_util_min); - util_max = max(rq_util_max, p_util_max); - } + if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) { + /* + * Open code uclamp_rq_util_with() except for + * the clamp() part. Ie: apply max aggregation + * only. util_fits_cpu() logic requires to + * operate on non clamped util but must use the + * max-aggregated uclamp_{min, max}. + */ + rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN); + rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX); + + util_min = max(rq_util_min, p_util_min); + util_max = max(rq_util_max, p_util_max); } if (!util_fits_cpu(util, util_min, util_max, cpu)) continue; From da07d2f9c153e457e845d4dcfdd13568d71d18a4 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 12 Jan 2023 12:27:08 +0000 Subject: [PATCH 0762/1593] sched/fair: Fixes for capacity inversion detection Traversing the Perf Domains requires rcu_read_lock() to be held and is conditional on sched_energy_enabled(). Ensure right protections applied. Also skip capacity inversion detection for our own pd; which was an error. Fixes: 44c7b80bffc3 ("sched/fair: Detect capacity inversion") Reported-by: Dietmar Eggemann Signed-off-by: Qais Yousef (Google) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20230112122708.330667-3-qyousef@layalina.io --- kernel/sched/fair.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index be43731b147da..0f87369914274 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8868,16 +8868,23 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) * * Thermal pressure will impact all cpus in this perf domain * equally. */ - if (static_branch_unlikely(&sched_asym_cpucapacity)) { + if (sched_energy_enabled()) { unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); - struct perf_domain *pd = rcu_dereference(rq->rd->pd); + struct perf_domain *pd; + + rcu_read_lock(); + pd = rcu_dereference(rq->rd->pd); rq->cpu_capacity_inverted = 0; for (; pd; pd = pd->next) { struct cpumask *pd_span = perf_domain_span(pd); unsigned long pd_cap_orig, pd_cap; + /* We can't be inverted against our own pd */ + if (cpumask_test_cpu(cpu_of(rq), pd_span)) + continue; + cpu = cpumask_any(pd_span); pd_cap_orig = arch_scale_cpu_capacity(cpu); @@ -8902,6 +8909,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) break; } } + + rcu_read_unlock(); } trace_sched_cpu_capacity_tp(rq); From fde5f74ccfc771941b018b5415fa9664426e10ad Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Thu, 12 Jan 2023 17:12:28 -0500 Subject: [PATCH 0763/1593] platform/x86: thinkpad_acpi: Fix profile mode display in AMT mode Recently AMT mode was enabled (somewhat unexpectedly) on the Lenovo Z13 platform. The FW is advertising it is available and the driver tries to use it - unfortunately it reports the profile mode incorrectly. Note, there is also some extra work needed to enable the dynamic aspect of AMT support that I will be following up with; but more testing is needed first. This patch just fixes things so the profiles are reported correctly. Link: https://gitlab.freedesktop.org/hadess/power-profiles-daemon/-/issues/115 Fixes: 46dcbc61b739 ("platform/x86: thinkpad-acpi: Add support for automatic mode transitions") Reviewed-by: Mario Limonciello Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230112221228.490946-1-mpearson-lenovo@squebb.ca Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 1195293b22fdd..a95946800ae9d 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10311,9 +10311,11 @@ static DEFINE_MUTEX(dytc_mutex); static int dytc_capabilities; static bool dytc_mmc_get_available; -static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) +static int convert_dytc_to_profile(int funcmode, int dytcmode, + enum platform_profile_option *profile) { - if (dytc_capabilities & BIT(DYTC_FC_MMC)) { + switch (funcmode) { + case DYTC_FUNCTION_MMC: switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10329,8 +10331,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p return -EINVAL; } return 0; - } - if (dytc_capabilities & BIT(DYTC_FC_PSC)) { + case DYTC_FUNCTION_PSC: switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10344,6 +10345,14 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p default: /* Unknown mode */ return -EINVAL; } + return 0; + case DYTC_FUNCTION_AMT: + /* For now return balanced. It's the closest we have to 'auto' */ + *profile = PLATFORM_PROFILE_BALANCED; + return 0; + default: + /* Unknown function */ + return -EOPNOTSUPP; } return 0; } @@ -10492,6 +10501,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; + /* system supports AMT, activate it when on balanced */ if (dytc_capabilities & BIT(DYTC_FC_AMT)) dytc_control_amt(profile == PLATFORM_PROFILE_BALANCED); @@ -10507,7 +10517,7 @@ static void dytc_profile_refresh(void) { enum platform_profile_option profile; int output, err = 0; - int perfmode; + int perfmode, funcmode; mutex_lock(&dytc_mutex); if (dytc_capabilities & BIT(DYTC_FC_MMC)) { @@ -10522,8 +10532,9 @@ static void dytc_profile_refresh(void) if (err) return; + funcmode = (output >> DYTC_GET_FUNCTION_BIT) & 0xF; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(perfmode, &profile); + convert_dytc_to_profile(funcmode, perfmode, &profile); if (profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); From f89d783d68dcc6b2ce4fe3bda972ae0f84df0dca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Fri, 13 Jan 2023 20:03:08 +0100 Subject: [PATCH 0764/1593] ASoC: Intel: avs: Implement PCI shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On shutdown reference to i915 driver needs to be released to not spam logs with unnecessary warnings. While at it do some additional cleanup to make sure DSP is powered down and interrupts from device are disabled. Fixes: 1affc44ea5dd ("ASoC: Intel: avs: PCI driver implementation") Reported-by: Kornel Dulęba Signed-off-by: Amadeusz Sławiński Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20230113190310.1451693-2-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/core.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 2ca24273c4910..6375018507288 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -481,6 +481,29 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return ret; } +static void avs_pci_shutdown(struct pci_dev *pci) +{ + struct hdac_bus *bus = pci_get_drvdata(pci); + struct avs_dev *adev = hdac_to_avs(bus); + + cancel_work_sync(&adev->probe_work); + avs_ipc_block(adev->ipc); + + snd_hdac_stop_streams(bus); + avs_dsp_op(adev, int_control, false); + snd_hdac_ext_bus_ppcap_int_enable(bus, false); + snd_hdac_ext_bus_link_power_down_all(bus); + + snd_hdac_bus_stop_chip(bus); + snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false); + + if (avs_platattr_test(adev, CLDMA)) + pci_free_irq(pci, 0, &code_loader); + pci_free_irq(pci, 0, adev); + pci_free_irq(pci, 0, bus); + pci_free_irq_vectors(pci); +} + static void avs_pci_remove(struct pci_dev *pci) { struct hdac_device *hdev, *save; @@ -739,6 +762,7 @@ static struct pci_driver avs_pci_driver = { .id_table = avs_ids, .probe = avs_pci_probe, .remove = avs_pci_remove, + .shutdown = avs_pci_shutdown, .driver = { .pm = &avs_dev_pm, }, From ac9c5e92dd15b9927e7355ccf79df76a58b44344 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Dec 2022 16:51:55 +0000 Subject: [PATCH 0765/1593] iommu/arm-smmu: Report IOMMU_CAP_CACHE_COHERENCY even betterer Although it's vanishingly unlikely that anyone would integrate an SMMU within a coherent interconnect without also making the pagetable walk interface coherent, the same effect happens if a coherent SMMU fails to advertise CTTW correctly. This turns out to be the case on some popular NXP SoCs, where VFIO started failing the IOMMU_CAP_CACHE_COHERENCY test, even though IOMMU_CACHE *was* previously achieving the desired effect anyway thanks to the underlying integration. While those SoCs stand to gain some more general benefits from a firmware update to override CTTW correctly in DT/ACPI, it's also easy to work around this in Linux as well, to avoid imposing too much on affected users - since the upstream client devices *are* correctly marked as coherent, we can trivially infer their coherent paths through the SMMU as well. Reported-by: Vladimir Oltean Fixes: df198b37e72c ("iommu/arm-smmu: Report IOMMU_CAP_CACHE_COHERENCY better") Signed-off-by: Robin Murphy Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/d6dc41952961e5c7b21acac08a8bf1eb0f69e124.1671123115.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 719fbca1fe52a..443bc517277ba 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1316,8 +1316,14 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - /* Assume that a coherent TCU implies coherent TBUs */ - return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK; + /* + * It's overwhelmingly the case in practice that when the pagetable + * walk interface is connected to a coherent interconnect, all the + * translation interfaces are too. Furthermore if the device is + * natively coherent, then its translation interface must also be. + */ + return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK || + device_get_dma_attr(dev) == DEV_DMA_COHERENT; case IOMMU_CAP_NOEXEC: return true; default: From ce31e6ca68bd7639bd3e5ef97be215031842bbab Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Dec 2022 16:12:50 +0200 Subject: [PATCH 0766/1593] iommu/arm-smmu: Don't unregister on shutdown Michael Walle says he noticed the following stack trace while performing a shutdown with "reboot -f". He suggests he got "lucky" and just hit the correct spot for the reboot while there was a packet transmission in flight. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 CPU: 0 PID: 23 Comm: kworker/0:1 Not tainted 6.1.0-rc5-00088-gf3600ff8e322 #1930 Hardware name: Kontron KBox A-230-LS (DT) pc : iommu_get_dma_domain+0x14/0x20 lr : iommu_dma_map_page+0x9c/0x254 Call trace: iommu_get_dma_domain+0x14/0x20 dma_map_page_attrs+0x1ec/0x250 enetc_start_xmit+0x14c/0x10b0 enetc_xmit+0x60/0xdc dev_hard_start_xmit+0xb8/0x210 sch_direct_xmit+0x11c/0x420 __dev_queue_xmit+0x354/0xb20 ip6_finish_output2+0x280/0x5b0 __ip6_finish_output+0x15c/0x270 ip6_output+0x78/0x15c NF_HOOK.constprop.0+0x50/0xd0 mld_sendpack+0x1bc/0x320 mld_ifc_work+0x1d8/0x4dc process_one_work+0x1e8/0x460 worker_thread+0x178/0x534 kthread+0xe0/0xe4 ret_from_fork+0x10/0x20 Code: d503201f f9416800 d503233f d50323bf (f9404c00) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt This appears to be reproducible when the board has a fixed IP address, is ping flooded from another host, and "reboot -f" is used. The following is one more manifestation of the issue: $ reboot -f kvm: exiting hardware virtualization cfg80211: failed to load regulatory.db arm-smmu 5000000.iommu: disabling translation sdhci-esdhc 2140000.mmc: Removing from iommu group 11 sdhci-esdhc 2150000.mmc: Removing from iommu group 12 fsl-edma 22c0000.dma-controller: Removing from iommu group 17 dwc3 3100000.usb: Removing from iommu group 9 dwc3 3110000.usb: Removing from iommu group 10 ahci-qoriq 3200000.sata: Removing from iommu group 2 fsl-qdma 8380000.dma-controller: Removing from iommu group 20 platform f080000.display: Removing from iommu group 0 etnaviv-gpu f0c0000.gpu: Removing from iommu group 1 etnaviv etnaviv: Removing from iommu group 1 caam_jr 8010000.jr: Removing from iommu group 13 caam_jr 8020000.jr: Removing from iommu group 14 caam_jr 8030000.jr: Removing from iommu group 15 caam_jr 8040000.jr: Removing from iommu group 16 fsl_enetc 0000:00:00.0: Removing from iommu group 4 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000002, GFSYNR1 0x00000429, GFSYNR2 0x00000000 fsl_enetc 0000:00:00.1: Removing from iommu group 5 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000002, GFSYNR1 0x00000429, GFSYNR2 0x00000000 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000000, GFSYNR1 0x00000429, GFSYNR2 0x00000000 fsl_enetc 0000:00:00.2: Removing from iommu group 6 fsl_enetc_mdio 0000:00:00.3: Removing from iommu group 8 mscc_felix 0000:00:00.5: Removing from iommu group 3 fsl_enetc 0000:00:00.6: Removing from iommu group 7 pcieport 0001:00:00.0: Removing from iommu group 18 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x00000002, GFSYNR0 0x00000000, GFSYNR1 0x00000429, GFSYNR2 0x00000000 pcieport 0002:00:00.0: Removing from iommu group 19 Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a8 pc : iommu_get_dma_domain+0x14/0x20 lr : iommu_dma_unmap_page+0x38/0xe0 Call trace: iommu_get_dma_domain+0x14/0x20 dma_unmap_page_attrs+0x38/0x1d0 enetc_unmap_tx_buff.isra.0+0x6c/0x80 enetc_poll+0x170/0x910 __napi_poll+0x40/0x1e0 net_rx_action+0x164/0x37c __do_softirq+0x128/0x368 run_ksoftirqd+0x68/0x90 smpboot_thread_fn+0x14c/0x190 Code: d503201f f9416800 d503233f d50323bf (f9405400) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- The problem seems to be that iommu_group_remove_device() is allowed to run with no coordination whatsoever with the shutdown procedure of the enetc PCI device. In fact, it almost seems as if it implies that the pci_driver :: shutdown() method is mandatory if DMA is used with an IOMMU, otherwise this is inevitable. That was never the case; shutdown methods are optional in device drivers. This is the call stack that leads to iommu_group_remove_device() during reboot: kernel_restart -> device_shutdown -> platform_shutdown -> arm_smmu_device_shutdown -> arm_smmu_device_remove -> iommu_device_unregister -> bus_for_each_dev -> remove_iommu_group -> iommu_release_device -> iommu_group_remove_device I don't know much about the arm_smmu driver, but arm_smmu_device_shutdown() invoking arm_smmu_device_remove() looks suspicious, since it causes the IOMMU device to unregister and that's where everything starts to unravel. It forces all other devices which depend on IOMMU groups to also point their ->shutdown() to ->remove(), which will make reboot slower overall. There are 2 moments relevant to this behavior. First was commit b06c076ea962 ("Revert "iommu/arm-smmu: Make arm-smmu explicitly non-modular"") when arm_smmu_device_shutdown() was made to run the exact same thing as arm_smmu_device_remove(). Prior to that, there was no iommu_device_unregister() call in arm_smmu_device_shutdown(). However, that was benign until commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration"), which made iommu_device_unregister() call remove_iommu_group(). Restore the old shutdown behavior by making remove() call shutdown(), but shutdown() does not call the remove() specific bits. Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Reported-by: Michael Walle Tested-by: Michael Walle # on kontron-sl28 Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20221215141251.3688780-1-vladimir.oltean@nxp.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 443bc517277ba..2ff7a72cf3772 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2191,19 +2191,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return 0; } -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_shutdown(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); if (!smmu) - return -ENODEV; + return; if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_notice(&pdev->dev, "disabling translation\n"); - iommu_device_unregister(&smmu->iommu); - iommu_device_sysfs_remove(&smmu->iommu); - arm_smmu_rpm_get(smmu); /* Turn the thing off */ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD); @@ -2215,12 +2212,21 @@ static int arm_smmu_device_remove(struct platform_device *pdev) clk_bulk_disable(smmu->num_clks, smmu->clks); clk_bulk_unprepare(smmu->num_clks, smmu->clks); - return 0; } -static void arm_smmu_device_shutdown(struct platform_device *pdev) +static int arm_smmu_device_remove(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + if (!smmu) + return -ENODEV; + + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + + arm_smmu_device_shutdown(pdev); + + return 0; } static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) From 32ea2c57dc216b6ad8125fa680d31daa5d421c95 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Dec 2022 16:12:51 +0200 Subject: [PATCH 0767/1593] iommu/arm-smmu-v3: Don't unregister on shutdown Similar to SMMUv2, this driver calls iommu_device_unregister() from the shutdown path, which removes the IOMMU groups with no coordination whatsoever with their users - shutdown methods are optional in device drivers. This can lead to NULL pointer dereferences in those drivers' DMA API calls, or worse. Instead of calling the full arm_smmu_device_remove() from arm_smmu_device_shutdown(), let's pick only the relevant function call - arm_smmu_device_disable() - more or less the reverse of arm_smmu_device_reset() - and call just that from the shutdown path. Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Suggested-by: Robin Murphy Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20221215141251.3688780-2-vladimir.oltean@nxp.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ab160198edd6b..f2425b0f0cd62 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3858,7 +3858,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) static void arm_smmu_device_shutdown(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + arm_smmu_device_disable(smmu); } static const struct of_device_id arm_smmu_of_match[] = { From a6a9a5da68084d5eac6ef85ad09df7fc9c971de4 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 30 Dec 2022 12:31:00 +0400 Subject: [PATCH 0768/1593] iommu: Fix refcount leak in iommu_device_claim_dma_owner iommu_group_get() returns the group with the reference incremented. Move iommu_group_get() after owner check to fix the refcount leak. Fixes: 89395ccedbc1 ("iommu: Add device-centric DMA ownership interfaces") Signed-off-by: Miaoqian Lin Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221230083100.1489569-1-linmq006@gmail.com [ joro: Remove *group = NULL initialization ] Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index de91dd88705bd..5f6a85aea501e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3185,14 +3185,16 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); */ int iommu_device_claim_dma_owner(struct device *dev, void *owner) { - struct iommu_group *group = iommu_group_get(dev); + struct iommu_group *group; int ret = 0; - if (!group) - return -ENODEV; if (WARN_ON(!owner)) return -EINVAL; + group = iommu_group_get(dev); + if (!group) + return -ENODEV; + mutex_lock(&group->mutex); if (group->owner_cnt) { if (group->owner != owner) { From dcdb3ba7e2a8caae7bfefd603bc22fd0ce9a389c Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Wed, 11 Jan 2023 14:38:00 +0800 Subject: [PATCH 0769/1593] iommu/iova: Fix alloc iova overflows issue In __alloc_and_insert_iova_range, there is an issue that retry_pfn overflows. The value of iovad->anchor.pfn_hi is ~0UL, then when iovad->cached_node is iovad->anchor, curr_iova->pfn_hi + 1 will overflow. As a result, if the retry logic is executed, low_pfn is updated to 0, and then new_pfn < low_pfn returns false to make the allocation successful. This issue occurs in the following two situations: 1. The first iova size exceeds the domain size. When initializing iova domain, iovad->cached_node is assigned as iovad->anchor. For example, the iova domain size is 10M, start_pfn is 0x1_F000_0000, and the iova size allocated for the first time is 11M. The following is the log information, new->pfn_lo is smaller than iovad->cached_node. Example log as follows: [ 223.798112][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range start_pfn:0x1f0000,retry_pfn:0x0,size:0xb00,limit_pfn:0x1f0a00 [ 223.799590][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range success start_pfn:0x1f0000,new->pfn_lo:0x1efe00,new->pfn_hi:0x1f08ff 2. The node with the largest iova->pfn_lo value in the iova domain is deleted, iovad->cached_node will be updated to iovad->anchor, and then the alloc iova size exceeds the maximum iova size that can be allocated in the domain. After judging that retry_pfn is less than limit_pfn, call retry_pfn+1 to fix the overflow issue. Signed-off-by: jianjiao zeng Signed-off-by: Yunfei Wang Cc: # 5.15.* Fixes: 4e89dce72521 ("iommu/iova: Retry from last rb tree node if iova search fails") Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20230111063801.25107-1-yf.wang@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a44ad92fc5eb7..fe452ce466429 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -197,7 +197,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = to_iova(curr); - retry_pfn = curr_iova->pfn_hi + 1; + retry_pfn = curr_iova->pfn_hi; retry: do { @@ -211,7 +211,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, if (high_pfn < size || new_pfn < low_pfn) { if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { high_pfn = limit_pfn; - low_pfn = retry_pfn; + low_pfn = retry_pfn + 1; curr = iova_find_limit(iovad, limit_pfn); curr_iova = to_iova(curr); goto retry; From 142e821f68cf5da79ce722cb9c1323afae30e185 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 19 Dec 2022 19:06:22 +0100 Subject: [PATCH 0770/1593] iommu/mediatek-v1: Fix an error handling path in mtk_iommu_v1_probe() A clk, prepared and enabled in mtk_iommu_v1_hw_init(), is not released in the error handling path of mtk_iommu_v1_probe(). Add the corresponding clk_disable_unprepare(), as already done in the remove function. Fixes: b17336c55d89 ("iommu/mediatek: add support for mtk iommu generation one HW") Signed-off-by: Christophe JAILLET Reviewed-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/593e7b7d97c6e064b29716b091a9d4fd122241fb.1671473163.git.christophe.jaillet@wanadoo.fr Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 69682ee068d2b..ca581ff1c7696 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -683,7 +683,7 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); if (ret) - return ret; + goto out_clk_unprepare; ret = iommu_device_register(&data->iommu, &mtk_iommu_v1_ops, dev); if (ret) @@ -698,6 +698,8 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); +out_clk_unprepare: + clk_disable_unprepare(data->bclk); return ret; } From 1ed8a46256771de283772d482403691807214cf7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Jan 2023 23:08:04 +0900 Subject: [PATCH 0771/1593] tomoyo: Remove "select SRCU" Now that the SRCU Kconfig option is unconditionally selected, there is no longer any point in selecting it. Therefore, remove the "select SRCU" Kconfig statements. Signed-off-by: Paul E. McKenney Signed-off-by: Tetsuo Handa --- security/tomoyo/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig index 772d2ab58fd14..ee0c1f48025bd 100644 --- a/security/tomoyo/Kconfig +++ b/security/tomoyo/Kconfig @@ -6,7 +6,6 @@ config SECURITY_TOMOYO select SECURITYFS select SECURITY_PATH select SECURITY_NETWORK - select SRCU default n help This selects TOMOYO Linux, pathname-based access control. From fa17087e244263627a01d6a9b76b8fdaf410de34 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 13 Jan 2023 23:11:38 +0900 Subject: [PATCH 0772/1593] tomoyo: Update website link SourceForge.JP was renamed to OSDN in May 2015. Signed-off-by: Tetsuo Handa --- security/tomoyo/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig index ee0c1f48025bd..fad75be5f381d 100644 --- a/security/tomoyo/Kconfig +++ b/security/tomoyo/Kconfig @@ -10,7 +10,7 @@ config SECURITY_TOMOYO help This selects TOMOYO Linux, pathname-based access control. Required userspace tools and further information may be - found at . + found at . If you are unsure how to answer this question, answer N. config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY From 544d163d659d45a206d8929370d5a2984e546cb7 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 12 Jan 2023 13:08:56 +0000 Subject: [PATCH 0773/1593] io_uring: lock overflowing for IOPOLL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot reports an issue with overflow filling for IOPOLL: WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734 CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0 Workqueue: events_unbound io_ring_exit_work Call trace:  io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734  io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773  io_fill_cqe_req io_uring/io_uring.h:168 [inline]  io_do_iopoll+0x474/0x62c io_uring/rw.c:1065  io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513  io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056  io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869  process_one_work+0x2d8/0x504 kernel/workqueue.c:2289  worker_thread+0x340/0x610 kernel/workqueue.c:2436  kthread+0x12c/0x158 kernel/kthread.c:376  ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863 There is no real problem for normal IOPOLL as flush is also called with uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL, for which __io_cqring_overflow_flush() happens from the CQ waiting path. Reported-and-tested-by: syzbot+6805087452d72929404e@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/rw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 8227af2e1c0f5..9c3ddd46a1adc 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1062,7 +1062,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) continue; req->cqe.flags = io_put_kbuf(req, 0); - io_fill_cqe_req(req->ctx, req); + if (unlikely(!__io_fill_cqe_req(ctx, req))) { + spin_lock(&ctx->completion_lock); + io_req_cqe_overflow(req); + spin_unlock(&ctx->completion_lock); + } } if (unlikely(!nr_events)) From d3f450533bbcb6dd4d7d59cadc9b61b7321e4ac1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 10:44:31 +0100 Subject: [PATCH 0774/1593] efi: tpm: Avoid READ_ONCE() for accessing the event log Nathan reports that recent kernels built with LTO will crash when doing EFI boot using Fedora's GRUB and SHIM. The culprit turns out to be a misaligned load from the TPM event log, which is annotated with READ_ONCE(), and under LTO, this gets translated into a LDAR instruction which does not tolerate misaligned accesses. Interestingly, this does not happen when booting the same kernel straight from the UEFI shell, and so the fact that the event log may appear misaligned in memory may be caused by a bug in GRUB or SHIM. However, using READ_ONCE() to access firmware tables is slightly unusual in any case, and here, we only need to ensure that 'event' is not dereferenced again after it gets unmapped, but this is already taken care of by the implicit barrier() semantics of the early_memunmap() call. Cc: Cc: Peter Jones Cc: Jarkko Sakkinen Cc: Matthew Garrett Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1782 Signed-off-by: Ard Biesheuvel --- include/linux/tpm_eventlog.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 20c0ff54b7a0d..7d68a5cc58816 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev * The loop below will unmap these fields if the log is larger than * one page, so save them here for reference: */ - count = READ_ONCE(event->count); - event_type = READ_ONCE(event->event_type); + count = event->count; + event_type = event->event_type; /* Verify that it's the log header */ if (event_header->pcr_idx != 0 || From e4f4db47794c9f474b184ee1418f42e6a07412b6 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Mon, 9 Jan 2023 16:05:46 +0100 Subject: [PATCH 0775/1593] bpf: Fix pointer-leak due to insufficient speculative store bypass mitigation To mitigate Spectre v4, 2039f26f3aca ("bpf: Fix leakage due to insufficient speculative store bypass mitigation") inserts lfence instructions after 1) initializing a stack slot and 2) spilling a pointer to the stack. However, this does not cover cases where a stack slot is first initialized with a pointer (subject to sanitization) but then overwritten with a scalar (not subject to sanitization because the slot was already initialized). In this case, the second write may be subject to speculative store bypass (SSB) creating a speculative pointer-as-scalar type confusion. This allows the program to subsequently leak the numerical pointer value using, for example, a branch-based cache side channel. To fix this, also sanitize scalars if they write a stack slot that previously contained a pointer. Assuming that pointer-spills are only generated by LLVM on register-pressure, the performance impact on most real-world BPF programs should be small. The following unprivileged BPF bytecode drafts a minimal exploit and the mitigation: [...] // r6 = 0 or 1 (skalar, unknown user input) // r7 = accessible ptr for side channel // r10 = frame pointer (fp), to be leaked // r9 = r10 # fp alias to encourage ssb *(u64 *)(r9 - 8) = r10 // fp[-8] = ptr, to be leaked // lfence added here because of pointer spill to stack. // // Ommitted: Dummy bpf_ringbuf_output() here to train alias predictor // for no r9-r10 dependency. // *(u64 *)(r10 - 8) = r6 // fp[-8] = scalar, overwrites ptr // 2039f26f3aca: no lfence added because stack slot was not STACK_INVALID, // store may be subject to SSB // // fix: also add an lfence when the slot contained a ptr // r8 = *(u64 *)(r9 - 8) // r8 = architecturally a scalar, speculatively a ptr // // leak ptr using branch-based cache side channel: r8 &= 1 // choose bit to leak if r8 == 0 goto SLOW // no mispredict // architecturally dead code if input r6 is 0, // only executes speculatively iff ptr bit is 1 r8 = *(u64 *)(r7 + 0) # encode bit in cache (0: slow, 1: fast) SLOW: [...] After running this, the program can time the access to *(r7 + 0) to determine whether the chosen pointer bit was 0 or 1. Repeat this 64 times to recover the whole address on amd64. In summary, sanitization can only be skipped if one scalar is overwritten with another scalar. Scalar-confusion due to speculative store bypass can not lead to invalid accesses because the pointer bounds deducted during verification are enforced using branchless logic. See 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") for details. Do not make the mitigation depend on !env->allow_{uninit_stack,ptr_leaks} because speculative leaks are likely unexpected if these were enabled. For example, leaking the address to a protected log file may be acceptable while disabling the mitigation might unintentionally leak the address into the cached-state of a map that is accessible to unprivileged processes. Fixes: 2039f26f3aca ("bpf: Fix leakage due to insufficient speculative store bypass mitigation") Signed-off-by: Luis Gerhorst Signed-off-by: Daniel Borkmann Acked-by: Henriette Hofmeier Link: https://lore.kernel.org/bpf/edc95bad-aada-9cfc-ffe2-fa9bb206583c@cs.fau.de Link: https://lore.kernel.org/bpf/20230109150544.41465-1-gerhorst@cs.fau.de --- kernel/bpf/verifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4c0985daac02..dbef0b0967ae1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3295,7 +3295,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, bool sanitize = reg && is_spillable_regtype(reg->type); for (i = 0; i < size; i++) { - if (state->stack[spi].slot_type[i] == STACK_INVALID) { + u8 type = state->stack[spi].slot_type[i]; + + if (type != STACK_MISC && type != STACK_ZERO) { sanitize = true; break; } From 83a7f8e4899fb4cd77c787a3373f3e82b49a080f Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 13 Jan 2023 16:46:37 +0100 Subject: [PATCH 0776/1593] drm/vc4: bo: Fix unused variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 07a2975c65f2 ("drm/vc4: bo: Fix drmm_mutex_init memory hog") removed the only use of the ret variable, but didn't remove the variable itself leading to a unused variable warning. Remove that variable. Reported-by: Stephen Rothwell Fixes: 07a2975c65f2 ("drm/vc4: bo: Fix drmm_mutex_init memory hog") Reviewed-by: Maíra Canal Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230113154637.1704116-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_bo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 29ce2fa72cc4c..c5947ed8cc812 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -395,7 +395,6 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo *bo; - int ret; if (WARN_ON_ONCE(vc4->is_vc5)) return ERR_PTR(-ENODEV); From 5daba914da0e48950e9407ea4d75fa57029c9adc Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 5 Dec 2022 19:58:23 +0800 Subject: [PATCH 0777/1593] phy: rockchip-inno-usb2: Fix missing clk_disable_unprepare() in rockchip_usb2phy_power_on() The clk_disable_unprepare() should be called in the error handling of rockchip_usb2phy_power_on(). Fixes: 0e08d2a727e6 ("phy: rockchip-inno-usb2: add a new driver for Rockchip usb2phy") Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221205115823.16957-1-shangxiaojing@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c index e6ededc515239..a0bc10aa79618 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c @@ -485,8 +485,10 @@ static int rockchip_usb2phy_power_on(struct phy *phy) return ret; ret = property_enable(base, &rport->port_cfg->phy_sus, false); - if (ret) + if (ret) { + clk_disable_unprepare(rphy->clk480m); return ret; + } /* waiting for the utmi_clk to become stable */ usleep_range(1500, 2000); From d60c471a8670635b65b3d67b6e7aaa263d191cc3 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Wed, 14 Dec 2022 23:37:32 +0100 Subject: [PATCH 0778/1593] phy: Revert "phy: qualcomm: usb28nm: Add MDM9607 init sequence" This reverts commit 557a28811c7e0286d3816842032db5eb7bb5f156. This commit introduced an init sequence from downstream DT [1] in the driver. As mentioned by the comment above the HSPHY_INIT_CFG macro for this sequence: /* * The macro is used to define an initialization sequence. Each tuple * is meant to program 'value' into phy register at 'offset' with 'delay' * in us followed. */ Instead of corresponding to offsets into the phy register, the sequence read by the downstream driver [2] is passed into ulpi_write [3] which crafts the address-value pair into a new value and writes it into the same register at USB_ULPI_VIEWPORT [4]. In other words, this init sequence is programmed into the hardware in a totally different way than downstream and is unlikely to achieve the desired result, if the hsphy is working at all. An alternative method needs to be found to write these init values at the desired location. Fortunately mdm9607 did not land upstream yet [5] and should have its compatible revised to use the generic one, instead of a compatible that writes wrong data to the wrong registers. [1]: https://android.googlesource.com/kernel/msm/+/android-7.1.0_r0.2/arch/arm/boot/dts/qcom/mdm9607.dtsi#585 [2]: https://android.googlesource.com/kernel/msm/+/android-7.1.0_r0.2/drivers/usb/phy/phy-msm-usb.c#4183 [3]: https://android.googlesource.com/kernel/msm/+/android-7.1.0_r0.2/drivers/usb/phy/phy-msm-usb.c#468 [4]: https://android.googlesource.com/kernel/msm/+/android-7.1.0_r0.2/drivers/usb/phy/phy-msm-usb.c#418 [5]: https://lore.kernel.org/linux-arm-msm/20210805222812.40731-1-konrad.dybcio@somainline.org/ Reported-by: Michael Srba Signed-off-by: Marijn Suijten Reviewed-by: Stephan Gerhold Reviewed-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20221214223733.648167-1-marijn.suijten@somainline.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,usb-hs-28nm.yaml | 1 - drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c | 13 ------------- 2 files changed, 14 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml b/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml index abcc4373f39e1..ca6a0836b53c4 100644 --- a/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml @@ -16,7 +16,6 @@ properties: compatible: enum: - qcom,usb-hs-28nm-femtophy - - qcom,usb-hs-28nm-mdm9607 reg: maxItems: 1 diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c index 8807e59a1162c..a52a9bf13b758 100644 --- a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c +++ b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c @@ -401,26 +401,13 @@ static const struct hsphy_init_seq init_seq_femtophy[] = { HSPHY_INIT_CFG(0x90, 0x60, 0), }; -static const struct hsphy_init_seq init_seq_mdm9607[] = { - HSPHY_INIT_CFG(0x80, 0x44, 0), - HSPHY_INIT_CFG(0x81, 0x38, 0), - HSPHY_INIT_CFG(0x82, 0x24, 0), - HSPHY_INIT_CFG(0x83, 0x13, 0), -}; - static const struct hsphy_data hsphy_data_femtophy = { .init_seq = init_seq_femtophy, .init_seq_num = ARRAY_SIZE(init_seq_femtophy), }; -static const struct hsphy_data hsphy_data_mdm9607 = { - .init_seq = init_seq_mdm9607, - .init_seq_num = ARRAY_SIZE(init_seq_mdm9607), -}; - static const struct of_device_id qcom_snps_hsphy_match[] = { { .compatible = "qcom,usb-hs-28nm-femtophy", .data = &hsphy_data_femtophy, }, - { .compatible = "qcom,usb-hs-28nm-mdm9607", .data = &hsphy_data_mdm9607, }, { }, }; MODULE_DEVICE_TABLE(of, qcom_snps_hsphy_match); From fd3a8cff4d4a4acb0af49dd947c822717c053cf7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 10 Jan 2023 12:02:43 -0600 Subject: [PATCH 0779/1593] x86/pci: Treat EfiMemoryMappedIO as reservation of ECAM space Normally we reject ECAM space unless it is reported as reserved in the E820 table or via a PNP0C02 _CRS method (PCI Firmware, r3.3, sec 4.1.2). 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map"), removes E820 entries that correspond to EfiMemoryMappedIO regions because some other firmware uses EfiMemoryMappedIO for PCI host bridge windows, and the E820 entries prevent Linux from allocating BAR space for hot-added devices. Some firmware doesn't report ECAM space via PNP0C02 _CRS methods, but does mention it as an EfiMemoryMappedIO region via EFI GetMemoryMap(), which is normally converted to an E820 entry by a bootloader or EFI stub. After 07eab0901ede, that E820 entry is removed, so we reject this ECAM space, which makes PCI extended config space (offsets 0x100-0xfff) inaccessible. The lack of extended config space breaks anything that relies on it, including perf, VSEC telemetry, EDAC, QAT, SR-IOV, etc. Allow use of ECAM for extended config space when the region is covered by an EfiMemoryMappedIO region, even if it's not included in E820 or PNP0C02 _CRS. Link: https://lore.kernel.org/r/ac2693d8-8ba3-72e0-5b66-b3ae008d539d@linux.intel.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=216891 Fixes: 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map") Link: https://lore.kernel.org/r/20230110180243.1590045-3-helgaas@kernel.org Reported-by: Kan Liang Reported-by: Tony Luck Reported-by: Giovanni Cabiddu Reported-by: Yunying Sun Reported-by: Baowen Zheng Reported-by: Zhenzhong Duan Reported-by: Yang Lixiao Tested-by: Tony Luck Tested-by: Giovanni Cabiddu Tested-by: Kan Liang Tested-by: Yunying Sun Signed-off-by: Bjorn Helgaas Reviewed-by: Dan Williams Reviewed-by: Rafael J. Wysocki --- arch/x86/pci/mmconfig-shared.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 51c951699b2ed..4b3efaa82ab7c 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -442,6 +443,32 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used) return mcfg_res.flags; } +static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used) +{ +#ifdef CONFIG_EFI + efi_memory_desc_t *md; + u64 size, mmio_start, mmio_end; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + mmio_start = md->phys_addr; + mmio_end = mmio_start + size; + + /* + * N.B. Caller supplies (start, start + size), + * so to match, mmio_end is the first address + * *past* the EFI_MEMORY_MAPPED_IO area. + */ + if (mmio_start <= start && end <= mmio_end) + return true; + } + } +#endif + + return false; +} + typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type); static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, @@ -513,6 +540,10 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e "MMCONFIG at %pR not reserved in " "ACPI motherboard resources\n", &cfg->res); + + if (is_mmconf_reserved(is_efi_mmio, cfg, dev, + "EfiMemoryMappedIO")) + return true; } /* From b574baa64cf84e7793fe79f4491ae36c16e65a0b Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Mon, 19 Dec 2022 15:12:21 +0800 Subject: [PATCH 0780/1593] phy: freescale: imx8m-pcie: Add one missing error return There should be one error return when fail to fetch the perst reset. Add the missing error return. Fixes: dce9edff16ee ("phy: freescale: imx8m-pcie: Add i.MX8MP PCIe PHY support") Signed-off-by: Richard Zhu Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/1671433941-2037-1-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index 7585e8080b77d..afc63552ecaf7 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -255,7 +255,7 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) imx8_phy->perst = devm_reset_control_get_exclusive(dev, "perst"); if (IS_ERR(imx8_phy->perst)) - dev_err_probe(dev, PTR_ERR(imx8_phy->perst), + return dev_err_probe(dev, PTR_ERR(imx8_phy->perst), "Failed to get PCIE PHY PERST control\n"); } From f5fe24ef17b5fbe6db49534163e77499fb10ae8c Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Fri, 13 Jan 2023 19:44:47 +0100 Subject: [PATCH 0781/1593] lockref: stop doing cpu_relax in the cmpxchg loop On the x86-64 architecture even a failing cmpxchg grants exclusive access to the cacheline, making it preferable to retry the failed op immediately instead of stalling with the pause instruction. To illustrate the impact, below are benchmark results obtained by running various will-it-scale tests on top of the 6.2-rc3 kernel and Cascade Lake (2 sockets * 24 cores * 2 threads) CPU. All results in ops/s. Note there is some variance in re-runs, but the code is consistently faster when contention is present. open3 ("Same file open/close"): proc stock no-pause 1 805603 814942 (+%1) 2 1054980 1054781 (-0%) 8 1544802 1822858 (+18%) 24 1191064 2199665 (+84%) 48 851582 1469860 (+72%) 96 609481 1427170 (+134%) fstat2 ("Same file fstat"): proc stock no-pause 1 3013872 3047636 (+1%) 2 4284687 4400421 (+2%) 8 3257721 5530156 (+69%) 24 2239819 5466127 (+144%) 48 1701072 5256609 (+209%) 96 1269157 6649326 (+423%) Additionally, a kernel with a private patch to help access() scalability: access2 ("Same file access"): proc stock patched patched +nopause 24 2378041 2005501 5370335 (-15% / +125%) That is, fixing the problems in access itself *reduces* scalability after the cacheline ping-pong only happens in lockref with the pause instruction. Note that fstat and access benchmarks are not currently integrated into will-it-scale, but interested parties can find them in pull requests to said project. Code at hand has a rather tortured history. First modification showed up in commit d472d9d98b46 ("lockref: Relax in cmpxchg loop"), written with Itanium in mind. Later it got patched up to use an arch-dependent macro to stop doing it on s390 where it caused a significant regression. Said macro had undergone revisions and was ultimately eliminated later, going back to cpu_relax. While I intended to only remove cpu_relax for x86-64, I got the following comment from Linus: I would actually prefer just removing it entirely and see if somebody else hollers. You have the numbers to prove it hurts on real hardware, and I don't think we have any numbers to the contrary. So I think it's better to trust the numbers and remove it as a failure, than say "let's just remove it on x86-64 and leave everybody else with the potentially broken code" Additionally, Will Deacon (maintainer of the arm64 port, one of the architectures previously benchmarked): So, from the arm64 side of the fence, I'm perfectly happy just removing the cpu_relax() calls from lockref. As such, come back full circle in history and whack it altogether. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/all/CAGudoHHx0Nqg6DE70zAVA75eV-HXfWyhVMWZ-aSeOofkA_=WdA@mail.gmail.com/ Acked-by: Tony Luck # ia64 Acked-by: Nicholas Piggin # powerpc Acked-by: Will Deacon # arm64 Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- lib/lockref.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/lockref.c b/lib/lockref.c index 45e93ece8ba0d..2afe4c5d89191 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -23,7 +23,6 @@ } \ if (!--retry) \ break; \ - cpu_relax(); \ } \ } while (0) From 22eebaa631c40f3dac169ba781e0de471b83bf45 Mon Sep 17 00:00:00 2001 From: Peter Foley Date: Thu, 12 Jan 2023 23:37:06 -0500 Subject: [PATCH 0782/1593] ata: pata_cs5535: Don't build on UML This driver uses MSR functions that aren't implemented under UML. Avoid building it to prevent tripping up allyesconfig. e.g. /usr/lib/gcc/x86_64-pc-linux-gnu/12/../../../../x86_64-pc-linux-gnu/bin/ld: pata_cs5535.c:(.text+0x3a3): undefined reference to `__tracepoint_read_msr' /usr/lib/gcc/x86_64-pc-linux-gnu/12/../../../../x86_64-pc-linux-gnu/bin/ld: pata_cs5535.c:(.text+0x3d2): undefined reference to `__tracepoint_write_msr' /usr/lib/gcc/x86_64-pc-linux-gnu/12/../../../../x86_64-pc-linux-gnu/bin/ld: pata_cs5535.c:(.text+0x457): undefined reference to `__tracepoint_write_msr' /usr/lib/gcc/x86_64-pc-linux-gnu/12/../../../../x86_64-pc-linux-gnu/bin/ld: pata_cs5535.c:(.text+0x481): undefined reference to `do_trace_write_msr' /usr/lib/gcc/x86_64-pc-linux-gnu/12/../../../../x86_64-pc-linux-gnu/bin/ld: pata_cs5535.c:(.text+0x4d5): undefined reference to `do_trace_write_msr' /usr/lib/gcc/x86_64-pc-linux-gnu/12/../../../../x86_64-pc-linux-gnu/bin/ld: pata_cs5535.c:(.text+0x4f5): undefined reference to `do_trace_read_msr' /usr/lib/gcc/x86_64-pc-linux-gnu/12/../../../../x86_64-pc-linux-gnu/bin/ld: pata_cs5535.c:(.text+0x51c): undefined reference to `do_trace_write_msr' Signed-off-by: Peter Foley Reviewed-by: Randy Dunlap Signed-off-by: Damien Le Moal --- drivers/ata/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index eceaec33af65b..9695c4404e26a 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -640,6 +640,7 @@ config PATA_CS5530 config PATA_CS5535 tristate "CS5535 PATA support (Experimental)" depends on PCI && (X86_32 || (X86_64 && COMPILE_TEST)) + depends on !UML help This option enables support for the NatSemi/AMD CS5535 companion chip used with the Geode processor family. From da35048f2600633a7f9ba5fa7d6e3b1d0195938b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Jan 2023 20:54:26 +1000 Subject: [PATCH 0783/1593] kallsyms: Fix scheduling with interrupts disabled in self-test kallsyms_on_each* may schedule so must not be called with interrupts disabled. The iteration function could disable interrupts, but this also changes lookup_symbol() to match the change to the other timing code. Reported-by: Erhard F. Link: https://lore.kernel.org/all/bug-216902-206035@https.bugzilla.kernel.org%2F/ Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202212251728.8d0872ff-oliver.sang@intel.com Fixes: 30f3bb09778d ("kallsyms: Add self-test facility") Tested-by: "Erhard F." Signed-off-by: Nicholas Piggin Signed-off-by: Luis Chamberlain --- kernel/kallsyms_selftest.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index f35d9cc1aab15..bfbc12da33267 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -157,14 +157,11 @@ static void test_kallsyms_compression_ratio(void) static int lookup_name(void *data, const char *name, struct module *mod, unsigned long addr) { u64 t0, t1, t; - unsigned long flags; struct test_stat *stat = (struct test_stat *)data; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); (void)kallsyms_lookup_name(name); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); t = t1 - t0; if (t < stat->min) @@ -234,18 +231,15 @@ static int find_symbol(void *data, const char *name, struct module *mod, unsigne static void test_perf_kallsyms_on_each_symbol(void) { u64 t0, t1; - unsigned long flags; struct test_stat stat; memset(&stat, 0, sizeof(stat)); stat.max = INT_MAX; stat.name = stub_name; stat.perf = 1; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); kallsyms_on_each_symbol(find_symbol, &stat); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); pr_info("kallsyms_on_each_symbol() traverse all: %lld ns\n", t1 - t0); } @@ -270,17 +264,14 @@ static int match_symbol(void *data, unsigned long addr) static void test_perf_kallsyms_on_each_match_symbol(void) { u64 t0, t1; - unsigned long flags; struct test_stat stat; memset(&stat, 0, sizeof(stat)); stat.max = INT_MAX; stat.name = stub_name; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); kallsyms_on_each_match_symbol(match_symbol, stat.name, &stat); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); pr_info("kallsyms_on_each_match_symbol() traverse all: %lld ns\n", t1 - t0); } From 3b293487b8752cc42c1cbf8a0447bc6076c075fa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 12 Jan 2023 15:03:16 -0800 Subject: [PATCH 0784/1593] firmware: coreboot: Check size of table entry and use flex-array The memcpy() of the data following a coreboot_table_entry couldn't be evaluated by the compiler under CONFIG_FORTIFY_SOURCE. To make it easier to reason about, add an explicit flexible array member to struct coreboot_device so the entire entry can be copied at once. Additionally, validate the sizes before copying. Avoids this run-time false positive warning: memcpy: detected field-spanning write (size 168) of single field "&device->entry" at drivers/firmware/google/coreboot_table.c:103 (size 8) Reported-by: Paul Menzel Link: https://lore.kernel.org/all/03ae2704-8c30-f9f0-215b-7cdf4ad35a9a@molgen.mpg.de/ Cc: Jack Rosenthal Cc: Guenter Roeck Cc: Julius Werner Cc: Brian Norris Cc: Stephen Boyd Cc: Greg Kroah-Hartman Signed-off-by: Kees Cook Reviewed-by: Julius Werner Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230107031406.gonna.761-kees@kernel.org Reviewed-by: Stephen Boyd Reviewed-by: Jack Rosenthal Link: https://lore.kernel.org/r/20230112230312.give.446-kees@kernel.org --- drivers/firmware/google/coreboot_table.c | 9 +++++++-- drivers/firmware/google/coreboot_table.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c index 2652c396c4236..33ae94745aef9 100644 --- a/drivers/firmware/google/coreboot_table.c +++ b/drivers/firmware/google/coreboot_table.c @@ -93,14 +93,19 @@ static int coreboot_table_populate(struct device *dev, void *ptr) for (i = 0; i < header->table_entries; i++) { entry = ptr_entry; - device = kzalloc(sizeof(struct device) + entry->size, GFP_KERNEL); + if (entry->size < sizeof(*entry)) { + dev_warn(dev, "coreboot table entry too small!\n"); + return -EINVAL; + } + + device = kzalloc(sizeof(device->dev) + entry->size, GFP_KERNEL); if (!device) return -ENOMEM; device->dev.parent = dev; device->dev.bus = &coreboot_bus_type; device->dev.release = coreboot_device_release; - memcpy(&device->entry, ptr_entry, entry->size); + memcpy(device->raw, ptr_entry, entry->size); switch (device->entry.tag) { case LB_TAG_CBMEM_ENTRY: diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h index 37f4d335a606d..d814dca33a084 100644 --- a/drivers/firmware/google/coreboot_table.h +++ b/drivers/firmware/google/coreboot_table.h @@ -79,6 +79,7 @@ struct coreboot_device { struct lb_cbmem_ref cbmem_ref; struct lb_cbmem_entry cbmem_entry; struct lb_framebuffer framebuffer; + DECLARE_FLEX_ARRAY(u8, raw); }; }; From 42633ed852deadc14d44660ad71e2f6640239120 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 12 Jan 2023 22:49:48 +0000 Subject: [PATCH 0785/1593] kbuild: Fix CFI hash randomization with KASAN Clang emits a asan.module_ctor constructor to each object file when KASAN is enabled, and these functions are indirectly called in do_ctors. With CONFIG_CFI_CLANG, the compiler also emits a CFI type hash before each address-taken global function so they can pass indirect call checks. However, in commit 0c3e806ec0f9 ("x86/cfi: Add boot time hash randomization"), x86 implemented boot time hash randomization, which relies on the .cfi_sites section generated by objtool. As objtool is run against vmlinux.o instead of individual object files with X86_KERNEL_IBT (enabled by default), CFI types in object files that are not part of vmlinux.o end up not being included in .cfi_sites, and thus won't get randomized and trip CFI when called. Only .vmlinux.export.o and init/version-timestamp.o are linked into vmlinux separately from vmlinux.o. As these files don't contain any functions, disable KASAN for both of them to avoid breaking hash randomization. Link: https://github.com/ClangBuiltLinux/linux/issues/1742 Fixes: 0c3e806ec0f9 ("x86/cfi: Add boot time hash randomization") Signed-off-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230112224948.1479453-2-samitolvanen@google.com --- init/Makefile | 1 + scripts/Makefile.vmlinux | 1 + 2 files changed, 2 insertions(+) diff --git a/init/Makefile b/init/Makefile index 8316c23bead26..26de459006c4e 100644 --- a/init/Makefile +++ b/init/Makefile @@ -59,3 +59,4 @@ include/generated/utsversion.h: FORCE $(obj)/version-timestamp.o: include/generated/utsversion.h CFLAGS_version-timestamp.o := -include include/generated/utsversion.h +KASAN_SANITIZE_version-timestamp.o := n diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 49946cb968440..10176dec97eac 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -18,6 +18,7 @@ quiet_cmd_cc_o_c = CC $@ $(call if_changed_dep,cc_o_c) ifdef CONFIG_MODULES +KASAN_SANITIZE_.vmlinux.export.o := n targets += .vmlinux.export.o vmlinux: .vmlinux.export.o endif From 84ed64b1a7a7fcd507598dee7708c1f225123711 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 10 Jan 2023 13:53:10 +0100 Subject: [PATCH 0786/1593] scsi: target: core: Fix warning on RT kernels Calling spin_lock_irqsave() does not disable the interrupts on realtime kernels, remove the warning and replace assert_spin_locked() with lockdep_assert_held(). Signed-off-by: Maurizio Lombardi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230110125310.55884-1-mlombard@redhat.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_tmr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index bac111456fa1d..2b95b4550a637 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -73,8 +73,8 @@ static bool __target_check_io_state(struct se_cmd *se_cmd, { struct se_session *sess = se_cmd->se_sess; - assert_spin_locked(&sess->sess_cmd_lock); - WARN_ON_ONCE(!irqs_disabled()); + lockdep_assert_held(&sess->sess_cmd_lock); + /* * If command already reached CMD_T_COMPLETE state within * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown, From 4bb4db7f3187c6e3de6b229ffc87cdb30a2d22b6 Mon Sep 17 00:00:00 2001 From: Jisoo Jang Date: Wed, 11 Jan 2023 22:19:14 +0900 Subject: [PATCH 0787/1593] net: nfc: Fix use-after-free in local_cleanup() Fix a use-after-free that occurs in kfree_skb() called from local_cleanup(). This could happen when killing nfc daemon (e.g. neard) after detaching an nfc device. When detaching an nfc device, local_cleanup() called from nfc_llcp_unregister_device() frees local->rx_pending and decreases local->ref by kref_put() in nfc_llcp_local_put(). In the terminating process, nfc daemon releases all sockets and it leads to decreasing local->ref. After the last release of local->ref, local_cleanup() called from local_release() frees local->rx_pending again, which leads to the bug. Setting local->rx_pending to NULL in local_cleanup() could prevent use-after-free when local_cleanup() is called twice. Found by a modified version of syzkaller. BUG: KASAN: use-after-free in kfree_skb() Call Trace: dump_stack_lvl (lib/dump_stack.c:106) print_address_description.constprop.0.cold (mm/kasan/report.c:306) kasan_check_range (mm/kasan/generic.c:189) kfree_skb (net/core/skbuff.c:955) local_cleanup (net/nfc/llcp_core.c:159) nfc_llcp_local_put.part.0 (net/nfc/llcp_core.c:172) nfc_llcp_local_put (net/nfc/llcp_core.c:181) llcp_sock_destruct (net/nfc/llcp_sock.c:959) __sk_destruct (net/core/sock.c:2133) sk_destruct (net/core/sock.c:2181) __sk_free (net/core/sock.c:2192) sk_free (net/core/sock.c:2203) llcp_sock_release (net/nfc/llcp_sock.c:646) __sock_release (net/socket.c:650) sock_close (net/socket.c:1365) __fput (fs/file_table.c:306) task_work_run (kernel/task_work.c:179) ptrace_notify (kernel/signal.c:2354) syscall_exit_to_user_mode_prepare (kernel/entry/common.c:278) syscall_exit_to_user_mode (kernel/entry/common.c:296) do_syscall_64 (arch/x86/entry/common.c:86) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:106) Allocated by task 4719: kasan_save_stack (mm/kasan/common.c:45) __kasan_slab_alloc (mm/kasan/common.c:325) slab_post_alloc_hook (mm/slab.h:766) kmem_cache_alloc_node (mm/slub.c:3497) __alloc_skb (net/core/skbuff.c:552) pn533_recv_response (drivers/nfc/pn533/usb.c:65) __usb_hcd_giveback_urb (drivers/usb/core/hcd.c:1671) usb_giveback_urb_bh (drivers/usb/core/hcd.c:1704) tasklet_action_common.isra.0 (kernel/softirq.c:797) __do_softirq (kernel/softirq.c:571) Freed by task 1901: kasan_save_stack (mm/kasan/common.c:45) kasan_set_track (mm/kasan/common.c:52) kasan_save_free_info (mm/kasan/genericdd.c:518) __kasan_slab_free (mm/kasan/common.c:236) kmem_cache_free (mm/slub.c:3809) kfree_skbmem (net/core/skbuff.c:874) kfree_skb (net/core/skbuff.c:931) local_cleanup (net/nfc/llcp_core.c:159) nfc_llcp_unregister_device (net/nfc/llcp_core.c:1617) nfc_unregister_device (net/nfc/core.c:1179) pn53x_unregister_nfc (drivers/nfc/pn533/pn533.c:2846) pn533_usb_disconnect (drivers/nfc/pn533/usb.c:579) usb_unbind_interface (drivers/usb/core/driver.c:458) device_release_driver_internal (drivers/base/dd.c:1279) bus_remove_device (drivers/base/bus.c:529) device_del (drivers/base/core.c:3665) usb_disable_device (drivers/usb/core/message.c:1420) usb_disconnect (drivers/usb/core.c:2261) hub_event (drivers/usb/core/hub.c:5833) process_one_work (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:212 include/trace/events/workqueue.h:108 kernel/workqueue.c:2281) worker_thread (include/linux/list.h:282 kernel/workqueue.c:2423) kthread (kernel/kthread.c:319) ret_from_fork (arch/x86/entry/entry_64.S:301) Fixes: 3536da06db0b ("NFC: llcp: Clean local timers and works when removing a device") Signed-off-by: Jisoo Jang Link: https://lore.kernel.org/r/20230111131914.3338838-1-jisoo.jang@yonsei.ac.kr Signed-off-by: Jakub Kicinski --- net/nfc/llcp_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 3364caabef8b1..a27e1842b2a09 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -157,6 +157,7 @@ static void local_cleanup(struct nfc_llcp_local *local) cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); + local->rx_pending = NULL; del_timer_sync(&local->sdreq_timer); cancel_work_sync(&local->sdreq_timeout_work); nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); From 488e0bf7f34af3d42d1d5e56f7a5a7beaff188a3 Mon Sep 17 00:00:00 2001 From: Esina Ekaterina Date: Thu, 12 Jan 2023 10:47:03 +0300 Subject: [PATCH 0788/1593] net: wan: Add checks for NULL for utdm in undo_uhdlc_init and unmap_si_regs If uhdlc_priv_tsa != 1 then utdm is not initialized. And if ret != NULL then goto undo_uhdlc_init, where utdm is dereferenced. Same if dev == NULL. Found by Astra Linux on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8d68100ab4ad ("soc/fsl/qe: fix err handling of ucc_of_parse_tdm") Signed-off-by: Esina Ekaterina Link: https://lore.kernel.org/r/20230112074703.13558-1-eesina@astralinux.ru Signed-off-by: Jakub Kicinski --- drivers/net/wan/fsl_ucc_hdlc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 22edea6ca4b81..1c53b55469270 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1243,9 +1243,11 @@ static int ucc_hdlc_probe(struct platform_device *pdev) free_dev: free_netdev(dev); undo_uhdlc_init: - iounmap(utdm->siram); + if (utdm) + iounmap(utdm->siram); unmap_si_regs: - iounmap(utdm->si_regs); + if (utdm) + iounmap(utdm->si_regs); free_utdm: if (uhdlc_priv->tsa) kfree(utdm); From 3c463721a73bdb57a913e0d3124677a3758886fc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 12 Jan 2023 12:54:40 +0200 Subject: [PATCH 0789/1593] net: enetc: avoid deadlock in enetc_tx_onestep_tstamp() This lockdep splat says it better than I could: ================================ WARNING: inconsistent lock state 6.2.0-rc2-07010-ga9b9500ffaac-dirty #967 Not tainted -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. kworker/1:3/179 [HC0[0]:SC0[0]:HE1:SE1] takes: ffff3ec4036ce098 (_xmit_ETHER#2){+.?.}-{3:3}, at: netif_freeze_queues+0x5c/0xc0 {IN-SOFTIRQ-W} state was registered at: _raw_spin_lock+0x5c/0xc0 sch_direct_xmit+0x148/0x37c __dev_queue_xmit+0x528/0x111c ip6_finish_output2+0x5ec/0xb7c ip6_finish_output+0x240/0x3f0 ip6_output+0x78/0x360 ndisc_send_skb+0x33c/0x85c ndisc_send_rs+0x54/0x12c addrconf_rs_timer+0x154/0x260 call_timer_fn+0xb8/0x3a0 __run_timers.part.0+0x214/0x26c run_timer_softirq+0x3c/0x74 __do_softirq+0x14c/0x5d8 ____do_softirq+0x10/0x20 call_on_irq_stack+0x2c/0x5c do_softirq_own_stack+0x1c/0x30 __irq_exit_rcu+0x168/0x1a0 irq_exit_rcu+0x10/0x40 el1_interrupt+0x38/0x64 irq event stamp: 7825 hardirqs last enabled at (7825): [] exit_to_kernel_mode+0x34/0x130 hardirqs last disabled at (7823): [] __do_softirq+0x550/0x5d8 softirqs last enabled at (7824): [] __do_softirq+0x46c/0x5d8 softirqs last disabled at (7811): [] ____do_softirq+0x10/0x20 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(_xmit_ETHER#2); lock(_xmit_ETHER#2); *** DEADLOCK *** 3 locks held by kworker/1:3/179: #0: ffff3ec400004748 ((wq_completion)events){+.+.}-{0:0}, at: process_one_work+0x1f4/0x6c0 #1: ffff80000a0bbdc8 ((work_completion)(&priv->tx_onestep_tstamp)){+.+.}-{0:0}, at: process_one_work+0x1f4/0x6c0 #2: ffff3ec4036cd438 (&dev->tx_global_lock){+.+.}-{3:3}, at: netif_tx_lock+0x1c/0x34 Workqueue: events enetc_tx_onestep_tstamp Call trace: print_usage_bug.part.0+0x208/0x22c mark_lock+0x7f0/0x8b0 __lock_acquire+0x7c4/0x1ce0 lock_acquire.part.0+0xe0/0x220 lock_acquire+0x68/0x84 _raw_spin_lock+0x5c/0xc0 netif_freeze_queues+0x5c/0xc0 netif_tx_lock+0x24/0x34 enetc_tx_onestep_tstamp+0x20/0x100 process_one_work+0x28c/0x6c0 worker_thread+0x74/0x450 kthread+0x118/0x11c but I'll say it anyway: the enetc_tx_onestep_tstamp() work item runs in process context, therefore with softirqs enabled (i.o.w., it can be interrupted by a softirq). If we hold the netif_tx_lock() when there is an interrupt, and the NET_TX softirq then gets scheduled, this will take the netif_tx_lock() a second time and deadlock the kernel. To solve this, use netif_tx_lock_bh(), which blocks softirqs from running. Fixes: 7294380c5211 ("enetc: support PTP Sync packet one-step timestamping") Signed-off-by: Vladimir Oltean Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230112105440.1786799-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3a79ead5219ae..e96449eedfb54 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2290,14 +2290,14 @@ static void enetc_tx_onestep_tstamp(struct work_struct *work) priv = container_of(work, struct enetc_ndev_priv, tx_onestep_tstamp); - netif_tx_lock(priv->ndev); + netif_tx_lock_bh(priv->ndev); clear_bit_unlock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags); skb = skb_dequeue(&priv->tx_skbs); if (skb) enetc_start_xmit(skb, priv->ndev); - netif_tx_unlock(priv->ndev); + netif_tx_unlock_bh(priv->ndev); } static void enetc_tx_onestep_tstamp_init(struct enetc_ndev_priv *priv) From 925f3deb45df73173a33e1e81db77575f4ffde39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Thu, 12 Jan 2023 17:13:11 +0100 Subject: [PATCH 0790/1593] net: lan966x: add missing fwnode_handle_put() for ports node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the "ethernet-ports" node is retrieved using device_get_named_child_node(), it should be release after using it. Add missing fwnode_handle_put() and move the code that retrieved the node from device-tree to avoid complicated handling in case of error. Fixes: db8bcaad5393 ("net: lan966x: add the basic lan966x driver") Signed-off-by: Clément Léger Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230112161311.495124-1-clement.leger@bootlin.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/lan966x/lan966x_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index cadde20505ba0..580c91d24a528 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1043,11 +1043,6 @@ static int lan966x_probe(struct platform_device *pdev) lan966x->base_mac[5] &= 0xf0; } - ports = device_get_named_child_node(&pdev->dev, "ethernet-ports"); - if (!ports) - return dev_err_probe(&pdev->dev, -ENODEV, - "no ethernet-ports child found\n"); - err = lan966x_create_targets(pdev, lan966x); if (err) return dev_err_probe(&pdev->dev, err, @@ -1125,6 +1120,11 @@ static int lan966x_probe(struct platform_device *pdev) } } + ports = device_get_named_child_node(&pdev->dev, "ethernet-ports"); + if (!ports) + return dev_err_probe(&pdev->dev, -ENODEV, + "no ethernet-ports child found\n"); + /* init switch */ lan966x_init(lan966x); lan966x_stats_init(lan966x); @@ -1162,6 +1162,8 @@ static int lan966x_probe(struct platform_device *pdev) goto cleanup_ports; } + fwnode_handle_put(ports); + lan966x_mdb_init(lan966x); err = lan966x_fdb_init(lan966x); if (err) @@ -1191,6 +1193,7 @@ static int lan966x_probe(struct platform_device *pdev) lan966x_fdb_deinit(lan966x); cleanup_ports: + fwnode_handle_put(ports); fwnode_handle_put(portnp); lan966x_cleanup_ports(lan966x); From 6bc1fe7dd748ba5e76e7917d110837cafe7b931c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 12 Jan 2023 18:42:51 +0100 Subject: [PATCH 0791/1593] mptcp: explicitly specify sock family at subflow creation time Let the caller specify the to-be-created subflow family. For a given MPTCP socket created with the AF_INET6 family, the current userspace PM can already ask the kernel to create subflows in v4 and v6. If "plain" IPv4 addresses are passed to the kernel, they are automatically mapped in v6 addresses "by accident". This can be problematic because the userspace will need to pass different addresses, now the v4-mapped-v6 addresses to destroy this new subflow. On the other hand, if the MPTCP socket has been created with the AF_INET family, the command to create a subflow in v6 will be accepted but the result will not be the one as expected as new subflow will be created in IPv4 using part of the v6 addresses passed to the kernel: not creating the expected subflow then. No functional change intended for the in-kernel PM where an explicit enforcement is currently in place. This arbitrary enforcement will be leveraged by other patches in a future version. Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Cc: stable@vger.kernel.org Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 9 +++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b7ad030dfe891..8cd6cc67c2c5c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -98,7 +98,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) struct socket *ssock; int err; - err = mptcp_subflow_create_socket(sk, &ssock); + err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock); if (err) return err; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a0d1658ce59ee..a9e0355744b6a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -641,7 +641,8 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *remote); -int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); +int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, + struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bd387d4b5a38f..ec54413fb31f7 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1547,7 +1547,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (!mptcp_is_fully_established(sk)) goto err_out; - err = mptcp_subflow_create_socket(sk, &sf); + err = mptcp_subflow_create_socket(sk, loc->family, &sf); if (err) goto err_out; @@ -1660,7 +1660,9 @@ static void mptcp_subflow_ops_undo_override(struct sock *ssk) #endif ssk->sk_prot = &tcp_prot; } -int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) + +int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, + struct socket **new_sock) { struct mptcp_subflow_context *subflow; struct net *net = sock_net(sk); @@ -1673,8 +1675,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) if (unlikely(!sk->sk_socket)) return -EINVAL; - err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, - &sf); + err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf); if (err) return err; From fb00ee4f3343acb2b9222ca9b73b47dd1e1a8efc Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 12 Jan 2023 18:42:52 +0100 Subject: [PATCH 0792/1593] mptcp: netlink: respect v4/v6-only sockets If an MPTCP socket has been created with AF_INET6 and the IPV6_V6ONLY option has been set, the userspace PM would allow creating subflows using IPv4 addresses, e.g. mapped in v6. The kernel side of userspace PM will also accept creating subflows with local and remote addresses having different families. Depending on the subflow socket's family, different behaviours are expected: - If AF_INET is forced with a v6 address, the kernel will take the last byte of the IP and try to connect to that: a new subflow is created but to a non expected address. - If AF_INET6 is forced with a v4 address, the kernel will try to connect to a v4 address (v4-mapped-v6). A -EBADF error from the connect() part is then expected. It is then required to check the given families can be accepted. This is done by using a new helper for addresses family matching, taking care of IPv4 vs IPv4-mapped-IPv6 addresses. This helper will be re-used later by the in-kernel path-manager to use mixed IPv4 and IPv6 addresses. While at it, a clear error message is now reported if there are some conflicts with the families that have been passed by the userspace. Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 25 +++++++++++++++++++++++++ net/mptcp/pm_userspace.c | 7 +++++++ net/mptcp/protocol.h | 3 +++ 3 files changed, 35 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 45e2a48397b95..70f0ced3ca86e 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -420,6 +420,31 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) } } +/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, + * otherwise allow any matching local/remote pair + */ +bool mptcp_pm_addr_families_match(const struct sock *sk, + const struct mptcp_addr_info *loc, + const struct mptcp_addr_info *rem) +{ + bool mptcp_is_v4 = sk->sk_family == AF_INET; + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6); + bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6); + + if (mptcp_is_v4) + return loc_is_v4 && rem_is_v4; + + if (ipv6_only_sock(sk)) + return !loc_is_v4 && !rem_is_v4; + + return loc_is_v4 == rem_is_v4; +#else + return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; +#endif +} + void mptcp_pm_data_reset(struct mptcp_sock *msk) { u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 65dcc55a8ad89..ea6ad9da74930 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -294,6 +294,13 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } sk = (struct sock *)msk; + + if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) { + GENL_SET_ERR_MSG(info, "families mismatch"); + err = -EINVAL; + goto create_err; + } + lock_sock(sk); err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a9e0355744b6a..601469249da80 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -777,6 +777,9 @@ int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry); +bool mptcp_pm_addr_families_match(const struct sock *sk, + const struct mptcp_addr_info *loc, + const struct mptcp_addr_info *rem); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); From 4656d72c1efa495a58ad6d8b073a60907073e4e6 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 12 Jan 2023 18:42:53 +0100 Subject: [PATCH 0793/1593] selftests: mptcp: userspace: validate v4-v6 subflows mix MPTCP protocol supports having subflows in both IPv4 and IPv6. In Linux, it is possible to have that if the MPTCP socket has been created with AF_INET6 family without the IPV6_V6ONLY option. Here, a new IPv4 subflow is being added to the initial IPv6 connection, then being removed using Netlink commands. Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../selftests/net/mptcp/userspace_pm.sh | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index a29deb9fa024c..ab2d581f28a1d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -752,6 +752,52 @@ test_subflows() "$server4_token" > /dev/null 2>&1 } +test_subflows_v4_v6_mix() +{ + # Attempt to add a listener at 10.0.2.1: + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app6_port > /dev/null 2>&1 & + local listener_pid=$! + + # ADD_ADDR4 from server to client machine reusing the subflow port on + # the established v6 connection + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 > /dev/null 2>&1 + stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ + "$server_addr_id" "$app6_port" + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + kill_wait $listener_pid + + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" > /dev/null 2>&1 + sleep 0.5 +} + test_prio() { local count @@ -861,6 +907,7 @@ make_connection "v6" test_announce test_remove test_subflows +test_subflows_v4_v6_mix test_prio test_listener From a22b7388d658ecfcd226600c8c34ce4481e88655 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Thu, 12 Jan 2023 16:55:29 -0800 Subject: [PATCH 0794/1593] sch_htb: Avoid grafting on htb_destroy_class_offload when destroying htb Peek at old qdisc and graft only when deleting a leaf class in the htb, rather than when deleting the htb itself. Do not peek at the qdisc of the netdev queue when destroying the htb. The caller may already have grafted a new qdisc that is not part of the htb structure being destroyed. This fix resolves two use cases. 1. Using tc to destroy the htb. - Netdev was being prematurely activated before the htb was fully destroyed. 2. Using tc to replace the htb with another qdisc (which also leads to the htb being destroyed). - Premature netdev activation like previous case. Newly grafted qdisc was also getting accidentally overwritten when destroying the htb. Fixes: d03b195b5aa0 ("sch_htb: Hierarchical QoS hardware offload") Signed-off-by: Rahul Rameshbabu Reviewed-by: Saeed Mahameed Reviewed-by: Maxim Mikityanskiy Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20230113005528.302625-1-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- net/sched/sch_htb.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 2238edece1a46..f46643850df84 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1549,7 +1549,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, struct tc_htb_qopt_offload offload_opt; struct netdev_queue *dev_queue; struct Qdisc *q = cl->leaf.q; - struct Qdisc *old = NULL; + struct Qdisc *old; int err; if (cl->level) @@ -1557,14 +1557,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, WARN_ON(!q); dev_queue = htb_offload_get_queue(cl); - old = htb_graft_helper(dev_queue, NULL); - if (destroying) - /* Before HTB is destroyed, the kernel grafts noop_qdisc to - * all queues. + /* When destroying, caller qdisc_graft grafts the new qdisc and invokes + * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload + * does not need to graft or qdisc_put the qdisc being destroyed. + */ + if (!destroying) { + old = htb_graft_helper(dev_queue, NULL); + /* Last qdisc grafted should be the same as cl->leaf.q when + * calling htb_delete. */ - WARN_ON(!(old->flags & TCQ_F_BUILTIN)); - else WARN_ON(old != q); + } if (cl->parent) { _bstats_update(&cl->parent->bstats_bias, @@ -1581,10 +1584,12 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, }; err = htb_offload(qdisc_dev(sch), &offload_opt); - if (!err || destroying) - qdisc_put(old); - else - htb_graft_helper(dev_queue, old); + if (!destroying) { + if (!err) + qdisc_put(old); + else + htb_graft_helper(dev_queue, old); + } if (last_child) return err; From f7b23d1c35d8b8de1425bdfccaefd01f3b7c9d1c Mon Sep 17 00:00:00 2001 From: Dmitry Perchanov Date: Wed, 11 Jan 2023 14:22:10 +0200 Subject: [PATCH 0795/1593] iio: hid: fix the retval in accel_3d_capture_sample Return value should be zero for success. This was forgotten for timestamp feature. Verified on RealSense cameras. Fixes: a96cd0f901ee ("iio: accel: hid-sensor-accel-3d: Add timestamp") Signed-off-by: Dmitry Perchanov Link: https://lore.kernel.org/r/a6dc426498221c81fa71045b41adf782ebd42136.camel@intel.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/hid-sensor-accel-3d.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c index a2def6f9380a3..5eac7ea199931 100644 --- a/drivers/iio/accel/hid-sensor-accel-3d.c +++ b/drivers/iio/accel/hid-sensor-accel-3d.c @@ -280,6 +280,7 @@ static int accel_3d_capture_sample(struct hid_sensor_hub_device *hsdev, hid_sensor_convert_timestamp( &accel_state->common_attributes, *(int64_t *)raw_data); + ret = 0; break; default: break; From eb50cd5bfdac61627a5026566cf3b90ced7b141c Mon Sep 17 00:00:00 2001 From: Dmitry Perchanov Date: Wed, 11 Jan 2023 14:24:25 +0200 Subject: [PATCH 0796/1593] iio: hid: fix the retval in gyro_3d_capture_sample Return value should be zero for success. This was forgotten for timestamp feature. Verified on RealSense cameras. Fixes: 4648cbd8fb92 ("iio: hid-sensor-gyro-3d: Add timestamp channel") Signed-off-by: Dmitry Perchanov Link: https://lore.kernel.org/r/7c1809dc74eb2f58a20595f4d02e76934f8e9219.camel@intel.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/hid-sensor-gyro-3d.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c index 8f0ad022c7f1b..698c50da1f109 100644 --- a/drivers/iio/gyro/hid-sensor-gyro-3d.c +++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c @@ -231,6 +231,7 @@ static int gyro_3d_capture_sample(struct hid_sensor_hub_device *hsdev, gyro_state->timestamp = hid_sensor_convert_timestamp(&gyro_state->common_attributes, *(s64 *)raw_data); + ret = 0; break; default: break; From a323e6b5737bb6e3d3946369b97099abb7dde695 Mon Sep 17 00:00:00 2001 From: Jensen Huang Date: Fri, 13 Jan 2023 14:44:57 +0800 Subject: [PATCH 0797/1593] arm64: dts: rockchip: add missing #interrupt-cells to rk356x pcie2x1 This fixes the following issue: pcieport 0000:00:00.0: of_irq_parse_pci: failed with rc=-22 Signed-off-by: Jensen Huang Link: https://lore.kernel.org/r/20230113064457.7105-1-jensenhuang@friendlyarm.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk356x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index 5706c3e24f0a0..c27f1c7f072da 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -966,6 +966,7 @@ clock-names = "aclk_mst", "aclk_slv", "aclk_dbi", "pclk", "aux"; device_type = "pci"; + #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc 0>, <0 0 0 2 &pcie_intc 1>, From 5dc4c995db9eb45f6373a956eb1f69460e69e6d4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Jan 2023 09:22:43 -0600 Subject: [PATCH 0798/1593] Linux 6.2-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 460716314fb3c..e09fe100efb21 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From a608da3bd730d718f2d3ebec1c26f9865f8f17ce Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 6 Jan 2023 17:43:06 +0900 Subject: [PATCH 0799/1593] zonefs: Detect append writes at invalid locations Using REQ_OP_ZONE_APPEND operations for synchronous writes to sequential files succeeds regardless of the zone write pointer position, as long as the target zone is not full. This means that if an external (buggy) application writes to the zone of a sequential file underneath the file system, subsequent file write() operation will succeed but the file size will not be correct and the file will contain invalid data written by another application. Modify zonefs_file_dio_append() to check the written sector of an append write (returned in bio->bi_iter.bi_sector) and return -EIO if there is a mismatch with the file zone wp offset field. This change triggers a call to zonefs_io_error() and a zone check. Modify zonefs_io_error_cb() to not expose the unexpected data after the current inode size when the errors=remount-ro mode is used. Other error modes are correctly handled already. Fixes: 02ef12a663c7 ("zonefs: use REQ_OP_ZONE_APPEND for sync DIO") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn --- fs/zonefs/super.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 2c53fbb8d918e..a9c5c3f720adf 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -442,6 +442,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(inode, zone, false, false); } + } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && + data_size > isize) { + /* Do not expose garbage data */ + data_size = isize; } /* @@ -805,6 +809,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) ret = submit_bio_wait(bio); + /* + * If the file zone was written underneath the file system, the zone + * write pointer may not be where we expect it to be, but the zone + * append write can still succeed. So check manually that we wrote where + * we intended to, that is, at zi->i_wpoffset. + */ + if (!ret) { + sector_t wpsector = + zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); + + if (bio->bi_iter.bi_sector != wpsector) { + zonefs_warn(inode->i_sb, + "Corrupted write pointer %llu for zone at %llu\n", + wpsector, zi->i_zsector); + ret = -EIO; + } + } + zonefs_file_write_dio_end_io(iocb, size, ret, 0); trace_zonefs_file_dio_append(inode, size, ret); From 6618d69aa129a8fc613e64775d5019524c6f231b Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 13 Dec 2022 19:03:55 +0100 Subject: [PATCH 0800/1593] rust: print: avoid evaluating arguments in `pr_*` macros in `unsafe` blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment it is possible to perform unsafe operations in the arguments of `pr_*` macros since they are evaluated inside an `unsafe` block: let x = &10u32 as *const u32; pr_info!("{}", *x); In other words, this is a soundness issue. Fix it so that it requires an explicit `unsafe` block. Reported-by: Wedson Almeida Filho Reported-by: Domen Puncer Kugler Link: https://github.com/Rust-for-Linux/linux/issues/479 Signed-off-by: Miguel Ojeda Reviewed-by: Boqun Feng Reviewed-by: Gary Guo Reviewed-by: Björn Roy Baron Reviewed-by: Vincenzo Palazzo --- rust/kernel/print.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index 29bf9c2e8aee6..30103325696d8 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -142,17 +142,24 @@ pub fn call_printk_cont(args: fmt::Arguments<'_>) { macro_rules! print_macro ( // The non-continuation cases (most of them, e.g. `INFO`). ($format_string:path, false, $($arg:tt)+) => ( - // SAFETY: This hidden macro should only be called by the documented - // printing macros which ensure the format string is one of the fixed - // ones. All `__LOG_PREFIX`s are null-terminated as they are generated - // by the `module!` proc macro or fixed values defined in a kernel - // crate. - unsafe { - $crate::print::call_printk( - &$format_string, - crate::__LOG_PREFIX, - format_args!($($arg)+), - ); + // To remain sound, `arg`s must be expanded outside the `unsafe` block. + // Typically one would use a `let` binding for that; however, `format_args!` + // takes borrows on the arguments, but does not extend the scope of temporaries. + // Therefore, a `match` expression is used to keep them around, since + // the scrutinee is kept until the end of the `match`. + match format_args!($($arg)+) { + // SAFETY: This hidden macro should only be called by the documented + // printing macros which ensure the format string is one of the fixed + // ones. All `__LOG_PREFIX`s are null-terminated as they are generated + // by the `module!` proc macro or fixed values defined in a kernel + // crate. + args => unsafe { + $crate::print::call_printk( + &$format_string, + crate::__LOG_PREFIX, + args, + ); + } } ); From 216f764716f34fe68cedc7296ae2043a7727e640 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 3 Jan 2023 16:47:55 +0800 Subject: [PATCH 0801/1593] block, bfq: switch 'bfqg->ref' to use atomic refcount apis The updating of 'bfqg->ref' should be protected by 'bfqd->lock', however, during code review, we found that bfq_pd_free() update 'bfqg->ref' without holding the lock, which is problematic: 1) bfq_pd_free() triggered by removing cgroup is called asynchronously; 2) bfqq will grab bfqg reference, and exit bfqq will drop the reference, which can concurrent with 1). Unfortunately, 'bfqd->lock' can't be held here because 'bfqd' might already be freed in bfq_pd_free(). Fix the problem by using atomic refcount apis. Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230103084755.1256479-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 8 +++----- block/bfq-iosched.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 1b2829e99dad0..7d9b15f0dbd57 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -316,14 +316,12 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) static void bfqg_get(struct bfq_group *bfqg) { - bfqg->ref++; + refcount_inc(&bfqg->ref); } static void bfqg_put(struct bfq_group *bfqg) { - bfqg->ref--; - - if (bfqg->ref == 0) + if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); } @@ -530,7 +528,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, } /* see comments in bfq_bic_update_cgroup for why refcounting */ - bfqg_get(bfqg); + refcount_set(&bfqg->ref, 1); return &bfqg->pd; } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 41aa151ccc223..466e4865ace63 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -928,7 +928,7 @@ struct bfq_group { char blkg_path[128]; /* reference counter (see comments in bfq_bic_update_cgroup) */ - int ref; + refcount_t ref; /* Is bfq_group still online? */ bool online; From b72d13977689f0c717444010e108c4f20658dfee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Tue, 27 Dec 2022 01:51:27 -0800 Subject: [PATCH 0802/1593] USB: serial: option: add Quectel EM05-G (RS) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EM05-G (RS) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0314 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0314 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 07ffff889ed1e..58df076b69444 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -258,6 +258,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05G_CS 0x030c #define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 +#define QUECTEL_PRODUCT_EM05G_RS 0x0314 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1167,6 +1168,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_RS, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, From d9bbb15881046bd76f8710c76e26a740eee997ef Mon Sep 17 00:00:00 2001 From: Ali Mirghasemi Date: Wed, 28 Dec 2022 15:08:47 +0330 Subject: [PATCH 0803/1593] USB: serial: option: add Quectel EC200U modem Add support for EC200U modem 0x0901: EC200U - AT + AP + CP + NMEA + DIAG + MOS usb-device output: T: Bus=01 Lev=02 Prnt=02 Port=02 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0901 Rev= 3.18 S: Manufacturer=Android S: Product=Android C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=400mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 512 Ivl=4096ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 512 Ivl=4096ms I:* If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Ali Mirghasemi Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 58df076b69444..f00eaff4c9130 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -262,6 +262,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 +#define QUECTEL_PRODUCT_EC200U 0x0901 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 #define QUECTEL_PRODUCT_RM500K 0x7001 @@ -1189,6 +1190,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, From 3f9e76e31704a325170e5aec2243c8d084d74854 Mon Sep 17 00:00:00 2001 From: Michael Adler Date: Tue, 3 Jan 2023 14:48:50 +0100 Subject: [PATCH 0804/1593] USB: serial: cp210x: add SCALANCE LPE-9000 device id Add the USB serial console device ID for Siemens SCALANCE LPE-9000 which have a USB port for their serial console. Signed-off-by: Michael Adler Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 67372acc23529..832ad592b7ef3 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -60,6 +60,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ + { USB_DEVICE(0x0908, 0x0070) }, /* Siemens SCALANCE LPE-9000 USB Serial Console */ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ { USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */ { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ From 1541dd0097c0f8f470e76eddf5120fc55a7e3101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Sun, 15 Jan 2023 18:07:27 -0800 Subject: [PATCH 0805/1593] USB: serial: option: add Quectel EM05CN (SG) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EM05CN (SG) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "MBIM" : AT + MBIM + DIAG + NMEA + MODEM "RMNET" : AT + DIAG + NMEA + Modem + QMI The detailed description of the USB configuration for each mode as follows: MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0310 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 1 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 2 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0310 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f00eaff4c9130..2b1d5472f5788 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -256,6 +256,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM060K 0x030b #define QUECTEL_PRODUCT_EM05G_CS 0x030c +#define QUECTEL_PRODUCT_EM05CN_SG 0x0310 #define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM05G_RS 0x0314 @@ -1163,6 +1164,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff), From 71dfd381a7c051f16a61f82fbd38a4cca563bdca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Sun, 15 Jan 2023 18:33:28 -0800 Subject: [PATCH 0806/1593] USB: serial: option: add Quectel EM05CN modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EM05CN modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "MBIM" : AT + MBIM + DIAG + NMEA + MODEM "RMNET" : AT + DIAG + NMEA + Modem + QMI The detailed description of the USB configuration for each mode as follows: MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0312 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 1 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 2 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0312 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2b1d5472f5788..ee5ac4ef7e162 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -258,6 +258,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05G_CS 0x030c #define QUECTEL_PRODUCT_EM05CN_SG 0x0310 #define QUECTEL_PRODUCT_EM05G_SG 0x0311 +#define QUECTEL_PRODUCT_EM05CN 0x0312 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM05G_RS 0x0314 #define QUECTEL_PRODUCT_EM12 0x0512 @@ -1164,6 +1165,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), From 67a90bccad3d20829bea35cb38ad744d9b6e30f4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 12 Jan 2023 22:36:39 -0800 Subject: [PATCH 0807/1593] fbdev: fbmon: fix function name in kernel-doc Fix a kernel-doc warning by correcting the function name in the kernel-doc comment: drivers/video/fbdev/core/fbmon.c:1073: warning: expecting prototype for fb_get_hblank_by_freq(). Prototype was for fb_get_hblank_by_hfreq() instead Signed-off-by: Randy Dunlap Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index b0e690f41025a..79e5bfbdd34c2 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -1050,7 +1050,7 @@ static u32 fb_get_vblank(u32 hfreq) } /** - * fb_get_hblank_by_freq - get horizontal blank time given hfreq + * fb_get_hblank_by_hfreq - get horizontal blank time given hfreq * @hfreq: horizontal freq * @xres: horizontal resolution in pixels * From ee9fffdc3887a46eab3209bc46b228b543c75fa6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 14 Jan 2023 09:54:04 +0100 Subject: [PATCH 0808/1593] fbdev: omapfb: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c | 7 ++++--- drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c | 7 ++++--- drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c | 3 ++- drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c | 3 ++- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c index bc5a44c2a144a..ae937854403b1 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/display-sysfs.c @@ -10,6 +10,7 @@ #define DSS_SUBSYS_NAME "DISPLAY" #include +#include #include #include #include @@ -36,7 +37,7 @@ static ssize_t display_enabled_store(struct omap_dss_device *dssdev, int r; bool enable; - r = strtobool(buf, &enable); + r = kstrtobool(buf, &enable); if (r) return r; @@ -73,7 +74,7 @@ static ssize_t display_tear_store(struct omap_dss_device *dssdev, if (!dssdev->driver->enable_te || !dssdev->driver->get_te) return -ENOENT; - r = strtobool(buf, &te); + r = kstrtobool(buf, &te); if (r) return r; @@ -183,7 +184,7 @@ static ssize_t display_mirror_store(struct omap_dss_device *dssdev, if (!dssdev->driver->set_mirror || !dssdev->driver->get_mirror) return -ENOENT; - r = strtobool(buf, &mirror); + r = kstrtobool(buf, &mirror); if (r) return r; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c index ba21c4a2633dd..1b644be5fe2ed 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c @@ -10,6 +10,7 @@ #define DSS_SUBSYS_NAME "MANAGER" #include +#include #include #include #include @@ -246,7 +247,7 @@ static ssize_t manager_trans_key_enabled_store(struct omap_overlay_manager *mgr, bool enable; int r; - r = strtobool(buf, &enable); + r = kstrtobool(buf, &enable); if (r) return r; @@ -290,7 +291,7 @@ static ssize_t manager_alpha_blending_enabled_store( if(!dss_has_feature(FEAT_ALPHA_FIXED_ZORDER)) return -ENODEV; - r = strtobool(buf, &enable); + r = kstrtobool(buf, &enable); if (r) return r; @@ -329,7 +330,7 @@ static ssize_t manager_cpr_enable_store(struct omap_overlay_manager *mgr, if (!dss_has_feature(FEAT_CPR)) return -ENODEV; - r = strtobool(buf, &enable); + r = kstrtobool(buf, &enable); if (r) return r; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c index 601c0beb6de96..1da4fb1c77b4a 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/overlay-sysfs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include