From 787e1853ae8a951233ceab1b01c862d9c668358b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 8 Jan 2018 07:47:36 +0100 Subject: [PATCH 001/269] iio: adc: aspeed: Fix error handling path The labels and branching order of the error path of 'aspeed_adc_probe()' are broken. Re-order the labels and goto statements. Signed-off-by: Christophe JAILLET Signed-off-by: Jonathan Cameron --- drivers/iio/adc/aspeed_adc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 327a49ba19916..9515ca165dfdb 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -243,7 +243,7 @@ static int aspeed_adc_probe(struct platform_device *pdev) ASPEED_ADC_INIT_POLLING_TIME, ASPEED_ADC_INIT_TIMEOUT); if (ret) - goto scaler_error; + goto poll_timeout_error; } /* Start all channels in normal mode. */ @@ -274,9 +274,10 @@ static int aspeed_adc_probe(struct platform_device *pdev) writel(ASPEED_OPERATION_MODE_POWER_DOWN, data->base + ASPEED_REG_ENGINE_CONTROL); clk_disable_unprepare(data->clk_scaler->clk); -reset_error: - reset_control_assert(data->rst); clk_enable_error: +poll_timeout_error: + reset_control_assert(data->rst); +reset_error: clk_hw_unregister_divider(data->clk_scaler); scaler_error: clk_hw_unregister_divider(data->clk_prescaler); From e31b617d0a63c6558485aaa730fd162faa95a766 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 22 Jan 2018 11:53:12 +0200 Subject: [PATCH 002/269] staging: iio: adc: ad7192: fix external frequency setting The external clock frequency was set only when selecting the internal clock, which is fixed at 4.9152 Mhz. This is incorrect, since it should be set when any of the external clock or crystal settings is selected. Added range validation for the external (crystal/clock) frequency setting. Valid values are between 2.4576 and 5.12 Mhz. Signed-off-by: Alexandru Ardelean Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7192.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c index f01595593ce2d..425e8b82533bb 100644 --- a/drivers/staging/iio/adc/ad7192.c +++ b/drivers/staging/iio/adc/ad7192.c @@ -141,6 +141,8 @@ #define AD7192_GPOCON_P1DAT BIT(1) /* P1 state */ #define AD7192_GPOCON_P0DAT BIT(0) /* P0 state */ +#define AD7192_EXT_FREQ_MHZ_MIN 2457600 +#define AD7192_EXT_FREQ_MHZ_MAX 5120000 #define AD7192_INT_FREQ_MHZ 4915200 /* NOTE: @@ -218,6 +220,12 @@ static int ad7192_calibrate_all(struct ad7192_state *st) ARRAY_SIZE(ad7192_calib_arr)); } +static inline bool ad7192_valid_external_frequency(u32 freq) +{ + return (freq >= AD7192_EXT_FREQ_MHZ_MIN && + freq <= AD7192_EXT_FREQ_MHZ_MAX); +} + static int ad7192_setup(struct ad7192_state *st, const struct ad7192_platform_data *pdata) { @@ -243,17 +251,20 @@ static int ad7192_setup(struct ad7192_state *st, id); switch (pdata->clock_source_sel) { - case AD7192_CLK_EXT_MCLK1_2: - case AD7192_CLK_EXT_MCLK2: - st->mclk = AD7192_INT_FREQ_MHZ; - break; case AD7192_CLK_INT: case AD7192_CLK_INT_CO: - if (pdata->ext_clk_hz) - st->mclk = pdata->ext_clk_hz; - else - st->mclk = AD7192_INT_FREQ_MHZ; + st->mclk = AD7192_INT_FREQ_MHZ; break; + case AD7192_CLK_EXT_MCLK1_2: + case AD7192_CLK_EXT_MCLK2: + if (ad7192_valid_external_frequency(pdata->ext_clk_hz)) { + st->mclk = pdata->ext_clk_hz; + break; + } + dev_err(&st->sd.spi->dev, "Invalid frequency setting %u\n", + pdata->ext_clk_hz); + ret = -EINVAL; + goto out; default: ret = -EINVAL; goto out; From a3b5655ebdb501a98a45c0d3265dca9f2fe0218a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 23 Jan 2018 17:04:56 +0100 Subject: [PATCH 003/269] iio: adc: stm32: fix stm32h7_adc_enable error handling Error handling in stm32h7_adc_enable routine doesn't unwind enable sequence correctly. ADEN can only be cleared by hardware (e.g. by writing one to ADDIS). It's also better to clear ADRDY just after it's been set by hardware. Fixes: 95e339b6e85d ("iio: adc: stm32: add support for STM32H7") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 7f5def465340e..9a2583caedaae 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -722,8 +722,6 @@ static int stm32h7_adc_enable(struct stm32_adc *adc) int ret; u32 val; - /* Clear ADRDY by writing one, then enable ADC */ - stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY); stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN); /* Poll for ADRDY to be set (after adc startup time) */ @@ -731,8 +729,11 @@ static int stm32h7_adc_enable(struct stm32_adc *adc) val & STM32H7_ADRDY, 100, STM32_ADC_TIMEOUT_US); if (ret) { - stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN); + stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADDIS); dev_err(&indio_dev->dev, "Failed to enable ADC\n"); + } else { + /* Clear ADRDY by writing one */ + stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY); } return ret; From 7d2b8e6aaf9ee87910c2337e1c59bb5d3e3ba8c5 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 25 Jan 2018 14:30:45 +0200 Subject: [PATCH 004/269] staging: iio: ad5933: switch buffer mode to software Since commit 152a6a884ae1 ("staging:iio:accel:sca3000 move to hybrid hard / soft buffer design.") the buffer mechanism has changed and the INDIO_BUFFER_HARDWARE flag has been unused. Since commit 2d6ca60f3284 ("iio: Add a DMAengine framework based buffer") the INDIO_BUFFER_HARDWARE flag has been re-purposed for DMA buffers. This driver has lagged behind these changes, and in order for buffers to work, the INDIO_BUFFER_SOFTWARE needs to be used. Signed-off-by: Alexandru Ardelean Fixes: 2d6ca60f3284 ("iio: Add a DMAengine framework based buffer") Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/impedance-analyzer/ad5933.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 2b28fb9c0048e..3bcf494663617 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -648,8 +648,6 @@ static int ad5933_register_ring_funcs_and_init(struct iio_dev *indio_dev) /* Ring buffer functions - here trigger setup related */ indio_dev->setup_ops = &ad5933_ring_setup_ops; - indio_dev->modes |= INDIO_BUFFER_HARDWARE; - return 0; } @@ -762,7 +760,7 @@ static int ad5933_probe(struct i2c_client *client, indio_dev->dev.parent = &client->dev; indio_dev->info = &ad5933_info; indio_dev->name = id->name; - indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->modes = (INDIO_BUFFER_SOFTWARE | INDIO_DIRECT_MODE); indio_dev->channels = ad5933_channels; indio_dev->num_channels = ARRAY_SIZE(ad5933_channels); From 4a8842de8db4953fdda7866626b78b12fb8adb97 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 19 Jan 2018 16:22:05 +0100 Subject: [PATCH 005/269] scsi: mpt3sas: fix an out of bound write cpu_msix_table is allocated to store online cpus, but pci_irq_get_affinity may return cpu_possible_mask which is then used to access cpu_msix_table. That causes bad user experience. Fix limits access to only online cpus, I've also added an additional test to protect from an unlikely change in cpu_online_mask. [mkp: checkpatch] Fixes: 1d55abc0e98a ("scsi: mpt3sas: switch to pci_alloc_irq_vectors") Signed-off-by: Tomas Henzl Acked-by: Suganath Prabu Subramani Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 13d6e4ec3022c..59a87ca328d36 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2410,8 +2410,11 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc) continue; } - for_each_cpu(cpu, mask) + for_each_cpu_and(cpu, mask, cpu_online_mask) { + if (cpu >= ioc->cpu_msix_table_sz) + break; ioc->cpu_msix_table[cpu] = reply_q->msix_index; + } } return; } From 2ce87cc5b269510de9ca1185ca8a6e10ec78c069 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 23 Jan 2018 11:05:21 -0800 Subject: [PATCH 006/269] scsi: qla2xxx: Fix memory corruption during hba reset test This patch fixes memory corrpution while performing HBA Reset test. Following stack trace is seen: [ 466.397219] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [ 466.433669] IP: [] qlt_free_session_done+0x260/0x5f0 [qla2xxx] [ 466.467731] PGD 0 [ 466.476718] Oops: 0000 [#1] SMP Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 12ee6e02d146d..afcb5567998a5 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3625,6 +3625,8 @@ qla2x00_remove_one(struct pci_dev *pdev) } qla2x00_wait_for_hba_ready(base_vha); + qla2x00_wait_for_sess_deletion(base_vha); + /* * if UNLOAD flag is already set, then continue unload, * where it was set first. From c39813652700f3df552b6557530f1e5f782dbe2f Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 23 Jan 2018 20:11:32 -0600 Subject: [PATCH 007/269] scsi: ibmvfc: fix misdefined reserved field in ibmvfc_fcp_rsp_info The fcp_rsp_info structure as defined in the FC spec has an initial 3 bytes reserved field. The ibmvfc driver mistakenly defined this field as 4 bytes resulting in the rsp_code field being defined in what should be the start of the second reserved field and thus always being reported as zero by the driver. Ideally, we should wire ibmvfc up with libfc for the sake of code deduplication, and ease of maintaining standardized structures in a single place. However, for now simply fixup the definition in ibmvfc for backporting to distros on older kernels. Wiring up with libfc will be done in a followup patch. Cc: Reported-by: Hannes Reinecke Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 9a0696f68f371..b81a53c4a9a8b 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -367,7 +367,7 @@ enum ibmvfc_fcp_rsp_info_codes { }; struct ibmvfc_fcp_rsp_info { - __be16 reserved; + u8 reserved[3]; u8 rsp_code; u8 reserved2[4]; }__attribute__((packed, aligned (2))); From 84af7e8b895088d89f246d6b0f82717fafdebf61 Mon Sep 17 00:00:00 2001 From: Sujit Reddy Thumma Date: Wed, 24 Jan 2018 09:52:35 +0530 Subject: [PATCH 008/269] scsi: ufs: Enable quirk to ignore sending WRITE_SAME command WRITE_SAME command is not supported by UFS. Enable a quirk for the upper level drivers to not send WRITE SAME command. [mkp: botched patch, applied by hand] Signed-off-by: Sujit Reddy Thumma Signed-off-by: Subhash Jadavani Signed-off-by: Asutosh Das Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 011c3369082c6..8196976182c92 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4352,6 +4352,8 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev) /* REPORT SUPPORTED OPERATION CODES is not supported */ sdev->no_report_opcodes = 1; + /* WRITE_SAME command is not supported */ + sdev->no_write_same = 1; ufshcd_set_queue_depth(sdev); From 52797a1d4b39716ddd300a3c463ffaf1330600a0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 24 Jan 2018 14:58:01 +0000 Subject: [PATCH 009/269] scsi: csiostor: remove redundant assignment to pointer 'ln' The pointer ln is assigned a value that is never read, it is re-assigned a new value in the list_for_each loop hence the initialization is redundant and can be removed. Cleans up clang warning: drivers/scsi/csiostor/csio_lnode.c:117:21: warning: Value stored to 'ln' during its initialization is never read Signed-off-by: Colin Ian King Acked-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_lnode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index be5ee2d378155..7dbbbb81a1e7d 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -114,7 +114,7 @@ static enum csio_ln_ev fwevt_to_lnevt[] = { static struct csio_lnode * csio_ln_lookup_by_portid(struct csio_hw *hw, uint8_t portid) { - struct csio_lnode *ln = hw->rln; + struct csio_lnode *ln; struct list_head *tmp; /* Match siblings lnode with portid */ From ecf7ff49945f5741fa1da112f994939f942031d3 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 24 Jan 2018 08:07:06 -0800 Subject: [PATCH 010/269] scsi: bnx2fc: Fix check in SCSI completion handler for timed out request When a request times out we set the io_req flag BNX2FC_FLAG_IO_COMPL so that if a subsequent completion comes in on that task ID we will ignore it. The issue is that in the check for this flag there is a missing return so we will continue to process a request which may have already been returned to the ownership of the SCSI layer. This can cause unpredictable results. Solution is to add in the missing return. [mkp: typo plus title shortening] Signed-off-by: Chad Dupuis Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 8e2f767147cb4..5a645b8b9af17 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1889,6 +1889,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, /* we will not receive ABTS response for this IO */ BNX2FC_IO_DBG(io_req, "Timer context finished processing " "this scsi cmd\n"); + return; } /* Cancel the timeout_work, as we received IO completion */ From e6f791d95313c85f3dd4a26141e28e50ae9aa0ae Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 25 Jan 2018 17:13:40 +0300 Subject: [PATCH 011/269] scsi: sym53c8xx_2: iterator underflow in sym_getsync() We wanted to exit the loop with "div" set to zero, but instead, if we don't hit the break then "div" is -1 when we finish the loop. It leads to an array underflow a few lines later. Signed-off-by: Dan Carpenter Reviewed-by: Johannes Thumshirn Acked-by: Matthew Wilcox Signed-off-by: Martin K. Petersen --- drivers/scsi/sym53c8xx_2/sym_hipd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index ca360daa6a253..378af306fda17 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -536,7 +536,7 @@ sym_getsync(struct sym_hcb *np, u_char dt, u_char sfac, u_char *divp, u_char *fa * Look for the greatest clock divisor that allows an * input speed faster than the period. */ - while (div-- > 0) + while (--div > 0) if (kpc >= (div_10M[div] << 2)) break; /* From a7043e9529f3c367cc4d82997e00be034cbe57ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 25 Jan 2018 17:27:27 +0300 Subject: [PATCH 012/269] scsi: mptfusion: Add bounds check in mptctl_hp_targetinfo() My static checker complains about an out of bounds read: drivers/message/fusion/mptctl.c:2786 mptctl_hp_targetinfo() error: buffer overflow 'hd->sel_timeout' 255 <= u32max. It's true that we probably should have a bounds check here. Signed-off-by: Dan Carpenter Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index 8d12017b9893a..4470630dd5455 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -2687,6 +2687,8 @@ mptctl_hp_targetinfo(unsigned long arg) __FILE__, __LINE__, iocnum); return -ENODEV; } + if (karg.hdr.id >= MPT_MAX_FC_DEVICES) + return -EINVAL; dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n", ioc->name)); From c02189e12ce3bf3808cb880569d3b10249f50bd9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 25 Jan 2018 08:24:29 -0800 Subject: [PATCH 013/269] scsi: qla2xxx: Avoid triggering undefined behavior in qla2x00_mbx_completion() A left shift must shift less than the bit width of the left argument. Avoid triggering undefined behavior if ha->mbx_count == 32. This patch avoids that UBSAN reports the following complaint: UBSAN: Undefined behaviour in drivers/scsi/qla2xxx/qla_isr.c:275:14 shift exponent 32 is too large for 32-bit type 'int' Call Trace: dump_stack+0x4e/0x6c ubsan_epilogue+0xd/0x3b __ubsan_handle_shift_out_of_bounds+0x112/0x14c qla2x00_mbx_completion+0x1c5/0x25d [qla2xxx] qla2300_intr_handler+0x1ea/0x3bb [qla2xxx] qla2x00_mailbox_command+0x77b/0x139a [qla2xxx] qla2x00_mbx_reg_test+0x83/0x114 [qla2xxx] qla2x00_chip_diag+0x354/0x45f [qla2xxx] qla2x00_initialize_adapter+0x2c2/0xa4e [qla2xxx] qla2x00_probe_one+0x1681/0x392e [qla2xxx] pci_device_probe+0x10b/0x1f1 driver_probe_device+0x21f/0x3a4 __driver_attach+0xa9/0xe1 bus_for_each_dev+0x6e/0xb5 driver_attach+0x22/0x3c bus_add_driver+0x1d1/0x2ae driver_register+0x78/0x130 __pci_register_driver+0x75/0xa8 qla2x00_module_init+0x21b/0x267 [qla2xxx] do_one_initcall+0x5a/0x1e2 do_init_module+0x9d/0x285 load_module+0x20db/0x38e3 SYSC_finit_module+0xa8/0xbc SyS_finit_module+0x9/0xb do_syscall_64+0x77/0x271 entry_SYSCALL64_slow_path+0x25/0x25 Reported-by: Meelis Roos Signed-off-by: Bart Van Assche Cc: Himanshu Madhani Reviewed-by: Laurence Oberman Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 14109d86c3f6a..89f93ebd819d7 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -272,7 +272,8 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; /* Read all mbox registers? */ - mboxes = (1 << ha->mbx_count) - 1; + WARN_ON_ONCE(ha->mbx_count > 32); + mboxes = (1ULL << ha->mbx_count) - 1; if (!ha->mcp) ql_dbg(ql_dbg_async, vha, 0x5001, "MBX pointer ERROR.\n"); else @@ -2880,7 +2881,8 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; /* Read all mbox registers? */ - mboxes = (1 << ha->mbx_count) - 1; + WARN_ON_ONCE(ha->mbx_count > 32); + mboxes = (1ULL << ha->mbx_count) - 1; if (!ha->mcp) ql_dbg(ql_dbg_async, vha, 0x504e, "MBX pointer ERROR.\n"); else From 7c0dde2b3d99fe3c54edada408d10dcd6ee0c1f7 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sun, 28 Jan 2018 07:23:54 +0000 Subject: [PATCH 014/269] scsi: aic7xxx: remove aiclib.c aiclib.c is unused (and contains no code) since commit 1ff927306e08 ("[SCSI] aic7xxx: remove aiclib.c") 13 years later, finish the cleaning by removing it from tree. [mkp: tweaked patch description] Signed-off-by: Corentin Labbe Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aiclib.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 drivers/scsi/aic7xxx/aiclib.c diff --git a/drivers/scsi/aic7xxx/aiclib.c b/drivers/scsi/aic7xxx/aiclib.c deleted file mode 100644 index 828ae3d9a510f..0000000000000 --- a/drivers/scsi/aic7xxx/aiclib.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Implementation of Utility functions for all SCSI device types. - * - * Copyright (c) 1997, 1998, 1999 Justin T. Gibbs. - * Copyright (c) 1997, 1998 Kenneth D. Merry. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/cam/scsi/scsi_all.c,v 1.38 2002/09/23 04:56:35 mjacob Exp $ - * $Id$ - */ - -#include "aiclib.h" - From 2e8233ab17411920bee87c0dd71790f11904f3b8 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 29 Jan 2018 12:30:16 +0000 Subject: [PATCH 015/269] scsi: Remove Makefile entry for oktagon files Remove line using non-existent files which were removed in commit 642978beb483 ("[SCSI] remove m68k NCR53C9x based drivers") [mkp: tweaked patch description] Signed-off-by: Corentin Labbe Acked-by: Geert Uytterhoeven Signed-off-by: Martin K. Petersen --- drivers/scsi/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index fcfd28d2884c5..de1b3fce936d5 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -185,7 +185,6 @@ ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \ CFLAGS_ncr53c8xx.o := $(ncr53c8xx-flags-y) $(ncr53c8xx-flags-m) zalon7xx-objs := zalon.o ncr53c8xx.o NCR_Q720_mod-objs := NCR_Q720.o ncr53c8xx.o -oktagon_esp_mod-objs := oktagon_esp.o oktagon_io.o # Files generated that shall be removed upon make clean clean-files := 53c700_d.h 53c700_u.h From f5572475e999a1e9cd44f8704023a815f611d377 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Jan 2018 15:50:03 -0800 Subject: [PATCH 016/269] scsi: scsi_dh: Document alua_rtpg_queue() arguments Since commit 3a025e1d1c2e ("Add optional check for bad kernel-doc comments") building with W=1 causes warnings to appear for issues in kernel-doc headers. This patch avoids that the following warnings are reported when building with W=1: drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'pg' drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'sdev' drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'qdata' drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'force' Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 022e421c21851..4b44325d1a828 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -876,6 +876,11 @@ static void alua_rtpg_work(struct work_struct *work) /** * alua_rtpg_queue() - cause RTPG to be submitted asynchronously + * @pg: ALUA port group associated with @sdev. + * @sdev: SCSI device for which to submit an RTPG. + * @qdata: Information about the callback to invoke after the RTPG. + * @force: Whether or not to submit an RTPG if a work item that will submit an + * RTPG already has been scheduled. * * Returns true if and only if alua_rtpg_work() will be called asynchronously. * That function is responsible for calling @qdata->fn(). From c028c6309a9f9b385ba8c0c984eb2b6c3f368650 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jan 2018 13:17:38 +0100 Subject: [PATCH 017/269] cfg80211: use only 1Mbps for basic rates in mesh Mesh used to use the mandatory rates as basic rates, but we got the calculation of mandatory rates wrong until some time ago. Fix this this broke interoperability with older versions since now more basic rates are required, and thus the MBSS isn't the same and the network stops working. Fix this by simply using only 1Mbps as the basic rate in 2.4GHz. Since the changed mandatory rates only affected 2.4GHz, this is all we need to make it work again. Reported-and-tested-by: Matthias Schiffer Fixes: 1bd773c077de ("wireless: set correct mandatory rate flags") Signed-off-by: Johannes Berg --- net/wireless/mesh.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 51aa55618ef75..b12da6ef3c122 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -170,9 +170,28 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, enum nl80211_bss_scan_width scan_width; struct ieee80211_supported_band *sband = rdev->wiphy.bands[setup->chandef.chan->band]; - scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); - setup->basic_rates = ieee80211_mandatory_rates(sband, - scan_width); + + if (setup->chandef.chan->band == NL80211_BAND_2GHZ) { + int i; + + /* + * Older versions selected the mandatory rates for + * 2.4 GHz as well, but were broken in that only + * 1 Mbps was regarded as a mandatory rate. Keep + * using just 1 Mbps as the default basic rate for + * mesh to be interoperable with older versions. + */ + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == 10) { + setup->basic_rates = BIT(i); + break; + } + } + } else { + scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); + setup->basic_rates = ieee80211_mandatory_rates(sband, + scan_width); + } } err = cfg80211_chandef_dfs_required(&rdev->wiphy, From c4de37ee2b55deac7d6aeac33e02e3d6be243898 Mon Sep 17 00:00:00 2001 From: Peter Oh Date: Fri, 26 Jan 2018 14:02:37 -0800 Subject: [PATCH 018/269] mac80211: mesh: fix wrong mesh TTL offset calculation mesh TTL offset in Mesh Channel Switch Parameters element depends on not only Secondary Channel Offset element, but also affected by HT Control field and Wide Bandwidth Channel Switch element. So use element structure to manipulate mesh channel swich param IE after removing its constant attribution to correct the miscalculation. Signed-off-by: Peter Oh Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mesh.c | 17 ++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 885d00b419119..61db1fb156ed4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1467,7 +1467,7 @@ struct ieee802_11_elems { const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; - const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; + struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie; /* length of them, respectively */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5e27364e10acf..23555536bad56 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1253,13 +1253,12 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, } static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee802_11_elems *elems) { struct ieee80211_mgmt *mgmt_fwd; struct sk_buff *skb; struct ieee80211_local *local = sdata->local; - u8 *pos = mgmt->u.action.u.chan_switch.variable; - size_t offset_ttl; skb = dev_alloc_skb(local->tx_headroom + len); if (!skb) @@ -1267,13 +1266,9 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->tx_headroom); mgmt_fwd = skb_put(skb, len); - /* offset_ttl is based on whether the secondary channel - * offset is available or not. Subtract 1 from the mesh TTL - * and disable the initiator flag before forwarding. - */ - offset_ttl = (len < 42) ? 7 : 10; - *(pos + offset_ttl) -= 1; - *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; + elems->mesh_chansw_params_ie->mesh_ttl--; + elems->mesh_chansw_params_ie->mesh_flags &= + ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); @@ -1321,7 +1316,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, /* forward or re-broadcast the CSA frame */ if (fwd_csa) { - if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0) + if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0) mcsa_dbg(sdata, "Failed to forward the CSA frame"); } } From 745fd50f3b044db6a3922e1718306555613164b0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 31 Jan 2018 12:04:50 +0100 Subject: [PATCH 019/269] drm/cirrus: Load lut in crtc_commit In the past the ast driver relied upon the fbdev emulation helpers to call ->load_lut at boot-up. But since commit b8e2b0199cc377617dc238f5106352c06dcd3fa2 Author: Peter Rosin Date: Tue Jul 4 12:36:57 2017 +0200 drm/fb-helper: factor out pseudo-palette that's cleaned up and drivers are expected to boot into a consistent lut state. This patch fixes that. Fixes: b8e2b0199cc3 ("drm/fb-helper: factor out pseudo-palette") Cc: Peter Rosin Cc: Daniel Vetter Cc: # v4.14+ References: https://bugzilla.kernel.org/show_bug.cgi?id=198123 Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20180131110450.22153-1-daniel.vetter@ffwll.ch Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/cirrus/cirrus_mode.c | 40 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c index cd23b1b282594..c91b9b054e3f7 100644 --- a/drivers/gpu/drm/cirrus/cirrus_mode.c +++ b/drivers/gpu/drm/cirrus/cirrus_mode.c @@ -294,22 +294,7 @@ static void cirrus_crtc_prepare(struct drm_crtc *crtc) { } -/* - * This is called after a mode is programmed. It should reverse anything done - * by the prepare function - */ -static void cirrus_crtc_commit(struct drm_crtc *crtc) -{ -} - -/* - * The core can pass us a set of gamma values to program. We actually only - * use this for 8-bit mode so can't perform smooth fades on deeper modes, - * but it's a requirement that we provide the function - */ -static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size, - struct drm_modeset_acquire_ctx *ctx) +static void cirrus_crtc_load_lut(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct cirrus_device *cdev = dev->dev_private; @@ -317,7 +302,7 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, int i; if (!crtc->enabled) - return 0; + return; r = crtc->gamma_store; g = r + crtc->gamma_size; @@ -330,6 +315,27 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, WREG8(PALETTE_DATA, *g++ >> 8); WREG8(PALETTE_DATA, *b++ >> 8); } +} + +/* + * This is called after a mode is programmed. It should reverse anything done + * by the prepare function + */ +static void cirrus_crtc_commit(struct drm_crtc *crtc) +{ + cirrus_crtc_load_lut(crtc); +} + +/* + * The core can pass us a set of gamma values to program. We actually only + * use this for 8-bit mode so can't perform smooth fades on deeper modes, + * but it's a requirement that we provide the function + */ +static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, + u16 *blue, uint32_t size, + struct drm_modeset_acquire_ctx *ctx) +{ + cirrus_crtc_load_lut(crtc); return 0; } From 54f809cfbd6b4a43959039f5d33596ed3297ce16 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Wed, 17 Jan 2018 12:51:08 +0100 Subject: [PATCH 020/269] drm/atomic: Fix memleak on ERESTARTSYS during non-blocking commits During a non-blocking commit, it is possible to return before the commit_tail work is queued (-ERESTARTSYS, for example). Since a reference on the crtc commit object is obtained for the pending vblank event when preparing the commit, the above situation will leave us with an extra reference. Therefore, if the commit_tail worker has not consumed the event at the end of a commit, release it's reference. Changes since v1: - Also check for state->event->base.completion being set, to handle the case where stall_checks() fails in setup_crtc_commit(). Changes since v2: - Add a flag to drm_crtc_commit, to prevent dereferencing a freed event. i915 may unreference the state in a worker. Fixes: 24835e442f28 ("drm: reference count event->completion") Cc: # v4.11+ Signed-off-by: Leo (Sunpeng) Li Acked-by: Harry Wentland #v1 Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20180117115108.29608-1-maarten.lankhorst@linux.intel.com Reviewed-by: Sean Paul --- drivers/gpu/drm/drm_atomic_helper.c | 15 +++++++++++++++ include/drm/drm_atomic.h | 9 +++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index b16f1d69a0bbf..e8c249361d7e4 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1778,6 +1778,8 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, new_crtc_state->event->base.completion = &commit->flip_done; new_crtc_state->event->base.completion_release = release_crtc_commit; drm_crtc_commit_get(commit); + + commit->abort_completion = true; } for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) { @@ -3327,8 +3329,21 @@ EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state); void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state) { if (state->commit) { + /* + * In the event that a non-blocking commit returns + * -ERESTARTSYS before the commit_tail work is queued, we will + * have an extra reference to the commit object. Release it, if + * the event has not been consumed by the worker. + * + * state->event may be freed, so we can't directly look at + * state->event->base.completion. + */ + if (state->event && state->commit->abort_completion) + drm_crtc_commit_put(state->commit); + kfree(state->commit->event); state->commit->event = NULL; + drm_crtc_commit_put(state->commit); } diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 5afd6e364fb67..c63b0b48e884c 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -134,6 +134,15 @@ struct drm_crtc_commit { * &drm_pending_vblank_event pointer to clean up private events. */ struct drm_pending_vblank_event *event; + + /** + * @abort_completion: + * + * A flag that's set after drm_atomic_helper_setup_commit takes a second + * reference for the completion of $drm_crtc_state.event. It's used by + * the free code to remove the second reference if commit fails. + */ + bool abort_completion; }; struct __drm_planes_state { From 511051d509ec54642dd6d30fdf2caa33c23619cc Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Thu, 1 Feb 2018 21:49:24 +0100 Subject: [PATCH 021/269] iio: srf08: fix link error "devm_iio_triggered_buffer_setup" undefined Functions for triggered buffer support are needed by this module. If they are not defined accidentally by another driver, there's an error thrown out while linking. Add a select of IIO_BUFFER and IIO_TRIGGERED_BUFFER in the Kconfig file. Signed-off-by: Andreas Klinger Fixes: a83195937151 ("iio: srf08: add triggered buffer support") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/proximity/Kconfig b/drivers/iio/proximity/Kconfig index fcb1c4ba5e414..f726f9427602f 100644 --- a/drivers/iio/proximity/Kconfig +++ b/drivers/iio/proximity/Kconfig @@ -68,6 +68,8 @@ config SX9500 config SRF08 tristate "Devantech SRF02/SRF08/SRF10 ultrasonic ranger sensor" + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER depends on I2C help Say Y here to build a driver for Devantech SRF02/SRF08/SRF10 From 50dbd09c56db0555813aa2824dc4fe8f1fc06aaa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Jan 2018 16:33:46 -0800 Subject: [PATCH 022/269] scsi: qla2xxx: Fix a locking imbalance in qlt_24xx_handle_els() Ensure that upon return the tgt->ha->tgt.sess_lock spin lock is unlocked no matter which code path is taken through this function. This was detected by sparse. Fixes: 82abdcaf3ede ("scsi: qla2xxx: Allow target mode to accept PRLI in dual mode") Signed-off-by: Bart Van Assche Cc: Himanshu Madhani Cc: Quinn Tran Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index fc89af8fe2569..896b2d8bd8035 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4871,8 +4871,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, sess); qlt_send_term_imm_notif(vha, iocb, 1); res = 0; - spin_lock_irqsave(&tgt->ha->tgt.sess_lock, - flags); break; } From fd2c19b2a28bb574b3914466a68ef830212d5cf7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jan 2018 09:16:56 +0100 Subject: [PATCH 023/269] netfilter: x_tables: remove size check Back in 2002 vmalloc used to BUG on too large sizes. We are much better behaved these days and vmalloc simply returns NULL for those. Remove the check as it simply not needed and the comment is even misleading. Link: http://lkml.kernel.org/r/20180131081916.GO21609@dhcp22.suse.cz Suggested-by: Andrew Morton Signed-off-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Florian Westphal Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2f685ee1f9c87..97e06a04c0d4e 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1004,10 +1004,6 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz < sizeof(*info)) return NULL; - /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((size >> PAGE_SHIFT) + 2 > totalram_pages) - return NULL; - /* __GFP_NORETRY is not fully supported by kvmalloc but it should * work reasonably well if sz is too large and bail out rather * than shoot all processes down before realizing there is nothing From b3e456fce9f51d6276e576d00271e2813c1b8b67 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 7 Feb 2018 21:59:17 -0800 Subject: [PATCH 024/269] netfilter: ipt_CLUSTERIP: fix a race condition of proc file creation There is a race condition between clusterip_config_entry_put() and clusterip_config_init(), after we release the spinlock in clusterip_config_entry_put(), a new proc file with a same IP could be created immediately since it is already removed from the configs list, therefore it triggers this warning: ------------[ cut here ]------------ proc_dir_entry 'ipt_CLUSTERIP/172.20.0.170' already registered WARNING: CPU: 1 PID: 4152 at fs/proc/generic.c:330 proc_register+0x2a4/0x370 fs/proc/generic.c:329 Kernel panic - not syncing: panic_on_warn set ... As a quick fix, just move the proc_remove() inside the spinlock. Reported-by: Fixes: 6c5d5cfbe3c5 ("netfilter: ipt_CLUSTERIP: check duplicate config when initializing") Tested-by: Paolo Abeni Cc: Xin Long Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Reviewed-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 3a84a60f6b39d..1ff72b87a0661 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -107,12 +107,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) local_bh_disable(); if (refcount_dec_and_lock(&c->entries, &cn->lock)) { - list_del_rcu(&c->list); - spin_unlock(&cn->lock); - local_bh_enable(); - - unregister_netdevice_notifier(&c->notifier); - /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ @@ -120,6 +114,12 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) if (cn->procdir) proc_remove(c->pde); #endif + list_del_rcu(&c->list); + spin_unlock(&cn->lock); + local_bh_enable(); + + unregister_netdevice_notifier(&c->notifier); + return; } local_bh_enable(); From 3a129cc2151425e5aeb69aeb25fbc994ec738137 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sun, 4 Feb 2018 18:45:21 +0100 Subject: [PATCH 025/269] vsprintf: avoid misleading "(null)" for %px Like %pK already does, print "00000000" instead. This confused people -- the convention is that "(null)" means you tried to dereference a null pointer as opposed to printing the address. Link: http://lkml.kernel.org/r/20180204174521.21383-1-kilobyte@angband.pl To: Sergey Senozhatsky To: Steven Rostedt To: linux-kernel@vger.kernel.org Cc: Andrew Morton Cc: Joe Perches Cc: Kees Cook Cc: "Roberts, William C" Cc: Linus Torvalds Cc: David Laight Cc: Randy Dunlap Cc: Geert Uytterhoeven Signed-off-by: Adam Borowski Signed-off-by: Petr Mladek --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 77ee6ced11b17..d7a708f82559c 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1849,7 +1849,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, { const int default_width = 2 * sizeof(void *); - if (!ptr && *fmt != 'K') { + if (!ptr && *fmt != 'K' && *fmt != 'x') { /* * Print (null) with the same width as a pointer so it makes * tabular output look nice. From bff52352e0ccc2481f2b6b0d612ff8ff56c50f3a Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Mon, 18 Dec 2017 16:14:36 +0100 Subject: [PATCH 026/269] usb: dwc3: of-simple: fix oops by unbalanced clk disable call dwc3_of_simple_dev_pm_ops has never been used since commit a0d8c4cfdf31 ("usb: dwc3: of-simple: set dev_pm_ops"), but this commit has brought and oops when unbind the device due this sequence: dwc3_of_simple_remove -> clk_disable ... -> pm_runtime_put_sync -> dwc3_of_simple_runtime_suspend -> clk_disable (again) This double call to clk_core_disable causes a kernel oops like this: WARNING: CPU: 1 PID: 4022 at drivers/clk/clk.c:656 clk_core_disable+0x78/0x80 CPU: 1 PID: 4022 Comm: bash Not tainted 4.15.0-rc4+ #44 Hardware name: Google Kevin (DT) pstate: 80000085 (Nzcv daIf -PAN -UAO) pc : clk_core_disable+0x78/0x80 lr : clk_core_disable_lock+0x20/0x38 sp : ffff00000bbf3a90 ... Call trace: clk_core_disable+0x78/0x80 clk_disable+0x1c/0x30 dwc3_of_simple_runtime_suspend+0x30/0x50 pm_generic_runtime_suspend+0x28/0x40 This patch fixes the unbalanced clk disable call by setting the num_clocks variable to zero once the clocks were disabled. Fixes: a0d8c4cfdf31 ("usb: dwc3: of-simple: set dev_pm_ops") Signed-off-by: Enric Balletbo i Serra Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-of-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index 7ae0eefc7cc7d..e54c3622eb28a 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -143,6 +143,7 @@ static int dwc3_of_simple_remove(struct platform_device *pdev) clk_disable_unprepare(simple->clks[i]); clk_put(simple->clks[i]); } + simple->num_clocks = 0; reset_control_assert(simple->resets); reset_control_put(simple->resets); From e3190868e5f52fb26544f16463593d54ce46ce61 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 12 Jan 2018 20:00:56 +0900 Subject: [PATCH 027/269] usb: gadget: udc: renesas_usb3: fix oops in renesas_usb3_remove() This patch fixes an issue that the renesas_usb3_remove() causes NULL pointer dereference because the usb3_to_dev() macro will use the gadget instance and it will be deleted before. Fixes: cf06df3fae28 ("usb: gadget: udc: renesas_usb3: move pm_runtime_{en,dis}able()") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 6e87af2483679..409cde4e6a516 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2410,7 +2410,7 @@ static int renesas_usb3_remove(struct platform_device *pdev) __renesas_usb3_ep_free_request(usb3->ep0_req); if (usb3->phy) phy_put(usb3->phy); - pm_runtime_disable(usb3_to_dev(usb3)); + pm_runtime_disable(&pdev->dev); return 0; } From 6180026341e852a250e1f97ebdcf71684a3c81b9 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 12 Jan 2018 18:18:05 -0800 Subject: [PATCH 028/269] usb: dwc3: gadget: Set maxpacket size for ep0 IN There are 2 control endpoint structures for DWC3. However, the driver only updates the OUT direction control endpoint structure during ConnectDone event. DWC3 driver needs to update the endpoint max packet size for control IN endpoint as well. If the max packet size is not properly set, then the driver will incorrectly calculate the data transfer size and fail to send ZLP for HS/FS 3-stage control read transfer. The fix is simply to update the max packet size for the ep0 IN direction during ConnectDone event. Cc: stable@vger.kernel.org Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 616ef49ccb49e..2bda4eb1e9ac1 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2745,6 +2745,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) break; } + dwc->eps[1]->endpoint.maxpacket = dwc->gadget.ep0->maxpacket; + /* Enable USB2 LPM Capability */ if ((dwc->revision > DWC3_REVISION_194A) && From f035d139ffece7b6a7b8bfb17bd0ba715ee57a04 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 12 Jan 2018 18:18:27 -0800 Subject: [PATCH 029/269] usb: dwc3: ep0: Reset TRB counter for ep0 IN DWC3 tracks TRB counter for each ep0 direction separately. In control read transfer completion handler, the driver needs to reset the TRB enqueue counter for ep0 IN direction. Currently the driver only resets the TRB counter for control OUT endpoint. Check for the data direction and properly reset the TRB counter from correct control endpoint. Cc: stable@vger.kernel.org Fixes: c2da2ff00606 ("usb: dwc3: ep0: don't use ep0in for transfers") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/ep0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 9c2e4a17918e4..18be31d5743a1 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -854,7 +854,12 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, trb++; trb->ctrl &= ~DWC3_TRB_CTRL_HWO; trace_dwc3_complete_trb(ep0, trb); - ep0->trb_enqueue = 0; + + if (r->direction) + dwc->eps[1]->trb_enqueue = 0; + else + dwc->eps[0]->trb_enqueue = 0; + dwc->ep0_bounced = false; } From 8813a59ed892305b5ac1b5b901740b1ad4b5fefa Mon Sep 17 00:00:00 2001 From: John Keeping Date: Fri, 12 Jan 2018 18:43:32 +0000 Subject: [PATCH 030/269] usb: gadget: f_uac2: fix bFirstInterface in composite gadget If there are multiple functions associated with a configuration, then the UAC2 interfaces may not start at zero. Set the correct first interface number in the association descriptor so that the audio interfaces are enumerated correctly in this case. Reviewed-by: Krzysztof Opasiak Signed-off-by: John Keeping Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uac2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 11fe788b43087..d2dc1f00180b7 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -524,6 +524,8 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); return ret; } + iad_desc.bFirstInterface = ret; + std_ac_if_desc.bInterfaceNumber = ret; uac2->ac_intf = ret; uac2->ac_alt = 0; From 00b42170c86f90ac9dea83a7dfcd3f0c38098fe2 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 17 Jan 2018 13:22:49 -0800 Subject: [PATCH 031/269] usb: dwc3: Undo PHY init if soft reset fails In this function, we init the USB2 and USB3 PHYs, but if soft reset times out, we don't unwind this. Noticed by inspection. Signed-off-by: Brian Norris Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index ade2ab00d37ab..bc2467f0e6a7b 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -234,6 +234,9 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) udelay(1); } while (--retries); + phy_exit(dwc->usb3_generic_phy); + phy_exit(dwc->usb2_generic_phy); + return -ETIMEDOUT; } From c4a5153e87fdf6805f63ff57556260e2554155a5 Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Thu, 18 Jan 2018 16:54:30 +0530 Subject: [PATCH 032/269] usb: dwc3: core: Power-off core/PHYs on system_suspend in host mode Commit 689bf72c6e0d ("usb: dwc3: Don't reinitialize core during host bus-suspend/resume") updated suspend/resume routines to not power_off and reinit PHYs/core for host mode. It broke platforms that rely on DWC3 core to power_off PHYs to enter low power state on system suspend. Perform dwc3_core_exit/init only during host mode system_suspend/ resume to addresses power regression from above mentioned patch and also allow USB session to stay connected across runtime_suspend/resume in host mode. While at it also replace existing checks for HOST only dr_mode with current_dr_role to have similar core driver behavior for both Host-only and DRD+Host configurations. Fixes: 689bf72c6e0d ("usb: dwc3: Don't reinitialize core during host bus-suspend/resume") Reviewed-by: Roger Quadros Signed-off-by: Manu Gautam Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index bc2467f0e6a7b..59511f2cd3ac4 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -100,6 +100,8 @@ static void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG)); reg |= DWC3_GCTL_PRTCAPDIR(mode); dwc3_writel(dwc->regs, DWC3_GCTL, reg); + + dwc->current_dr_role = mode; } static void __dwc3_set_mode(struct work_struct *work) @@ -133,8 +135,6 @@ static void __dwc3_set_mode(struct work_struct *work) dwc3_set_prtcap(dwc, dwc->desired_dr_role); - dwc->current_dr_role = dwc->desired_dr_role; - spin_unlock_irqrestore(&dwc->lock, flags); switch (dwc->desired_dr_role) { @@ -219,7 +219,7 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) * XHCI driver will reset the host block. If dwc3 was configured for * host-only mode, then we can return early. */ - if (dwc->dr_mode == USB_DR_MODE_HOST) + if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) return 0; reg = dwc3_readl(dwc->regs, DWC3_DCTL); @@ -919,7 +919,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) switch (dwc->dr_mode) { case USB_DR_MODE_PERIPHERAL: - dwc->current_dr_role = DWC3_GCTL_PRTCAP_DEVICE; dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); if (dwc->usb2_phy) @@ -935,7 +934,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) } break; case USB_DR_MODE_HOST: - dwc->current_dr_role = DWC3_GCTL_PRTCAP_HOST; dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); if (dwc->usb2_phy) @@ -1287,7 +1285,7 @@ static int dwc3_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int dwc3_suspend_common(struct dwc3 *dwc) +static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { unsigned long flags; @@ -1299,6 +1297,10 @@ static int dwc3_suspend_common(struct dwc3 *dwc) dwc3_core_exit(dwc); break; case DWC3_GCTL_PRTCAP_HOST: + /* do nothing during host runtime_suspend */ + if (!PMSG_IS_AUTO(msg)) + dwc3_core_exit(dwc); + break; default: /* do nothing */ break; @@ -1307,7 +1309,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc) return 0; } -static int dwc3_resume_common(struct dwc3 *dwc) +static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) { unsigned long flags; int ret; @@ -1323,6 +1325,13 @@ static int dwc3_resume_common(struct dwc3 *dwc) spin_unlock_irqrestore(&dwc->lock, flags); break; case DWC3_GCTL_PRTCAP_HOST: + /* nothing to do on host runtime_resume */ + if (!PMSG_IS_AUTO(msg)) { + ret = dwc3_core_init(dwc); + if (ret) + return ret; + } + break; default: /* do nothing */ break; @@ -1334,12 +1343,11 @@ static int dwc3_resume_common(struct dwc3 *dwc) static int dwc3_runtime_checks(struct dwc3 *dwc) { switch (dwc->current_dr_role) { - case USB_DR_MODE_PERIPHERAL: - case USB_DR_MODE_OTG: + case DWC3_GCTL_PRTCAP_DEVICE: if (dwc->connected) return -EBUSY; break; - case USB_DR_MODE_HOST: + case DWC3_GCTL_PRTCAP_HOST: default: /* do nothing */ break; @@ -1356,7 +1364,7 @@ static int dwc3_runtime_suspend(struct device *dev) if (dwc3_runtime_checks(dwc)) return -EBUSY; - ret = dwc3_suspend_common(dwc); + ret = dwc3_suspend_common(dwc, PMSG_AUTO_SUSPEND); if (ret) return ret; @@ -1372,7 +1380,7 @@ static int dwc3_runtime_resume(struct device *dev) device_init_wakeup(dev, false); - ret = dwc3_resume_common(dwc); + ret = dwc3_resume_common(dwc, PMSG_AUTO_RESUME); if (ret) return ret; @@ -1419,7 +1427,7 @@ static int dwc3_suspend(struct device *dev) struct dwc3 *dwc = dev_get_drvdata(dev); int ret; - ret = dwc3_suspend_common(dwc); + ret = dwc3_suspend_common(dwc, PMSG_SUSPEND); if (ret) return ret; @@ -1435,7 +1443,7 @@ static int dwc3_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - ret = dwc3_resume_common(dwc); + ret = dwc3_resume_common(dwc, PMSG_RESUME); if (ret) return ret; From 499350865387f8b8c40a9e9453a9a7eb3cec5dc4 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 18 Jan 2018 00:22:45 -0200 Subject: [PATCH 033/269] usb: phy: mxs: Fix NULL pointer dereference on i.MX23/28 Commit e93650994a95 ("usb: phy: mxs: add usb charger type detection") causes the following kernel hang on i.MX28: [ 2.207973] usbcore: registered new interface driver usb-storage [ 2.235659] Unable to handle kernel NULL pointer dereference at virtual address 00000188 [ 2.244195] pgd = (ptrval) [ 2.246994] [00000188] *pgd=00000000 [ 2.250676] Internal error: Oops: 5 [#1] ARM [ 2.254979] Modules linked in: [ 2.258089] CPU: 0 PID: 1 Comm: swapper Not tainted 4.15.0-rc8-next-20180117-00002-g75d5f21 #7 [ 2.266724] Hardware name: Freescale MXS (Device Tree) [ 2.271921] PC is at regmap_read+0x0/0x5c [ 2.275977] LR is at mxs_phy_charger_detect+0x34/0x1dc mxs_phy_charger_detect() makes accesses to the anatop registers via regmap, however i.MX23/28 do not have such registers, which causes a NULL pointer dereference. Fix the issue by doing a NULL check on the 'regmap' pointer. Fixes: e93650994a95 ("usb: phy: mxs: add usb charger type detection") Cc: # v4.15 Reviewed-by: Li Jun Acked-by: Peter Chen Signed-off-by: Fabio Estevam Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-mxs-usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c index da031c45395ab..fbec863350f67 100644 --- a/drivers/usb/phy/phy-mxs-usb.c +++ b/drivers/usb/phy/phy-mxs-usb.c @@ -602,6 +602,9 @@ static enum usb_charger_type mxs_phy_charger_detect(struct usb_phy *phy) void __iomem *base = phy->io_priv; enum usb_charger_type chgr_type = UNKNOWN_TYPE; + if (!regmap) + return UNKNOWN_TYPE; + if (mxs_charger_data_contact_detect(mxs_phy)) return chgr_type; From 6cf439e0d37463e42784271179c8a308fd7493c6 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 24 Jan 2018 00:11:53 -0800 Subject: [PATCH 034/269] usb: gadget: f_fs: Process all descriptors during bind During _ffs_func_bind(), the received descriptors are evaluated to prepare for binding with the gadget in order to allocate endpoints and optionally set up OS descriptors. However, the high- and super-speed descriptors are only parsed based on whether the gadget_is_dualspeed() and gadget_is_superspeed() calls are true, respectively. This is a problem in case a userspace program always provides all of the {full,high,super,OS} descriptors when configuring a function. Then, for example if a gadget device is not capable of SuperSpeed, the call to ffs_do_descs() for the SS descriptors is skipped, resulting in an incorrect offset calculation for the vla_ptr when moving on to the OS descriptors that follow. This causes ffs_do_os_descs() to fail as it is now looking at the SS descriptors' offset within the raw_descs buffer instead. _ffs_func_bind() should evaluate the descriptors unconditionally, so remove the checks for gadget speed. Fixes: f0175ab51993 ("usb: gadget: f_fs: OS descriptors support") Cc: stable@vger.kernel.org Co-Developed-by: Mayank Rana Signed-off-by: Mayank Rana Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 8f2cf3baa19c1..49fc589fbf58c 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2979,10 +2979,8 @@ static int _ffs_func_bind(struct usb_configuration *c, struct ffs_data *ffs = func->ffs; const int full = !!func->ffs->fs_descs_count; - const int high = gadget_is_dualspeed(func->gadget) && - func->ffs->hs_descs_count; - const int super = gadget_is_superspeed(func->gadget) && - func->ffs->ss_descs_count; + const int high = !!func->ffs->hs_descs_count; + const int super = !!func->ffs->ss_descs_count; int fs_len, hs_len, ss_len, ret, i; struct ffs_ep *eps_ptr; From 675272d092e4a5570bace92593776f7348daf4c5 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 24 Jan 2018 23:58:20 -0800 Subject: [PATCH 035/269] usb: gadget: f_fs: Use config_ep_by_speed() In commit 2bfa0719ac2a ("usb: gadget: function: f_fs: pass companion descriptor along") there is a pointer arithmetic bug where the comp_desc is obtained as follows: comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + USB_DT_ENDPOINT_SIZE); Since ds is a pointer to usb_endpoint_descriptor, adding 7 to it ends up going out of bounds (7 * sizeof(struct usb_endpoint_descriptor), which is actually 7*9 bytes) past the SS descriptor. As a result the maxburst value will be read incorrectly, and the UDC driver will also get a garbage comp_desc (assuming it uses it). Since Felipe wrote, "Eventually, f_fs.c should be converted to use config_ep_by_speed() like all other functions, though", let's finally do it. This allows the other usb_ep fields to be properly populated, such as maxpacket and mult. It also eliminates the awkward speed-based descriptor lookup since config_ep_by_speed() does that already using the ones found in struct usb_function. Fixes: 2bfa0719ac2a ("usb: gadget: function: f_fs: pass companion descriptor along") Cc: stable@vger.kernel.org Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 38 ++++++------------------------ 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 49fc589fbf58c..c2592d883f67c 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1855,44 +1855,20 @@ static int ffs_func_eps_enable(struct ffs_function *func) spin_lock_irqsave(&func->ffs->eps_lock, flags); while(count--) { - struct usb_endpoint_descriptor *ds; - struct usb_ss_ep_comp_descriptor *comp_desc = NULL; - int needs_comp_desc = false; - int desc_idx; - - if (ffs->gadget->speed == USB_SPEED_SUPER) { - desc_idx = 2; - needs_comp_desc = true; - } else if (ffs->gadget->speed == USB_SPEED_HIGH) - desc_idx = 1; - else - desc_idx = 0; - - /* fall-back to lower speed if desc missing for current speed */ - do { - ds = ep->descs[desc_idx]; - } while (!ds && --desc_idx >= 0); - - if (!ds) { - ret = -EINVAL; - break; - } - ep->ep->driver_data = ep; - ep->ep->desc = ds; - if (needs_comp_desc) { - comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + - USB_DT_ENDPOINT_SIZE); - ep->ep->maxburst = comp_desc->bMaxBurst + 1; - ep->ep->comp_desc = comp_desc; + ret = config_ep_by_speed(func->gadget, &func->function, ep->ep); + if (ret) { + pr_err("%s: config_ep_by_speed(%s) returned %d\n", + __func__, ep->ep->name, ret); + break; } ret = usb_ep_enable(ep->ep); if (likely(!ret)) { epfile->ep = ep; - epfile->in = usb_endpoint_dir_in(ds); - epfile->isoc = usb_endpoint_xfer_isoc(ds); + epfile->in = usb_endpoint_dir_in(ep->ep->desc); + epfile->isoc = usb_endpoint_xfer_isoc(ep->ep->desc); } else { break; } From c49f63055e252810e5d6c83a4943b18db16b3cd8 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 22 Jan 2018 15:01:42 +0200 Subject: [PATCH 036/269] usb: dwc3: omap: don't miss events during suspend/resume The USB cable state can change during suspend/resume so be sure to check and update the extcon state. Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index a4719e853b85a..ed8b865176758 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -582,9 +582,25 @@ static int dwc3_omap_resume(struct device *dev) return 0; } +static void dwc3_omap_complete(struct device *dev) +{ + struct dwc3_omap *omap = dev_get_drvdata(dev); + + if (extcon_get_state(omap->edev, EXTCON_USB)) + dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF); + + if (extcon_get_state(omap->edev, EXTCON_USB_HOST)) + dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT); +} + static const struct dev_pm_ops dwc3_omap_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dwc3_omap_suspend, dwc3_omap_resume) + .complete = dwc3_omap_complete, }; #define DEV_PM_OPS (&dwc3_omap_dev_pm_ops) From e74bd4d358e5455233f1dcc3975425905b270b91 Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Thu, 21 Dec 2017 09:54:25 +0530 Subject: [PATCH 037/269] usb: gadget: core: Fix use-after-free of usb_request Driver is tracing usb_request after freeing it. Fix it by changing the order. Signed-off-by: Manu Gautam Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 859d5b11ba4c4..1f8b19d9cf97b 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -180,8 +180,8 @@ EXPORT_SYMBOL_GPL(usb_ep_alloc_request); void usb_ep_free_request(struct usb_ep *ep, struct usb_request *req) { - ep->ops->free_request(ep, req); trace_usb_ep_free_request(ep, req, 0); + ep->ops->free_request(ep, req); } EXPORT_SYMBOL_GPL(usb_ep_free_request); From b16ea8b9492e99e03b1269fe93ebdbf8e4eabf8a Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 2 Feb 2018 13:21:35 -0800 Subject: [PATCH 038/269] usb: dwc3: Fix GDBGFIFOSPACE_TYPE values The FIFO/Queue type values are incorrect. Correct them according to DWC_usb3 programming guide section 1.2.27 (or DWC_usb31 section 1.2.25). Additionally, this patch includes ProtocolStatusQ and AuxEventQ types. Fixes: cf6d867d3b57 ("usb: dwc3: core: add fifo space helper") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 03c7aaaac9268..185b9603fd98b 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -158,13 +158,15 @@ #define DWC3_GDBGFIFOSPACE_TYPE(n) (((n) << 5) & 0x1e0) #define DWC3_GDBGFIFOSPACE_SPACE_AVAILABLE(n) (((n) >> 16) & 0xffff) -#define DWC3_TXFIFOQ 1 -#define DWC3_RXFIFOQ 3 -#define DWC3_TXREQQ 5 -#define DWC3_RXREQQ 7 -#define DWC3_RXINFOQ 9 -#define DWC3_DESCFETCHQ 13 -#define DWC3_EVENTQ 15 +#define DWC3_TXFIFOQ 0 +#define DWC3_RXFIFOQ 1 +#define DWC3_TXREQQ 2 +#define DWC3_RXREQQ 3 +#define DWC3_RXINFOQ 4 +#define DWC3_PSTATQ 5 +#define DWC3_DESCFETCHQ 6 +#define DWC3_EVENTQ 7 +#define DWC3_AUXEVENTQ 8 /* Global RX Threshold Configuration Register */ #define DWC3_GRXTHRCFG_MAXRXBURSTSIZE(n) (((n) & 0x1f) << 19) From 20bf410ecf9e9c045f4b0548d516dd3de8691074 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:23 +0100 Subject: [PATCH 039/269] usb: gadget: udc: Remove USB_GADGET_DUALSPEED select USB_GADGET_DUALSPEED was removed by commit 85b8614d7223 ("usb: gadget: get rid of USB_GADGET_{DUAL,SUPER}SPEED"), but the USB_SNP_UDC_PLAT symbol still selects it. Remove the USB_GADGET_DUALSPEED select from USB_SNP_UDC_PLAT. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Signed-off-by: Ulf Magnusson Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 1e9567091d860..0875d38476ee9 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -274,7 +274,6 @@ config USB_SNP_UDC_PLAT tristate "Synopsys USB 2.0 Device controller" depends on USB_GADGET && OF && HAS_DMA depends on EXTCON || EXTCON=n - select USB_GADGET_DUALSPEED select USB_SNP_CORE default ARCH_BCM_IPROC help From 17aa31f13cad25daa19d3f923323f552e87bc874 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 5 Feb 2018 17:12:35 +0900 Subject: [PATCH 040/269] usb: renesas_usbhs: missed the "running" flag in usb_dmac with rx path This fixes an issue that a gadget driver (usb_f_fs) is possible to stop rx transactions after the usb-dmac is used because the following functions missed to set/check the "running" flag. - usbhsf_dma_prepare_pop_with_usb_dmac() - usbhsf_dma_pop_done_with_usb_dmac() So, if next transaction uses pio, the usbhsf_prepare_pop() can not start the transaction because the "running" flag is 0. Fixes: 8355b2b3082d ("usb: renesas_usbhs: fix the behavior of some usbhs_pkt_handle") Cc: # v3.19+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/fifo.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 5925d111bd474..39fa2fc1b8b76 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -982,6 +982,10 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt, if ((uintptr_t)pkt->buf & (USBHS_USB_DMAC_XFER_SIZE - 1)) goto usbhsf_pio_prepare_pop; + /* return at this time if the pipe is running */ + if (usbhs_pipe_is_running(pipe)) + return 0; + usbhs_pipe_config_change_bfre(pipe, 1); ret = usbhsf_fifo_select(pipe, fifo, 0); @@ -1172,6 +1176,7 @@ static int usbhsf_dma_pop_done_with_usb_dmac(struct usbhs_pkt *pkt, usbhsf_fifo_clear(pipe, fifo); pkt->actual = usbhs_dma_calc_received_size(pkt, chan, rcv_len); + usbhs_pipe_running(pipe, 0); usbhsf_dma_stop(pipe, fifo); usbhsf_dma_unmap(pkt); usbhsf_fifo_unselect(pipe, pipe->fifo); From 20c63f4089cceab803438c383631963e34c4d8e5 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 12 Feb 2018 00:14:42 +0100 Subject: [PATCH 041/269] usb: gadget: fsl_udc_core: fix ep valid checks Clang reports the following warning: drivers/usb/gadget/udc/fsl_udc_core.c:1312:10: warning: address of array 'ep->name' will always evaluate to 'true' [-Wpointer-bool-conversion] if (ep->name) ~~ ~~~~^~~~ It seems that the authors intention was to check if the ep has been configured through struct_ep_setup. Check whether struct usb_ep name pointer has been set instead. Signed-off-by: Stefan Agner Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/fsl_udc_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index e5b4ee96c4bf6..56b517a38865a 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -1305,7 +1305,7 @@ static void udc_reset_ep_queue(struct fsl_udc *udc, u8 pipe) { struct fsl_ep *ep = get_ep_by_pipe(udc, pipe); - if (ep->name) + if (ep->ep.name) nuke(ep, -ESHUTDOWN); } @@ -1693,7 +1693,7 @@ static void dtd_complete_irq(struct fsl_udc *udc) curr_ep = get_ep_by_pipe(udc, i); /* If the ep is configured */ - if (curr_ep->name == NULL) { + if (!curr_ep->ep.name) { WARNING("Invalid EP?"); continue; } From 201ec568c57a43dbc73c7ac00e73c3c2d39559fc Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 16 Jan 2018 16:03:32 +0400 Subject: [PATCH 042/269] usb: dwc2: Add safety check in setting of descriptor chain pointers In some cases device sending ZLP IN on non EP0 which reassigning EP0 OUT descriptor pointer to that EP. Dedicated for EP0 OUT descriptor multiple time re-used by other EP while that descriptor already in use by EP0 OUT for SETUP transaction. As result when SETUP packet received BNA interrupt asserting. In dwc2_hsotg_program_zlp() function dwc2_gadget_set_ep0_desc_chain() must be called only for EP0. Acked-by: John Youn Signed-off-by: Minas Harutyunyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e4c3ce0de5de1..57c7400057fa6 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1917,7 +1917,9 @@ static void dwc2_hsotg_program_zlp(struct dwc2_hsotg *hsotg, /* Not specific buffer needed for ep0 ZLP */ dma_addr_t dma = hs_ep->desc_list_dma; - dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep); + if (!index) + dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep); + dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, dma, 0); } else { dwc2_writel(DXEPTSIZ_MC(1) | DXEPTSIZ_PKTCNT(1) | From 9e95a66cce7250c358d496e1c3b62e29ce79ef40 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 16 Jan 2018 16:03:58 +0400 Subject: [PATCH 043/269] usb: dwc2: Add safety check for STSPHSERCVD intr STSPHSERCVD (status phase received) interrupt should be handled when EP0 is in DWC2_EP0_DATA_OUT state. Sometimes STSPHSERCVD interrupt asserted , when EP0 is not in DATA_OUT state. Spurios interrupt. Acked-by: John Youn Signed-off-by: Minas Harutyunyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 57c7400057fa6..d6222e9464639 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2976,9 +2976,13 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx, if (ints & DXEPINT_STSPHSERCVD) { dev_dbg(hsotg->dev, "%s: StsPhseRcvd\n", __func__); - /* Move to STATUS IN for DDMA */ - if (using_desc_dma(hsotg)) - dwc2_hsotg_ep0_zlp(hsotg, true); + /* Safety check EP0 state when STSPHSERCVD asserted */ + if (hsotg->ep0_state == DWC2_EP0_DATA_OUT) { + /* Move to STATUS IN for DDMA */ + if (using_desc_dma(hsotg)) + dwc2_hsotg_ep0_zlp(hsotg, true); + } + } if (ints & DXEPINT_BACK2BACKSETUP) From 755d739534f998d92e348fba8ffb0478416576e7 Mon Sep 17 00:00:00 2001 From: Vardan Mikayelyan Date: Tue, 16 Jan 2018 16:04:24 +0400 Subject: [PATCH 044/269] usb: dwc2: Fix dwc2_hsotg_core_init_disconnected() We should call dwc2_hsotg_enqueue_setup() after properly setting lx_state. Because it may cause error-out from dwc2_hsotg_enqueue_setup() due to wrong value in lx_state. Issue can be reproduced by loading driver while connected A-Connector (start in A-HOST mode) then disconnect A-Connector to switch to B-DEVICE. Acked-by: John Youn Signed-off-by: Vardan Mikayelyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index d6222e9464639..5bcad1d869b50 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3381,12 +3381,6 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg, dwc2_writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) | DXEPCTL_USBACTEP, hsotg->regs + DIEPCTL0); - dwc2_hsotg_enqueue_setup(hsotg); - - dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n", - dwc2_readl(hsotg->regs + DIEPCTL0), - dwc2_readl(hsotg->regs + DOEPCTL0)); - /* clear global NAKs */ val = DCTL_CGOUTNAK | DCTL_CGNPINNAK; if (!is_usb_reset) @@ -3397,6 +3391,12 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg, mdelay(3); hsotg->lx_state = DWC2_L0; + + dwc2_hsotg_enqueue_setup(hsotg); + + dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n", + dwc2_readl(hsotg->regs + DIEPCTL0), + dwc2_readl(hsotg->regs + DOEPCTL0)); } static void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg) From 5b4e64beb6ab40f5d8b44500fe0fc201c25a0f16 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2018 20:46:28 +0100 Subject: [PATCH 045/269] extcon: axp288: Constify the axp288_pwr_up_down_info array Make the axp288_pwr_up_down_info array const char * const, this leads to the following section size changes: .text 0x674 -> 0x664 .data 0x148 -> 0x0f0 .rodata 0x0b4 -> 0x114 Signed-off-by: Hans de Goede Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-axp288.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index 0a44d43802fe1..c8f7b6435679f 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -106,7 +106,7 @@ struct axp288_extcon_info { }; /* Power up/down reason string array */ -static char *axp288_pwr_up_down_info[] = { +static const char * const axp288_pwr_up_down_info[] = { "Last wake caused by user pressing the power button", "Last wake caused by a charger insertion", "Last wake caused by a battery insertion", @@ -124,7 +124,7 @@ static char *axp288_pwr_up_down_info[] = { */ static void axp288_extcon_log_rsi(struct axp288_extcon_info *info) { - char **rsi; + const char * const *rsi; unsigned int val, i, clear_mask = 0; int ret; From d82e233cee26ceacb9feb937a21bfb61b1826860 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2018 20:46:29 +0100 Subject: [PATCH 046/269] Revert "extcon: axp288: Redo charger type detection a couple of seconds after probe()" Redoing the charger type detection to give the usb-role-switch code time to properly set the role-switch is no good for mainline, since the usb-role-switch code is not yet in mainline (my bad, sorry). Also once we've that code there are better ways to fix this which are not prone to racing as doing a retry after 2 seconds is. This reverts commit 50082c17bb1455acacd376ae30dff92f2e1addbd. Signed-off-by: Hans de Goede Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-axp288.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index c8f7b6435679f..3ec4c715e2405 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -1,7 +1,6 @@ /* * extcon-axp288.c - X-Power AXP288 PMIC extcon cable detection driver * - * Copyright (C) 2016-2017 Hans de Goede * Copyright (C) 2015 Intel Corporation * Author: Ramakrishna Pallala * @@ -98,11 +97,9 @@ struct axp288_extcon_info { struct device *dev; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irqc; - struct delayed_work det_work; int irq[EXTCON_IRQ_END]; struct extcon_dev *edev; unsigned int previous_cable; - bool first_detect_done; }; /* Power up/down reason string array */ @@ -140,25 +137,6 @@ static void axp288_extcon_log_rsi(struct axp288_extcon_info *info) regmap_write(info->regmap, AXP288_PS_BOOT_REASON_REG, clear_mask); } -static void axp288_chrg_detect_complete(struct axp288_extcon_info *info) -{ - /* - * We depend on other drivers to do things like mux the data lines, - * enable/disable vbus based on the id-pin, etc. Sometimes the BIOS has - * not set these things up correctly resulting in the initial charger - * cable type detection giving a wrong result and we end up not charging - * or charging at only 0.5A. - * - * So we schedule a second cable type detection after 2 seconds to - * give the other drivers time to load and do their thing. - */ - if (!info->first_detect_done) { - queue_delayed_work(system_wq, &info->det_work, - msecs_to_jiffies(2000)); - info->first_detect_done = true; - } -} - static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) { int ret, stat, cfg, pwr_stat; @@ -223,8 +201,6 @@ static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) info->previous_cable = cable; } - axp288_chrg_detect_complete(info); - return 0; dev_det_ret: @@ -246,11 +222,8 @@ static irqreturn_t axp288_extcon_isr(int irq, void *data) return IRQ_HANDLED; } -static void axp288_extcon_det_work(struct work_struct *work) +static void axp288_extcon_enable(struct axp288_extcon_info *info) { - struct axp288_extcon_info *info = - container_of(work, struct axp288_extcon_info, det_work.work); - regmap_update_bits(info->regmap, AXP288_BC_GLOBAL_REG, BC_GLOBAL_RUN, 0); /* Enable the charger detection logic */ @@ -272,7 +245,6 @@ static int axp288_extcon_probe(struct platform_device *pdev) info->regmap = axp20x->regmap; info->regmap_irqc = axp20x->regmap_irqc; info->previous_cable = EXTCON_NONE; - INIT_DELAYED_WORK(&info->det_work, axp288_extcon_det_work); platform_set_drvdata(pdev, info); @@ -318,7 +290,7 @@ static int axp288_extcon_probe(struct platform_device *pdev) } /* Start charger cable type detection */ - queue_delayed_work(system_wq, &info->det_work, 0); + axp288_extcon_enable(info); return 0; } From 0434352d3d2e950cf5e743f6062abd87de22f960 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 13 Feb 2018 20:25:50 +0100 Subject: [PATCH 047/269] extcon: int3496: process id-pin first so that we start with the right status Some other drivers may be waiting for our extcon to show-up, exiting their probe methods with -EPROBE_DEFER until we show up. These drivers will typically get the cable state directly after getting the extcon, this commit changes the int3496 code to wait for the initial processing of the id-pin to complete before exiting probe() with 0, which will cause devices waiting on the defered probe to get reprobed. This fixes a race where the initial work might still be running while other drivers were already calling extcon_get_state(). Fixes: 2f556bdb9f2e ("extcon: int3496: Add Intel INT3496 ACPI ... driver") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-intel-int3496.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c index c8691b5a9cb00..191e99f06a9a5 100644 --- a/drivers/extcon/extcon-intel-int3496.c +++ b/drivers/extcon/extcon-intel-int3496.c @@ -153,8 +153,9 @@ static int int3496_probe(struct platform_device *pdev) return ret; } - /* queue initial processing of id-pin */ + /* process id-pin so that we start with the right status */ queue_delayed_work(system_wq, &data->work, 0); + flush_delayed_work(&data->work); platform_set_drvdata(pdev, data); From cabe92a55e3a12005a4ac4d3954c9a174b0efe2a Mon Sep 17 00:00:00 2001 From: "Michael Kelley (EOSG)" Date: Wed, 24 Jan 2018 22:49:57 +0000 Subject: [PATCH 048/269] scsi: storvsc: Increase cmd_per_lun for higher speed devices Increase cmd_per_lun to allow more I/Os in progress per device, particularly for NVMe's. The Hyper-V host side can handle the higher count with no issues. Signed-off-by: Michael Kelley Reviewed-by: K. Y. Srinivasan Acked-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index e07907d91d040..8eadb30115aaa 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1656,7 +1656,7 @@ static struct scsi_host_template scsi_driver = { .eh_timed_out = storvsc_eh_timed_out, .slave_alloc = storvsc_device_alloc, .slave_configure = storvsc_device_configure, - .cmd_per_lun = 255, + .cmd_per_lun = 2048, .this_id = -1, .use_clustering = ENABLE_CLUSTERING, /* Make sure we dont get a sg segment crosses a page boundary */ From eaf75d1815dad230dac2f1e8f1dc0349b2d50071 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 1 Feb 2018 10:33:17 -0800 Subject: [PATCH 049/269] scsi: qla2xxx: Fix double free bug after firmware timeout This patch is based on Max's original patch. When the qla2xxx firmware is unavailable, eventually qla2x00_sp_timeout() is reached, which calls the timeout function and frees the srb_t instance. The timeout function always resolves to qla2x00_async_iocb_timeout(), which invokes another callback function called "done". All of these qla2x00_*_sp_done() callbacks also free the srb_t instance; after returning to qla2x00_sp_timeout(), it is freed again. The fix is to remove the "sp->free(sp)" call from qla2x00_sp_timeout() and add it to those code paths in qla2x00_async_iocb_timeout() which do not already free the object. This is how it looks like with KASAN: BUG: KASAN: use-after-free in qla2x00_sp_timeout+0x228/0x250 Read of size 8 at addr ffff88278147a590 by task swapper/2/0 Allocated by task 1502: save_stack+0x33/0xa0 kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc+0xb8/0x1c0 mempool_alloc+0xd6/0x260 qla24xx_async_gnl+0x3c5/0x1100 Freed by task 0: save_stack+0x33/0xa0 kasan_slab_free+0x72/0xc0 kmem_cache_free+0x75/0x200 qla24xx_async_gnl_sp_done+0x556/0x9e0 qla2x00_async_iocb_timeout+0x1c7/0x420 qla2x00_sp_timeout+0x16d/0x250 call_timer_fn+0x36/0x200 The buggy address belongs to the object at ffff88278147a440 which belongs to the cache qla2xxx_srbs of size 344 The buggy address is located 336 bytes inside of 344-byte region [ffff88278147a440, ffff88278147a598) Reported-by: Max Kellermann Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Cc: Max Kellermann Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index aececf664654d..2dea1129d3967 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -59,8 +59,6 @@ qla2x00_sp_timeout(struct timer_list *t) req->outstanding_cmds[sp->handle] = NULL; iocb = &sp->u.iocb_cmd; iocb->timeout(sp); - if (sp->type != SRB_ELS_DCMD) - sp->free(sp); spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); } @@ -102,7 +100,6 @@ qla2x00_async_iocb_timeout(void *data) srb_t *sp = data; fc_port_t *fcport = sp->fcport; struct srb_iocb *lio = &sp->u.iocb_cmd; - struct event_arg ea; if (fcport) { ql_dbg(ql_dbg_disc, fcport->vha, 0x2071, @@ -117,25 +114,13 @@ qla2x00_async_iocb_timeout(void *data) switch (sp->type) { case SRB_LOGIN_CMD: - if (!fcport) - break; /* Retry as needed. */ lio->u.logio.data[0] = MBS_COMMAND_ERROR; lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ? QLA_LOGIO_LOGIN_RETRIED : 0; - memset(&ea, 0, sizeof(ea)); - ea.event = FCME_PLOGI_DONE; - ea.fcport = sp->fcport; - ea.data[0] = lio->u.logio.data[0]; - ea.data[1] = lio->u.logio.data[1]; - ea.sp = sp; - qla24xx_handle_plogi_done_event(fcport->vha, &ea); + sp->done(sp, QLA_FUNCTION_TIMEOUT); break; case SRB_LOGOUT_CMD: - if (!fcport) - break; - qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT); - break; case SRB_CT_PTHRU_CMD: case SRB_MB_IOCB: case SRB_NACK_PLOGI: @@ -235,12 +220,10 @@ static void qla2x00_async_logout_sp_done(void *ptr, int res) { srb_t *sp = ptr; - struct srb_iocb *lio = &sp->u.iocb_cmd; sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); - if (!test_bit(UNLOADING, &sp->vha->dpc_flags)) - qla2x00_post_async_logout_done_work(sp->vha, sp->fcport, - lio->u.logio.data); + sp->fcport->login_gen++; + qlt_logo_completion_handler(sp->fcport, res); sp->free(sp); } From f3767225021a48fc419d963559793e585da88b3d Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Thu, 1 Feb 2018 10:33:18 -0800 Subject: [PATCH 050/269] scsi: qla2xxx: Fix incorrect handle for abort IOCB This patch fixes incorrect handle used for abort IOCB. Fixes: b027a5ace443 ("scsi: qla2xxx: Fix queue ID for async abort with Multiqueue") Signed-off-by: Darren Trapp Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_iocb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 1b62e943ec49c..8d00d559bd265 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3275,12 +3275,11 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb) memset(abt_iocb, 0, sizeof(struct abort_entry_24xx)); abt_iocb->entry_type = ABORT_IOCB_TYPE; abt_iocb->entry_count = 1; - abt_iocb->handle = - cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no, - aio->u.abt.cmd_hndl)); + abt_iocb->handle = cpu_to_le32(MAKE_HANDLE(req->id, sp->handle)); abt_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id); abt_iocb->handle_to_abort = - cpu_to_le32(MAKE_HANDLE(req->id, aio->u.abt.cmd_hndl)); + cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no, + aio->u.abt.cmd_hndl)); abt_iocb->port_id[0] = sp->fcport->d_id.b.al_pa; abt_iocb->port_id[1] = sp->fcport->d_id.b.area; abt_iocb->port_id[2] = sp->fcport->d_id.b.domain; From 1683ce57f568c7c92d53e9234624a53554a29cd5 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Wed, 7 Feb 2018 08:12:35 -0800 Subject: [PATCH 051/269] scsi: qedi: Fix truncation of CHAP name and secret The data in NVRAM is not guaranteed to be NUL terminated. Since snprintf expects byte-stream to accommodate null byte, the CHAP secret is truncated. Use sprintf instead of snprintf to fix the truncation of CHAP name and secret. Signed-off-by: Andrew Vasquez Signed-off-by: Nilesh Javali Reviewed-by: Bart Van Assche Acked-by: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 58596d17f7d98..7c05be680b94c 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1830,8 +1830,8 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf) switch (type) { case ISCSI_BOOT_INI_INITIATOR_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n", - initiator->initiator_name.byte); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + initiator->initiator_name.byte); break; default: rc = 0; @@ -1898,8 +1898,8 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, switch (type) { case ISCSI_BOOT_TGT_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n", - block->target[idx].target_name.byte); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + block->target[idx].target_name.byte); break; case ISCSI_BOOT_TGT_IP_ADDR: if (ipv6_en) @@ -1920,20 +1920,20 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, block->target[idx].lun.value[0]); break; case ISCSI_BOOT_TGT_CHAP_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n", - chap_name); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n", - chap_secret); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n", - mchap_name); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n", - mchap_secret); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); From 2c08fe64e4f3b8528f6880b2bd7a66cce6fbcec3 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 7 Feb 2018 08:12:36 -0800 Subject: [PATCH 052/269] scsi: qedi: Cleanup local str variable Signed-off-by: Nilesh Javali Reviewed-by: Bart Van Assche Acked-by: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 43 ++++++++++++++++------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 7c05be680b94c..8b637d1fe5a41 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1723,7 +1723,6 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf) { struct qedi_ctx *qedi = data; struct nvm_iscsi_initiator *initiator; - char *str = buf; int rc = 1; u32 ipv6_en, dhcp_en, ip_len; struct nvm_iscsi_block *block; @@ -1757,32 +1756,32 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf) switch (type) { case ISCSI_BOOT_ETH_IP_ADDR: - rc = snprintf(str, ip_len, fmt, ip); + rc = snprintf(buf, ip_len, fmt, ip); break; case ISCSI_BOOT_ETH_SUBNET_MASK: - rc = snprintf(str, ip_len, fmt, sub); + rc = snprintf(buf, ip_len, fmt, sub); break; case ISCSI_BOOT_ETH_GATEWAY: - rc = snprintf(str, ip_len, fmt, gw); + rc = snprintf(buf, ip_len, fmt, gw); break; case ISCSI_BOOT_ETH_FLAGS: - rc = snprintf(str, 3, "%hhd\n", + rc = snprintf(buf, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); break; case ISCSI_BOOT_ETH_INDEX: - rc = snprintf(str, 3, "0\n"); + rc = snprintf(buf, 3, "0\n"); break; case ISCSI_BOOT_ETH_MAC: - rc = sysfs_format_mac(str, qedi->mac, ETH_ALEN); + rc = sysfs_format_mac(buf, qedi->mac, ETH_ALEN); break; case ISCSI_BOOT_ETH_VLAN: - rc = snprintf(str, 12, "%d\n", + rc = snprintf(buf, 12, "%d\n", GET_FIELD2(initiator->generic_cont0, NVM_ISCSI_CFG_INITIATOR_VLAN)); break; case ISCSI_BOOT_ETH_ORIGIN: if (dhcp_en) - rc = snprintf(str, 3, "3\n"); + rc = snprintf(buf, 3, "3\n"); break; default: rc = 0; @@ -1818,7 +1817,6 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf) { struct qedi_ctx *qedi = data; struct nvm_iscsi_initiator *initiator; - char *str = buf; int rc; struct nvm_iscsi_block *block; @@ -1830,7 +1828,7 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf) switch (type) { case ISCSI_BOOT_INI_INITIATOR_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, initiator->initiator_name.byte); break; default: @@ -1859,7 +1857,6 @@ static ssize_t qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, char *buf, enum qedi_nvm_tgts idx) { - char *str = buf; int rc = 1; u32 ctrl_flags, ipv6_en, chap_en, mchap_en, ip_len; struct nvm_iscsi_block *block; @@ -1898,48 +1895,48 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, switch (type) { case ISCSI_BOOT_TGT_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, block->target[idx].target_name.byte); break; case ISCSI_BOOT_TGT_IP_ADDR: if (ipv6_en) - rc = snprintf(str, ip_len, "%pI6\n", + rc = snprintf(buf, ip_len, "%pI6\n", block->target[idx].ipv6_addr.byte); else - rc = snprintf(str, ip_len, "%pI4\n", + rc = snprintf(buf, ip_len, "%pI4\n", block->target[idx].ipv4_addr.byte); break; case ISCSI_BOOT_TGT_PORT: - rc = snprintf(str, 12, "%d\n", + rc = snprintf(buf, 12, "%d\n", GET_FIELD2(block->target[idx].generic_cont0, NVM_ISCSI_CFG_TARGET_TCP_PORT)); break; case ISCSI_BOOT_TGT_LUN: - rc = snprintf(str, 22, "%.*d\n", + rc = snprintf(buf, 22, "%.*d\n", block->target[idx].lun.value[1], block->target[idx].lun.value[0]); break; case ISCSI_BOOT_TGT_CHAP_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: - rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); + rc = snprintf(buf, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); break; case ISCSI_BOOT_TGT_NIC_ASSOC: - rc = snprintf(str, 3, "0\n"); + rc = snprintf(buf, 3, "0\n"); break; default: rc = 0; From 00c20cdc79259c6c5bf978b21af96c2d3edb646d Mon Sep 17 00:00:00 2001 From: Meelis Roos Date: Fri, 9 Feb 2018 08:57:44 +0200 Subject: [PATCH 053/269] scsi: aacraid: fix shutdown crash when init fails When aacraid init fails with "AAC0: adapter self-test failed.", shutdown leads to UBSAN warning and then oops: [154316.118423] ================================================================================ [154316.118508] UBSAN: Undefined behaviour in drivers/scsi/scsi_lib.c:2328:27 [154316.118566] member access within null pointer of type 'struct Scsi_Host' [154316.118631] CPU: 2 PID: 14530 Comm: reboot Tainted: G W 4.15.0-dirty #89 [154316.118701] Hardware name: Hewlett Packard HP NetServer/HP System Board, BIOS 4.06.46 PW 06/25/2003 [154316.118774] Call Trace: [154316.118848] dump_stack+0x48/0x65 [154316.118916] ubsan_epilogue+0xe/0x40 [154316.118976] __ubsan_handle_type_mismatch+0xfb/0x180 [154316.119043] scsi_block_requests+0x20/0x30 [154316.119135] aac_shutdown+0x18/0x40 [aacraid] [154316.119196] pci_device_shutdown+0x33/0x50 [154316.119269] device_shutdown+0x18a/0x390 [...] [154316.123435] BUG: unable to handle kernel NULL pointer dereference at 000000f4 [154316.123515] IP: scsi_block_requests+0xa/0x30 This is because aac_shutdown() does struct Scsi_Host *shost = pci_get_drvdata(dev); scsi_block_requests(shost); and that assumes shost has been assigned with pci_set_drvdata(). However, pci_set_drvdata(pdev, shost) is done in aac_probe_one() far after bailing out with error from calling the init function ((*aac_drivers[index].init)(aac)), and when the init function fails, no error is returned from aac_probe_one() so PCI layer assumes there is driver attached, and tries to shut it down later. Fix it by returning error from aac_probe_one() when card-specific init function fails. This fixes reboot on my HP NetRAID-4M with dead battery. Signed-off-by: Meelis Roos Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/linit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index ad6ec573cc878..b730e8edb8b32 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1690,8 +1690,10 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) * Map in the registers from the adapter. */ aac->base_size = AAC_MIN_FOOTPRINT_SIZE; - if ((*aac_drivers[index].init)(aac)) + if ((*aac_drivers[index].init)(aac)) { + error = -ENODEV; goto out_unmap; + } if (aac->sync_mode) { if (aac_sync_mode) From 1bc5ad3a6acdcf56f83272f2de1cd2389ea9e9e2 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Sun, 11 Feb 2018 22:48:41 -0800 Subject: [PATCH 054/269] scsi: qla4xxx: skip error recovery in case of register disconnect. A system crashes when continuously removing/re-adding the storage controller. Signed-off-by: Manish Rangankar Reviewed-by: Ewan D. Milne Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_def.h | 2 ++ drivers/scsi/qla4xxx/ql4_os.c | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index fc233717355fe..817f312023a99 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -168,6 +168,8 @@ #define DEV_DB_NON_PERSISTENT 0 #define DEV_DB_PERSISTENT 1 +#define QL4_ISP_REG_DISCONNECT 0xffffffffU + #define COPY_ISID(dst_isid, src_isid) { \ int i, j; \ for (i = 0, j = ISID_SIZE - 1; i < ISID_SIZE;) \ diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 82e889bbe0ed8..fc2c97d9a0d60 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -262,6 +262,24 @@ static struct iscsi_transport qla4xxx_iscsi_transport = { static struct scsi_transport_template *qla4xxx_scsi_transport; +static int qla4xxx_isp_check_reg(struct scsi_qla_host *ha) +{ + u32 reg_val = 0; + int rval = QLA_SUCCESS; + + if (is_qla8022(ha)) + reg_val = readl(&ha->qla4_82xx_reg->host_status); + else if (is_qla8032(ha) || is_qla8042(ha)) + reg_val = qla4_8xxx_rd_direct(ha, QLA8XXX_PEG_ALIVE_COUNTER); + else + reg_val = readw(&ha->reg->ctrl_status); + + if (reg_val == QL4_ISP_REG_DISCONNECT) + rval = QLA_ERROR; + + return rval; +} + static int qla4xxx_send_ping(struct Scsi_Host *shost, uint32_t iface_num, uint32_t iface_type, uint32_t payload_size, uint32_t pid, struct sockaddr *dst_addr) @@ -9186,10 +9204,17 @@ static int qla4xxx_eh_abort(struct scsi_cmnd *cmd) struct srb *srb = NULL; int ret = SUCCESS; int wait = 0; + int rval; ql4_printk(KERN_INFO, ha, "scsi%ld:%d:%llu: Abort command issued cmd=%p, cdb=0x%x\n", ha->host_no, id, lun, cmd, cmd->cmnd[0]); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + spin_lock_irqsave(&ha->hardware_lock, flags); srb = (struct srb *) CMD_SP(cmd); if (!srb) { @@ -9241,6 +9266,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) struct scsi_qla_host *ha = to_qla_host(cmd->device->host); struct ddb_entry *ddb_entry = cmd->device->hostdata; int ret = FAILED, stat; + int rval; if (!ddb_entry) return ret; @@ -9260,6 +9286,12 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + /* FIXME: wait for hba to go online */ stat = qla4xxx_reset_lun(ha, ddb_entry, cmd->device->lun); if (stat != QLA_SUCCESS) { @@ -9303,6 +9335,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd) struct scsi_qla_host *ha = to_qla_host(cmd->device->host); struct ddb_entry *ddb_entry = cmd->device->hostdata; int stat, ret; + int rval; if (!ddb_entry) return FAILED; @@ -9320,6 +9353,12 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd) ha->host_no, cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + stat = qla4xxx_reset_target(ha, ddb_entry); if (stat != QLA_SUCCESS) { starget_printk(KERN_INFO, scsi_target(cmd->device), @@ -9374,9 +9413,16 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd) { int return_status = FAILED; struct scsi_qla_host *ha; + int rval; ha = to_qla_host(cmd->device->host); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + if ((is_qla8032(ha) || is_qla8042(ha)) && ql4xdontresethba) qla4_83xx_set_idc_dontreset(ha); From 9a3efb6b661f71d5675369ace9257833f0e78ef3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 13 Feb 2018 19:00:21 -0800 Subject: [PATCH 055/269] bpf: fix memory leak in lpm_trie map_free callback function There is a memory leak happening in lpm_trie map_free callback function trie_free. The trie structure itself does not get freed. Also, trie_free function did not do synchronize_rcu before freeing various data structures. This is incorrect as some rcu_read_lock region(s) for lookup, update, delete or get_next_key may not complete yet. The fix is to add synchronize_rcu in the beginning of trie_free. The useless spin_lock is removed from this function as well. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Mathieu Malaterre Reported-by: Alexei Starovoitov Tested-by: Mathieu Malaterre Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 7b469d10d0e93..a75e02c961b5b 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map) struct lpm_trie_node __rcu **slot; struct lpm_trie_node *node; - raw_spin_lock(&trie->lock); + /* Wait for outstanding programs to complete + * update/lookup/delete/get_next_key and free the trie. + */ + synchronize_rcu(); /* Always start at the root and walk down to a node that has no * children. Then free that node, nullify its reference in the parent @@ -569,7 +572,7 @@ static void trie_free(struct bpf_map *map) node = rcu_dereference_protected(*slot, lockdep_is_held(&trie->lock)); if (!node) - goto unlock; + goto out; if (rcu_access_pointer(node->child[0])) { slot = &node->child[0]; @@ -587,8 +590,8 @@ static void trie_free(struct bpf_map *map) } } -unlock: - raw_spin_unlock(&trie->lock); +out: + kfree(trie); } static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) From 952fad8e323975c4e826b659087d2648777594a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2018 15:33:52 -0800 Subject: [PATCH 056/269] bpf: fix sock_map_alloc() error path In case user program provides silly parameters, we want a map_alloc() handler to return an error, not a NULL pointer, otherwise we crash later in find_and_alloc_map() Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 48c33417d13c0..a927e89dad6e9 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock, static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - int err = -EINVAL; u64 cost; + int err; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ cost = (u64) stab->map.max_entries * sizeof(struct sock *); + err = -EINVAL; if (cost >= U32_MAX - PAGE_SIZE) goto free_stab; From 7fc17e909edfb9bf421ee04e981d3d474175c7c7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 14 Feb 2018 22:17:34 +0800 Subject: [PATCH 057/269] bpf: cpumap: use GFP_KERNEL instead of GFP_ATOMIC in __cpu_map_entry_alloc() There're several implications after commit 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") with the using of vmalloc() since can't allow GFP_ATOMIC but mandate GFP_KERNEL. This will lead a WARN since cpumap try to call with GFP_ATOMIC. Fortunately, entry allocation of cpumap can only be done through syscall path which means GFP_ATOMIC is not necessary, so fixing this by replacing GFP_ATOMIC with GFP_KERNEL. Reported-by: syzbot+1a240cdb1f4cc88819df@syzkaller.appspotmail.com Fixes: 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") Cc: Michal Hocko Cc: Daniel Borkmann Cc: Matthew Wilcox Cc: Jesper Dangaard Brouer Cc: akpm@linux-foundation.org Cc: dhowells@redhat.com Cc: hannes@cmpxchg.org Signed-off-by: Jason Wang Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index fbfdada6caeef..a4bb0b34375a6 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data) static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) { - gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; struct bpf_cpu_map_entry *rcpu; int numa, err; From 9c481b908b011398b1491752271cd1e2c9ad5758 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 14 Feb 2018 15:31:00 +0100 Subject: [PATCH 058/269] bpf: fix bpf_prog_array_copy_to_user warning from perf event prog query syzkaller tried to perform a prog query in perf_event_query_prog_array() where struct perf_event_query_bpf had an ids_len of 1,073,741,353 and thus causing a warning due to failed kcalloc() allocation out of the bpf_prog_array_copy_to_user() helper. Given we cannot attach more than 64 programs to a perf event, there's no point in allowing huge ids_len. Therefore, allow a buffer that would fix the maximum number of ids and also add a __GFP_NOWARN to the temporary ids buffer. Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") Fixes: 0911287ce32b ("bpf: fix bpf_prog_array_copy_to_user() issues") Reported-by: syzbot+cab5816b0edbabf598b3@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 2 +- kernel/trace/bpf_trace.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 29ca9208dcfad..d315b393abdd0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, * so always copy 'cnt' prog_ids to the user. * In a rare race the user will see zero prog_ids */ - ids = kcalloc(cnt, sizeof(u32), GFP_USER); + ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); if (!ids) return -ENOMEM; rcu_read_lock(); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index fc2838ac8b787..c0a9e310d7150 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -872,6 +872,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) return -EINVAL; if (copy_from_user(&query, uquery, sizeof(query))) return -EFAULT; + if (query.ids_len > BPF_TRACE_MAX_PROGS) + return -E2BIG; mutex_lock(&bpf_event_mutex); ret = bpf_prog_array_copy_info(event->tp_event->prog_array, From 01ea306f2ac2baff98d472da719193e738759d93 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2018 12:19:00 +0100 Subject: [PATCH 059/269] netfilter: drop outermost socket lock in getsockopt() The Syzbot reported a possible deadlock in the netfilter area caused by rtnl lock, xt lock and socket lock being acquired with a different order on different code paths, leading to the following backtrace: Reviewed-by: Xin Long ====================================================== WARNING: possible circular locking dependency detected 4.15.0+ #301 Not tainted ------------------------------------------------------ syzkaller233489/4179 is trying to acquire lock: (rtnl_mutex){+.+.}, at: [<0000000048e996fd>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 but task is already holding lock: (&xt[i].mutex){+.+.}, at: [<00000000328553a2>] xt_find_table_lock+0x3e/0x3e0 net/netfilter/x_tables.c:1041 which lock already depends on the new lock. === Since commit 3f34cfae1230 ("netfilter: on sockopt() acquire sock lock only in the required scope"), we already acquire the socket lock in the innermost scope, where needed. In such commit I forgot to remove the outer-most socket lock from the getsockopt() path, this commit addresses the issues dropping it now. v1 -> v2: fix bad subj, added relavant 'fixes' tag Fixes: 22265a5c3c10 ("netfilter: xt_TEE: resolve oif using netdevice notifiers") Fixes: 202f59afd441 ("netfilter: ipt_CLUSTERIP: do not hold dev") Fixes: 3f34cfae1230 ("netfilter: on sockopt() acquire sock lock only in the required scope") Reported-by: syzbot+ddde1c7b7ff7442d7f2d@syzkaller.appspotmail.com Suggested-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/ipv4/ip_sockglue.c | 7 +------ net/ipv6/ipv6_sockglue.c | 10 ++-------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 008be04ac1cc5..9c41a0cef1a51 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1567,10 +1567,7 @@ int ip_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = nf_getsockopt(sk, PF_INET, optname, optval, - &len); - release_sock(sk); + err = nf_getsockopt(sk, PF_INET, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); return err; @@ -1602,9 +1599,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); - release_sock(sk); if (err >= 0) err = put_user(len, optlen); return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d78d41fc4b1a4..24535169663dc 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1367,10 +1367,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = nf_getsockopt(sk, PF_INET6, optname, optval, - &len); - release_sock(sk); + err = nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } @@ -1409,10 +1406,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = compat_nf_getsockopt(sk, PF_INET6, - optname, optval, &len); - release_sock(sk); + err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } From 57ebd808a97d7c5b1e1afb937c2db22beba3c1f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Feb 2018 13:46:25 +0100 Subject: [PATCH 060/269] netfilter: add back stackpointer size checks The rationale for removing the check is only correct for rulesets generated by ip(6)tables. In iptables, a jump can only occur to a user-defined chain, i.e. because we size the stack based on number of user-defined chains we cannot exceed stack size. However, the underlying binary format has no such restriction, and the validation step only ensures that the jump target is a valid rule start point. IOW, its possible to build a rule blob that has no user-defined chains but does contain a jump. If this happens, no jump stack gets allocated and crash occurs because no jumpstack was allocated. Fixes: 7814b6ec6d0d6 ("netfilter: xtables: don't save/restore jumpstack offset") Reported-by: syzbot+e783f671527912cd9403@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 4 ++++ net/ipv4/netfilter/ip_tables.c | 7 ++++++- net/ipv6/netfilter/ip6_tables.c | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4ffe302f9b820..e3e420f3ba7b2 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -252,6 +252,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, } if (table_base + v != arpt_next_entry(e)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9a71f31495070..e38395a8dcf28 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -330,8 +330,13 @@ ipt_do_table(struct sk_buff *skb, continue; } if (table_base + v != ipt_next_entry(e) && - !(e->ip.flags & IPT_F_GOTO)) + !(e->ip.flags & IPT_F_GOTO)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; + } e = get_entry(table_base, v); continue; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index af4c917e08369..62358b93bbac5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -352,6 +352,10 @@ ip6t_do_table(struct sk_buff *skb, } if (table_base + v != ip6t_next_entry(e) && !(e->ipv6.flags & IP6T_F_GOTO)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; } From db93a3632b0f8773a3899e04a3a3e0aa7a26eb46 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 8 Feb 2018 13:53:52 -0800 Subject: [PATCH 061/269] netfilter: ipt_CLUSTERIP: fix a refcount bug in clusterip_config_find_get() In clusterip_config_find_get() we hold RCU read lock so it could run concurrently with clusterip_config_entry_put(), as a result, the refcnt could go back to 1 from 0, which leads to a double list_del()... Just replace refcount_inc() with refcount_inc_not_zero(), as for c->refcount. Fixes: d73f33b16883 ("netfilter: CLUSTERIP: RCU conversion") Cc: Eric Dumazet Cc: Pablo Neira Ayuso Cc: Florian Westphal Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1ff72b87a0661..4b02ab39ebc54 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -154,8 +154,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) #endif if (unlikely(!refcount_inc_not_zero(&c->refcount))) c = NULL; - else if (entry) - refcount_inc(&c->entries); + else if (entry) { + if (unlikely(!refcount_inc_not_zero(&c->entries))) { + clusterip_config_put(c); + c = NULL; + } + } } rcu_read_unlock_bh(); From 0cc9501f94592125b2012452c57054b8215bcf33 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:51:59 +0100 Subject: [PATCH 062/269] netfilter: x_tables: remove pr_info where possible remove several pr_info messages that cannot be triggered with iptables, the check is only to ensure input is sane. iptables(8) already prints error messages in these cases. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ECN.c | 10 ++++------ net/netfilter/xt_CHECKSUM.c | 5 ++--- net/netfilter/xt_DSCP.c | 4 +--- net/netfilter/xt_HL.c | 13 +++---------- net/netfilter/xt_HMARK.c | 10 ++++------ net/netfilter/xt_LED.c | 4 +--- net/netfilter/xt_dscp.c | 4 +--- 7 files changed, 16 insertions(+), 34 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 270765236f5e8..39ff167e6d865 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -98,14 +98,12 @@ static int ecn_tg_check(const struct xt_tgchk_param *par) const struct ipt_ECN_info *einfo = par->targinfo; const struct ipt_entry *e = par->entryinfo; - if (einfo->operation & IPT_ECN_OP_MASK) { - pr_info("unsupported ECN operation %x\n", einfo->operation); + if (einfo->operation & IPT_ECN_OP_MASK) return -EINVAL; - } - if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { - pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect); + + if (einfo->ip_ect & ~IPT_ECN_IP_MASK) return -EINVAL; - } + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { pr_info("cannot use TCP operations on a non-tcp rule\n"); diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c index 0f642ef8cd266..ea3c5701fb0f8 100644 --- a/net/netfilter/xt_CHECKSUM.c +++ b/net/netfilter/xt_CHECKSUM.c @@ -39,10 +39,9 @@ static int checksum_tg_check(const struct xt_tgchk_param *par) pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); return -EINVAL; } - if (!einfo->operation) { - pr_info("no CHECKSUM operation enabled\n"); + if (!einfo->operation) return -EINVAL; - } + return 0; } diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 3f83d38c4e5bb..098ed851b7a78 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -66,10 +66,8 @@ static int dscp_tg_check(const struct xt_tgchk_param *par) { const struct xt_DSCP_info *info = par->targinfo; - if (info->dscp > XT_DSCP_MAX) { - pr_info("dscp %x out of range\n", info->dscp); + if (info->dscp > XT_DSCP_MAX) return -EDOM; - } return 0; } diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c index 1535e87ed9bd4..4653b071bed41 100644 --- a/net/netfilter/xt_HL.c +++ b/net/netfilter/xt_HL.c @@ -105,10 +105,8 @@ static int ttl_tg_check(const struct xt_tgchk_param *par) { const struct ipt_TTL_info *info = par->targinfo; - if (info->mode > IPT_TTL_MAXMODE) { - pr_info("TTL: invalid or unknown mode %u\n", info->mode); + if (info->mode > IPT_TTL_MAXMODE) return -EINVAL; - } if (info->mode != IPT_TTL_SET && info->ttl == 0) return -EINVAL; return 0; @@ -118,15 +116,10 @@ static int hl_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_HL_info *info = par->targinfo; - if (info->mode > IP6T_HL_MAXMODE) { - pr_info("invalid or unknown mode %u\n", info->mode); + if (info->mode > IP6T_HL_MAXMODE) return -EINVAL; - } - if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { - pr_info("increment/decrement does not " - "make sense with value 0\n"); + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) return -EINVAL; - } return 0; } diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 60e6dbe124605..dd08cc1f86c7d 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -313,10 +313,9 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) { const struct xt_hmark_info *info = par->targinfo; - if (!info->hmodulus) { - pr_info("xt_HMARK: hash modulus can't be zero\n"); + if (!info->hmodulus) return -EINVAL; - } + if (info->proto_mask && (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); @@ -324,10 +323,9 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) } if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | - XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) { - pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n"); + XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) return -EINVAL; - } + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | XT_HMARK_FLAG(XT_HMARK_DPORT)))) { diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 1dcad893df781..ece311c11fdcf 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -111,10 +111,8 @@ static int led_tg_check(const struct xt_tgchk_param *par) struct xt_led_info_internal *ledinternal; int err; - if (ledinfo->id[0] == '\0') { - pr_info("No 'id' parameter given.\n"); + if (ledinfo->id[0] == '\0') return -EINVAL; - } mutex_lock(&xt_led_mutex); diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 236ac8008909d..a4c2b862f820a 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -46,10 +46,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par) { const struct xt_dscp_info *info = par->matchinfo; - if (info->dscp > XT_DSCP_MAX) { - pr_info("dscp %x out of range\n", info->dscp); + if (info->dscp > XT_DSCP_MAX) return -EDOM; - } return 0; } From 1b6cd67191e16a66f69c9881d878204c3143f03f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:00 +0100 Subject: [PATCH 063/269] netfilter: x_tables: use pr ratelimiting in xt core most messages are converted to info, since they occur in response to wrong usage. Size mismatch however is a real error (xtables ABI bug) that should not occur. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 70 +++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 97e06a04c0d4e..fa1655aff8d3f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -434,36 +434,35 @@ int xt_check_match(struct xt_mtchk_param *par, * ebt_among is exempt from centralized matchsize checking * because it uses a dynamic-size data set. */ - pr_err("%s_tables: %s.%u match: invalid size " - "%u (kernel) != (user) %u\n", - xt_prefix[par->family], par->match->name, - par->match->revision, - XT_ALIGN(par->match->matchsize), size); + pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n", + xt_prefix[par->family], par->match->name, + par->match->revision, + XT_ALIGN(par->match->matchsize), size); return -EINVAL; } if (par->match->table != NULL && strcmp(par->match->table, par->table) != 0) { - pr_err("%s_tables: %s match: only valid in %s table, not %s\n", - xt_prefix[par->family], par->match->name, - par->match->table, par->table); + pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n", + xt_prefix[par->family], par->match->name, + par->match->table, par->table); return -EINVAL; } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; - pr_err("%s_tables: %s match: used from hooks %s, but only " - "valid from %s\n", - xt_prefix[par->family], par->match->name, - textify_hooks(used, sizeof(used), par->hook_mask, - par->family), - textify_hooks(allow, sizeof(allow), par->match->hooks, - par->family)); + pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n", + xt_prefix[par->family], par->match->name, + textify_hooks(used, sizeof(used), + par->hook_mask, par->family), + textify_hooks(allow, sizeof(allow), + par->match->hooks, + par->family)); return -EINVAL; } if (par->match->proto && (par->match->proto != proto || inv_proto)) { - pr_err("%s_tables: %s match: only valid for protocol %u\n", - xt_prefix[par->family], par->match->name, - par->match->proto); + pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n", + xt_prefix[par->family], par->match->name, + par->match->proto); return -EINVAL; } if (par->match->checkentry != NULL) { @@ -814,36 +813,35 @@ int xt_check_target(struct xt_tgchk_param *par, int ret; if (XT_ALIGN(par->target->targetsize) != size) { - pr_err("%s_tables: %s.%u target: invalid size " - "%u (kernel) != (user) %u\n", - xt_prefix[par->family], par->target->name, - par->target->revision, - XT_ALIGN(par->target->targetsize), size); + pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", + xt_prefix[par->family], par->target->name, + par->target->revision, + XT_ALIGN(par->target->targetsize), size); return -EINVAL; } if (par->target->table != NULL && strcmp(par->target->table, par->table) != 0) { - pr_err("%s_tables: %s target: only valid in %s table, not %s\n", - xt_prefix[par->family], par->target->name, - par->target->table, par->table); + pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n", + xt_prefix[par->family], par->target->name, + par->target->table, par->table); return -EINVAL; } if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64]; - pr_err("%s_tables: %s target: used from hooks %s, but only " - "usable from %s\n", - xt_prefix[par->family], par->target->name, - textify_hooks(used, sizeof(used), par->hook_mask, - par->family), - textify_hooks(allow, sizeof(allow), par->target->hooks, - par->family)); + pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n", + xt_prefix[par->family], par->target->name, + textify_hooks(used, sizeof(used), + par->hook_mask, par->family), + textify_hooks(allow, sizeof(allow), + par->target->hooks, + par->family)); return -EINVAL; } if (par->target->proto && (par->target->proto != proto || inv_proto)) { - pr_err("%s_tables: %s target: only valid for protocol %u\n", - xt_prefix[par->family], par->target->name, - par->target->proto); + pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n", + xt_prefix[par->family], par->target->name, + par->target->proto); return -EINVAL; } if (par->target->checkentry != NULL) { From 11f7aee2326f37f9d3abba27bb61d92ec09fbfde Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:01 +0100 Subject: [PATCH 064/269] netfilter: xt_CT: use pr ratelimiting checkpatch complains about line > 80 but this would require splitting "literal" over two lines which is worse. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 5a152e2acfd58..8790190c6feb3 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -82,15 +82,14 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, proto = xt_ct_find_proto(par); if (!proto) { - pr_info("You must specify a L4 protocol, and not use " - "inversions on it.\n"); + pr_info_ratelimited("You must specify a L4 protocol and not use inversions on it\n"); return -ENOENT; } helper = nf_conntrack_helper_try_module_get(helper_name, par->family, proto); if (helper == NULL) { - pr_info("No such helper \"%s\"\n", helper_name); + pr_info_ratelimited("No such helper \"%s\"\n", helper_name); return -ENOENT; } @@ -124,6 +123,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout; struct nf_conn_timeout *timeout_ext; + const char *errmsg = NULL; int ret = 0; u8 proto; @@ -131,29 +131,29 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); if (timeout_find_get == NULL) { ret = -ENOENT; - pr_info("Timeout policy base is empty\n"); + errmsg = "Timeout policy base is empty"; goto out; } proto = xt_ct_find_proto(par); if (!proto) { ret = -EINVAL; - pr_info("You must specify a L4 protocol, and not use " - "inversions on it.\n"); + errmsg = "You must specify a L4 protocol and not use inversions on it"; goto out; } timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; - pr_info("No such timeout policy \"%s\"\n", timeout_name); + pr_info_ratelimited("No such timeout policy \"%s\"\n", + timeout_name); goto out; } if (timeout->l3num != par->family) { ret = -EINVAL; - pr_info("Timeout policy `%s' can only be used by L3 protocol " - "number %d\n", timeout_name, timeout->l3num); + pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", + timeout_name, 3, timeout->l3num); goto err_put_timeout; } /* Make sure the timeout policy matches any existing protocol tracker, @@ -162,9 +162,8 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, l4proto = __nf_ct_l4proto_find(par->family, proto); if (timeout->l4proto->l4proto != l4proto->l4proto) { ret = -EINVAL; - pr_info("Timeout policy `%s' can only be used by L4 protocol " - "number %d\n", - timeout_name, timeout->l4proto->l4proto); + pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", + timeout_name, 4, timeout->l4proto->l4proto); goto err_put_timeout; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); @@ -180,6 +179,8 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, __xt_ct_tg_timeout_put(timeout); out: rcu_read_unlock(); + if (errmsg) + pr_info_ratelimited("%s\n", errmsg); return ret; #else return -EOPNOTSUPP; From e016c5e43db51875c2b541b59bd217494d213174 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:02 +0100 Subject: [PATCH 065/269] netfilter: xt_NFQUEUE: use pr ratelimiting switch this to info, since these aren't really errors. We only use printk because we cannot report meaningful errors in the xtables framework. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_NFQUEUE.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index a360b99a958af..a9aca80a32aeb 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -8,6 +8,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -67,13 +69,13 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) init_hashrandom(&jhash_initval); if (info->queues_total == 0) { - pr_err("NFQUEUE: number of total queues is 0\n"); + pr_info_ratelimited("number of total queues is 0\n"); return -EINVAL; } maxid = info->queues_total - 1 + info->queuenum; if (maxid > 0xffff) { - pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", - info->queues_total, maxid); + pr_info_ratelimited("number of queues (%u) out of range (got %u)\n", + info->queues_total, maxid); return -ERANGE; } if (par->target->revision == 2 && info->flags > 1) From c82b31c5f5608f7f069c584ac169f5691a92d3f5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:03 +0100 Subject: [PATCH 066/269] netfilter: xt_set: use pr ratelimiting also convert this to info for consistency. These errors are informational message to user, given iptables doesn't have netlink extack equivalent. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_set.c | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 16b6b11ee83f0..6f4c5217d8358 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -92,12 +92,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_info_ratelimited("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) { - pr_warn("Protocol error: set match dimension is over the limit!\n"); + pr_info_ratelimited("set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -143,12 +143,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_info_ratelimited("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: set match dimension is over the limit!\n"); + pr_info_ratelimited("set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -241,8 +241,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -250,8 +250,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -259,7 +259,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 || info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -316,8 +316,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -325,8 +325,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -334,7 +334,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -444,8 +444,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -454,8 +454,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -465,7 +465,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->map_set.index != IPSET_INVALID_ID) { if (strncmp(par->table, "mangle", 7)) { - pr_warn("--map-set only usable from mangle table\n"); + pr_info_ratelimited("--map-set only usable from mangle table\n"); return -EINVAL; } if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | @@ -473,14 +473,14 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) !(par->hook_mask & (1 << NF_INET_FORWARD | 1 << NF_INET_LOCAL_OUT | 1 << NF_INET_POST_ROUTING))) { - pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); + pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); return -EINVAL; } index = ip_set_nfnl_get_byindex(par->net, info->map_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find map_set index %u as target\n", - info->map_set.index); + pr_info_ratelimited("Cannot find map_set index %u as target\n", + info->map_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -494,7 +494,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX || info->map_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) From 7ecbf1033521194e544477377ff7e837d25f1ef3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:04 +0100 Subject: [PATCH 067/269] netfilter: bridge: use pr ratelimiting ebt_among still uses pr_err -- these errors indicate ebtables tool bug, not a usage error. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_among.c | 10 +++++----- net/bridge/netfilter/ebt_limit.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 279527f8b1fe7..ce7152a12bd86 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -187,17 +187,17 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { - pr_info("wrong size: %d against expected %d, rounded to %zd\n", - em->match_size, expected_length, - EBT_ALIGN(expected_length)); + pr_err_ratelimited("wrong size: %d against expected %d, rounded to %zd\n", + em->match_size, expected_length, + EBT_ALIGN(expected_length)); return -EINVAL; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { - pr_info("dst integrity fail: %x\n", -err); + pr_err_ratelimited("dst integrity fail: %x\n", -err); return -EINVAL; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { - pr_info("src integrity fail: %x\n", -err); + pr_err_ratelimited("src integrity fail: %x\n", -err); return -EINVAL; } return 0; diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 61a9f1be1263a..165b9d678cf1d 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -72,8 +72,8 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par) /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->avg, info->burst); + pr_info_ratelimited("overflow, try lower: %u/%u\n", + info->avg, info->burst); return -EINVAL; } From cc48baefdfff83e3774811f69eb181b8850bd8af Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:05 +0100 Subject: [PATCH 068/269] netfilter: x_tables: rate-limit table mismatch warnings Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_rpfilter.c | 4 ++-- net/ipv6/netfilter/ip6t_rpfilter.c | 4 ++-- net/netfilter/xt_CONNSECMARK.c | 4 ++-- net/netfilter/xt_SECMARK.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 37fb9552e8589..5d107dd9098e6 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -111,8 +111,8 @@ static int rpfilter_check(const struct xt_mtchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { - pr_info("match only valid in the \'raw\' " - "or \'mangle\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index b12e61b7b16ce..ddf3111f9810d 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -109,8 +109,8 @@ static int rpfilter_check(const struct xt_mtchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { - pr_info("match only valid in the \'raw\' " - "or \'mangle\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index da56c06a443c0..6f30cd399e426 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -91,8 +91,8 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "security") != 0) { - pr_info("target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 9faf5e050b796..5c5cd782fab5c 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -86,8 +86,8 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "security") != 0) { - pr_info("target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", + par->table); return -EINVAL; } From c08e5e1ee6d65917af2bb12c2c568d637a682c44 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:06 +0100 Subject: [PATCH 069/269] netfilter: x_tables: use pr ratelimiting in matches/targets all of these print simple error message - use single pr_ratelimit call. checkpatch complains about lines > 80 but this would require splitting several "literals" over multiple lines which is worse. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_HMARK.c | 17 +++++++++++------ net/netfilter/xt_addrtype.c | 33 ++++++++++++++++----------------- net/netfilter/xt_policy.c | 23 +++++++++++++---------- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index dd08cc1f86c7d..9c75f419cd804 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -9,6 +9,8 @@ * the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -312,15 +314,15 @@ hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) static int hmark_tg_check(const struct xt_tgchk_param *par) { const struct xt_hmark_info *info = par->targinfo; + const char *errmsg = "proto mask must be zero with L3 mode"; if (!info->hmodulus) return -EINVAL; if (info->proto_mask && - (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { - pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); - return -EINVAL; - } + (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) + goto err; + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) @@ -329,10 +331,13 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | XT_HMARK_FLAG(XT_HMARK_DPORT)))) { - pr_info("xt_HMARK: spi-set and port-set can't be combined\n"); - return -EINVAL; + errmsg = "spi-set and port-set can't be combined"; + goto err; } return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_target hmark_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 911a7c0da5040..89e281b3bfc24 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -164,48 +164,47 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { + const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("both incoming and outgoing " - "interface limitation cannot be selected\n"); - return -EINVAL; - } + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + goto err; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("output interface limitation " - "not valid in PREROUTING and INPUT\n"); - return -EINVAL; + errmsg = "output interface limitation not valid in PREROUTING and INPUT"; + goto err; } if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) { - pr_info("input interface limitation " - "not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; + errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT"; + goto err; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { - pr_err("ipv6 BLACKHOLE matching not supported\n"); - return -EINVAL; + errmsg = "ipv6 BLACKHOLE matching not supported"; + goto err; } if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { - pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n"); - return -EINVAL; + errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported"; + goto err; } if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { - pr_err("ipv6 does not support BROADCAST matching\n"); - return -EINVAL; + errmsg = "ipv6 does not support BROADCAST matching"; + goto err; } } #endif return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_match addrtype_mt_reg[] __read_mostly = { diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 5639fb03bdd92..13f8ccf946d62 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -132,26 +132,29 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) static int policy_mt_check(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; + const char *errmsg = "neither incoming nor outgoing policy selected"; + + if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) + goto err; - if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { - pr_info("neither incoming nor outgoing policy selected\n"); - return -EINVAL; - } if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { - pr_info("output policy not valid in PREROUTING and INPUT\n"); - return -EINVAL; + errmsg = "output policy not valid in PREROUTING and INPUT"; + goto err; } if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) { - pr_info("input policy not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; + errmsg = "input policy not valid in POSTROUTING and OUTPUT"; + goto err; } if (info->len > XT_POLICY_MAX_ELEM) { - pr_info("too many policy elements\n"); - return -EINVAL; + errmsg = "too many policy elements"; + goto err; } return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_match policy_mt_reg[] __read_mostly = { From b26066447bb8599b393b2dd2bbeb68767e09ba07 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:07 +0100 Subject: [PATCH 070/269] netfilter: x_tables: use pr ratelimiting in all remaining spots Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/ipv4/netfilter/ipt_REJECT.c | 4 ++-- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 4 ++-- net/ipv6/netfilter/ip6t_rpfilter.c | 2 +- net/ipv6/netfilter/ip6t_srh.c | 6 ++++-- net/netfilter/xt_AUDIT.c | 4 ++-- net/netfilter/xt_CHECKSUM.c | 3 ++- net/netfilter/xt_CONNSECMARK.c | 6 +++--- net/netfilter/xt_LED.c | 2 +- net/netfilter/xt_SECMARK.c | 14 ++++++++------ net/netfilter/xt_TCPMSS.c | 10 ++++------ net/netfilter/xt_TPROXY.c | 6 ++---- net/netfilter/xt_bpf.c | 4 +++- net/netfilter/xt_cgroup.c | 8 +++++--- net/netfilter/xt_cluster.c | 8 +++----- net/netfilter/xt_connbytes.c | 4 ++-- net/netfilter/xt_connlabel.c | 7 ++++--- net/netfilter/xt_connmark.c | 8 ++++---- net/netfilter/xt_conntrack.c | 4 ++-- net/netfilter/xt_ecn.c | 4 ++-- net/netfilter/xt_hashlimit.c | 24 +++++++++++++----------- net/netfilter/xt_helper.c | 4 ++-- net/netfilter/xt_ipcomp.c | 2 +- net/netfilter/xt_ipvs.c | 3 ++- net/netfilter/xt_l2tp.c | 22 +++++++++++++--------- net/netfilter/xt_limit.c | 4 ++-- net/netfilter/xt_nat.c | 5 +++-- net/netfilter/xt_nfacct.c | 6 ++++-- net/netfilter/xt_physdev.c | 4 +--- net/netfilter/xt_recent.c | 14 ++++++-------- net/netfilter/xt_socket.c | 10 ++++++---- net/netfilter/xt_state.c | 4 ++-- net/netfilter/xt_time.c | 6 +++--- 34 files changed, 116 insertions(+), 104 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 39ff167e6d865..aaaf9a81fbc97 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -106,7 +106,7 @@ static int ecn_tg_check(const struct xt_tgchk_param *par) if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { - pr_info("cannot use TCP operations on a non-tcp rule\n"); + pr_info_ratelimited("cannot use operation on non-tcp rule\n"); return -EINVAL; } return 0; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 8bd0d7b266320..e8bed3390e58e 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -74,13 +74,13 @@ static int reject_tg_check(const struct xt_tgchk_param *par) const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { - pr_info("ECHOREPLY no longer supported.\n"); + pr_info_ratelimited("ECHOREPLY no longer supported.\n"); return -EINVAL; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO)) { - pr_info("TCP_RESET invalid for non-tcp\n"); + pr_info_ratelimited("TCP_RESET invalid for non-tcp\n"); return -EINVAL; } } diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 5d107dd9098e6..fd01f13c896a1 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -105,7 +105,7 @@ static int rpfilter_check(const struct xt_mtchk_param *par) const struct xt_rpfilter_info *info = par->matchinfo; unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { - pr_info("unknown options encountered"); + pr_info_ratelimited("unknown options\n"); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fa51a205918db..38dea8ff680fe 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -85,14 +85,14 @@ static int reject_tg6_check(const struct xt_tgchk_param *par) const struct ip6t_entry *e = par->entryinfo; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { - pr_info("ECHOREPLY is not supported.\n"); + pr_info_ratelimited("ECHOREPLY is not supported\n"); return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { - pr_info("TCP_RESET illegal for non-tcp\n"); + pr_info_ratelimited("TCP_RESET illegal for non-tcp\n"); return -EINVAL; } } diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index ddf3111f9810d..94deb69bbbdaa 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -103,7 +103,7 @@ static int rpfilter_check(const struct xt_mtchk_param *par) unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { - pr_info("unknown options encountered"); + pr_info_ratelimited("unknown options\n"); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c index 9642164107ce1..33719d5560c8a 100644 --- a/net/ipv6/netfilter/ip6t_srh.c +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -122,12 +122,14 @@ static int srh_mt6_check(const struct xt_mtchk_param *par) const struct ip6t_srh *srhinfo = par->matchinfo; if (srhinfo->mt_flags & ~IP6T_SRH_MASK) { - pr_err("unknown srh match flags %X\n", srhinfo->mt_flags); + pr_info_ratelimited("unknown srh match flags %X\n", + srhinfo->mt_flags); return -EINVAL; } if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) { - pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags); + pr_info_ratelimited("unknown srh invflags %X\n", + srhinfo->mt_invflags); return -EINVAL; } diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index c502419d63061..f368ee6741db5 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -120,8 +120,8 @@ static int audit_tg_check(const struct xt_tgchk_param *par) const struct xt_audit_info *info = par->targinfo; if (info->type > XT_AUDIT_TYPE_MAX) { - pr_info("Audit type out of range (valid range: 0..%hhu)\n", - XT_AUDIT_TYPE_MAX); + pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n", + XT_AUDIT_TYPE_MAX); return -ERANGE; } diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c index ea3c5701fb0f8..9f4151ec3e06e 100644 --- a/net/netfilter/xt_CHECKSUM.c +++ b/net/netfilter/xt_CHECKSUM.c @@ -36,7 +36,8 @@ static int checksum_tg_check(const struct xt_tgchk_param *par) const struct xt_CHECKSUM_info *einfo = par->targinfo; if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { - pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); + pr_info_ratelimited("unsupported CHECKSUM operation %x\n", + einfo->operation); return -EINVAL; } if (!einfo->operation) diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 6f30cd399e426..f3f1caac949ba 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -102,14 +102,14 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) break; default: - pr_info("invalid mode: %hu\n", info->mode); + pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index ece311c11fdcf..4472424e7ead9 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -136,7 +136,7 @@ static int led_tg_check(const struct xt_tgchk_param *par) err = led_trigger_register(&ledinternal->netfilter_led_trigger); if (err) { - pr_err("Trigger name is already in use.\n"); + pr_info_ratelimited("Trigger name is already in use.\n"); goto exit_alloc; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 5c5cd782fab5c..4ad5fe27e08bc 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -60,18 +60,20 @@ static int checkentry_lsm(struct xt_secmark_target_info *info) &info->secid); if (err) { if (err == -EINVAL) - pr_info("invalid security context \'%s\'\n", info->secctx); + pr_info_ratelimited("invalid security context \'%s\'\n", + info->secctx); return err; } if (!info->secid) { - pr_info("unable to map security context \'%s\'\n", info->secctx); + pr_info_ratelimited("unable to map security context \'%s\'\n", + info->secctx); return -ENOENT; } err = security_secmark_relabel_packet(info->secid); if (err) { - pr_info("unable to obtain relabeling permission\n"); + pr_info_ratelimited("unable to obtain relabeling permission\n"); return err; } @@ -92,8 +94,8 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) } if (mode && mode != info->mode) { - pr_info("mode already set to %hu cannot mix with " - "rules for mode %hu\n", mode, info->mode); + pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n", + mode, info->mode); return -EINVAL; } @@ -101,7 +103,7 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) case SECMARK_MODE_SEL: break; default: - pr_info("invalid mode: %hu\n", info->mode); + pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 99bb8e410f229..98efb202f8b4a 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -273,8 +273,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info("path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } if (par->nft_compat) @@ -283,7 +282,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; - pr_info("Only works on TCP SYN packets\n"); + pr_info_ratelimited("Only works on TCP SYN packets\n"); return -EINVAL; } @@ -298,8 +297,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info("path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } if (par->nft_compat) @@ -308,7 +306,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; - pr_info("Only works on TCP SYN packets\n"); + pr_info_ratelimited("Only works on TCP SYN packets\n"); return -EINVAL; } #endif diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 17d7705e3bd41..8c89323c06afe 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -540,8 +540,7 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) !(i->invflags & IP6T_INV_PROTO)) return 0; - pr_info("Can be used only in combination with " - "either -p tcp or -p udp\n"); + pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } #endif @@ -559,8 +558,7 @@ static int tproxy_tg4_check(const struct xt_tgchk_param *par) && !(i->invflags & IPT_INV_PROTO)) return 0; - pr_info("Can be used only in combination with " - "either -p tcp or -p udp\n"); + pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 06b090d8e9014..a2cf8a6236d63 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -7,6 +7,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -34,7 +36,7 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, program.filter = insns; if (bpf_prog_create(ret, &program)) { - pr_info("bpf: check failed: parse error\n"); + pr_info_ratelimited("check failed: parse error\n"); return -EINVAL; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 891f4e7e8ea7f..7df2dece57d30 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -12,6 +12,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -48,7 +50,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) } if (info->has_path && info->has_classid) { - pr_info("xt_cgroup: both path and classid specified\n"); + pr_info_ratelimited("path and classid specified\n"); return -EINVAL; } @@ -56,8 +58,8 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) if (info->has_path) { cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { - pr_info("xt_cgroup: invalid path, errno=%ld\n", - PTR_ERR(cgrp)); + pr_info_ratelimited("invalid path, errno=%ld\n", + PTR_ERR(cgrp)); return -EINVAL; } info->priv = cgrp; diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 57ef175dfbfaa..0068688995c82 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -135,14 +135,12 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) struct xt_cluster_match_info *info = par->matchinfo; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { - pr_info("you have exceeded the maximum " - "number of cluster nodes (%u > %u)\n", - info->total_nodes, XT_CLUSTER_NODES_MAX); + pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n", + info->total_nodes, XT_CLUSTER_NODES_MAX); return -EINVAL; } if (info->node_mask >= (1ULL << info->total_nodes)) { - pr_info("this node mask cannot be " - "higher than the total number of nodes\n"); + pr_info_ratelimited("node mask cannot exceed total number of nodes\n"); return -EDOM; } return 0; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index cad0b7b5eb356..93cb018c3055f 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -112,8 +112,8 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); /* * This filter cannot function correctly unless connection tracking diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 23372879e6e30..4fa4efd243532 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -57,14 +57,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) int ret; if (info->options & ~options) { - pr_err("Unknown options in mask %x\n", info->options); + pr_info_ratelimited("Unknown options in mask %x\n", + info->options); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index ec377cc6a369c..809639ce6f5a4 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -79,8 +79,8 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } @@ -109,8 +109,8 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 39cf1d019240e..df80fe7d391c0 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -272,8 +272,8 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c index 3c831a8efebc6..c7ad4afa5fb8c 100644 --- a/net/netfilter/xt_ecn.c +++ b/net/netfilter/xt_ecn.c @@ -97,7 +97,7 @@ static int ecn_mt_check4(const struct xt_mtchk_param *par) if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { - pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } @@ -139,7 +139,7 @@ static int ecn_mt_check6(const struct xt_mtchk_param *par) if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) { - pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index ca6847403ca21..aa96027f44188 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -523,7 +523,8 @@ static u64 user2rate(u64 user) if (user != 0) { return div64_u64(XT_HASHLIMIT_SCALE_v2, user); } else { - pr_warn("invalid rate from userspace: %llu\n", user); + pr_info_ratelimited("invalid rate from userspace: %llu\n", + user); return 0; } } @@ -865,33 +866,34 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, } if (cfg->mode & ~XT_HASHLIMIT_ALL) { - pr_info("Unknown mode mask %X, kernel too old?\n", - cfg->mode); + pr_info_ratelimited("Unknown mode mask %X, kernel too old?\n", + cfg->mode); return -EINVAL; } /* Check for overflow. */ if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) { if (cfg->avg == 0 || cfg->avg > U32_MAX) { - pr_info("hashlimit invalid rate\n"); + pr_info_ratelimited("invalid rate\n"); return -ERANGE; } if (cfg->interval == 0) { - pr_info("hashlimit invalid interval\n"); + pr_info_ratelimited("invalid interval\n"); return -EINVAL; } } else if (cfg->mode & XT_HASHLIMIT_BYTES) { if (user2credits_byte(cfg->avg) == 0) { - pr_info("overflow, rate too high: %llu\n", cfg->avg); + pr_info_ratelimited("overflow, rate too high: %llu\n", + cfg->avg); return -EINVAL; } } else if (cfg->burst == 0 || - user2credits(cfg->avg * cfg->burst, revision) < - user2credits(cfg->avg, revision)) { - pr_info("overflow, try lower: %llu/%llu\n", - cfg->avg, cfg->burst); - return -ERANGE; + user2credits(cfg->avg * cfg->burst, revision) < + user2credits(cfg->avg, revision)) { + pr_info_ratelimited("overflow, try lower: %llu/%llu\n", + cfg->avg, cfg->burst); + return -ERANGE; } mutex_lock(&hashlimit_mutex); diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 38a78151c0e99..fd077aeaaed95 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -61,8 +61,8 @@ static int helper_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } info->name[sizeof(info->name) - 1] = '\0'; diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 7ca64a50db04d..57f1df5757011 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -72,7 +72,7 @@ static int comp_mt_check(const struct xt_mtchk_param *par) /* Must specify no unknown invflags */ if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) { - pr_err("unknown flags %X\n", compinfo->invflags); + pr_info_ratelimited("unknown flags %X\n", compinfo->invflags); return -EINVAL; } return 0; diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 42540d26c2b8e..1d950a6100af1 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -158,7 +158,8 @@ static int ipvs_mt_check(const struct xt_mtchk_param *par) && par->family != NFPROTO_IPV6 #endif ) { - pr_info("protocol family %u not supported\n", par->family); + pr_info_ratelimited("protocol family %u not supported\n", + par->family); return -EINVAL; } diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c index 8aee572771f2b..c43482bf48e68 100644 --- a/net/netfilter/xt_l2tp.c +++ b/net/netfilter/xt_l2tp.c @@ -216,7 +216,7 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) /* Check for invalid flags */ if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION | XT_L2TP_TYPE)) { - pr_info("unknown flags: %x\n", info->flags); + pr_info_ratelimited("unknown flags: %x\n", info->flags); return -EINVAL; } @@ -225,7 +225,8 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) (!(info->flags & XT_L2TP_SID)) && ((!(info->flags & XT_L2TP_TYPE)) || (info->type != XT_L2TP_TYPE_CONTROL))) { - pr_info("invalid flags combination: %x\n", info->flags); + pr_info_ratelimited("invalid flags combination: %x\n", + info->flags); return -EINVAL; } @@ -234,19 +235,22 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) */ if (info->flags & XT_L2TP_VERSION) { if ((info->version < 2) || (info->version > 3)) { - pr_info("wrong L2TP version: %u\n", info->version); + pr_info_ratelimited("wrong L2TP version: %u\n", + info->version); return -EINVAL; } if (info->version == 2) { if ((info->flags & XT_L2TP_TID) && (info->tid > 0xffff)) { - pr_info("v2 tid > 0xffff: %u\n", info->tid); + pr_info_ratelimited("v2 tid > 0xffff: %u\n", + info->tid); return -EINVAL; } if ((info->flags & XT_L2TP_SID) && (info->sid > 0xffff)) { - pr_info("v2 sid > 0xffff: %u\n", info->sid); + pr_info_ratelimited("v2 sid > 0xffff: %u\n", + info->sid); return -EINVAL; } } @@ -268,13 +272,13 @@ static int l2tp_mt_check4(const struct xt_mtchk_param *par) if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { - pr_info("missing protocol rule (udp|l2tpip)\n"); + pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { - pr_info("v2 doesn't support IP mode\n"); + pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } @@ -295,13 +299,13 @@ static int l2tp_mt_check6(const struct xt_mtchk_param *par) if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { - pr_info("missing protocol rule (udp|l2tpip)\n"); + pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { - pr_info("v2 doesn't support IP mode\n"); + pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 61403b77361cb..55d18cd676356 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -106,8 +106,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* Check for overflow. */ if (r->burst == 0 || user2credits(r->avg * r->burst) < user2credits(r->avg)) { - pr_info("Overflow, try lower: %u/%u\n", - r->avg, r->burst); + pr_info_ratelimited("Overflow, try lower: %u/%u\n", + r->avg, r->burst); return -ERANGE; } diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index 0fd14d1eb09d1..bdb689cdc829d 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -19,8 +21,7 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; if (mr->rangesize != 1) { - pr_info("%s: multiple ranges no longer supported\n", - par->target->name); + pr_info_ratelimited("multiple ranges no longer supported\n"); return -EINVAL; } return nf_ct_netns_get(par->net, par->family); diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 6f92d25590a85..c8674deed4eb4 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -6,6 +6,8 @@ * it under the terms of the GNU General Public License version 2 (or any * later at your option) as published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -39,8 +41,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { - pr_info("xt_nfacct: accounting object with name `%s' " - "does not exists\n", info->name); + pr_info_ratelimited("accounting object `%s' does not exists\n", + info->name); return -ENOENT; } info->nfacct = nfacct; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index bb33598e4530d..9d6d67b953ac8 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,9 +107,7 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out and --physdev-is-out are only " - "supported in the FORWARD and POSTROUTING chains with " - "bridged traffic.\n"); + pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 245fa350a7a85..6d232d18faff7 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -342,8 +342,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, net_get_random_once(&hash_rnd, sizeof(hash_rnd)); if (info->check_set & ~XT_RECENT_VALID_FLAGS) { - pr_info("Unsupported user space flags (%08x)\n", - info->check_set); + pr_info_ratelimited("Unsupported userspace flags (%08x)\n", + info->check_set); return -EINVAL; } if (hweight8(info->check_set & @@ -357,8 +357,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return -EINVAL; if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) { - pr_info("hitcount (%u) is larger than allowed maximum (%u)\n", - info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); + pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n", + info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } if (info->name[0] == '\0' || @@ -587,7 +587,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, add = true; break; default: - pr_info("Need \"+ip\", \"-ip\" or \"/\"\n"); + pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n"); return -EINVAL; } @@ -601,10 +601,8 @@ recent_mt_proc_write(struct file *file, const char __user *input, succ = in4_pton(c, size, (void *)&addr, '\n', NULL); } - if (!succ) { - pr_info("illegal address written to procfs\n"); + if (!succ) return -EINVAL; - } spin_lock_bh(&recent_lock); e = recent_entry_lookup(t, &addr, family, 0); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 575d2153e3b81..2ac7f674d19b1 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -171,7 +171,8 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par) return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V1); return -EINVAL; } return 0; @@ -187,7 +188,8 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par) return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V2); return -EINVAL; } return 0; @@ -203,8 +205,8 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par) if (err) return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { - pr_info("unknown flags 0x%x\n", - info->flags & ~XT_SOCKET_FLAGS_V3); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V3); return -EINVAL; } return 0; diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5fbd79194d21e..0b41c0befe3cf 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -44,8 +44,8 @@ static int state_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 1b01eec1fbda5..0160f505e337c 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -235,13 +235,13 @@ static int time_mt_check(const struct xt_mtchk_param *par) if (info->daytime_start > XT_TIME_MAX_DAYTIME || info->daytime_stop > XT_TIME_MAX_DAYTIME) { - pr_info("invalid argument - start or " - "stop time greater than 23:59:59\n"); + pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n"); return -EDOM; } if (info->flags & ~XT_TIME_ALL_FLAGS) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_TIME_ALL_FLAGS); return -EINVAL; } From d682026dd3c548a408415cd75882e5d081147f5b Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 12 Feb 2018 21:45:42 +0800 Subject: [PATCH 071/269] .gitignore: ignore ASN.1 auto generated files when build kernel with default configure, files: generatenet/ipv4/netfilter/nf_nat_snmp_basic-asn1.c net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h will be automatically generated by ASN.1 compiler, so No need to track them in git, it's better to ignore them. Signed-off-by: Zhu Lingshan Signed-off-by: Pablo Neira Ayuso --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 705e09913dc23..1be78fd8163bd 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,7 @@ all.config # Kdevelop4 *.kdev4 + +#Automatically generated by ASN.1 compiler +net/ipv4/netfilter/nf_nat_snmp_basic-asn1.c +net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h From 10414014bc085aac9f787a5890b33b5605fbcfc4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Feb 2018 18:49:39 +0100 Subject: [PATCH 072/269] netfilter: x_tables: fix missing timer initialization in xt_LED syzbot reported that xt_LED may try to use the ledinternal->timer without previously initializing it: ------------[ cut here ]------------ kernel BUG at kernel/time/timer.c:958! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 1826 Comm: kworker/1:2 Not tainted 4.15.0+ #306 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:__mod_timer kernel/time/timer.c:958 [inline] RIP: 0010:mod_timer+0x7d6/0x13c0 kernel/time/timer.c:1102 RSP: 0018:ffff8801d24fe9f8 EFLAGS: 00010293 RAX: ffff8801d25246c0 RBX: ffff8801aec6cb50 RCX: ffffffff816052c6 RDX: 0000000000000000 RSI: 00000000fffbd14b RDI: ffff8801aec6cb68 RBP: ffff8801d24fec98 R08: 0000000000000000 R09: 1ffff1003a49fd6c R10: ffff8801d24feb28 R11: 0000000000000005 R12: dffffc0000000000 R13: ffff8801d24fec70 R14: 00000000fffbd14b R15: ffff8801af608f90 FS: 0000000000000000(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000206d6fd0 CR3: 0000000006a22001 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: led_tg+0x1db/0x2e0 net/netfilter/xt_LED.c:75 ip6t_do_table+0xc2a/0x1a30 net/ipv6/netfilter/ip6_tables.c:365 ip6table_raw_hook+0x65/0x80 net/ipv6/netfilter/ip6table_raw.c:42 nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline] nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483 nf_hook.constprop.27+0x3f6/0x830 include/linux/netfilter.h:243 NF_HOOK include/linux/netfilter.h:286 [inline] ndisc_send_skb+0xa51/0x1370 net/ipv6/ndisc.c:491 ndisc_send_ns+0x38a/0x870 net/ipv6/ndisc.c:633 addrconf_dad_work+0xb9e/0x1320 net/ipv6/addrconf.c:4008 process_one_work+0xbbf/0x1af0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x33c/0x400 kernel/kthread.c:238 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:429 Code: 85 2a 0b 00 00 4d 8b 3c 24 4d 85 ff 75 9f 4c 8b bd 60 fd ff ff e8 bb 57 10 00 65 ff 0d 94 9a a1 7e e9 d9 fc ff ff e8 aa 57 10 00 <0f> 0b e8 a3 57 10 00 e9 14 fb ff ff e8 99 57 10 00 4c 89 bd 70 RIP: __mod_timer kernel/time/timer.c:958 [inline] RSP: ffff8801d24fe9f8 RIP: mod_timer+0x7d6/0x13c0 kernel/time/timer.c:1102 RSP: ffff8801d24fe9f8 ---[ end trace f661ab06f5dd8b3d ]--- The ledinternal struct can be shared between several different xt_LED targets, but the related timer is currently initialized only if the first target requires it. Fix it by unconditionally initializing the timer struct. v1 -> v2: call del_timer_sync() unconditionally, too. Fixes: 268cb38e1802 ("netfilter: x_tables: add LED trigger target") Reported-by: syzbot+10c98dc5725c6c8fc7fb@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_LED.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 4472424e7ead9..19846445504dc 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -140,9 +140,10 @@ static int led_tg_check(const struct xt_tgchk_param *par) goto exit_alloc; } - /* See if we need to set up a timer */ - if (ledinfo->delay > 0) - timer_setup(&ledinternal->timer, led_timeout_callback, 0); + /* Since the letinternal timer can be shared between multiple targets, + * always set it up, even if the current target does not need it + */ + timer_setup(&ledinternal->timer, led_timeout_callback, 0); list_add_tail(&ledinternal->list, &xt_led_triggers); @@ -179,8 +180,7 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par) list_del(&ledinternal->list); - if (ledinfo->delay > 0) - del_timer_sync(&ledinternal->timer); + del_timer_sync(&ledinternal->timer); led_trigger_unregister(&ledinternal->netfilter_led_trigger); From db57ccf0f2f4624b4c4758379f8165277504fbd7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 14 Feb 2018 17:21:19 +0100 Subject: [PATCH 073/269] netfilter: nat: cope with negative port range syzbot reported a division by 0 bug in the netfilter nat code: divide error: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 4168 Comm: syzkaller034710 Not tainted 4.16.0-rc1+ #309 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:nf_nat_l4proto_unique_tuple+0x291/0x530 net/netfilter/nf_nat_proto_common.c:88 RSP: 0018:ffff8801b2466778 EFLAGS: 00010246 RAX: 000000000000f153 RBX: ffff8801b2466dd8 RCX: ffff8801b2466c7c RDX: 0000000000000000 RSI: ffff8801b2466c58 RDI: ffff8801db5293ac RBP: ffff8801b24667d8 R08: ffff8801b8ba6dc0 R09: ffffffff88af5900 R10: ffff8801b24666f0 R11: 0000000000000000 R12: 000000002990f153 R13: 0000000000000001 R14: 0000000000000000 R15: ffff8801b2466c7c FS: 00000000017e3880(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000208fdfe4 CR3: 00000001b5340002 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dccp_unique_tuple+0x40/0x50 net/netfilter/nf_nat_proto_dccp.c:30 get_unique_tuple+0xc28/0x1c10 net/netfilter/nf_nat_core.c:362 nf_nat_setup_info+0x1c2/0xe00 net/netfilter/nf_nat_core.c:406 nf_nat_redirect_ipv6+0x306/0x730 net/netfilter/nf_nat_redirect.c:124 redirect_tg6+0x7f/0xb0 net/netfilter/xt_REDIRECT.c:34 ip6t_do_table+0xc2a/0x1a30 net/ipv6/netfilter/ip6_tables.c:365 ip6table_nat_do_chain+0x65/0x80 net/ipv6/netfilter/ip6table_nat.c:41 nf_nat_ipv6_fn+0x594/0xa80 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:302 nf_nat_ipv6_local_fn+0x33/0x5d0 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:407 ip6table_nat_local_fn+0x2c/0x40 net/ipv6/netfilter/ip6table_nat.c:69 nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline] nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483 nf_hook include/linux/netfilter.h:243 [inline] NF_HOOK include/linux/netfilter.h:286 [inline] ip6_xmit+0x10ec/0x2260 net/ipv6/ip6_output.c:277 inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139 dccp_transmit_skb+0x9ac/0x10f0 net/dccp/output.c:142 dccp_connect+0x369/0x670 net/dccp/output.c:564 dccp_v6_connect+0xe17/0x1bf0 net/dccp/ipv6.c:946 __inet_stream_connect+0x2d4/0xf00 net/ipv4/af_inet.c:620 inet_stream_connect+0x58/0xa0 net/ipv4/af_inet.c:684 SYSC_connect+0x213/0x4a0 net/socket.c:1639 SyS_connect+0x24/0x30 net/socket.c:1620 do_syscall_64+0x282/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x441c69 RSP: 002b:00007ffe50cc0be8 EFLAGS: 00000217 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 0000000000441c69 RDX: 000000000000001c RSI: 00000000208fdfe4 RDI: 0000000000000003 RBP: 00000000006cc018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000538 R11: 0000000000000217 R12: 0000000000403590 R13: 0000000000403620 R14: 0000000000000000 R15: 0000000000000000 Code: 48 89 f0 83 e0 07 83 c0 01 38 d0 7c 08 84 d2 0f 85 46 02 00 00 48 8b 45 c8 44 0f b7 20 e8 88 97 04 fd 31 d2 41 0f b7 c4 4c 89 f9 <41> f7 f6 48 c1 e9 03 48 b8 00 00 00 00 00 fc ff df 0f b6 0c 01 RIP: nf_nat_l4proto_unique_tuple+0x291/0x530 net/netfilter/nf_nat_proto_common.c:88 RSP: ffff8801b2466778 The problem is that currently we don't have any check on the configured port range. A port range == -1 triggers the bug, while other negative values may require a very long time to complete the following loop. This commit addresses the issue swapping the two ends on negative ranges. The check is performed in nf_nat_l4proto_unique_tuple() since the nft nat loads the port values from nft registers at runtime. v1 -> v2: use the correct 'Fixes' tag v2 -> v3: update commit message, drop unneeded READ_ONCE() Fixes: 5b1158e909ec ("[NETFILTER]: Add NAT support for nf_conntrack") Reported-by: syzbot+8012e198bd037f4871e5@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_proto_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index fbce552a796e1..7d7466dbf6633 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -41,7 +41,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, const struct nf_conn *ct, u16 *rover) { - unsigned int range_size, min, i; + unsigned int range_size, min, max, i; __be16 *portptr; u_int16_t off; @@ -71,7 +71,10 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, } } else { min = ntohs(range->min_proto.all); - range_size = ntohs(range->max_proto.all) - min + 1; + max = ntohs(range->max_proto.all); + if (unlikely(max < min)) + swap(max, min); + range_size = max - min + 1; } if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { From 52c84d36b7e2f8197a9a6174d6f901a7c7afb850 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 14 Feb 2018 22:42:54 -0800 Subject: [PATCH 074/269] tools: bpftool: preserve JSON for batch mode when dumping insns to file Print a "null" JSON object to standard output when bpftool is used to print program instructions to a file, so as to avoid breaking JSON output on batch mode. This null object was added for most commands in a previous commit, but this specific case had been omitted. Fixes: 004b45c0e51a ("tools: bpftool: provide JSON output for all possible commands") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e8e2baaf93c26..e549e329be821 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -774,6 +774,9 @@ static int do_dump(int argc, char **argv) n < 0 ? strerror(errno) : "short write"); goto err_free; } + + if (json_output) + jsonw_null(json_wtr); } else { if (member_len == &info.jited_prog_len) { const char *name = NULL; From 9be6d411b0c473d31f756993b8b41bb16b0679c1 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 14 Feb 2018 22:42:55 -0800 Subject: [PATCH 075/269] tools: bpftool: preserve JSON output on errors on batch file parsing Before this patch, perror() function is used in some cases when bpftool fails to parse its input file in batch mode. This function does not integrate well with the rest of the output when JSON is used, so we replace it by something that is compliant. Most calls to perror() had already been replaced in a previous patch, this one is a leftover. Fixes: d319c8e101c5 ("tools: bpftool: preserve JSON output on errors on batch file parsing") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 3a0396d87c424..185acfa229b59 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -244,7 +244,7 @@ static int do_batch(int argc, char **argv) } if (errno && errno != ENOENT) { - perror("reading batch file failed"); + p_err("reading batch file failed: %s", strerror(errno)); err = -1; } else { p_info("processed %d lines", lines); From c927b080c67e3e97193c81fc1d27f4251bf4e036 Mon Sep 17 00:00:00 2001 From: Kamil Konieczny Date: Wed, 7 Feb 2018 16:52:09 +0100 Subject: [PATCH 076/269] crypto: s5p-sss - Fix kernel Oops in AES-ECB mode In AES-ECB mode crypt is done with key only, so any use of IV can cause kernel Oops. Use IV only in AES-CBC and AES-CTR. Signed-off-by: Kamil Konieczny Reported-by: Anand Moon Reviewed-by: Krzysztof Kozlowski Tested-by: Anand Moon Cc: stable@vger.kernel.org # can be applied after commit 8f9702aad138 Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 188f44b7eb27e..5d64c08b7f47e 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -1922,15 +1922,21 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) uint32_t aes_control; unsigned long flags; int err; + u8 *iv; aes_control = SSS_AES_KEY_CHANGE_MODE; if (mode & FLAGS_AES_DECRYPT) aes_control |= SSS_AES_MODE_DECRYPT; - if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) + if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) { aes_control |= SSS_AES_CHAIN_MODE_CBC; - else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) + iv = req->info; + } else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) { aes_control |= SSS_AES_CHAIN_MODE_CTR; + iv = req->info; + } else { + iv = NULL; /* AES_ECB */ + } if (dev->ctx->keylen == AES_KEYSIZE_192) aes_control |= SSS_AES_KEY_SIZE_192; @@ -1961,7 +1967,7 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) goto outdata_error; SSS_AES_WRITE(dev, AES_CONTROL, aes_control); - s5p_set_aes(dev, dev->ctx->aes_key, req->info, dev->ctx->keylen); + s5p_set_aes(dev, dev->ctx->aes_key, iv, dev->ctx->keylen); s5p_set_dma_indata(dev, dev->sg_src); s5p_set_dma_outdata(dev, dev->sg_dst); From 8874ae5f15f3feef3b4a415b9aed51edcf449aa1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Jan 2018 09:35:14 +0000 Subject: [PATCH 077/269] USB: gadget: udc: Add missing platform_device_put() on error in bdc_pci_probe() Add the missing platform_device_put() before return from bdc_pci_probe() in the platform_device_add_resources() error handling case. Fixes: efed421a94e6 ("usb: gadget: Add UDC driver for Broadcom USB3.0 device controller IP BDC") Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/bdc/bdc_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/bdc/bdc_pci.c b/drivers/usb/gadget/udc/bdc/bdc_pci.c index 1e940f054cb8c..6dbc489513cdb 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_pci.c +++ b/drivers/usb/gadget/udc/bdc/bdc_pci.c @@ -77,6 +77,7 @@ static int bdc_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (ret) { dev_err(&pci->dev, "couldn't add resources to bdc device\n"); + platform_device_put(bdc); return ret; } From 98112041bcca164676367e261c8c1073ef70cb51 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 12 Feb 2018 15:30:08 +0200 Subject: [PATCH 078/269] usb: dwc3: core: Fix ULPI PHYs and prevent phy_get/ulpi_init during suspend/resume In order for ULPI PHYs to work, dwc3_phy_setup() and dwc3_ulpi_init() must be doene before dwc3_core_get_phy(). commit 541768b08a40 ("usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys") broke this. The other issue is that dwc3_core_get_phy() and dwc3_ulpi_init() should be called only once during the life cycle of the driver. However, as dwc3_core_init() is called during system suspend/resume it will result in multiple calls to dwc3_core_get_phy() and dwc3_ulpi_init() which is wrong. Fix this by moving dwc3_ulpi_init() out of dwc3_phy_setup() into dwc3_core_ulpi_init(). Use a flag 'ulpi_ready' to ensure that dwc3_core_ulpi_init() is called only once from dwc3_core_init(). Use another flag 'phys_ready' to call dwc3_core_get_phy() only once from dwc3_core_init(). Fixes: 541768b08a40 ("usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys") Fixes: f54edb539c11 ("usb: dwc3: core: initialize ULPI before trying to get the PHY") Cc: linux-stable # >= v4.13 Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 47 +++++++++++++++++++++++++++++++---------- drivers/usb/dwc3/core.h | 5 +++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 59511f2cd3ac4..f1d838a4acd61 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -486,6 +486,22 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc) parms->hwparams8 = dwc3_readl(dwc->regs, DWC3_GHWPARAMS8); } +static int dwc3_core_ulpi_init(struct dwc3 *dwc) +{ + int intf; + int ret = 0; + + intf = DWC3_GHWPARAMS3_HSPHY_IFC(dwc->hwparams.hwparams3); + + if (intf == DWC3_GHWPARAMS3_HSPHY_IFC_ULPI || + (intf == DWC3_GHWPARAMS3_HSPHY_IFC_UTMI_ULPI && + dwc->hsphy_interface && + !strncmp(dwc->hsphy_interface, "ulpi", 4))) + ret = dwc3_ulpi_init(dwc); + + return ret; +} + /** * dwc3_phy_setup - Configure USB PHY Interface of DWC3 Core * @dwc: Pointer to our controller context structure @@ -497,7 +513,6 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc) static int dwc3_phy_setup(struct dwc3 *dwc) { u32 reg; - int ret; reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); @@ -568,9 +583,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc) } /* FALLTHROUGH */ case DWC3_GHWPARAMS3_HSPHY_IFC_ULPI: - ret = dwc3_ulpi_init(dwc); - if (ret) - return ret; /* FALLTHROUGH */ default: break; @@ -727,6 +739,7 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc) } static int dwc3_core_get_phy(struct dwc3 *dwc); +static int dwc3_core_ulpi_init(struct dwc3 *dwc); /** * dwc3_core_init - Low-level initialization of DWC3 Core @@ -758,17 +771,27 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc->maximum_speed = USB_SPEED_HIGH; } - ret = dwc3_core_get_phy(dwc); + ret = dwc3_phy_setup(dwc); if (ret) goto err0; - ret = dwc3_core_soft_reset(dwc); - if (ret) - goto err0; + if (!dwc->ulpi_ready) { + ret = dwc3_core_ulpi_init(dwc); + if (ret) + goto err0; + dwc->ulpi_ready = true; + } - ret = dwc3_phy_setup(dwc); + if (!dwc->phys_ready) { + ret = dwc3_core_get_phy(dwc); + if (ret) + goto err0a; + dwc->phys_ready = true; + } + + ret = dwc3_core_soft_reset(dwc); if (ret) - goto err0; + goto err0a; dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc); @@ -841,6 +864,9 @@ static int dwc3_core_init(struct dwc3 *dwc) phy_exit(dwc->usb2_generic_phy); phy_exit(dwc->usb3_generic_phy); +err0a: + dwc3_ulpi_exit(dwc); + err0: return ret; } @@ -1235,7 +1261,6 @@ static int dwc3_probe(struct platform_device *pdev) err3: dwc3_free_event_buffers(dwc); - dwc3_ulpi_exit(dwc); err2: pm_runtime_allow(&pdev->dev); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 185b9603fd98b..860d2bc184d1c 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -797,7 +797,9 @@ struct dwc3_scratchpad_array { * @usb3_phy: pointer to USB3 PHY * @usb2_generic_phy: pointer to USB2 PHY * @usb3_generic_phy: pointer to USB3 PHY + * @phys_ready: flag to indicate that PHYs are ready * @ulpi: pointer to ulpi interface + * @ulpi_ready: flag to indicate that ULPI is initialized * @u2sel: parameter from Set SEL request. * @u2pel: parameter from Set SEL request. * @u1sel: parameter from Set SEL request. @@ -895,7 +897,10 @@ struct dwc3 { struct phy *usb2_generic_phy; struct phy *usb3_generic_phy; + bool phys_ready; + struct ulpi *ulpi; + bool ulpi_ready; void __iomem *regs; size_t regs_size; From bde0716d1f076e4c913c7946bcc858f71243c7a0 Mon Sep 17 00:00:00 2001 From: Joe Lee Date: Mon, 12 Feb 2018 14:24:46 +0200 Subject: [PATCH 079/269] xhci: workaround for AMD Promontory disabled ports wakeup For AMD Promontory xHCI host, although you can disable USB ports in BIOS settings, those ports will be enabled anyway after you remove a device on that port and re-plug it in again. It's a known limitation of the chip. As a workaround we can clear the PORT_WAKE_BITS. [commit and code comment rephrasing -Mathias] Signed-off-by: Joe Lee Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 109 ++++++++++++++++++++++++++++++++++ drivers/usb/host/pci-quirks.h | 5 ++ drivers/usb/host/xhci-hub.c | 7 +++ drivers/usb/host/xhci-pci.c | 11 ++++ drivers/usb/host/xhci.h | 2 +- 5 files changed, 133 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 1615367170251..67ad4bb6919a2 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -66,6 +66,23 @@ #define AX_INDXC 0x30 #define AX_DATAC 0x34 +#define PT_ADDR_INDX 0xE8 +#define PT_READ_INDX 0xE4 +#define PT_SIG_1_ADDR 0xA520 +#define PT_SIG_2_ADDR 0xA521 +#define PT_SIG_3_ADDR 0xA522 +#define PT_SIG_4_ADDR 0xA523 +#define PT_SIG_1_DATA 0x78 +#define PT_SIG_2_DATA 0x56 +#define PT_SIG_3_DATA 0x34 +#define PT_SIG_4_DATA 0x12 +#define PT4_P1_REG 0xB521 +#define PT4_P2_REG 0xB522 +#define PT2_P1_REG 0xD520 +#define PT2_P2_REG 0xD521 +#define PT1_P1_REG 0xD522 +#define PT1_P2_REG 0xD523 + #define NB_PCIE_INDX_ADDR 0xe0 #define NB_PCIE_INDX_DATA 0xe4 #define PCIE_P_CNTL 0x10040 @@ -512,6 +529,98 @@ void usb_amd_dev_put(void) } EXPORT_SYMBOL_GPL(usb_amd_dev_put); +/* + * Check if port is disabled in BIOS on AMD Promontory host. + * BIOS Disabled ports may wake on connect/disconnect and need + * driver workaround to keep them disabled. + * Returns true if port is marked disabled. + */ +bool usb_amd_pt_check_port(struct device *device, int port) +{ + unsigned char value, port_shift; + struct pci_dev *pdev; + u16 reg; + + pdev = to_pci_dev(device); + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_1_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_1_DATA) + return false; + + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_2_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_2_DATA) + return false; + + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_3_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_3_DATA) + return false; + + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_4_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_4_DATA) + return false; + + /* Check disabled port setting, if bit is set port is enabled */ + switch (pdev->device) { + case 0x43b9: + case 0x43ba: + /* + * device is AMD_PROMONTORYA_4(0x43b9) or PROMONTORYA_3(0x43ba) + * PT4_P1_REG bits[7..1] represents USB2.0 ports 6 to 0 + * PT4_P2_REG bits[6..0] represents ports 13 to 7 + */ + if (port > 6) { + reg = PT4_P2_REG; + port_shift = port - 7; + } else { + reg = PT4_P1_REG; + port_shift = port + 1; + } + break; + case 0x43bb: + /* + * device is AMD_PROMONTORYA_2(0x43bb) + * PT2_P1_REG bits[7..5] represents USB2.0 ports 2 to 0 + * PT2_P2_REG bits[5..0] represents ports 9 to 3 + */ + if (port > 2) { + reg = PT2_P2_REG; + port_shift = port - 3; + } else { + reg = PT2_P1_REG; + port_shift = port + 5; + } + break; + case 0x43bc: + /* + * device is AMD_PROMONTORYA_1(0x43bc) + * PT1_P1_REG[7..4] represents USB2.0 ports 3 to 0 + * PT1_P2_REG[5..0] represents ports 9 to 4 + */ + if (port > 3) { + reg = PT1_P2_REG; + port_shift = port - 4; + } else { + reg = PT1_P1_REG; + port_shift = port + 4; + } + break; + default: + return false; + } + pci_write_config_word(pdev, PT_ADDR_INDX, reg); + pci_read_config_byte(pdev, PT_READ_INDX, &value); + + return !(value & BIT(port_shift)); +} +EXPORT_SYMBOL_GPL(usb_amd_pt_check_port); + /* * Make sure the controller is completely inactive, unable to * generate interrupts or do DMA. diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index b68dcb5dd0fdb..4ca0d9b7e463c 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -17,6 +17,7 @@ void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev); void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); void sb800_prefetch(struct device *dev, int on); bool usb_xhci_needs_pci_reset(struct pci_dev *pdev); +bool usb_amd_pt_check_port(struct device *device, int port); #else struct pci_dev; static inline void usb_amd_quirk_pll_disable(void) {} @@ -25,6 +26,10 @@ static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {} static inline void usb_amd_dev_put(void) {} static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {} static inline void sb800_prefetch(struct device *dev, int on) {} +static inline bool usb_amd_pt_check_port(struct device *device, int port) +{ + return false; +} #endif /* CONFIG_USB_PCI */ #endif /* __LINUX_USB_PCI_QUIRKS_H */ diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 46d5e08f05f15..1df0c362c4362 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1522,6 +1522,13 @@ int xhci_bus_suspend(struct usb_hcd *hcd) t2 |= PORT_WKOC_E | PORT_WKCONN_E; t2 &= ~PORT_WKDISC_E; } + + if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) && + (hcd->speed < HCD_USB3)) { + if (usb_amd_pt_check_port(hcd->self.controller, + port_index)) + t2 &= ~PORT_WAKE_BITS; + } } else t2 &= ~PORT_WAKE_BITS; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 6c79037876db0..5262fa571a5da 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -42,6 +42,10 @@ #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 +#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 +#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba +#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb +#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 static const char hcd_name[] = "xhci_hcd"; @@ -125,6 +129,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if ((pdev->vendor == PCI_VENDOR_ID_AMD) && + ((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) || + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) || + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) || + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1))) + xhci->quirks |= XHCI_U2_DISABLE_WAKE; + if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 96099a245c69e..e4d7d3d06a759 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1822,7 +1822,7 @@ struct xhci_hcd { /* For controller with a broken Port Disable implementation */ #define XHCI_BROKEN_PORT_PED (1 << 25) #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) -/* Reserved. It was XHCI_U2_DISABLE_WAKE */ +#define XHCI_U2_DISABLE_WAKE (1 << 27) #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) #define XHCI_HW_LPM_DISABLE (1 << 29) From 1208d8a84fdcae6b395c57911cdf907450d30e70 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 12 Feb 2018 14:24:47 +0200 Subject: [PATCH 080/269] xhci: Don't print a warning when setting link state for disabled ports When disabling a USB3 port the hub driver will set the port link state to U3 to prevent "ejected" or "safely removed" devices that are still physically connected from immediately re-enumerating. If the device was really unplugged, then error messages were printed as the hub tries to set the U3 link state for a port that is no longer enabled. xhci-hcd ee000000.usb: Cannot set link state. usb usb8-port1: cannot disable (err = -32) Don't print error message in xhci-hub if hub tries to set port link state for a disabled port. Return -ENODEV instead which also silences hub driver. Signed-off-by: Mathias Nyman Tested-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 1df0c362c4362..72ebbc908e19f 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1224,17 +1224,17 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, temp = readl(port_array[wIndex]); break; } - - /* Software should not attempt to set - * port link state above '3' (U3) and the port - * must be enabled. - */ - if ((temp & PORT_PE) == 0 || - (link_state > USB_SS_PORT_LS_U3)) { - xhci_warn(xhci, "Cannot set link state.\n"); + /* Port must be enabled */ + if (!(temp & PORT_PE)) { + retval = -ENODEV; + break; + } + /* Can't set port link state above '3' (U3) */ + if (link_state > USB_SS_PORT_LS_U3) { + xhci_warn(xhci, "Cannot set port %d link state %d\n", + wIndex, link_state); goto error; } - if (link_state == USB_SS_PORT_LS_U3) { slot_id = xhci_find_slot_id_by_port(hcd, xhci, wIndex + 1); From fa2dfd0ec22e0069c84dfae162972cbbc7c75488 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:48 +0200 Subject: [PATCH 081/269] xhci: Fix NULL pointer in xhci debugfs Commit dde634057da7 ("xhci: Fix use-after-free in xhci debugfs") causes a null pointer dereference while fixing xhci-debugfs usage of ring pointers that were freed during hibernate. The fix passed addresses to ring pointers instead, but forgot to do this change for the xhci_ring_trb_show function. The address of the ring pointer passed to xhci-debugfs was of a temporary ring pointer "new_ring" instead of the actual ring "ring" pointer. The temporary new_ring pointer will be set to NULL later causing the NULL pointer dereference. This issue was seen when reading xhci related files in debugfs: cat /sys/kernel/debug/usb/xhci/*/devices/*/ep*/trbs [ 184.604861] BUG: unable to handle kernel NULL pointer dereference at (null) [ 184.613776] IP: xhci_ring_trb_show+0x3a/0x890 [ 184.618733] PGD 264193067 P4D 264193067 PUD 263238067 PMD 0 [ 184.625184] Oops: 0000 [#1] SMP [ 184.726410] RIP: 0010:xhci_ring_trb_show+0x3a/0x890 [ 184.731944] RSP: 0018:ffffba8243c0fd90 EFLAGS: 00010246 [ 184.737880] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000295d6 [ 184.746020] RDX: 00000000000295d5 RSI: 0000000000000001 RDI: ffff971a6418d400 [ 184.754121] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 184.762222] R10: ffff971a64c98a80 R11: ffff971a62a00e40 R12: ffff971a62a85500 [ 184.770325] R13: 0000000000020000 R14: ffff971a6418d400 R15: ffff971a6418d400 [ 184.778448] FS: 00007fe725a79700(0000) GS:ffff971a6ec00000(0000) knlGS:0000000000000000 [ 184.787644] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 184.794168] CR2: 0000000000000000 CR3: 000000025f365005 CR4: 00000000003606f0 [ 184.802318] Call Trace: [ 184.805094] ? seq_read+0x281/0x3b0 [ 184.809068] seq_read+0xeb/0x3b0 [ 184.812735] full_proxy_read+0x4d/0x70 [ 184.817007] __vfs_read+0x23/0x120 [ 184.820870] vfs_read+0x91/0x130 [ 184.824538] SyS_read+0x42/0x90 [ 184.828106] entry_SYSCALL_64_fastpath+0x1a/0x7d Fixes: dde634057da7 ("xhci: Fix use-after-free in xhci debugfs") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index e26e685d8a578..5851052d4668a 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -211,7 +211,7 @@ static void xhci_ring_dump_segment(struct seq_file *s, static int xhci_ring_trb_show(struct seq_file *s, void *unused) { int i; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; struct xhci_segment *seg = ring->first_seg; for (i = 0; i < ring->num_segs; i++) { @@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci, snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index); epriv->root = xhci_debugfs_create_ring_dir(xhci, - &dev->eps[ep_index].new_ring, + &dev->eps[ep_index].ring, epriv->name, spriv->root); spriv->eps[ep_index] = epriv; From d91676717261578f429d3577dbe9154b26e8abf7 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:49 +0200 Subject: [PATCH 082/269] xhci: Fix xhci debugfs devices node disappearance after hibernation During system resume from hibernation, xhci host is reset, all the nodes in devices folder are removed in xhci_mem_cleanup function. Later nodes in /sys/kernel/debug/usb/xhci/* are created again in function xhci_run, but the nodes already exist, so the nodes still keep the old ones, finally device nodes in xhci debugfs folder /sys/kernel/debug/usb/xhci/*/devices/* are disappeared. This fix removed xhci debugfs nodes before the nodes are re-created, so all the nodes in xhci debugfs can be re-created successfully. Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1eeb3396300f2..b01bd643f9051 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1014,6 +1014,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "cleaning up memory\n"); xhci_mem_cleanup(xhci); + xhci_debugfs_exit(xhci); xhci_dbg(xhci, "xhci_stop completed - status = %x\n", readl(&xhci->op_regs->status)); From 8c5a93ebf7ac56d47f879b3c7c2f8c83b40c2cdb Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:50 +0200 Subject: [PATCH 083/269] xhci: xhci debugfs device nodes weren't removed after device plugged out There is a bug after plugged out USB device, the device and its ep00 nodes are still kept, we need to remove the nodes in xhci_free_dev when USB device is plugged out. Fixes: 052f71e25a7e ("xhci: Fix xhci debugfs NULL pointer dereference in resume from hibernate") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index b01bd643f9051..4adb6da0bd38f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3545,12 +3545,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING; del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } - + xhci_debugfs_remove_slot(xhci, udev->slot_id); ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) { - xhci_debugfs_remove_slot(xhci, udev->slot_id); + if (ret) xhci_free_virt_device(xhci, udev->slot_id); - } } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) From 11cd764dc9a030991880ad4d51db93918afa5822 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:51 +0200 Subject: [PATCH 084/269] xhci: fix xhci debugfs errors in xhci_stop In function xhci_stop, xhci_debugfs_exit called before xhci_mem_cleanup. xhci_debugfs_exit removed the xhci debugfs root nodes, xhci_mem_cleanup called function xhci_free_virt_devices_depth_first which in turn called function xhci_debugfs_remove_slot. Function xhci_debugfs_remove_slot removed the nodes for devices, the nodes folders are sub folder of xhci debugfs. It is unreasonable to remove xhci debugfs root folder before xhci debugfs sub folder. Function xhci_mem_cleanup should be called before function xhci_debugfs_exit. Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 4adb6da0bd38f..25d4b748a56f3 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -646,8 +646,6 @@ static void xhci_stop(struct usb_hcd *hcd) return; } - xhci_debugfs_exit(xhci); - xhci_dbc_exit(xhci); spin_lock_irq(&xhci->lock); @@ -680,6 +678,7 @@ static void xhci_stop(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "cleaning up memory"); xhci_mem_cleanup(xhci); + xhci_debugfs_exit(xhci); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "xhci_stop completed - status = %x", readl(&xhci->op_regs->status)); From 71a0483d56e784b1e11f38f10d7e22d265dbe244 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Thu, 1 Feb 2018 10:32:32 +0100 Subject: [PATCH 085/269] USB: serial: option: Add support for Quectel EP06 The Quectel EP06 is a Cat. 6 LTE modem, and the interface mapping is as follows: 0: Diag 1: NMEA 2: AT 3: Modem Interface 4 is QMI and interface 5 is ADB, so they are blacklisted. This patch should also be considered for -stable. The QMI-patch for this modem is already in the -stable-queue. v1->v2: * Updated commit prefix (thanks Johan Hovold) * Updated commit message slightly. Signed-off-by: Kristian Evensen Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5db8ed517e0e1..2d8d9150da0cc 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -241,6 +241,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EC21 0x0121 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_BG96 0x0296 +#define QUECTEL_PRODUCT_EP06 0x0306 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -689,6 +690,10 @@ static const struct option_blacklist_info yuga_clm920_nc5_blacklist = { .reserved = BIT(1) | BIT(4), }; +static const struct option_blacklist_info quectel_ep06_blacklist = { + .reserved = BIT(4) | BIT(5), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1203,6 +1208,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06), + .driver_info = (kernel_ulong_t)&quectel_ep06_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), From b2685bdacdaab065c172b97b55ab46c6be77a037 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 2 Feb 2018 13:51:39 +0800 Subject: [PATCH 086/269] ohci-hcd: Fix race condition caused by ohci_urb_enqueue() and io_watchdog_func() Running io_watchdog_func() while ohci_urb_enqueue() is running can cause a race condition where ohci->prev_frame_no is corrupted and the watchdog can mis-detect following error: ohci-platform 664a0800.usb: frame counter not updating; disabled ohci-platform 664a0800.usb: HC died; cleaning up Specifically, following scenario causes a race condition: 1. ohci_urb_enqueue() calls spin_lock_irqsave(&ohci->lock, flags) and enters the critical section 2. ohci_urb_enqueue() calls timer_pending(&ohci->io_watchdog) and it returns false 3. ohci_urb_enqueue() sets ohci->prev_frame_no to a frame number read by ohci_frame_no(ohci) 4. ohci_urb_enqueue() schedules io_watchdog_func() with mod_timer() 5. ohci_urb_enqueue() calls spin_unlock_irqrestore(&ohci->lock, flags) and exits the critical section 6. Later, ohci_urb_enqueue() is called 7. ohci_urb_enqueue() calls spin_lock_irqsave(&ohci->lock, flags) and enters the critical section 8. The timer scheduled on step 4 expires and io_watchdog_func() runs 9. io_watchdog_func() calls spin_lock_irqsave(&ohci->lock, flags) and waits on it because ohci_urb_enqueue() is already in the critical section on step 7 10. ohci_urb_enqueue() calls timer_pending(&ohci->io_watchdog) and it returns false 11. ohci_urb_enqueue() sets ohci->prev_frame_no to new frame number read by ohci_frame_no(ohci) because the frame number proceeded between step 3 and 6 12. ohci_urb_enqueue() schedules io_watchdog_func() with mod_timer() 13. ohci_urb_enqueue() calls spin_unlock_irqrestore(&ohci->lock, flags) and exits the critical section, then wake up io_watchdog_func() which is waiting on step 9 14. io_watchdog_func() enters the critical section 15. io_watchdog_func() calls ohci_frame_no(ohci) and set frame_no variable to the frame number 16. io_watchdog_func() compares frame_no and ohci->prev_frame_no On step 16, because this calling of io_watchdog_func() is scheduled on step 4, the frame number set in ohci->prev_frame_no is expected to the number set on step 3. However, ohci->prev_frame_no is overwritten on step 11. Because step 16 is executed soon after step 11, the frame number might not proceed, so ohci->prev_frame_no must equals to frame_no. To address above scenario, this patch introduces a special sentinel value IO_WATCHDOG_OFF and set this value to ohci->prev_frame_no when the watchdog is not pending or running. When ohci_urb_enqueue() schedules the watchdog (step 4 and 12 above), it compares ohci->prev_frame_no to IO_WATCHDOG_OFF so that ohci->prev_frame_no is not overwritten while io_watchdog_func() is running. Signed-off-by: Shigeru Yoshida Signed-off-by: Haiqing Bai Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 10 +++++++--- drivers/usb/host/ohci-hub.c | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index ee96763493332..84f88fa411cde 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -74,6 +74,7 @@ static const char hcd_name [] = "ohci_hcd"; #define STATECHANGE_DELAY msecs_to_jiffies(300) #define IO_WATCHDOG_DELAY msecs_to_jiffies(275) +#define IO_WATCHDOG_OFF 0xffffff00 #include "ohci.h" #include "pci-quirks.h" @@ -231,7 +232,7 @@ static int ohci_urb_enqueue ( } /* Start up the I/O watchdog timer, if it's not running */ - if (!timer_pending(&ohci->io_watchdog) && + if (ohci->prev_frame_no == IO_WATCHDOG_OFF && list_empty(&ohci->eds_in_use) && !(ohci->flags & OHCI_QUIRK_QEMU)) { ohci->prev_frame_no = ohci_frame_no(ohci); @@ -501,6 +502,7 @@ static int ohci_init (struct ohci_hcd *ohci) return 0; timer_setup(&ohci->io_watchdog, io_watchdog_func, 0); + ohci->prev_frame_no = IO_WATCHDOG_OFF; ohci->hcca = dma_alloc_coherent (hcd->self.controller, sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL); @@ -730,7 +732,7 @@ static void io_watchdog_func(struct timer_list *t) u32 head; struct ed *ed; struct td *td, *td_start, *td_next; - unsigned frame_no; + unsigned frame_no, prev_frame_no = IO_WATCHDOG_OFF; unsigned long flags; spin_lock_irqsave(&ohci->lock, flags); @@ -835,7 +837,7 @@ static void io_watchdog_func(struct timer_list *t) } } if (!list_empty(&ohci->eds_in_use)) { - ohci->prev_frame_no = frame_no; + prev_frame_no = frame_no; ohci->prev_wdh_cnt = ohci->wdh_cnt; ohci->prev_donehead = ohci_readl(ohci, &ohci->regs->donehead); @@ -845,6 +847,7 @@ static void io_watchdog_func(struct timer_list *t) } done: + ohci->prev_frame_no = prev_frame_no; spin_unlock_irqrestore(&ohci->lock, flags); } @@ -973,6 +976,7 @@ static void ohci_stop (struct usb_hcd *hcd) if (quirk_nec(ohci)) flush_work(&ohci->nec_work); del_timer_sync(&ohci->io_watchdog); + ohci->prev_frame_no = IO_WATCHDOG_OFF; ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable); ohci_usb_reset(ohci); diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index fb7aaa3b9d067..634f3c7bf7748 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -311,8 +311,10 @@ static int ohci_bus_suspend (struct usb_hcd *hcd) rc = ohci_rh_suspend (ohci, 0); spin_unlock_irq (&ohci->lock); - if (rc == 0) + if (rc == 0) { del_timer_sync(&ohci->io_watchdog); + ohci->prev_frame_no = IO_WATCHDOG_OFF; + } return rc; } From 009f41aed4b3e11e6dc1e3c07377a10c20f1a5ed Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 26 Jan 2018 11:56:50 -0700 Subject: [PATCH 087/269] usbip: keep usbip_device sockfd state in sync with tcp_socket Keep usbip_device sockfd state in sync with tcp_socket. When tcp_socket is reset to null, reset sockfd to -1 to keep it in sync. Signed-off-by: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 3 +++ drivers/usb/usbip/vhci_hcd.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 49e552472c3f3..dd8ef36ab10ec 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -73,6 +73,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a goto err; sdev->ud.tcp_socket = socket; + sdev->ud.sockfd = sockfd; spin_unlock_irq(&sdev->ud.lock); @@ -172,6 +173,7 @@ static void stub_shutdown_connection(struct usbip_device *ud) if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); ud->tcp_socket = NULL; + ud->sockfd = -1; } /* 3. free used data */ @@ -266,6 +268,7 @@ static struct stub_device *stub_device_alloc(struct usb_device *udev) sdev->ud.status = SDEV_ST_AVAILABLE; spin_lock_init(&sdev->ud.lock); sdev->ud.tcp_socket = NULL; + sdev->ud.sockfd = -1; INIT_LIST_HEAD(&sdev->priv_init); INIT_LIST_HEAD(&sdev->priv_tx); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index c3e1008aa491e..20e3d46095838 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -984,6 +984,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) if (vdev->ud.tcp_socket) { sockfd_put(vdev->ud.tcp_socket); vdev->ud.tcp_socket = NULL; + vdev->ud.sockfd = -1; } pr_info("release socket\n"); @@ -1030,6 +1031,7 @@ static void vhci_device_reset(struct usbip_device *ud) if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); ud->tcp_socket = NULL; + ud->sockfd = -1; } ud->status = VDEV_ST_NULL; From 02a10f061a3f8bca1b37332672f50a107198adbe Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 1 Feb 2018 12:26:43 +0800 Subject: [PATCH 088/269] usb: host: ehci: use correct device pointer for dma ops commit a8c06e407ef9 ("usb: separate out sysdev pointer from usb_bus") converted to use hcd->self.sysdev for DMA operations instead of hcd->self.controller, but forgot to do it for hcd test mode. Replace the correct one in this commit. Fixes: a8c06e407ef9 ("usb: separate out sysdev pointer from usb_bus") Signed-off-by: Peter Chen Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index facafdf8fb95d..d7641cbdee43d 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -774,12 +774,12 @@ static struct urb *request_single_step_set_feature_urb( atomic_inc(&urb->use_count); atomic_inc(&urb->dev->urbnum); urb->setup_dma = dma_map_single( - hcd->self.controller, + hcd->self.sysdev, urb->setup_packet, sizeof(struct usb_ctrlrequest), DMA_TO_DEVICE); urb->transfer_dma = dma_map_single( - hcd->self.controller, + hcd->self.sysdev, urb->transfer_buffer, urb->transfer_buffer_length, DMA_FROM_DEVICE); From d6efa938ac366fe8cb92d6157f74d43cc35f1c67 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 5 Feb 2018 17:12:35 +0900 Subject: [PATCH 089/269] usb: renesas_usbhs: missed the "running" flag in usb_dmac with rx path This fixes an issue that a gadget driver (usb_f_fs) is possible to stop rx transactions after the usb-dmac is used because the following functions missed to set/check the "running" flag. - usbhsf_dma_prepare_pop_with_usb_dmac() - usbhsf_dma_pop_done_with_usb_dmac() So, if next transaction uses pio, the usbhsf_prepare_pop() can not start the transaction because the "running" flag is 0. Fixes: 8355b2b3082d ("usb: renesas_usbhs: fix the behavior of some usbhs_pkt_handle") Cc: # v3.19+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 5925d111bd474..39fa2fc1b8b76 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -982,6 +982,10 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt, if ((uintptr_t)pkt->buf & (USBHS_USB_DMAC_XFER_SIZE - 1)) goto usbhsf_pio_prepare_pop; + /* return at this time if the pipe is running */ + if (usbhs_pipe_is_running(pipe)) + return 0; + usbhs_pipe_config_change_bfre(pipe, 1); ret = usbhsf_fifo_select(pipe, fifo, 0); @@ -1172,6 +1176,7 @@ static int usbhsf_dma_pop_done_with_usb_dmac(struct usbhs_pkt *pkt, usbhsf_fifo_clear(pipe, fifo); pkt->actual = usbhs_dma_calc_received_size(pkt, chan, rcv_len); + usbhs_pipe_running(pipe, 0); usbhsf_dma_stop(pipe, fifo); usbhsf_dma_unmap(pkt); usbhsf_fifo_unselect(pipe, pipe->fifo); From 52ad2bd8918158266fc88a05f95429b56b6a33c5 Mon Sep 17 00:00:00 2001 From: Karsten Koop Date: Fri, 9 Feb 2018 09:12:06 +0000 Subject: [PATCH 090/269] usb: ldusb: add PIDs for new CASSY devices supported by this driver This patch adds support for new CASSY devices to the ldusb driver. The PIDs are also added to the ignore list in hid-quirks. Signed-off-by: Karsten Koop Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-quirks.c | 3 +++ drivers/usb/misc/ldusb.c | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 43ddcdfbd0da4..9454ac134ce22 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -645,6 +645,9 @@ #define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033 #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035 #define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038 +#define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040 +#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042 +#define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043 #define USB_DEVICE_ID_LD_JWM 0x1080 #define USB_DEVICE_ID_LD_DMMP 0x1081 #define USB_DEVICE_ID_LD_UMIP 0x1090 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 5f6035a5ce367..e92b77fa574a9 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -809,6 +809,9 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) }, diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 63b9e85dc0e93..236a60f53099e 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -42,6 +42,9 @@ #define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033 /* USB Product ID of Micro-CASSY Time (reserved) */ #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035 /* USB Product ID of Micro-CASSY Temperature */ #define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038 /* USB Product ID of Micro-CASSY pH */ +#define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040 /* USB Product ID of Power Analyser CASSY */ +#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042 /* USB Product ID of Converter Controller CASSY */ +#define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043 /* USB Product ID of Machine Test CASSY */ #define USB_DEVICE_ID_LD_JWM 0x1080 /* USB Product ID of Joule and Wattmeter */ #define USB_DEVICE_ID_LD_DMMP 0x1081 /* USB Product ID of Digital Multimeter P (reserved) */ #define USB_DEVICE_ID_LD_UMIP 0x1090 /* USB Product ID of UMI P */ @@ -84,6 +87,9 @@ static const struct usb_device_id ld_usb_table[] = { { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) }, + { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) }, + { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) }, + { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) }, From 91b119359c1c3033a6621909d3c5dbbdf201d6b4 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 5 Feb 2018 11:50:56 +0800 Subject: [PATCH 091/269] usb: host: ehci: always enable interrupt for qtd completion at test mode At former code, the SETUP stage does not enable interrupt for qtd completion, it relies on IAA watchdog to complete interrupt, then the transcation would be considered timeout if the flag need_io_watchdog is cleared by platform code. In this commit, we always add enable interrupt for qtd completion, then the qtd completion can be notified by hardware interrupt. Signed-off-by: Peter Chen Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-q.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 88158324dcae2..3276304056952 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -1188,10 +1188,10 @@ static int submit_single_step_set_feature( * 15 secs after the setup */ if (is_setup) { - /* SETUP pid */ + /* SETUP pid, and interrupt after SETUP completion */ qtd_fill(ehci, qtd, urb->setup_dma, sizeof(struct usb_ctrlrequest), - token | (2 /* "setup" */ << 8), 8); + QTD_IOC | token | (2 /* "setup" */ << 8), 8); submit_async(ehci, urb, &qtd_list, GFP_ATOMIC); return 0; /*Return now; we shall come back after 15 seconds*/ @@ -1228,12 +1228,8 @@ static int submit_single_step_set_feature( qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma); list_add_tail(&qtd->qtd_list, head); - /* dont fill any data in such packets */ - qtd_fill(ehci, qtd, 0, 0, token, 0); - - /* by default, enable interrupt on urb completion */ - if (likely(!(urb->transfer_flags & URB_NO_INTERRUPT))) - qtd->hw_token |= cpu_to_hc32(ehci, QTD_IOC); + /* Interrupt after STATUS completion */ + qtd_fill(ehci, qtd, 0, 0, token | QTD_IOC, 0); submit_async(ehci, urb, &qtd_list, GFP_KERNEL); From 46408ea558df13b110e0866b99624384a33bdeba Mon Sep 17 00:00:00 2001 From: AMAN DEEP Date: Thu, 8 Feb 2018 11:55:01 +0800 Subject: [PATCH 092/269] usb: ohci: Proper handling of ed_rm_list to handle race condition between usb_kill_urb() and finish_unlinks() There is a race condition between finish_unlinks->finish_urb() function and usb_kill_urb() in ohci controller case. The finish_urb calls spin_unlock(&ohci->lock) before usb_hcd_giveback_urb() function call, then if during this time, usb_kill_urb is called for another endpoint, then new ed will be added to ed_rm_list at beginning for unlink, and ed_rm_list will point to newly added. When finish_urb() is completed in finish_unlinks() and ed->td_list becomes empty as in below code (in finish_unlinks() function): if (list_empty(&ed->td_list)) { *last = ed->ed_next; ed->ed_next = NULL; } else if (ohci->rh_state == OHCI_RH_RUNNING) { *last = ed->ed_next; ed->ed_next = NULL; ed_schedule(ohci, ed); } The *last = ed->ed_next will make ed_rm_list to point to ed->ed_next and previously added ed by usb_kill_urb will be left unreferenced by ed_rm_list. This causes usb_kill_urb() hang forever waiting for finish_unlink to remove added ed from ed_rm_list. The main reason for hang in this race condtion is addition and removal of ed from ed_rm_list in the beginning during usb_kill_urb and later last* is modified in finish_unlinks(). As suggested by Alan Stern, the solution for proper handling of ohci->ed_rm_list is to remove ed from the ed_rm_list before finishing any URBs. Then at the end, we can add ed back to the list if necessary. This properly handle the updated ohci->ed_rm_list in usb_kill_urb(). Fixes: 977dcfdc6031 ("USB: OHCI: don't lose track of EDs when a controller dies") Acked-by: Alan Stern CC: Signed-off-by: Aman Deep Signed-off-by: Jeffy Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-q.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index b2ec8c399363b..4ccb85a67bb3c 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -1019,6 +1019,8 @@ static void finish_unlinks(struct ohci_hcd *ohci) * have modified this list. normally it's just prepending * entries (which we'd ignore), but paranoia won't hurt. */ + *last = ed->ed_next; + ed->ed_next = NULL; modified = 0; /* unlink urbs as requested, but rescan the list after @@ -1077,21 +1079,22 @@ static void finish_unlinks(struct ohci_hcd *ohci) goto rescan_this; /* - * If no TDs are queued, take ED off the ed_rm_list. + * If no TDs are queued, ED is now idle. * Otherwise, if the HC is running, reschedule. - * If not, leave it on the list for further dequeues. + * If the HC isn't running, add ED back to the + * start of the list for later processing. */ if (list_empty(&ed->td_list)) { - *last = ed->ed_next; - ed->ed_next = NULL; ed->state = ED_IDLE; list_del(&ed->in_use_list); } else if (ohci->rh_state == OHCI_RH_RUNNING) { - *last = ed->ed_next; - ed->ed_next = NULL; ed_schedule(ohci, ed); } else { - last = &ed->ed_next; + ed->ed_next = ohci->ed_rm_list; + ohci->ed_rm_list = ed; + /* Don't loop on the same ED */ + if (last == &ohci->ed_rm_list) + last = &ed->ed_next; } if (modified) From 7a1646d922577b5b48c0d222e03831141664bb59 Mon Sep 17 00:00:00 2001 From: Jack Stocker Date: Thu, 15 Feb 2018 18:24:10 +0000 Subject: [PATCH 093/269] Add delay-init quirk for Corsair K70 RGB keyboards Following on from this patch: https://lkml.org/lkml/2017/11/3/516, Corsair K70 RGB keyboards also require the DELAY_INIT quirk to start correctly at boot. Device ids found here: usb 3-3: New USB device found, idVendor=1b1c, idProduct=1b13 usb 3-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 3-3: Product: Corsair K70 RGB Gaming Keyboard Signed-off-by: Jack Stocker Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 4024926c1d68c..f4a548471f0fa 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -226,6 +226,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1a0a, 0x0200), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Corsair K70 RGB */ + { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair Strafe RGB */ { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT }, From 14fa91e0fef8e4d6feb8b1fa2a807828e0abe815 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Tue, 13 Feb 2018 12:18:27 +0200 Subject: [PATCH 094/269] IB/ipoib: Do not warn if IPoIB debugfs doesn't exist netdev_wait_allrefs() could rebroadcast NETDEV_UNREGISTER event multiple times until all refs are gone, which will result in calling ipoib_delete_debug_files multiple times and printing a warning. Remove the WARN_ONCE since checks of NULL pointers before calling debugfs_remove are not needed. Fixes: 771a52584096 ("IB/IPoIB: ibX: failed to create mcg debug file") Signed-off-by: Alaa Hleihel Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 11f74cbe6660b..ea302b0546016 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -281,8 +281,6 @@ void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n"); - WARN_ONCE(!priv->path_dentry, "null path debug file\n"); debugfs_remove(priv->mcg_dentry); debugfs_remove(priv->path_dentry); priv->mcg_dentry = priv->path_dentry = NULL; From 415bb699d793f7ad9c67c04a766d1e655fa6b203 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Feb 2018 12:18:28 +0200 Subject: [PATCH 095/269] RDMA/restrack: Remove unimplemented XRCD object Resource tracking of XRCD objects is not implemented in current version of restrack and hence can be removed. Fixes: 02d8883f520e ("RDMA/restrack: Add general infrastructure to track RDMA resources") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/restrack.c | 5 ----- include/rdma/restrack.h | 4 ---- 2 files changed, 9 deletions(-) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 857637bf46da2..d8dc709a37156 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -63,7 +63,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) { enum rdma_restrack_type type = res->type; struct ib_device *dev; - struct ib_xrcd *xrcd; struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; @@ -81,10 +80,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) qp = container_of(res, struct ib_qp, res); dev = qp->device; break; - case RDMA_RESTRACK_XRCD: - xrcd = container_of(res, struct ib_xrcd, res); - dev = xrcd->device; - break; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", type); return NULL; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index c2d81167c8585..2cdf8dcf4bdcb 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -28,10 +28,6 @@ enum rdma_restrack_type { * @RDMA_RESTRACK_QP: Queue pair (QP) */ RDMA_RESTRACK_QP, - /** - * @RDMA_RESTRACK_XRCD: XRC domain (XRCD) - */ - RDMA_RESTRACK_XRCD, /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ From 89d9e8d3f14d807bbd7725f8f6f5eeb7f6f5c42f Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:29 +0200 Subject: [PATCH 096/269] IB/uverbs: Always use the attribute size provided by the user This fixes several bugs around the copy_to/from user path: - copy_to used the user provided size of the attribute and could copy data beyond the end of the kernel buffer into userspace. - copy_from didn't know the size of the kernel buffer and could have left kernel memory unexpectedly un-initialized. - copy_from did not use the user length to determine if the attribute data is inlined or not. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types.c | 5 ++-- include/rdma/uverbs_ioctl.h | 35 +++++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index cab0ac3556eb0..c6502c7b7c466 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -323,7 +323,8 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev, cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_add(&cq->res); - ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe); + ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe, + sizeof(cq->cqe)); if (ret) goto err_cq; @@ -375,7 +376,7 @@ static int uverbs_destroy_cq_handler(struct ib_device *ib_dev, resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp); + return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp)); } static DECLARE_UVERBS_METHOD( diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 6da44079aa589..32cb14703914b 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -351,29 +351,50 @@ static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr } static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, - size_t idx, const void *from) + size_t idx, const void *from, size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); u16 flags; + size_t min_size; if (IS_ERR(attr)) return PTR_ERR(attr); + min_size = min_t(size_t, attr->ptr_attr.len, size); + if (copy_to_user(attr->ptr_attr.ptr, from, min_size)) + return -EFAULT; + flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; - return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) && - !put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT; + if (put_user(flags, &attr->uattr->flags)) + return -EFAULT; + + return 0; } -static inline int _uverbs_copy_from(void *to, size_t to_size, +static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) +{ + return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); +} + +static inline int _uverbs_copy_from(void *to, const struct uverbs_attr_bundle *attrs_bundle, - size_t idx) + size_t idx, + size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); - if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data)) + /* + * Validation ensures attr->ptr_attr.len >= size. If the caller is + * using UVERBS_ATTR_SPEC_F_MIN_SZ then it must call copy_from with + * the right size. + */ + if (unlikely(size < attr->ptr_attr.len)) + return -EINVAL; + + if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len)) return -EFAULT; @@ -382,7 +403,7 @@ static inline int _uverbs_copy_from(void *to, size_t to_size, } #define uverbs_copy_from(to, attrs_bundle, idx) \ - _uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx) + _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to)) /* ================================================= * Definitions -> Specs infrastructure From 6c976c30ad1c205bd6e34182c5ba9a1267d752ca Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:30 +0200 Subject: [PATCH 097/269] IB/uverbs: Use inline data transfer for UHW_IN The rule for the API is pointers less than 8 bytes are inlined into the .data field of the attribute. Fix the creation of the driver udata struct to follow this rule and point to the .data itself when the size is less than 8 bytes. Otherwise if the UHW struct is less than 8 bytes the driver will get EFAULT during copy_from_user. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index c6502c7b7c466..7b0e4d778d794 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -234,8 +234,11 @@ static void create_udata(struct uverbs_attr_bundle *ctx, uverbs_attr_get(ctx, UVERBS_UHW_OUT); if (!IS_ERR(uhw_in)) { - udata->inbuf = uhw_in->ptr_attr.ptr; udata->inlen = uhw_in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(uhw_in)) + udata->inbuf = &uhw_in->uattr->data; + else + udata->inbuf = uhw_in->ptr_attr.ptr; } else { udata->inbuf = NULL; udata->inlen = 0; From 2f36028ce98ef8e9c04809cc20b9dc498cc1a508 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:31 +0200 Subject: [PATCH 098/269] IB/uverbs: Use u64_to_user_ptr() not a union The union approach will get the endianness wrong sometimes if the kernel's pointer size is 32 bits resulting in EFAULTs when trying to copy to/from user. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types.c | 4 ++-- include/rdma/uverbs_ioctl.h | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 7b0e4d778d794..df1360e6774f4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -238,14 +238,14 @@ static void create_udata(struct uverbs_attr_bundle *ctx, if (uverbs_attr_ptr_is_inline(uhw_in)) udata->inbuf = &uhw_in->uattr->data; else - udata->inbuf = uhw_in->ptr_attr.ptr; + udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); } else { udata->inbuf = NULL; udata->inlen = 0; } if (!IS_ERR(uhw_out)) { - udata->outbuf = uhw_out->ptr_attr.ptr; + udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); udata->outlen = uhw_out->ptr_attr.len; } else { udata->outbuf = NULL; diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 32cb14703914b..38287d9d23a1f 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -276,10 +276,7 @@ struct uverbs_object_tree_def { */ struct uverbs_ptr_attr { - union { - u64 data; - void __user *ptr; - }; + u64 data; u16 len; /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ u16 flags; @@ -361,7 +358,7 @@ static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, return PTR_ERR(attr); min_size = min_t(size_t, attr->ptr_attr.len, size); - if (copy_to_user(attr->ptr_attr.ptr, from, min_size)) + if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) return -EFAULT; flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; @@ -396,7 +393,8 @@ static inline int _uverbs_copy_from(void *to, if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); - else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len)) + else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), + attr->ptr_attr.len)) return -EFAULT; return 0; From 3d89459e2ef92cc0e5a50dde868780ccda9786c1 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:32 +0200 Subject: [PATCH 099/269] IB/uverbs: Fix method merging in uverbs_ioctl_merge Fix a bug in uverbs_ioctl_merge that looked at the object's iterator number instead of the method's iterator number when merging methods. While we're at it, make the uverbs_ioctl_merge code a bit more clear and faster. Fixes: 118620d3686b ('IB/core: Add uverbs merge trees functionality') Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl_merge.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index 062485f9300dc..62e1eb1d2a28a 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -114,6 +114,7 @@ static size_t get_elements_above_id(const void **iters, short min = SHRT_MAX; const void *elem; int i, j, last_stored = -1; + unsigned int equal_min = 0; for_each_element(elem, i, j, elements, num_elements, num_offset, data_offset) { @@ -136,6 +137,10 @@ static size_t get_elements_above_id(const void **iters, */ iters[last_stored == i ? num_iters - 1 : num_iters++] = elem; last_stored = i; + if (min == GET_ID(id)) + equal_min++; + else + equal_min = 1; min = GET_ID(id); } @@ -146,15 +151,10 @@ static size_t get_elements_above_id(const void **iters, * Therefore, we need to clean the beginning of the array to make sure * all ids of final elements are equal to min. */ - for (i = num_iters - 1; i >= 0 && - GET_ID(*(u16 *)(iters[i] + id_offset)) == min; i--) - ; - - num_iters -= i + 1; - memmove(iters, iters + i + 1, sizeof(*iters) * num_iters); + memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min); *min_id = min; - return num_iters; + return equal_min; } #define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ @@ -322,7 +322,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me hash = kzalloc(sizeof(*hash) + ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1), sizeof(long)) + - BITS_TO_LONGS(attr_max_bucket) * sizeof(long), + BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long), GFP_KERNEL); if (!hash) { res = -ENOMEM; @@ -509,7 +509,7 @@ static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_ * first handler which != NULL. This also defines the * set of flags used for this handler. */ - for (i = num_object_defs - 1; + for (i = num_method_defs - 1; i >= 0 && !method_defs[i]->handler; i--) ; hash->methods[min_id++] = method; From 5d2beb576d32ef2cd047db8914e3602e99a12763 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:33 +0200 Subject: [PATCH 100/269] IB/uverbs: Use __aligned_u64 for uapi headers This has no impact on the structure layout since these structs already have their u64s already properly aligned, but it does document that we have this requirement for 32 bit compatibility. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index 03557b5f9aa6b..46de0885e8001 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -65,7 +65,7 @@ struct ib_uverbs_attr { __u16 len; /* only for pointers */ __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ __u16 reserved; - __u64 data; /* ptr to command, inline data or idr/fd */ + __aligned_u64 data; /* ptr to command, inline data or idr/fd */ }; struct ib_uverbs_ioctl_hdr { @@ -73,7 +73,7 @@ struct ib_uverbs_ioctl_hdr { __u16 object_id; __u16 method_id; __u16 num_attrs; - __u64 reserved; + __aligned_u64 reserved; struct ib_uverbs_attr attrs[0]; }; From 9dfb2ff400f6c0a52f63014b5331b64ee7bd5c19 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:34 +0200 Subject: [PATCH 101/269] IB/uverbs: Add ioctl support for 32bit processes 32 bit processes running on a 64 bit kernel call compat_ioctl so that implementations can revise any structure layout issues. Point compat_ioctl at our normal ioctl because: - All our structures are designed to be the same on 32 and 64 bit, ie we use __aligned_u64 when required and are careful to manage padding. - Any pointers are stored in u64's and userspace is expected to prepare them properly. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 395a3b091229f..cd72555ad457a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -942,6 +942,7 @@ static const struct file_operations uverbs_fops = { .llseek = no_llseek, #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) .unlocked_ioctl = ib_uverbs_ioctl, + .compat_ioctl = ib_uverbs_ioctl, #endif }; @@ -954,6 +955,7 @@ static const struct file_operations uverbs_mmap_fops = { .llseek = no_llseek, #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) .unlocked_ioctl = ib_uverbs_ioctl, + .compat_ioctl = ib_uverbs_ioctl, #endif }; From 4d39a959bc1f3d164b5a54147fdeb19f84b1ed58 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:35 +0200 Subject: [PATCH 102/269] IB/uverbs: Fix possible oops with duplicate ioctl attributes If the same attribute is listed twice by the user in the ioctl attribute list then error unwind can cause the kernel to deref garbage. This happens when an object with WRITE access is sent twice. The second parse properly fails but corrupts the state required for the error unwind it triggers. Fixing this by making duplicates in the attribute list invalid. This is not something we need to support. The ioctl interface is currently recommended to be disabled in kConfig. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index d96dc1d17be18..339b851450446 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -59,6 +59,9 @@ static int uverbs_process_attr(struct ib_device *ibdev, return 0; } + if (test_bit(attr_id, attr_bundle_h->valid_bitmap)) + return -EINVAL; + spec = &attr_spec_bucket->attrs[attr_id]; e = &elements[attr_id]; e->uattr = uattr_ptr; From d9dc7a3500a58de9bf3861d1a96ffeab42624b4f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:36 +0200 Subject: [PATCH 103/269] IB/uverbs: Hold the uobj write lock after allocate This clarifies the design intention that time between allocate and commit has the uobj exclusive to the caller. We already guarantee this by delaying publishing the uobj pointer via idr_insert, fd_install, list_add, etc. Additionally holding the usecnt lock during this period provides extra clarity and more protection against future mistakes. Fixes: 3832125624b7 ("IB/core: Add support for idr types") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 85b5ee4defa4b..3fe6035abde68 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -141,7 +141,12 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, */ uobj->context = context; uobj->type = type; - atomic_set(&uobj->usecnt, 0); + /* + * Allocated objects start out as write locked to deny any other + * syscalls from accessing them until they are committed. See + * rdma_alloc_commit_uobject + */ + atomic_set(&uobj->usecnt, -1); kref_init(&uobj->ref); return uobj; @@ -527,6 +532,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) return ret; } + /* matches atomic_set(-1) in alloc_uobj */ + lockdep_check(uobj, true); + atomic_set(&uobj->usecnt, 0); + uobj->type->type_class->alloc_commit(uobj); up_read(&uobj->context->cleanup_rwsem); From 6623e3e3cd78020016d3fa42555763178e94ab64 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Feb 2018 12:18:37 +0200 Subject: [PATCH 104/269] RDMA/uverbs: Protect from races between lookup and destroy of uobjects The race is between lookup_get_idr_uobject and uverbs_idr_remove_uobj -> uverbs_uobject_put. We deliberately do not call sychronize_rcu after the idr_remove in uverbs_idr_remove_uobj for performance reasons, instead we call kfree_rcu() during uverbs_uobject_put. However, this means we can obtain pointers to uobj's that have already been released and must protect against krefing them using kref_get_unless_zero. ================================================================== BUG: KASAN: use-after-free in copy_ah_attr_from_uverbs.isra.2+0x860/0xa00 Read of size 4 at addr ffff88005fda1ac8 by task syz-executor2/441 CPU: 1 PID: 441 Comm: syz-executor2 Not tainted 4.15.0-rc2+ #56 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0x8d/0xd4 print_address_description+0x73/0x290 kasan_report+0x25c/0x370 ? copy_ah_attr_from_uverbs.isra.2+0x860/0xa00 copy_ah_attr_from_uverbs.isra.2+0x860/0xa00 ? uverbs_try_lock_object+0x68/0xc0 ? modify_qp.isra.7+0xdc4/0x10e0 modify_qp.isra.7+0xdc4/0x10e0 ib_uverbs_modify_qp+0xfe/0x170 ? ib_uverbs_query_qp+0x970/0x970 ? __lock_acquire+0xa11/0x1da0 ib_uverbs_write+0x55a/0xad0 ? ib_uverbs_query_qp+0x970/0x970 ? ib_uverbs_query_qp+0x970/0x970 ? ib_uverbs_open+0x760/0x760 ? futex_wake+0x147/0x410 ? sched_clock_cpu+0x18/0x180 ? check_prev_add+0x1680/0x1680 ? do_futex+0x3b6/0xa30 ? sched_clock_cpu+0x18/0x180 __vfs_write+0xf7/0x5c0 ? ib_uverbs_open+0x760/0x760 ? kernel_read+0x110/0x110 ? lock_acquire+0x370/0x370 ? __fget+0x264/0x3b0 vfs_write+0x18a/0x460 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0x85 RIP: 0033:0x448e29 RSP: 002b:00007f443fee0c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f443fee16bc RCX: 0000000000448e29 RDX: 0000000000000078 RSI: 00000000209f8000 RDI: 0000000000000012 RBP: 000000000070bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000008e98 R14: 00000000006ebf38 R15: 0000000000000000 Allocated by task 1: kmem_cache_alloc_trace+0x16c/0x2f0 mlx5_alloc_cmd_msg+0x12e/0x670 cmd_exec+0x419/0x1810 mlx5_cmd_exec+0x40/0x70 mlx5_core_mad_ifc+0x187/0x220 mlx5_MAD_IFC+0xd7/0x1b0 mlx5_query_mad_ifc_gids+0x1f3/0x650 mlx5_ib_query_gid+0xa4/0xc0 ib_query_gid+0x152/0x1a0 ib_query_port+0x21e/0x290 mlx5_port_immutable+0x30f/0x490 ib_register_device+0x5dd/0x1130 mlx5_ib_add+0x3e7/0x700 mlx5_add_device+0x124/0x510 mlx5_register_interface+0x11f/0x1c0 mlx5_ib_init+0x56/0x61 do_one_initcall+0xa3/0x250 kernel_init_freeable+0x309/0x3b8 kernel_init+0x14/0x180 ret_from_fork+0x24/0x30 Freed by task 1: kfree+0xeb/0x2f0 mlx5_free_cmd_msg+0xcd/0x140 cmd_exec+0xeba/0x1810 mlx5_cmd_exec+0x40/0x70 mlx5_core_mad_ifc+0x187/0x220 mlx5_MAD_IFC+0xd7/0x1b0 mlx5_query_mad_ifc_gids+0x1f3/0x650 mlx5_ib_query_gid+0xa4/0xc0 ib_query_gid+0x152/0x1a0 ib_query_port+0x21e/0x290 mlx5_port_immutable+0x30f/0x490 ib_register_device+0x5dd/0x1130 mlx5_ib_add+0x3e7/0x700 mlx5_add_device+0x124/0x510 mlx5_register_interface+0x11f/0x1c0 mlx5_ib_init+0x56/0x61 do_one_initcall+0xa3/0x250 kernel_init_freeable+0x309/0x3b8 kernel_init+0x14/0x180 ret_from_fork+0x24/0x30 The buggy address belongs to the object at ffff88005fda1ab0 which belongs to the cache kmalloc-32 of size 32 The buggy address is located 24 bytes inside of 32-byte region [ffff88005fda1ab0, ffff88005fda1ad0) The buggy address belongs to the page: page:00000000d5655c19 count:1 mapcount:0 mapping: (null) index:0xffff88005fda1fc0 flags: 0x4000000000000100(slab) raw: 4000000000000100 0000000000000000 ffff88005fda1fc0 0000000180550008 raw: ffffea00017f6780 0000000400000004 ffff88006c803980 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88005fda1980: fc fc fb fb fb fb fc fc fb fb fb fb fc fc fb fb ffff88005fda1a00: fb fb fc fc fb fb fb fb fc fc 00 00 00 00 fc fc ffff88005fda1a80: fb fb fb fb fc fc fb fb fb fb fc fc fb fb fb fb ffff88005fda1b00: fc fc 00 00 00 00 fc fc fb fb fb fb fc fc fb fb ffff88005fda1b80: fb fb fc fc fb fb fb fb fc fc fb fb fb fb fc fc ==================================================================@ Cc: syzkaller Cc: # 4.11 Fixes: 3832125624b7 ("IB/core: Add support for idr types") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 3fe6035abde68..f1f805a0d31a0 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -201,7 +201,15 @@ static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *t goto free; } - uverbs_uobject_get(uobj); + /* + * The idr_find is guaranteed to return a pointer to something that + * isn't freed yet, or NULL, as the free after idr_remove goes through + * kfree_rcu(). However the object may still have been released and + * kfree() could be called at any time. + */ + if (!kref_get_unless_zero(&uobj->ref)) + uobj = ERR_PTR(-ENOENT); + free: rcu_read_unlock(); return uobj; From 104f268d439b3c21c83708e52946a4d8d37f3d0f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:38 +0200 Subject: [PATCH 105/269] IB/uverbs: Improve lockdep_check This is really being used as an assert that the expected usecnt is being held and implicitly that the usecnt is valid. Rename it to assert_uverbs_usecnt and tighten the checks to only accept valid values of usecnt (eg 0 and < -1 are invalid). The tigher checkes make the assertion cover more cases and is more likely to find bugs via syzkaller/etc. Fixes: 3832125624b7 ("IB/core: Add support for idr types") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index f1f805a0d31a0..cfd257e34e029 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -412,13 +412,13 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, return ret; } -static void lockdep_check(struct ib_uobject *uobj, bool exclusive) +static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) { #ifdef CONFIG_LOCKDEP if (exclusive) - WARN_ON(atomic_read(&uobj->usecnt) > 0); + WARN_ON(atomic_read(&uobj->usecnt) != -1); else - WARN_ON(atomic_read(&uobj->usecnt) == -1); + WARN_ON(atomic_read(&uobj->usecnt) <= 0); #endif } @@ -457,7 +457,7 @@ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } - lockdep_check(uobj, true); + assert_uverbs_usecnt(uobj, true); ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); up_read(&ucontext->cleanup_rwsem); @@ -487,7 +487,7 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } - lockdep_check(uobject, true); + assert_uverbs_usecnt(uobject, true); ret = uobject->type->type_class->remove_commit(uobject, RDMA_REMOVE_DESTROY); if (ret) @@ -541,7 +541,7 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) } /* matches atomic_set(-1) in alloc_uobj */ - lockdep_check(uobj, true); + assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); uobj->type->type_class->alloc_commit(uobj); @@ -578,7 +578,7 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) { - lockdep_check(uobj, exclusive); + assert_uverbs_usecnt(uobj, exclusive); uobj->type->type_class->lookup_put(uobj, exclusive); /* * In order to unlock an object, either decrease its usecnt for From ec6f8401c48a86809237e86878a6fac6b281118f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:40 +0200 Subject: [PATCH 106/269] IB/uverbs: Fix unbalanced unlock on error path for rdma_explicit_destroy If remove_commit fails then the lock is left locked while the uobj still exists. Eventually the kernel will deadlock. lockdep detects this and says: test/4221 is leaving the kernel with locks still held! 1 lock held by test/4221: #0: (&ucontext->cleanup_rwsem){.+.+}, at: [<000000001e5c7523>] rdma_explicit_destroy+0x37/0x120 [ib_uverbs] Fixes: 4da70da23e9b ("IB/core: Explicitly destroy an object while keeping uobject") Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index cfd257e34e029..d8eead5d106df 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -491,12 +491,13 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) ret = uobject->type->type_class->remove_commit(uobject, RDMA_REMOVE_DESTROY); if (ret) - return ret; + goto out; uobject->type = &null_obj_type; +out: up_read(&ucontext->cleanup_rwsem); - return 0; + return ret; } static void alloc_commit_idr_uobject(struct ib_uobject *uobj) From 3f802b162dbf4a558ff98986449eddc717826209 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Feb 2018 12:18:41 +0200 Subject: [PATCH 107/269] RDMA/uverbs: Protect from command mask overflow The command number is not bounds checked against the command mask before it is shifted, resulting in an ubsan hit. This does not cause malfunction since the command number is eventually bounds checked, but we can make this ubsan clean by moving the bounds check to before the mask check. ================================================================================ UBSAN: Undefined behaviour in drivers/infiniband/core/uverbs_main.c:647:21 shift exponent 207 is too large for 64-bit type 'long long unsigned int' CPU: 0 PID: 446 Comm: syz-executor3 Not tainted 4.15.0-rc2+ #61 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ubsan_epilogue+0xe/0x81 __ubsan_handle_shift_out_of_bounds+0x293/0x2f7 ? debug_check_no_locks_freed+0x340/0x340 ? __ubsan_handle_load_invalid_value+0x19b/0x19b ? lock_acquire+0x440/0x440 ? lock_acquire+0x19d/0x440 ? __might_fault+0xf4/0x240 ? ib_uverbs_write+0x68d/0xe20 ib_uverbs_write+0x68d/0xe20 ? __lock_acquire+0xcf7/0x3940 ? uverbs_devnode+0x110/0x110 ? cyc2ns_read_end+0x10/0x10 ? sched_clock_cpu+0x18/0x200 ? sched_clock_cpu+0x18/0x200 __vfs_write+0x10d/0x700 ? uverbs_devnode+0x110/0x110 ? kernel_read+0x170/0x170 ? __fget+0x35b/0x5d0 ? security_file_permission+0x93/0x260 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0x85 RIP: 0033:0x448e29 RSP: 002b:00007f033f567c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f033f5686bc RCX: 0000000000448e29 RDX: 0000000000000060 RSI: 0000000020001000 RDI: 0000000000000012 RBP: 000000000070bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000056a0 R14: 00000000006e8740 R15: 0000000000000000 ================================================================================ Cc: syzkaller Cc: # 4.5 Fixes: 2dbd5186a39c ("IB/core: IB/core: Allow legacy verbs through extended interfaces") Reported-by: Noa Osherovich Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index cd72555ad457a..b1ca223aa3808 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -650,12 +650,21 @@ static int verify_command_mask(struct ib_device *ib_dev, __u32 command) return -1; } +static bool verify_command_idx(u32 command, bool extended) +{ + if (extended) + return command < ARRAY_SIZE(uverbs_ex_cmd_table); + + return command < ARRAY_SIZE(uverbs_cmd_table); +} + static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; + bool extended_command; __u32 command; __u32 flags; int srcu_key; @@ -688,6 +697,15 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, } command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + flags = (hdr.command & + IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; + + extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED; + if (!verify_command_idx(command, extended_command)) { + ret = -EINVAL; + goto out; + } + if (verify_command_mask(ib_dev, command)) { ret = -EOPNOTSUPP; goto out; @@ -699,12 +717,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - flags = (hdr.command & - IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; - if (!flags) { - if (command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[command]) { + if (!uverbs_cmd_table[command]) { ret = -EINVAL; goto out; } @@ -725,8 +739,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, struct ib_udata uhw; size_t written_count = count; - if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || - !uverbs_ex_cmd_table[command]) { + if (!uverbs_ex_cmd_table[command]) { ret = -ENOSYS; goto out; } From 0cba0efcc7238d47a045a8d7a4079f6a22993546 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:37 +0200 Subject: [PATCH 108/269] RDMA/restrack: Increment CQ restrack object before committing Once the uobj is committed it is immediately possible another thread could destroy it, which worst case, can result in a use-after-free of the restrack objects. Cc: syzkaller Fixes: 08f294a1524b ("RDMA/core: Add resource tracking for create and destroy CQs") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 256934d1f64fb..4e55f83250498 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1030,14 +1030,14 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); + cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_add(&cq->res); + ret = cb(file, obj, &resp, ucore, context); if (ret) goto err_cb; uobj_alloc_commit(&obj->uobject); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_add(&cq->res); - return obj; err_cb: From 5c2e1c4f926856717f3fd31932e926dc3fe77ebd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:38 +0200 Subject: [PATCH 109/269] RDMA/uverbs: Fix bad unlock balance in ib_uverbs_close_xrcd There is no matching lock for this mutex. Git history suggests this is just a missed remnant from an earlier version of the function before this locking was moved into uverbs_free_xrcd. Originally this lock was protecting the xrcd_table_delete() ===================================== WARNING: bad unlock balance detected! 4.15.0+ #87 Not tainted ------------------------------------- syzkaller223405/269 is trying to release lock (&uverbs_dev->xrcd_tree_mutex) at: [<00000000b8703372>] ib_uverbs_close_xrcd+0x195/0x1f0 but there are no more locks to release! other info that might help us debug this: 1 lock held by syzkaller223405/269: #0: (&uverbs_dev->disassociate_srcu){....}, at: [<000000005af3b960>] ib_uverbs_write+0x265/0xef0 stack backtrace: CPU: 0 PID: 269 Comm: syzkaller223405 Not tainted 4.15.0+ #87 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ? ib_uverbs_write+0x265/0xef0 ? console_unlock+0x502/0xbd0 ? ib_uverbs_close_xrcd+0x195/0x1f0 print_unlock_imbalance_bug+0x131/0x160 lock_release+0x59d/0x1100 ? ib_uverbs_close_xrcd+0x195/0x1f0 ? lock_acquire+0x440/0x440 ? lock_acquire+0x440/0x440 __mutex_unlock_slowpath+0x88/0x670 ? wait_for_completion+0x4c0/0x4c0 ? rdma_lookup_get_uobject+0x145/0x2f0 ib_uverbs_close_xrcd+0x195/0x1f0 ? ib_uverbs_open_xrcd+0xdd0/0xdd0 ib_uverbs_write+0x7f9/0xef0 ? cyc2ns_read_end+0x10/0x10 ? ib_uverbs_open_xrcd+0xdd0/0xdd0 ? uverbs_devnode+0x110/0x110 ? cyc2ns_read_end+0x10/0x10 ? cyc2ns_read_end+0x10/0x10 ? sched_clock_cpu+0x18/0x200 __vfs_write+0x10d/0x700 ? uverbs_devnode+0x110/0x110 ? kernel_read+0x170/0x170 ? __fget+0x358/0x5d0 ? security_file_permission+0x93/0x260 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x1e/0x8b RIP: 0033:0x4335c9 Cc: syzkaller Cc: # 4.11 Fixes: fd3c7904db6e ("IB/core: Change idr objects to use the new schema") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4e55f83250498..1187b757d911c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -603,10 +603,8 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle, file->ucontext); - if (IS_ERR(uobj)) { - mutex_unlock(&file->device->xrcd_tree_mutex); + if (IS_ERR(uobj)) return PTR_ERR(uobj); - } ret = uobj_remove_commit(uobj); return ret ?: in_len; From 1ff5325c3ca1843228a86549318bbd3b414b9207 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:39 +0200 Subject: [PATCH 110/269] RDMA/uverbs: Fix circular locking dependency Avoid circular locking dependency by calling to uobj_alloc_commit() outside of xrcd_tree_mutex lock. ====================================================== WARNING: possible circular locking dependency detected 4.15.0+ #87 Not tainted ------------------------------------------------------ syzkaller401056/269 is trying to acquire lock: (&uverbs_dev->xrcd_tree_mutex){+.+.}, at: [<000000006c12d2cd>] uverbs_free_xrcd+0xd2/0x360 but task is already holding lock: (&ucontext->uobjects_lock){+.+.}, at: [<00000000da010f09>] uverbs_cleanup_ucontext+0x168/0x730 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&ucontext->uobjects_lock){+.+.}: __mutex_lock+0x111/0x1720 rdma_alloc_commit_uobject+0x22c/0x600 ib_uverbs_open_xrcd+0x61a/0xdd0 ib_uverbs_write+0x7f9/0xef0 __vfs_write+0x10d/0x700 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 entry_SYSCALL_64_fastpath+0x1e/0x8b -> #0 (&uverbs_dev->xrcd_tree_mutex){+.+.}: lock_acquire+0x19d/0x440 __mutex_lock+0x111/0x1720 uverbs_free_xrcd+0xd2/0x360 remove_commit_idr_uobject+0x6d/0x110 uverbs_cleanup_ucontext+0x2f0/0x730 ib_uverbs_cleanup_ucontext.constprop.3+0x52/0x120 ib_uverbs_close+0xf2/0x570 __fput+0x2cd/0x8d0 task_work_run+0xec/0x1d0 do_exit+0x6a1/0x1520 do_group_exit+0xe8/0x380 SyS_exit_group+0x1e/0x20 entry_SYSCALL_64_fastpath+0x1e/0x8b other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ucontext->uobjects_lock); lock(&uverbs_dev->xrcd_tree_mutex); lock(&ucontext->uobjects_lock); lock(&uverbs_dev->xrcd_tree_mutex); *** DEADLOCK *** 3 locks held by syzkaller401056/269: #0: (&file->cleanup_mutex){+.+.}, at: [<00000000c9f0c252>] ib_uverbs_close+0xac/0x570 #1: (&ucontext->cleanup_rwsem){++++}, at: [<00000000b6994d49>] uverbs_cleanup_ucontext+0xf6/0x730 #2: (&ucontext->uobjects_lock){+.+.}, at: [<00000000da010f09>] uverbs_cleanup_ucontext+0x168/0x730 stack backtrace: CPU: 0 PID: 269 Comm: syzkaller401056 Not tainted 4.15.0+ #87 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ? uverbs_cleanup_ucontext+0x168/0x730 ? console_unlock+0x502/0xbd0 print_circular_bug.isra.24+0x35e/0x396 ? print_circular_bug_header+0x12e/0x12e ? find_usage_backwards+0x30/0x30 ? entry_SYSCALL_64_fastpath+0x1e/0x8b validate_chain.isra.28+0x25d1/0x40c0 ? check_usage+0xb70/0xb70 ? graph_lock+0x160/0x160 ? find_usage_backwards+0x30/0x30 ? cyc2ns_read_end+0x10/0x10 ? print_irqtrace_events+0x280/0x280 ? __lock_acquire+0x93d/0x1630 __lock_acquire+0x93d/0x1630 lock_acquire+0x19d/0x440 ? uverbs_free_xrcd+0xd2/0x360 __mutex_lock+0x111/0x1720 ? uverbs_free_xrcd+0xd2/0x360 ? uverbs_free_xrcd+0xd2/0x360 ? __mutex_lock+0x828/0x1720 ? mutex_lock_io_nested+0x1550/0x1550 ? uverbs_cleanup_ucontext+0x168/0x730 ? __lock_acquire+0x9a9/0x1630 ? mutex_lock_io_nested+0x1550/0x1550 ? uverbs_cleanup_ucontext+0xf6/0x730 ? lock_contended+0x11a0/0x11a0 ? uverbs_free_xrcd+0xd2/0x360 uverbs_free_xrcd+0xd2/0x360 remove_commit_idr_uobject+0x6d/0x110 uverbs_cleanup_ucontext+0x2f0/0x730 ? sched_clock_cpu+0x18/0x200 ? uverbs_close_fd+0x1c0/0x1c0 ib_uverbs_cleanup_ucontext.constprop.3+0x52/0x120 ib_uverbs_close+0xf2/0x570 ? ib_uverbs_remove_one+0xb50/0xb50 ? ib_uverbs_remove_one+0xb50/0xb50 __fput+0x2cd/0x8d0 task_work_run+0xec/0x1d0 do_exit+0x6a1/0x1520 ? fsnotify_first_mark+0x220/0x220 ? exit_notify+0x9f0/0x9f0 ? entry_SYSCALL_64_fastpath+0x5/0x8b ? entry_SYSCALL_64_fastpath+0x5/0x8b ? trace_hardirqs_on_thunk+0x1a/0x1c ? time_hardirqs_on+0x27/0x670 ? time_hardirqs_off+0x27/0x490 ? syscall_return_slowpath+0x6c/0x460 ? entry_SYSCALL_64_fastpath+0x5/0x8b do_group_exit+0xe8/0x380 SyS_exit_group+0x1e/0x20 entry_SYSCALL_64_fastpath+0x1e/0x8b RIP: 0033:0x431ce9 Cc: syzkaller Cc: # 4.11 Fixes: fd3c7904db6e ("IB/core: Change idr objects to use the new schema") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1187b757d911c..6941faaaf1c38 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -562,9 +562,10 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (f.file) fdput(f); + mutex_unlock(&file->device->xrcd_tree_mutex); + uobj_alloc_commit(&obj->uobject); - mutex_unlock(&file->device->xrcd_tree_mutex); return in_len; err_copy: From 5d4c05c3ee36f67ddc107ab5ea0898af01a62cc1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:40 +0200 Subject: [PATCH 111/269] RDMA/uverbs: Sanitize user entered port numbers prior to access it ================================================================== BUG: KASAN: use-after-free in copy_ah_attr_from_uverbs+0x6f2/0x8c0 Read of size 4 at addr ffff88006476a198 by task syzkaller697701/265 CPU: 0 PID: 265 Comm: syzkaller697701 Not tainted 4.15.0+ #90 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ? show_regs_print_info+0x17/0x17 ? lock_contended+0x11a0/0x11a0 print_address_description+0x83/0x3e0 kasan_report+0x18c/0x4b0 ? copy_ah_attr_from_uverbs+0x6f2/0x8c0 ? copy_ah_attr_from_uverbs+0x6f2/0x8c0 ? lookup_get_idr_uobject+0x120/0x200 ? copy_ah_attr_from_uverbs+0x6f2/0x8c0 copy_ah_attr_from_uverbs+0x6f2/0x8c0 ? modify_qp+0xd0e/0x1350 modify_qp+0xd0e/0x1350 ib_uverbs_modify_qp+0xf9/0x170 ? ib_uverbs_query_qp+0xa70/0xa70 ib_uverbs_write+0x7f9/0xef0 ? attach_entity_load_avg+0x8b0/0x8b0 ? ib_uverbs_query_qp+0xa70/0xa70 ? uverbs_devnode+0x110/0x110 ? cyc2ns_read_end+0x10/0x10 ? print_irqtrace_events+0x280/0x280 ? sched_clock_cpu+0x18/0x200 ? _raw_spin_unlock_irq+0x29/0x40 ? _raw_spin_unlock_irq+0x29/0x40 ? _raw_spin_unlock_irq+0x29/0x40 ? time_hardirqs_on+0x27/0x670 __vfs_write+0x10d/0x700 ? uverbs_devnode+0x110/0x110 ? kernel_read+0x170/0x170 ? _raw_spin_unlock_irq+0x29/0x40 ? finish_task_switch+0x1bd/0x7a0 ? finish_task_switch+0x194/0x7a0 ? prandom_u32_state+0xe/0x180 ? rcu_read_unlock+0x80/0x80 ? security_file_permission+0x93/0x260 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x1e/0x8b RIP: 0033:0x433c29 RSP: 002b:00007ffcf2be82a8 EFLAGS: 00000217 Allocated by task 62: kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc+0x141/0x480 dup_fd+0x101/0xcc0 copy_process.part.62+0x166f/0x4390 _do_fork+0x1cb/0xe90 kernel_thread+0x34/0x40 call_usermodehelper_exec_work+0x112/0x260 process_one_work+0x929/0x1aa0 worker_thread+0x5c6/0x12a0 kthread+0x346/0x510 ret_from_fork+0x3a/0x50 Freed by task 259: kasan_slab_free+0x71/0xc0 kmem_cache_free+0xf3/0x4c0 put_files_struct+0x225/0x2c0 exit_files+0x88/0xc0 do_exit+0x67c/0x1520 do_group_exit+0xe8/0x380 SyS_exit_group+0x1e/0x20 entry_SYSCALL_64_fastpath+0x1e/0x8b The buggy address belongs to the object at ffff88006476a000 which belongs to the cache files_cache of size 832 The buggy address is located 408 bytes inside of 832-byte region [ffff88006476a000, ffff88006476a340) The buggy address belongs to the page: page:ffffea000191da80 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 0000000000000000 0000000000000000 0000000100080008 raw: 0000000000000000 0000000100000001 ffff88006bcf7a80 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88006476a080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88006476a100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88006476a180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88006476a200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88006476a280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Cc: syzkaller Cc: # 4.11 Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6941faaaf1c38..cd9fbd7c82b01 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1970,8 +1970,15 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } + if ((cmd->base.attr_mask & IB_QP_AV) && + !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { + ret = -EINVAL; + goto release_qp; + } + if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && - !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) { + (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || + !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) { ret = -EINVAL; goto release_qp; } From 1f5a6c47aabc4606f91ad2e6ef71a1ff1924101c Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Thu, 15 Feb 2018 12:36:46 -0800 Subject: [PATCH 112/269] RDMA/vmw_pvrdma: Fix usage of user response structures in ABI file This ensures that we return the right structures back to userspace. Otherwise, it looks like the reserved fields in the response structures in userspace might have uninitialized data in them. Fixes: 8b10ba783c9d ("RDMA/vmw_pvrdma: Add shared receive queue support") Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Suggested-by: Jason Gunthorpe Reviewed-by: Bryan Tan Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Signed-off-by: Adit Ranadive Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 4 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 4 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index faa9478c14a6b..f95b97646c252 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -114,6 +114,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_cq *cmd = &req.create_cq; struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; + struct pvrdma_create_cq_resp cq_resp = {0}; struct pvrdma_create_cq ucmd; BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); @@ -197,6 +198,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, cq->ibcq.cqe = resp->cqe; cq->cq_handle = resp->cq_handle; + cq_resp.cqn = resp->cq_handle; spin_lock_irqsave(&dev->cq_tbl_lock, flags); dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); @@ -205,7 +207,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, cq->uar = &(to_vucontext(context)->uar); /* Copy udata back. */ - if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); pvrdma_destroy_cq(&cq->ibcq); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 5acebb1ef631a..af235967a9c2e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -113,6 +113,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_srq *cmd = &req.create_srq; struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp; + struct pvrdma_create_srq_resp srq_resp = {0}; struct pvrdma_create_srq ucmd; unsigned long flags; int ret; @@ -204,12 +205,13 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, } srq->srq_handle = resp->srqn; + srq_resp.srqn = resp->srqn; spin_lock_irqsave(&dev->srq_tbl_lock, flags); dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq; spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); /* Copy udata back. */ - if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); pvrdma_destroy_srq(&srq->ibsrq); return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 16b96616ef7e6..a51463cd2f374 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -447,6 +447,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_pd *cmd = &req.create_pd; struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; + struct pvrdma_alloc_pd_resp pd_resp = {0}; int ret; void *ptr; @@ -475,9 +476,10 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, pd->privileged = !context; pd->pd_handle = resp->pd_handle; pd->pdn = resp->pd_handle; + pd_resp.pdn = resp->pd_handle; if (context) { - if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back protection domain\n"); pvrdma_dealloc_pd(&pd->ibpd); From 9c2d63b843a5c8a8d0559cc067b5398aa5ec3ffc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Feb 2018 01:10:29 +0100 Subject: [PATCH 113/269] bpf: fix mlock precharge on arraymaps syzkaller recently triggered OOM during percpu map allocation; while there is work in progress by Dennis Zhou to add __GFP_NORETRY semantics for percpu allocator under pressure, there seems also a missing bpf_map_precharge_memlock() check in array map allocation. Given today the actual bpf_map_charge_memlock() happens after the find_and_alloc_map() in syscall path, the bpf_map_precharge_memlock() is there to bail out early before we go and do the map setup work when we find that we hit the limits anyway. Therefore add this for array map as well. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Fixes: a10423b87a7e ("bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map") Reported-by: syzbot+adb03f3f0bb57ce3acda@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Dennis Zhou Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index b1f66480135b3..a364c408f25a5 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -73,11 +73,11 @@ static int array_map_alloc_check(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; - int numa_node = bpf_map_attr_numa_node(attr); + int ret, numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); + u64 cost, array_size, mask64; struct bpf_array *array; - u64 array_size, mask64; elem_size = round_up(attr->value_size, 8); @@ -109,8 +109,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ - if (array_size >= U32_MAX - PAGE_SIZE) + cost = array_size; + if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); + if (percpu) { + cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); + if (cost >= U32_MAX - PAGE_SIZE) + return ERR_PTR(-ENOMEM); + } + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + ret = bpf_map_precharge_memlock(cost); + if (ret < 0) + return ERR_PTR(ret); /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(array_size, numa_node); @@ -121,20 +132,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); + array->map.pages = cost; array->elem_size = elem_size; - if (!percpu) - goto out; - - array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); - - if (array_size >= U32_MAX - PAGE_SIZE || - bpf_array_alloc_percpu(array)) { + if (percpu && bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } -out: - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; return &array->map; } From b86b8eb6fecb5a4bac1ed0ca925c4082a61ea6e9 Mon Sep 17 00:00:00 2001 From: Dominik Bozek Date: Thu, 15 Feb 2018 21:27:48 -0800 Subject: [PATCH 114/269] usb: cdc_acm: prevent race at write to acm while system resumes ACM driver may accept data to transmit while system is not fully resumed. In this case ACM driver buffers data and prepare URBs on usb anchor list. There is a little chance that two tasks put a char and initiate acm_tty_flush_chars(). In such a case, driver will put one URB twice on usb anchor list. This patch also reset length of data before resue of a buffer. This not only prevent sending rubbish, but also lower risc of race. Without this patch we hit following kernel panic in one of our stabilty/stress tests. [ 46.884442] *list_add double add*: new=ffff9b2ab7289330, prev=ffff9b2ab7289330, next=ffff9b2ab81e28e0. [ 46.884476] Modules linked in: hci_uart btbcm bluetooth rfkill_gpio igb_avb(O) cfg80211 snd_soc_sst_bxt_tdf8532 snd_soc_skl snd_soc_skl_ipc snd_soc_sst_ipc snd_soc_sst_dsp snd_soc_sst_acpi snd_soc_sst_match snd_hda_ext_core snd_hda_core trusty_timer trusty_wall trusty_log trusty_virtio trusty_ipc trusty_mem trusty_irq trusty virtio_ring virtio intel_ipu4_mmu_bxtB0 lib2600_mod_bxtB0 intel_ipu4_isys_mod_bxtB0 lib2600psys_mod_bxtB0 intel_ipu4_psys_mod_bxtB0 intel_ipu4_mod_bxtB0 intel_ipu4_wrapper_bxtB0 intel_ipu4_acpi videobuf2_dma_contig as3638 dw9714 lm3643 crlmodule smiapp smiapp_pll [ 46.884480] CPU: 1 PID: 33 Comm: kworker/u8:1 Tainted: G U W O 4.9.56-quilt-2e5dc0ac-g618ed69ced6e-dirty #4 [ 46.884489] Workqueue: events_unbound flush_to_ldisc [ 46.884494] ffffb98ac012bb08 ffffffffad3e82e5 ffffb98ac012bb58 0000000000000000 [ 46.884497] ffffb98ac012bb48 ffffffffad0a23d1 00000024ad6374dd ffff9b2ab7289330 [ 46.884500] ffff9b2ab81e28e0 ffff9b2ab7289330 0000000000000002 0000000000000000 [ 46.884501] Call Trace: [ 46.884507] [] dump_stack+0x67/0x92 [ 46.884511] [] __warn+0xd1/0xf0 [ 46.884513] [] warn_slowpath_fmt+0x5f/0x80 [ 46.884516] [] __list_add+0xb3/0xc0 [ 46.884521] [] *usb_anchor_urb*+0x4c/0xa0 [ 46.884524] [] *acm_tty_flush_chars*+0x8f/0xb0 [ 46.884527] [] *acm_tty_put_char*+0x41/0x100 [ 46.884530] [] tty_put_char+0x24/0x40 [ 46.884533] [] do_output_char+0xa5/0x200 [ 46.884535] [] __process_echoes+0x148/0x290 [ 46.884538] [] n_tty_receive_buf_common+0x57c/0xb00 [ 46.884541] [] n_tty_receive_buf2+0x14/0x20 [ 46.884543] [] tty_ldisc_receive_buf+0x22/0x50 [ 46.884545] [] flush_to_ldisc+0xc5/0xe0 [ 46.884549] [] process_one_work+0x148/0x440 [ 46.884551] [] worker_thread+0x69/0x4a0 [ 46.884554] [] ? max_active_store+0x80/0x80 [ 46.884556] [] kthread+0x110/0x130 [ 46.884559] [] ? kthread_park+0x60/0x60 [ 46.884563] [] ret_from_fork+0x27/0x40 [ 46.884566] ---[ end trace 3bd599058b8a9eb3 ]--- Signed-off-by: Dominik Bozek Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 06b3b54a0e680..7b366a6c0b493 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -174,6 +174,7 @@ static int acm_wb_alloc(struct acm *acm) wb = &acm->wb[wbn]; if (!wb->use) { wb->use = 1; + wb->len = 0; return wbn; } wbn = (wbn + 1) % ACM_NW; @@ -805,16 +806,18 @@ static int acm_tty_write(struct tty_struct *tty, static void acm_tty_flush_chars(struct tty_struct *tty) { struct acm *acm = tty->driver_data; - struct acm_wb *cur = acm->putbuffer; + struct acm_wb *cur; int err; unsigned long flags; + spin_lock_irqsave(&acm->write_lock, flags); + + cur = acm->putbuffer; if (!cur) /* nothing to do */ - return; + goto out; acm->putbuffer = NULL; err = usb_autopm_get_interface_async(acm->control); - spin_lock_irqsave(&acm->write_lock, flags); if (err < 0) { cur->use = 0; acm->putbuffer = cur; From f88982679f54f75daa5b8eff3da72508f1e7422f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 30 Jan 2018 23:11:24 -0800 Subject: [PATCH 115/269] binder: check for binder_thread allocation failure in binder_poll() If the kzalloc() in binder_get_thread() fails, binder_poll() dereferences the resulting NULL pointer. Fix it by returning POLLERR if the memory allocation failed. This bug was found by syzkaller using fault injection. Reported-by: syzbot Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 15e3d3c2260dd..ad5e662e3e149 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4391,6 +4391,8 @@ static __poll_t binder_poll(struct file *filp, bool wait_for_proc_work; thread = binder_get_thread(proc); + if (!thread) + return POLLERR; binder_inner_proc_lock(thread->proc); thread->looper |= BINDER_LOOPER_STATE_POLL; From e46a3b3ba7509cb7fda0e07bc7c63a2cd90f579b Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 7 Feb 2018 12:38:47 -0800 Subject: [PATCH 116/269] ANDROID: binder: remove WARN() for redundant txn error binder_send_failed_reply() is called when a synchronous transaction fails. It reports an error to the thread that is waiting for the completion. Given that the transaction is synchronous, there should never be more than 1 error response to that thread -- this was being asserted with a WARN(). However, when exercising the driver with syzbot tests, cases were observed where multiple "synchronous" requests were sent without waiting for responses, so it is possible that multiple errors would be reported to the thread. This testing was conducted with panic_on_warn set which forced the crash. This is easily reproduced by sending back-to-back "synchronous" transactions without checking for any response (eg, set read_size to 0): bwr.write_buffer = (uintptr_t)&bc1; bwr.write_size = sizeof(bc1); bwr.read_buffer = (uintptr_t)&br; bwr.read_size = 0; ioctl(fd, BINDER_WRITE_READ, &bwr); sleep(1); bwr2.write_buffer = (uintptr_t)&bc2; bwr2.write_size = sizeof(bc2); bwr2.read_buffer = (uintptr_t)&br; bwr2.read_size = 0; ioctl(fd, BINDER_WRITE_READ, &bwr2); sleep(1); The first transaction is sent to the servicemanager and the reply fails because no VMA is set up by this client. After binder_send_failed_reply() is called, the BINDER_WORK_RETURN_ERROR is sitting on the thread's todo list since the read_size was 0 and the client is not waiting for a response. The 2nd transaction is sent and the BINDER_WORK_RETURN_ERROR has not been consumed, so the thread's reply_error.cmd is still set (normally cleared when the BINDER_WORK_RETURN_ERROR is handled). Therefore when the servicemanager attempts to reply to the 2nd failed transaction, the error is already set and it triggers this warning. This is a user error since it is not waiting for the synchronous transaction to complete. If it ever does check, it will see an error. Changed the WARN() to a pr_warn(). Signed-off-by: Todd Kjos Reported-by: syzbot Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ad5e662e3e149..31322e9a235d3 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1991,8 +1991,14 @@ static void binder_send_failed_reply(struct binder_transaction *t, &target_thread->reply_error.work); wake_up_interruptible(&target_thread->wait); } else { - WARN(1, "Unexpected reply error: %u\n", - target_thread->reply_error.cmd); + /* + * Cannot get here for normal operation, but + * we can if multiple synchronous transactions + * are sent without blocking for responses. + * Just ignore the 2nd error in this case. + */ + pr_warn("Unexpected reply error: %u\n", + target_thread->reply_error.cmd); } binder_inner_proc_unlock(target_thread->proc); binder_thread_dec_tmpref(target_thread); From 8ca86f1639ec5890d400fff9211aca22d0a392eb Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 7 Feb 2018 13:57:37 -0800 Subject: [PATCH 117/269] binder: replace "%p" with "%pK" The format specifier "%p" can leak kernel addresses. Use "%pK" instead. There were 4 remaining cases in binder.c. Signed-off-by: Todd Kjos Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 31322e9a235d3..a85f9033b57e2 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2199,7 +2199,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, - "%d buffer release %d, size %zd-%zd, failed at %p\n", + "%d buffer release %d, size %zd-%zd, failed at %pK\n", proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, failed_at); @@ -3711,7 +3711,7 @@ static int binder_thread_write(struct binder_proc *proc, } } binder_debug(BINDER_DEBUG_DEAD_BINDER, - "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n", + "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n", proc->pid, thread->pid, (u64)cookie, death); if (death == NULL) { @@ -5042,7 +5042,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, spin_lock(&t->lock); to_proc = t->to_proc; seq_printf(m, - "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", + "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d", prefix, t->debug_id, t, t->from ? t->from->proc->pid : 0, t->from ? t->from->pid : 0, @@ -5066,7 +5066,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, } if (buffer->target_node) seq_printf(m, " node %d", buffer->target_node->debug_id); - seq_printf(m, " size %zd:%zd data %p\n", + seq_printf(m, " size %zd:%zd data %pK\n", buffer->data_size, buffer->offsets_size, buffer->data); } From 5eeb2ca02a2f6084fc57ae5c244a38baab07033a Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 16 Feb 2018 09:47:15 +0100 Subject: [PATCH 118/269] ANDROID: binder: synchronize_rcu() when using POLLFREE. To prevent races with ep_remove_waitqueue() removing the waitqueue at the same time. Reported-by: syzbot+a2a3c4909716e271487e@syzkaller.appspotmail.com Signed-off-by: Martijn Coenen Cc: stable # 4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a85f9033b57e2..764b63a5aadef 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4382,6 +4382,15 @@ static int binder_thread_release(struct binder_proc *proc, binder_inner_proc_unlock(thread->proc); + /* + * This is needed to avoid races between wake_up_poll() above and + * and ep_remove_waitqueue() called for other reasons (eg the epoll file + * descriptor being closed); ep_remove_waitqueue() holds an RCU read + * lock, so we can be sure it's done after calling synchronize_rcu(). + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL) + synchronize_rcu(); + if (send_reply) binder_send_failed_reply(send_reply, BR_DEAD_REPLY); binder_release_work(proc, &thread->todo); From 7ae079aca59f560d2a44b65d45dffdefed6bd17a Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 14 Feb 2018 14:03:29 +0200 Subject: [PATCH 119/269] mei: set device client to the disconnected state upon suspend. This fixes regression introduced by commit 8d52af6795c0 ("mei: speed up the power down flow") In mei_cldev_disable during device power down flow, such as suspend or system power off, it jumps over disconnecting function to speed up the power down process, however, because the client is unlinked from the file_list (mei_cl_unlink) mei_cl_set_disconnected is not called from mei_cl_all_disconnect leaving resource leaking. The most visible is reference counter on underlying HW module is not decreased preventing to remove modules after suspend/resume cycles. Signed-off-by: Tomas Winkler Fixes: 8d52af6795c0 ("mei: speed up the power down flow") Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 6 ------ drivers/misc/mei/client.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 3e5eabdae8d96..772d02922529e 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -548,12 +548,6 @@ int mei_cldev_disable(struct mei_cl_device *cldev) goto out; } - if (bus->dev_state == MEI_DEV_POWER_DOWN) { - dev_dbg(bus->dev, "Device is powering down, don't bother with disconnection\n"); - err = 0; - goto out; - } - err = mei_cl_disconnect(cl); if (err < 0) dev_err(bus->dev, "Could not disconnect from the ME client\n"); diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index be64969d986ab..7e60c1817c311 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -945,6 +945,12 @@ int mei_cl_disconnect(struct mei_cl *cl) return 0; } + if (dev->dev_state == MEI_DEV_POWER_DOWN) { + cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n"); + mei_cl_set_disconnected(cl); + return 0; + } + rets = pm_runtime_get(dev->dev); if (rets < 0 && rets != -EINPROGRESS) { pm_runtime_put_noidle(dev->dev); From ac66b8347bbad5913df098e5281fa6e2c7fc796e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 14 Feb 2018 18:45:59 +0000 Subject: [PATCH 120/269] gpu: ipu-v3: make const arrays int_reg static, shrinks object size Don't populate the const read-only arrays int_reg on the stack but instead make them static. Makes the object code smaller by over 80 bytes: Before: text data bss dec hex filename 28024 8936 192 37152 9120 drivers/gpu/ipu-v3/ipu-common.o After: text data bss dec hex filename 27794 9080 192 37066 90ca drivers/gpu/ipu-v3/ipu-common.o (gcc version 7.2.0 x86_64) Signed-off-by: Colin Ian King Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 658fa2d3e40c2..48685cddbad1b 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1089,7 +1089,7 @@ static void ipu_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14}; + static const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14}; chained_irq_enter(chip, desc); @@ -1102,7 +1102,7 @@ static void ipu_err_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - const int int_reg[] = { 4, 5, 8, 9}; + static const int int_reg[] = { 4, 5, 8, 9}; chained_irq_enter(chip, desc); From 6d36b7fec60e6f74a15ce4781d30b2aecce85dfc Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 22 Jan 2018 16:06:16 +0100 Subject: [PATCH 121/269] gpu: ipu-cpmem: add 8-bit grayscale support to ipu_cpmem_set_image Add the missing offset calculation for grayscale images. Since the IPU only supports capturing greyscale in raw passthrough mode, it is the same as 8-bit bayer formats. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-cpmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index bb9c087e6c0d2..ef32377b91c08 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -788,6 +788,7 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image) case V4L2_PIX_FMT_SGBRG8: case V4L2_PIX_FMT_SGRBG8: case V4L2_PIX_FMT_SRGGB8: + case V4L2_PIX_FMT_GREY: offset = image->rect.left + image->rect.top * pix->bytesperline; break; case V4L2_PIX_FMT_SBGGR16: From de526f401284e1638d4c97cb5a4c292ac3f37655 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Feb 2018 08:11:48 -0800 Subject: [PATCH 122/269] netfilter: xt_hashlimit: fix lock imbalance syszkaller found that rcu was not held in hashlimit_mt_common() We only need to enable BH at this point. Fixes: bea74641e378 ("netfilter: xt_hashlimit: add rate match mode") Signed-off-by: Eric Dumazet Reported-by: syzkaller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index aa96027f44188..66f5aca62a087 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -775,7 +775,7 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, if (!dh->rateinfo.prev_window && (dh->rateinfo.current_rate <= dh->rateinfo.burst)) { spin_unlock(&dh->lock); - rcu_read_unlock_bh(); + local_bh_enable(); return !(cfg->mode & XT_HASHLIMIT_INVERT); } else { goto overlimit; From 2188558621ed475cef55fa94ce535499452f0091 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 14:38:43 +0200 Subject: [PATCH 123/269] RDMA/verbs: Check existence of function prior to accessing it Update all the flows to ensure that function pointer exists prior to accessing it. This is much safer than checking the uverbs_ex_mask variable, especially since we know that test isn't working properly and will be removed in -next. This prevents a user triggereable oops. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 3 +++ drivers/infiniband/core/uverbs_cmd.c | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index c4560d84dfaeb..c91f9a80b8313 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -309,6 +309,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, { struct ib_qp *qp; + if (!dev->create_qp) + return ERR_PTR(-EOPNOTSUPP); + qp = dev->create_qp(pd, attr, udata); if (IS_ERR(qp)) return qp; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cd9fbd7c82b01..dbcfb313cee9f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -978,6 +978,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, struct ib_uverbs_ex_create_cq_resp resp; struct ib_cq_init_attr attr = {}; + if (!ib_dev->create_cq) + return ERR_PTR(-EOPNOTSUPP); + if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); @@ -2947,6 +2950,11 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, wq_init_attr.create_flags = cmd.create_flags; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); + + if (!pd->device->create_wq) { + err = -EOPNOTSUPP; + goto err_put_cq; + } wq = pd->device->create_wq(pd, &wq_init_attr, uhw); if (IS_ERR(wq)) { err = PTR_ERR(wq); @@ -3090,7 +3098,12 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.flags = cmd.flags; wq_attr.flags_mask = cmd.flags_mask; } + if (!wq->device->modify_wq) { + ret = -EOPNOTSUPP; + goto out; + } ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); +out: uobj_put_obj_read(wq); return ret; } @@ -3187,6 +3200,11 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; + + if (!ib_dev->create_rwq_ind_table) { + err = -EOPNOTSUPP; + goto err_uobj; + } rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); if (IS_ERR(rwq_ind_tbl)) { @@ -3776,6 +3794,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, struct ib_device_attr attr = {0}; int err; + if (!ib_dev->query_device) + return -EOPNOTSUPP; + if (ucore->inlen < sizeof(cmd)) return -EINVAL; From 02b7b2844c2ffd3b614ec2b9293e8c7f041d60da Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2018 16:36:04 +0100 Subject: [PATCH 124/269] staging: fsl-mc: fix build testing on x86 Selecting GENERIC_MSI_IRQ_DOMAIN on x86 causes a compile-time error in some configurations: drivers/base/platform-msi.c:37:19: error: field 'arg' has incomplete type On the other architectures, we are fine, but here we should have an additional dependency on X86_LOCAL_APIC so we can get the PCI_MSI_IRQ_DOMAIN symbol. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fsl-mc/bus/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/fsl-mc/bus/Kconfig b/drivers/staging/fsl-mc/bus/Kconfig index 1f91000491767..b35ef7ee69014 100644 --- a/drivers/staging/fsl-mc/bus/Kconfig +++ b/drivers/staging/fsl-mc/bus/Kconfig @@ -7,7 +7,7 @@ config FSL_MC_BUS bool "QorIQ DPAA2 fsl-mc bus driver" - depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86 || PPC))) + depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86_LOCAL_APIC || PPC))) select GENERIC_MSI_IRQ_DOMAIN help Driver to enable the bus infrastructure for the QorIQ DPAA2 From ce8a3a9e76d0193e2e8d74a06d275b3c324ca652 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 4 Feb 2018 02:06:27 +0000 Subject: [PATCH 125/269] staging: android: ashmem: Fix a race condition in pin ioctls ashmem_pin_unpin() reads asma->file and asma->size before taking the ashmem_mutex, so it can race with other operations that modify them. Build-tested only. Cc: stable@vger.kernel.org Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index bbdc53b686dd1..6dbba5aff1911 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -702,30 +702,32 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, size_t pgstart, pgend; int ret = -EINVAL; + mutex_lock(&ashmem_mutex); + if (unlikely(!asma->file)) - return -EINVAL; + goto out_unlock; - if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) - return -EFAULT; + if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) { + ret = -EFAULT; + goto out_unlock; + } /* per custom, you can pass zero for len to mean "everything onward" */ if (!pin.len) pin.len = PAGE_ALIGN(asma->size) - pin.offset; if (unlikely((pin.offset | pin.len) & ~PAGE_MASK)) - return -EINVAL; + goto out_unlock; if (unlikely(((__u32)-1) - pin.offset < pin.len)) - return -EINVAL; + goto out_unlock; if (unlikely(PAGE_ALIGN(asma->size) < pin.offset + pin.len)) - return -EINVAL; + goto out_unlock; pgstart = pin.offset / PAGE_SIZE; pgend = pgstart + (pin.len / PAGE_SIZE) - 1; - mutex_lock(&ashmem_mutex); - switch (cmd) { case ASHMEM_PIN: ret = ashmem_pin(asma, pgstart, pgend); @@ -738,6 +740,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, break; } +out_unlock: mutex_unlock(&ashmem_mutex); return ret; From 6d79bd5bb6c79a9dba4842040c9adf39e7806330 Mon Sep 17 00:00:00 2001 From: Liam Mark Date: Fri, 26 Jan 2018 09:48:18 -0800 Subject: [PATCH 126/269] staging: android: ion: Zero CMA allocated memory Since commit 204f672255c2 ("staging: android: ion: Use CMA APIs directly") the CMA API is now used directly and therefore the allocated memory is no longer automatically zeroed. Explicitly zero CMA allocated memory to ensure that no data is exposed to userspace. Fixes: 204f672255c2 ("staging: android: ion: Use CMA APIs directly") Signed-off-by: Liam Mark Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_cma_heap.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c index 94e06925c712b..49718c96bf9ee 100644 --- a/drivers/staging/android/ion/ion_cma_heap.c +++ b/drivers/staging/android/ion/ion_cma_heap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ion.h" @@ -42,6 +43,22 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, if (!pages) return -ENOMEM; + if (PageHighMem(pages)) { + unsigned long nr_clear_pages = nr_pages; + struct page *page = pages; + + while (nr_clear_pages > 0) { + void *vaddr = kmap_atomic(page); + + memset(vaddr, 0, PAGE_SIZE); + kunmap_atomic(vaddr); + page++; + nr_clear_pages--; + } + } else { + memset(page_address(pages), 0, size); + } + table = kmalloc(sizeof(*table), GFP_KERNEL); if (!table) goto err; From 2f08ee363fe097bc6dc01aac53e1798b16c00986 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 14 Feb 2018 18:43:36 -0800 Subject: [PATCH 127/269] RDMA/restrack: don't use uaccess_kernel() uaccess_kernel() isn't sufficient to determine if an rdma resource is user-mode or not. For example, resources allocated in the add_one() function of an ib_client get falsely labeled as user mode, when they are kernel mode allocations. EG: mad qps. The result is that these qps are skipped over during a nldev query because of an erroneous namespace mismatch. So now we determine if the resource is user-mode by looking at the object struct's uobject or similar pointer to know if it was allocated for user mode applications. Fixes: 02d8883f520e ("RDMA/restrack: Add general infrastructure to track RDMA resources") Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 4 +++- drivers/infiniband/core/restrack.c | 18 ++++++++++++++++-- drivers/infiniband/core/uverbs_cmd.c | 4 ++-- drivers/infiniband/core/verbs.c | 3 +-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index c91f9a80b8313..25bb178f60742 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -305,7 +305,8 @@ void nldev_exit(void); static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr, - struct ib_udata *udata) + struct ib_udata *udata, + struct ib_uobject *uobj) { struct ib_qp *qp; @@ -318,6 +319,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->device = dev; qp->pd = pd; + qp->uobject = uobj; /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index d8dc709a37156..3dbc4e4cca415 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -7,7 +7,6 @@ #include #include #include -#include #include void rdma_restrack_init(struct rdma_restrack_root *res) @@ -88,6 +87,21 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) return dev; } +static bool res_is_user(struct rdma_restrack_entry *res) +{ + switch (res->type) { + case RDMA_RESTRACK_PD: + return container_of(res, struct ib_pd, res)->uobject; + case RDMA_RESTRACK_CQ: + return container_of(res, struct ib_cq, res)->uobject; + case RDMA_RESTRACK_QP: + return container_of(res, struct ib_qp, res)->uobject; + default: + WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); + return false; + } +} + void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); @@ -95,7 +109,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) if (!dev) return; - if (!uaccess_kernel()) { + if (res_is_user(res)) { get_task_struct(current); res->task = current; res->kern_name = NULL; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index dbcfb313cee9f..25a0e0e083b33 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1520,7 +1520,8 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, uhw); + qp = _ib_create_qp(device, pd, &attr, uhw, + &obj->uevent.uobject); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1553,7 +1554,6 @@ static int create_qp(struct ib_uverbs_file *file, if (ind_tbl) atomic_inc(&ind_tbl->usecnt); } - qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 16ebc6372c31a..93025d2009b89 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -887,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); - qp = _ib_create_qp(device, pd, qp_init_attr, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); if (IS_ERR(qp)) return qp; @@ -898,7 +898,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, } qp->real_qp = qp; - qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; From 27d4ee03078aba88c5e07dcc4917e8d01d046f38 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: [PATCH 128/269] workqueue: Allow retrieval of current task's work struct Introduce a helper to retrieve the current task's work struct if it is a workqueue worker. This allows us to fix a long-standing deadlock in several DRM drivers wherein the ->runtime_suspend callback waits for a specific worker to finish and that worker in turn calls a function which waits for runtime suspend to finish. That function is invoked from multiple call sites and waiting for runtime suspend to finish is the correct thing to do except if it's executing in the context of the worker. Cc: Lai Jiangshan Cc: Dave Airlie Cc: Ben Skeggs Cc: Alex Deucher Acked-by: Tejun Heo Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/2d8f603074131eb87e588d2b803a71765bd3a2fd.1518338788.git.lukas@wunner.de --- include/linux/workqueue.h | 1 + kernel/workqueue.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4a54ef96aff5b..bc0cda180c8b7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -465,6 +465,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 43d18cb463083..255c20efdf7bd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4167,6 +4167,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) } EXPORT_SYMBOL_GPL(workqueue_set_max_active); +/** + * current_work - retrieve %current task's work struct + * + * Determine if %current task is a workqueue worker and what it's working on. + * Useful to find out the context that the %current task is running in. + * + * Return: work struct if %current task is a workqueue worker, %NULL otherwise. + */ +struct work_struct *current_work(void) +{ + struct worker *worker = current_wq_worker(); + + return worker ? worker->current_work : NULL; +} +EXPORT_SYMBOL(current_work); + /** * current_is_workqueue_rescuer - is %current workqueue rescuer? * From 25c058ccaf2ebbc3e250ec1e199e161f91fe27d4 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 14 Feb 2018 06:41:25 +0100 Subject: [PATCH 129/269] drm: Allow determining if current task is output poll worker Introduce a helper to determine if the current task is an output poll worker. This allows us to fix a long-standing deadlock in several DRM drivers wherein the ->runtime_suspend callback waits for the output poll worker to finish and the worker in turn calls a ->detect callback which waits for runtime suspend to finish. The ->detect callback is invoked from multiple call sites and waiting for runtime suspend to finish is the correct thing to do except if it's executing in the context of the worker. v2: Expand kerneldoc to specifically mention deadlock between output poll worker and autosuspend worker as use case. (Lyude) Cc: Dave Airlie Cc: Ben Skeggs Cc: Alex Deucher Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/3549ce32e7f1467102e70d3e9cbf70c46bfe108e.1518593424.git.lukas@wunner.de --- drivers/gpu/drm/drm_probe_helper.c | 20 ++++++++++++++++++++ include/drm/drm_crtc_helper.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 6dc2dde5b6720..7a6b2dc08913e 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -654,6 +654,26 @@ static void output_poll_execute(struct work_struct *work) schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD); } +/** + * drm_kms_helper_is_poll_worker - is %current task an output poll worker? + * + * Determine if %current task is an output poll worker. This can be used + * to select distinct code paths for output polling versus other contexts. + * + * One use case is to avoid a deadlock between the output poll worker and + * the autosuspend worker wherein the latter waits for polling to finish + * upon calling drm_kms_helper_poll_disable(), while the former waits for + * runtime suspend to finish upon calling pm_runtime_get_sync() in a + * connector ->detect hook. + */ +bool drm_kms_helper_is_poll_worker(void) +{ + struct work_struct *work = current_work(); + + return work && work->func == output_poll_execute; +} +EXPORT_SYMBOL(drm_kms_helper_is_poll_worker); + /** * drm_kms_helper_poll_disable - disable output polling * @dev: drm_device diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 76e237bd989be..6914633037a53 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -77,5 +77,6 @@ void drm_kms_helper_hotplug_event(struct drm_device *dev); void drm_kms_helper_poll_disable(struct drm_device *dev); void drm_kms_helper_poll_enable(struct drm_device *dev); +bool drm_kms_helper_is_poll_worker(void); #endif From d61a5c1063515e855bedb1b81e20e50b0ac3541e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: [PATCH 130/269] drm/nouveau: Fix deadlock on runtime suspend nouveau's ->runtime_suspend hook calls drm_kms_helper_poll_disable(), which waits for the output poll worker to finish if it's running. The output poll worker meanwhile calls pm_runtime_get_sync() in nouveau_connector_detect() which waits for the ongoing suspend to finish, causing a deadlock. Fix by not acquiring a runtime PM ref if nouveau_connector_detect() is called in the output poll worker's context. This is safe because the poll worker is only enabled while runtime active and we know that ->runtime_suspend waits for it to finish. Other contexts calling nouveau_connector_detect() do require a runtime PM ref, these comprise: status_store() drm sysfs interface ->fill_modes drm callback drm_fb_helper_probe_connector_modes() drm_mode_getconnector() nouveau_connector_hotplug() nouveau_display_hpd_work() nv17_tv_set_property() Stack trace for posterity: INFO: task kworker/0:1:58 blocked for more than 120 seconds. Workqueue: events output_poll_execute [drm_kms_helper] Call Trace: schedule+0x28/0x80 rpm_resume+0x107/0x6e0 __pm_runtime_resume+0x47/0x70 nouveau_connector_detect+0x7e/0x4a0 [nouveau] nouveau_connector_detect_lvds+0x132/0x180 [nouveau] drm_helper_probe_detect_ctx+0x85/0xd0 [drm_kms_helper] output_poll_execute+0x11e/0x1c0 [drm_kms_helper] process_one_work+0x184/0x380 worker_thread+0x2e/0x390 INFO: task kworker/0:2:252 blocked for more than 120 seconds. Workqueue: pm pm_runtime_work Call Trace: schedule+0x28/0x80 schedule_timeout+0x1e3/0x370 wait_for_completion+0x123/0x190 flush_work+0x142/0x1c0 nouveau_pmops_runtime_suspend+0x7e/0xd0 [nouveau] pci_pm_runtime_suspend+0x5c/0x180 vga_switcheroo_runtime_suspend+0x1e/0xa0 __rpm_callback+0xc1/0x200 rpm_callback+0x1f/0x70 rpm_suspend+0x13c/0x640 pm_runtime_work+0x6e/0x90 process_one_work+0x184/0x380 worker_thread+0x2e/0x390 Bugzilla: https://bugs.archlinux.org/task/53497 Bugzilla: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=870523 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70388#c33 Fixes: 5addcf0a5f0f ("nouveau: add runtime PM support (v0.9)") Cc: stable@vger.kernel.org # v3.12+: 27d4ee03078a: workqueue: Allow retrieval of current task's work struct Cc: stable@vger.kernel.org # v3.12+: 25c058ccaf2e: drm: Allow determining if current task is output poll worker Cc: Ben Skeggs Cc: Dave Airlie Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/b7d2cbb609a80f59ccabfdf479b9d5907c603ea1.1518338789.git.lukas@wunner.de --- drivers/gpu/drm/nouveau/nouveau_connector.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 69d6e61a01ecf..6ed9cb053dfa5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -570,9 +570,15 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) nv_connector->edid = NULL; } - ret = pm_runtime_get_sync(connector->dev->dev); - if (ret < 0 && ret != -EACCES) - return conn_status; + /* Outputs are only polled while runtime active, so acquiring a + * runtime PM ref here is unnecessary (and would deadlock upon + * runtime suspend because it waits for polling to finish). + */ + if (!drm_kms_helper_is_poll_worker()) { + ret = pm_runtime_get_sync(connector->dev->dev); + if (ret < 0 && ret != -EACCES) + return conn_status; + } nv_encoder = nouveau_connector_ddc_detect(connector); if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) { @@ -647,8 +653,10 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return conn_status; } From 15734feff2bdac24aa3266c437cffa42851990e3 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: [PATCH 131/269] drm/radeon: Fix deadlock on runtime suspend radeon's ->runtime_suspend hook calls drm_kms_helper_poll_disable(), which waits for the output poll worker to finish if it's running. The output poll worker meanwhile calls pm_runtime_get_sync() in radeon's ->detect hooks, which waits for the ongoing suspend to finish, causing a deadlock. Fix by not acquiring a runtime PM ref if the ->detect hooks are called in the output poll worker's context. This is safe because the poll worker is only enabled while runtime active and we know that ->runtime_suspend waits for it to finish. Stack trace for posterity: INFO: task kworker/0:3:31847 blocked for more than 120 seconds Workqueue: events output_poll_execute [drm_kms_helper] Call Trace: schedule+0x3c/0x90 rpm_resume+0x1e2/0x690 __pm_runtime_resume+0x3f/0x60 radeon_lvds_detect+0x39/0xf0 [radeon] output_poll_execute+0xda/0x1e0 [drm_kms_helper] process_one_work+0x14b/0x440 worker_thread+0x48/0x4a0 INFO: task kworker/2:0:10493 blocked for more than 120 seconds. Workqueue: pm pm_runtime_work Call Trace: schedule+0x3c/0x90 schedule_timeout+0x1b3/0x240 wait_for_common+0xc2/0x180 wait_for_completion+0x1d/0x20 flush_work+0xfc/0x1a0 __cancel_work_timer+0xa5/0x1d0 cancel_delayed_work_sync+0x13/0x20 drm_kms_helper_poll_disable+0x1f/0x30 [drm_kms_helper] radeon_pmops_runtime_suspend+0x3d/0xa0 [radeon] pci_pm_runtime_suspend+0x61/0x1a0 vga_switcheroo_runtime_suspend+0x21/0x70 __rpm_callback+0x32/0x70 rpm_callback+0x24/0x80 rpm_suspend+0x12b/0x640 pm_runtime_work+0x6f/0xb0 process_one_work+0x14b/0x440 worker_thread+0x48/0x4a0 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94147 Fixes: 10ebc0bc0934 ("drm/radeon: add runtime PM support (v2)") Cc: stable@vger.kernel.org # v3.13+: 27d4ee03078a: workqueue: Allow retrieval of current task's work struct Cc: stable@vger.kernel.org # v3.13+: 25c058ccaf2e: drm: Allow determining if current task is output poll worker Cc: Ismo Toijala Cc: Alex Deucher Cc: Dave Airlie Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/64ea02c44f91dda19bc563902b97bbc699040392.1518338789.git.lukas@wunner.de --- drivers/gpu/drm/radeon/radeon_connectors.c | 74 ++++++++++++++-------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 59dcefb2df3bc..30e129684c7cf 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -900,9 +900,11 @@ radeon_lvds_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); @@ -925,8 +927,12 @@ radeon_lvds_detect(struct drm_connector *connector, bool force) /* check acpi lid status ??? */ radeon_connector_update_scratch_regs(connector, ret); - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + return ret; } @@ -1040,9 +1046,11 @@ radeon_vga_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } encoder = radeon_best_single_encoder(connector); if (!encoder) @@ -1109,8 +1117,10 @@ radeon_vga_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -1174,9 +1184,11 @@ radeon_tv_detect(struct drm_connector *connector, bool force) if (!radeon_connector->dac_load_detect) return ret; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } encoder = radeon_best_single_encoder(connector); if (!encoder) @@ -1188,8 +1200,12 @@ radeon_tv_detect(struct drm_connector *connector, bool force) if (ret == connector_status_connected) ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, false); radeon_connector_update_scratch_regs(connector, ret); - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + return ret; } @@ -1252,9 +1268,11 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; bool dret = false, broken_edid = false; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (radeon_connector->detected_hpd_without_ddc) { force = true; @@ -1437,8 +1455,10 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) } exit: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -1689,9 +1709,11 @@ radeon_dp_detect(struct drm_connector *connector, bool force) if (radeon_dig_connector->is_mst) return connector_status_disconnected; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (!force && radeon_check_hpd_status_unchanged(connector)) { ret = connector->status; @@ -1778,8 +1800,10 @@ radeon_dp_detect(struct drm_connector *connector, bool force) } out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } From aa0aad57909eb321746325951d66af88a83bc956 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: [PATCH 132/269] drm/amdgpu: Fix deadlock on runtime suspend amdgpu's ->runtime_suspend hook calls drm_kms_helper_poll_disable(), which waits for the output poll worker to finish if it's running. The output poll worker meanwhile calls pm_runtime_get_sync() in amdgpu's ->detect hooks, which waits for the ongoing suspend to finish, causing a deadlock. Fix by not acquiring a runtime PM ref if the ->detect hooks are called in the output poll worker's context. This is safe because the poll worker is only enabled while runtime active and we know that ->runtime_suspend waits for it to finish. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Cc: stable@vger.kernel.org # v4.2+: 27d4ee03078a: workqueue: Allow retrieval of current task's work struct Cc: stable@vger.kernel.org # v4.2+: 25c058ccaf2e: drm: Allow determining if current task is output poll worker Cc: Alex Deucher Tested-by: Mike Lothian Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/4c9bf72aacae1eef062bd134cd112e0770a7f121.1518338789.git.lukas@wunner.de --- .../gpu/drm/amd/amdgpu/amdgpu_connectors.c | 58 ++++++++++++------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index df9cbc78e1689..21e7ae159dffc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -737,9 +737,11 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (encoder) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); @@ -758,8 +760,12 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force) /* check acpi lid status ??? */ amdgpu_connector_update_scratch_regs(connector, ret); - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + return ret; } @@ -869,9 +875,11 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } encoder = amdgpu_connector_best_single_encoder(connector); if (!encoder) @@ -925,8 +933,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -989,9 +999,11 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; bool dret = false, broken_edid = false; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) { ret = connector->status; @@ -1116,8 +1128,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); exit: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -1360,9 +1374,11 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) { ret = connector->status; @@ -1430,8 +1446,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } From f027e0b3a774e10302207e91d304bbf99e3a8b36 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 14 Feb 2018 15:43:00 +0100 Subject: [PATCH 133/269] iio: adis_lib: Initialize trigger before requesting interrupt The adis_probe_trigger() creates a new IIO trigger and requests an interrupt associated with the trigger. The interrupt uses the generic iio_trigger_generic_data_rdy_poll() function as its interrupt handler. Currently the driver initializes some fields of the trigger structure after the interrupt has been requested. But an interrupt can fire as soon as it has been requested. This opens up a race condition. iio_trigger_generic_data_rdy_poll() will access the trigger data structure and dereference the ops field. If the ops field is not yet initialized this will result in a NULL pointer deref. It is not expected that the device generates an interrupt at this point, so typically this issue did not surface unless e.g. due to a hardware misconfiguration (wrong interrupt number, wrong polarity, etc.). But some newer devices from the ADIS family start to generate periodic interrupts in their power-on reset configuration and unfortunately the interrupt can not be masked in the device. This makes the race condition much more visible and the following crash has been observed occasionally when booting a system using the ADIS16460. Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = c0004000 [00000008] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-04126-gf9739f0-dirty #257 Hardware name: Xilinx Zynq Platform task: ef04f640 task.stack: ef050000 PC is at iio_trigger_notify_done+0x30/0x68 LR is at iio_trigger_generic_data_rdy_poll+0x18/0x20 pc : [] lr : [] psr: 60000193 sp : ef051bb8 ip : 00000000 fp : ef106400 r10: c081d80a r9 : ef3bfa00 r8 : 00000087 r7 : ef051bec r6 : 00000000 r5 : ef3bfa00 r4 : ee92ab00 r3 : 00000000 r2 : 00000000 r1 : 00000000 r0 : ee97e400 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none Control: 18c5387d Table: 0000404a DAC: 00000051 Process swapper/0 (pid: 1, stack limit = 0xef050210) [] (iio_trigger_notify_done) from [] (__handle_irq_event_percpu+0x88/0x118) [] (__handle_irq_event_percpu) from [] (handle_irq_event_percpu+0x1c/0x58) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x38/0x5c) [] (handle_irq_event) from [] (handle_level_irq+0xa4/0x130) [] (handle_level_irq) from [] (generic_handle_irq+0x24/0x34) [] (generic_handle_irq) from [] (zynq_gpio_irqhandler+0xb8/0x13c) [] (zynq_gpio_irqhandler) from [] (generic_handle_irq+0x24/0x34) [] (generic_handle_irq) from [] (__handle_domain_irq+0x5c/0xb4) [] (__handle_domain_irq) from [] (gic_handle_irq+0x48/0x8c) [] (gic_handle_irq) from [] (__irq_svc+0x6c/0xa8) To fix this make sure that the trigger is fully initialized before requesting the interrupt. Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Reported-by: Robin Getz Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis_trigger.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c index 0dd5a381be64f..457372f36791b 100644 --- a/drivers/iio/imu/adis_trigger.c +++ b/drivers/iio/imu/adis_trigger.c @@ -46,6 +46,10 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) if (adis->trig == NULL) return -ENOMEM; + adis->trig->dev.parent = &adis->spi->dev; + adis->trig->ops = &adis_trigger_ops; + iio_trigger_set_drvdata(adis->trig, adis); + ret = request_irq(adis->spi->irq, &iio_trigger_generic_data_rdy_poll, IRQF_TRIGGER_RISING, @@ -54,9 +58,6 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) if (ret) goto error_free_trig; - adis->trig->dev.parent = &adis->spi->dev; - adis->trig->ops = &adis_trigger_ops; - iio_trigger_set_drvdata(adis->trig, adis); ret = iio_trigger_register(adis->trig); indio_dev->trig = iio_trigger_get(adis->trig); From 4cd140bda6494543f1c1b0ccceceaa44b676eef6 Mon Sep 17 00:00:00 2001 From: Stefan Windfeldt-Prytz Date: Thu, 15 Feb 2018 15:02:53 +0100 Subject: [PATCH 134/269] iio: buffer: check if a buffer has been set up when poll is called If no iio buffer has been set up and poll is called return 0. Without this check there will be a null pointer dereference when calling poll on a iio driver without an iio buffer. Cc: stable@vger.kernel.org Signed-off-by: Stefan Windfeldt-Prytz Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index eda2a0f1658fb..c7499c8bd69f1 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -175,7 +175,7 @@ unsigned int iio_buffer_poll(struct file *filp, struct iio_dev *indio_dev = filp->private_data; struct iio_buffer *rb = indio_dev->buffer; - if (!indio_dev->info) + if (!indio_dev->info || rb == NULL) return 0; poll_wait(filp, &rb->pollq, wait); From bee92d06157fc39d5d7836a061c7d41289a55797 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 16:31:23 +0100 Subject: [PATCH 135/269] cfg80211: fix cfg80211_beacon_dup gcc-8 warns about some obviously incorrect code: net/mac80211/cfg.c: In function 'cfg80211_beacon_dup': net/mac80211/cfg.c:2896:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] From the context, I conclude that we want to copy from beacon into new_beacon, as we do in the rest of the function. Cc: stable@vger.kernel.org Fixes: 73da7d5bab79 ("mac80211: add channel switch command and beacon callbacks") Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb15d3b97cb21..84f757c5d91a6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2863,7 +2863,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) } if (beacon->probe_resp_len) { new_beacon->probe_resp_len = beacon->probe_resp_len; - beacon->probe_resp = pos; + new_beacon->probe_resp = pos; memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } From ce162bfbc0b601841886965baba14877127c7c7c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Jan 2018 08:40:51 +0100 Subject: [PATCH 136/269] mac80211_hwsim: don't use WQ_MEM_RECLAIM We're obviously not part of a memory reclaim path, so don't set the flag. This also causes a warning in check_flush_dependency() since we end up in a code path that flushes a non-reclaim workqueue, and we shouldn't do that if we were really part of reclaim. Reported-by: syzbot+41cdaf4232c50e658934@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index f6d4a50f1bdb8..829ac22b72fc4 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3455,7 +3455,7 @@ static int __init init_mac80211_hwsim(void) spin_lock_init(&hwsim_radio_lock); - hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0); + hwsim_wq = alloc_workqueue("hwsim_wq", 0, 0); if (!hwsim_wq) return -ENOMEM; From 04c4927359b1f09310bfee92e7187c9022be3e00 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Mon, 19 Feb 2018 12:09:54 +0530 Subject: [PATCH 137/269] arm64: Fix compilation error while accessing MPIDR_HWID_BITMASK from .S files Since commit e1a50de37860 (arm64: cputype: Silence Sparse warnings), compilation of arm64 architecture is broken with the following error messages: AR arch/arm64/kernel/built-in.o arch/arm64/kernel/head.S: Assembler messages: arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')' arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')' arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')' arch/arm64/kernel/head.S:677: Error: junk at end of line, first unrecognized character is `L' arch/arm64/kernel/head.S:677: Error: unexpected characters following instruction at operand 2 -- `movz x1,:abs_g1_s:0xff00ffffffUL' arch/arm64/kernel/head.S:677: Error: unexpected characters following instruction at operand 2 -- `movk x1,:abs_g0_nc:0xff00ffffffUL' This patch fixes the same by using the UL() macro correctly for assigning the MPIDR_HWID_BITMASK macro value. Fixes: e1a50de37860 ("arm64: cputype: Silence Sparse warnings") Acked-by: Arnd Bergmann Acked-by: Robin Murphy Signed-off-by: Bhupesh Sharma Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index eda8c5f629fc8..350c76a1d15ba 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -20,7 +20,7 @@ #define MPIDR_UP_BITMASK (0x1 << 30) #define MPIDR_MT_BITMASK (0x1 << 24) -#define MPIDR_HWID_BITMASK 0xff00ffffffUL +#define MPIDR_HWID_BITMASK UL(0xff00ffffff) #define MPIDR_LEVEL_BITS_SHIFT 3 #define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) From 651b9920d7a694ffb1f885aef2bbb068a25d9d66 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 10 Feb 2018 13:20:34 +0100 Subject: [PATCH 138/269] mac80211: round IEEE80211_TX_STATUS_HEADROOM up to multiple of 4 This ensures that mac80211 allocated management frames are properly aligned, which makes copying them more efficient. For instance, mt76 uses iowrite32_copy to copy beacon frames to beacon template memory on the chip. Misaligned 32-bit accesses cause CPU exceptions on MIPS and should be avoided. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index eec143cca1c0f..c9077a8329772 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4141,7 +4141,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); * The TX headroom reserved by mac80211 for its own tx_status functions. * This is enough for the radiotap header. */ -#define IEEE80211_TX_STATUS_HEADROOM 14 +#define IEEE80211_TX_STATUS_HEADROOM ALIGN(14, 4) /** * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames From d78d9ee9d40aca4781d2c5334972544601a4c3a2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 19 Feb 2018 14:48:35 +0200 Subject: [PATCH 139/269] mac80211: fix a possible leak of station stats If sta_info_alloc fails after allocating the per CPU statistics, they are not properly freed. Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0c5627f8a104e..8d7e3732bb613 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -433,6 +433,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (sta->sta.txq[0]) kfree(to_txq_info(sta->sta.txq[0])); free: + free_percpu(sta->pcpu_rx_stats); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif From 95f3ce6a77893ac828ba841df44421620de4314b Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 19 Feb 2018 14:48:37 +0200 Subject: [PATCH 140/269] mac80211: fix calling sleeping function in atomic context sta_info_alloc can be called from atomic paths (such as RX path) so we need to call pcpu_alloc with the correct gfp. Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8d7e3732bb613..af0b608ee8ed1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -314,7 +314,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (ieee80211_hw_check(hw, USES_RSS)) { sta->pcpu_rx_stats = - alloc_percpu(struct ieee80211_sta_rx_stats); + alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp); if (!sta->pcpu_rx_stats) goto free; } From 3027a8e799b20fc922496a12f8ad2f9f36a8a696 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 19 Feb 2018 14:48:38 +0200 Subject: [PATCH 141/269] cfg80211: clear wep keys after disconnection When a low level driver calls cfg80211_disconnected(), wep keys are not cleared. As a result, following connection requests will fail since cfg80211 internal state shows a connection is still in progress. Fix this by clearing the wep keys when disconnecting. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/sme.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index fdb3646274a56..701cfd7acc1bc 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1032,6 +1032,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->current_bss = NULL; wdev->ssid_len = 0; wdev->conn_owner_nlportid = 0; + kzfree(wdev->connect_keys); + wdev->connect_keys = NULL; nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); From 191da271ac260700db3e5b4bb982a17ca78769d6 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 19 Feb 2018 14:48:42 +0200 Subject: [PATCH 142/269] mac80211: Do not disconnect on invalid operating class Some APs include a non global operating class in their extended channel switch information element. In such a case, as the operating class is not known, mac80211 would decide to disconnect. However the specification states that the operating class needs to be taken from Annex E, but it does not specify from which table it should be taken, so it is valid for an AP to use a non global operating class. To avoid possibly unneeded disconnection, in such a case ignore the operating class and assume that the current band is used, and if the resulting channel and band configuration is invalid disconnect. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/spectmgmt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index ee0181778a429..0293348357474 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -8,6 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -27,7 +28,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) { - enum nl80211_band new_band; + enum nl80211_band new_band = current_band; int new_freq; u8 new_chan_no; struct ieee80211_channel *new_chan; @@ -55,15 +56,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, elems->ext_chansw_ie->new_operating_class, &new_band)) { sdata_info(sdata, - "cannot understand ECSA IE operating class %d, disconnecting\n", + "cannot understand ECSA IE operating class, %d, ignoring\n", elems->ext_chansw_ie->new_operating_class); - return -EINVAL; } new_chan_no = elems->ext_chansw_ie->new_ch_num; csa_ie->count = elems->ext_chansw_ie->count; csa_ie->mode = elems->ext_chansw_ie->mode; } else if (elems->ch_switch_ie) { - new_band = current_band; new_chan_no = elems->ch_switch_ie->new_ch_num; csa_ie->count = elems->ch_switch_ie->count; csa_ie->mode = elems->ch_switch_ie->mode; From 3b07029729e347f288c70227cfe3c66b085d6b0b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 19 Feb 2018 14:48:43 +0200 Subject: [PATCH 143/269] mac80211: Fix sending ADDBA response for an ongoing session In case an ADDBA request is received while there is already an ongoing BA sessions with the same parameters, i.e., update flow, an ADBBA response with decline status was sent twice. Fix it. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index d444752dbf407..d643033909130 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -8,6 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -315,9 +316,6 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, * driver so reject the timeout update. */ status = WLAN_STATUS_REQUEST_DECLINED; - ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, - tid, dialog_token, status, - 1, buf_size, timeout); goto end; } From 9085b34d0e8361595a7d19034c550d5d15044556 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 19 Feb 2018 13:38:00 +0000 Subject: [PATCH 144/269] arm64: uaccess: Formalise types for access_ok() In converting __range_ok() into a static inline, I inadvertently made it more type-safe, but without considering the ordering of the relevant conversions. This leads to quite a lot of Sparse noise about the fact that we use __chk_user_ptr() after addr has already been converted from a user pointer to an unsigned long. Rather than just adding another cast for the sake of shutting Sparse up, it seems reasonable to rework the types to make logical sense (although the resulting codegen for __range_ok() remains identical). The only callers this affects directly are our compat traps where the inferred "user-pointer-ness" of a register value now warrants explicit casting. Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 12 ++++++------ arch/arm64/kernel/armv8_deprecated.c | 4 +++- arch/arm64/kernel/sys_compat.c | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 543e11f0f657e..e66b0fca99c2f 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -72,15 +72,15 @@ static inline void set_fs(mm_segment_t fs) * This is equivalent to the following test: * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 */ -static inline unsigned long __range_ok(unsigned long addr, unsigned long size) +static inline unsigned long __range_ok(const void __user *addr, unsigned long size) { - unsigned long limit = current_thread_info()->addr_limit; + unsigned long ret, limit = current_thread_info()->addr_limit; __chk_user_ptr(addr); asm volatile( // A + B <= C + 1 for all A,B,C, in four easy steps: // 1: X = A + B; X' = X % 2^64 - " adds %0, %0, %2\n" + " adds %0, %3, %2\n" // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 " csel %1, xzr, %1, hi\n" // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' @@ -92,9 +92,9 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size) // testing X' - C == 0, subject to the previous adjustments. " sbcs xzr, %0, %1\n" " cset %0, ls\n" - : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); + : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); - return addr; + return ret; } /* @@ -104,7 +104,7 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size) */ #define untagged_addr(addr) sign_extend64(addr, 55) -#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size) +#define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs #define _ASM_EXTABLE(from, to) \ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c33b5e4010ab7..68450e954d47d 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -370,6 +370,7 @@ static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) static int swp_handler(struct pt_regs *regs, u32 instr) { u32 destreg, data, type, address = 0; + const void __user *user_ptr; int rn, rt2, res = 0; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); @@ -401,7 +402,8 @@ static int swp_handler(struct pt_regs *regs, u32 instr) aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data); /* Check access in reasonable access range for both SWP and SWPB */ - if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + user_ptr = (const void __user *)(unsigned long)(address & ~3); + if (!access_ok(VERIFY_WRITE, user_ptr, 4)) { pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n", address); goto fault; diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 8b8bbd3eaa52c..a382b2a1b84e3 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -57,7 +57,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags) if (end < start || flags) return -EINVAL; - if (!access_ok(VERIFY_READ, start, end - start)) + if (!access_ok(VERIFY_READ, (const void __user *)start, end - start)) return -EFAULT; return __do_compat_cache_op(start, end); From c795f3052b60b01e80485fad98c53e5e67d093c9 Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Thu, 15 Feb 2018 15:34:55 +0100 Subject: [PATCH 145/269] gpu: ipu-v3: pre: fix device node leak in ipu_pre_lookup_by_phandle Before returning, call of_node_put() for the device node returned by of_parse_phandle(). Fixes: d2a34232580a ("gpu: ipu-v3: add driver for Prefetch Resolve Engine") Signed-off-by: Tobias Jordan Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-pre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index f1cec3d70498a..0f70e88475409 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -129,11 +129,14 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index) if (pre_node == pre->dev->of_node) { mutex_unlock(&ipu_pre_list_mutex); device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE); + of_node_put(pre_node); return pre; } } mutex_unlock(&ipu_pre_list_mutex); + of_node_put(pre_node); + return NULL; } From 3addaba8141bc6a4f649a48f46e552af32922147 Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Thu, 15 Feb 2018 15:35:30 +0100 Subject: [PATCH 146/269] gpu: ipu-v3: prg: fix device node leak in ipu_prg_lookup_by_phandle Before returning, call of_node_put() for the device node returned by of_parse_phandle(). Fixes: ea9c260514c1 ("gpu: ipu-v3: add driver for Prefetch Resolve Gasket") Signed-off-by: Tobias Jordan Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c index 067365c733c63..97b99500153d3 100644 --- a/drivers/gpu/ipu-v3/ipu-prg.c +++ b/drivers/gpu/ipu-v3/ipu-prg.c @@ -102,11 +102,14 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id) mutex_unlock(&ipu_prg_list_mutex); device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE); prg->id = ipu_id; + of_node_put(prg_node); return prg; } } mutex_unlock(&ipu_prg_list_mutex); + of_node_put(prg_node); + return NULL; } From 58a22fc44539ad7fd4c07c9fcc156cad1e3340ea Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 25 Jan 2018 10:37:52 +0100 Subject: [PATCH 147/269] gpu: ipu-cpmem: add 16-bit grayscale support to ipu_cpmem_set_image Add the missing offset calculation for 16-bit grayscale images. Since the IPU only supports capturing greyscale in raw passthrough mode, it is the same as 16-bit bayer formats. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-cpmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index ef32377b91c08..9f2d9ec42add6 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -795,6 +795,7 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image) case V4L2_PIX_FMT_SGBRG16: case V4L2_PIX_FMT_SGRBG16: case V4L2_PIX_FMT_SRGGB16: + case V4L2_PIX_FMT_Y16: offset = image->rect.left * 2 + image->rect.top * pix->bytesperline; break; From 50b0f0aee839b5a9995fe7964a678634f75a0518 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Tue, 13 Feb 2018 18:35:36 +0100 Subject: [PATCH 148/269] gpu: ipu-csi: add 10/12-bit grayscale support to mbus_code_to_bus_cfg The 10/12-bit config used for bayer formats is used for grayscale as well. Signed-off-by: Jan Luebbe Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-csi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c index 24e12b87a0cbe..caa05b0702e16 100644 --- a/drivers/gpu/ipu-v3/ipu-csi.c +++ b/drivers/gpu/ipu-v3/ipu-csi.c @@ -288,6 +288,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) case MEDIA_BUS_FMT_SGBRG10_1X10: case MEDIA_BUS_FMT_SGRBG10_1X10: case MEDIA_BUS_FMT_SRGGB10_1X10: + case MEDIA_BUS_FMT_Y10_1X10: cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; cfg->mipi_dt = MIPI_DT_RAW10; cfg->data_width = IPU_CSI_DATA_WIDTH_10; @@ -296,6 +297,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) case MEDIA_BUS_FMT_SGBRG12_1X12: case MEDIA_BUS_FMT_SGRBG12_1X12: case MEDIA_BUS_FMT_SRGGB12_1X12: + case MEDIA_BUS_FMT_Y12_1X12: cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; cfg->mipi_dt = MIPI_DT_RAW12; cfg->data_width = IPU_CSI_DATA_WIDTH_12; From 06998a756a3865817b87a129a7e5d5bb66dc1ec3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 18 Feb 2018 16:53:59 +0800 Subject: [PATCH 149/269] drm/edid: Add 6 bpc quirk for CPT panel in Asus UX303LA Similar to commit e10aec652f31 ("drm/edid: Add 6 bpc quirk for display AEO model 0."), the EDID reports "DFP 1.x compliant TMDS" but it support 6bpc instead of 8 bpc. Hence, use 6 bpc quirk for this panel. Fixes: 196f954e2509 ("drm/i915/dp: Revert "drm/i915/dp: fall back to 18 bpp when sink capability is unknown"") BugLink: https://bugs.launchpad.net/bugs/1749420 Signed-off-by: Kai-Heng Feng Reviewed-by: Mario Kleiner Cc: # v4.8+ Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180218085359.7817-1-kai.heng.feng@canonical.com --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index cb487148359a8..16fb76ba6509d 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -113,6 +113,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */ + { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, From be68a8aaf925aaf35574260bf820bb09d2f9e07f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:41:44 +0000 Subject: [PATCH 150/269] arm64: cpufeature: Fix CTR_EL0 field definitions Our field definitions for CTR_EL0 suffer from a number of problems: - The IDC and DIC fields are missing, which causes us to enable CTR trapping on CPUs with either of these returning non-zero values. - The ERG is FTR_LOWER_SAFE, whereas it should be treated like CWG as FTR_HIGHER_SAFE so that applications can use it to avoid false sharing. - [nit] A RES1 field is described as "RAO" This patch updates the CTR_EL0 field definitions to fix these issues. Cc: Cc: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 29b1f873e337f..2985a067fc131 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -199,9 +199,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { }; static const struct arm64_ftr_bits ftr_ctr[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will From 5ee39a71fd89ab7240c5339d04161c44a8e03269 Mon Sep 17 00:00:00 2001 From: Michael Weiser Date: Thu, 1 Feb 2018 23:13:38 +0100 Subject: [PATCH 151/269] arm64: Disable unhandled signal log messages by default aarch64 unhandled signal kernel messages are very verbose, suggesting them to be more of a debugging aid: sigsegv[33]: unhandled level 2 translation fault (11) at 0x00000000, esr 0x92000046, in sigsegv[400000+71000] CPU: 1 PID: 33 Comm: sigsegv Tainted: G W 4.15.0-rc3+ #3 Hardware name: linux,dummy-virt (DT) pstate: 60000000 (nZCv daif -PAN -UAO) pc : 0x4003f4 lr : 0x4006bc sp : 0000fffffe94a060 x29: 0000fffffe94a070 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 00000000004001b0 x23: 0000000000486ac8 x22: 00000000004001c8 x21: 0000000000000000 x20: 0000000000400be8 x19: 0000000000400b30 x18: 0000000000484728 x17: 000000000865ffc8 x16: 000000000000270f x15: 00000000000000b0 x14: 0000000000000002 x13: 0000000000000001 x12: 0000000000000000 x11: 0000000000000000 x10: 0008000020008008 x9 : 000000000000000f x8 : ffffffffffffffff x7 : 0004000000000000 x6 : ffffffffffffffff x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000004003e4 x2 : 0000fffffe94a1e8 x1 : 000000000000000a x0 : 0000000000000000 Disable them by default, so they can be enabled using /proc/sys/debug/exception-trace. Cc: Signed-off-by: Michael Weiser Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index bbb0fde2780ef..c8639f95e59a3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -57,7 +57,7 @@ static const char *handler[]= { "Error" }; -int show_unhandled_signals = 1; +int show_unhandled_signals = 0; static void dump_backtrace_entry(unsigned long where) { From 1962682d2b2fbe6cfa995a85c53c069fadda473e Mon Sep 17 00:00:00 2001 From: Michael Weiser Date: Thu, 1 Feb 2018 23:13:36 +0100 Subject: [PATCH 152/269] arm64: Remove unimplemented syscall log message Stop printing a (ratelimited) kernel message for each instance of an unimplemented syscall being called. Userland making an unimplemented syscall is not necessarily misbehaviour and to be expected with a current userland running on an older kernel. Also, the current message looks scary to users but does not actually indicate a real problem nor help them narrow down the cause. Just rely on sys_ni_syscall() to return -ENOSYS. Cc: Acked-by: Will Deacon Signed-off-by: Michael Weiser Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c8639f95e59a3..eb2d15147e8d3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -526,14 +526,6 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) } #endif - if (show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: syscall %d\n", current->comm, - task_pid_nr(current), regs->syscallno); - dump_instr("", regs); - if (user_mode(regs)) - __show_regs(regs); - } - return sys_ni_syscall(); } From a06f818a70de21b4b3b4186816094208fc7accf9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 16:46:57 +0000 Subject: [PATCH 153/269] arm64: __show_regs: Only resolve kernel symbols when running at EL1 __show_regs pretty prints PC and LR by attempting to map them to kernel function names to improve the utility of crash reports. Unfortunately, this mapping is applied even when the pt_regs corresponds to user mode, resulting in a KASLR oracle. Avoid this issue by only looking up the function symbols when the register state indicates that we're actually running at EL1. Cc: Reported-by: NCSC Security Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ad8aeb098b31e..c0da6efe54655 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -220,8 +220,15 @@ void __show_regs(struct pt_regs *regs) show_regs_print_info(KERN_DEFAULT); print_pstate(regs); - printk("pc : %pS\n", (void *)regs->pc); - printk("lr : %pS\n", (void *)lr); + + if (!user_mode(regs)) { + printk("pc : %pS\n", (void *)regs->pc); + printk("lr : %pS\n", (void *)lr); + } else { + printk("pc : %016llx\n", regs->pc); + printk("lr : %016llx\n", lr); + } + printk("sp : %016llx\n", sp); i = top_reg; From cfc2c740533368b96e2be5e0a4e8c3cace7d9814 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Feb 2018 19:36:28 -0800 Subject: [PATCH 154/269] netfilter: IDLETIMER: be syzkaller friendly We had one report from syzkaller [1] First issue is that INIT_WORK() should be done before mod_timer() or we risk timer being fired too soon, even with a 1 second timer. Second issue is that we need to reject too big info->timeout to avoid overflows in msecs_to_jiffies(info->timeout * 1000), or risk looping, if result after overflow is 0. [1] WARNING: CPU: 1 PID: 5129 at kernel/workqueue.c:1444 __queue_work+0xdf4/0x1230 kernel/workqueue.c:1444 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 5129 Comm: syzkaller159866 Not tainted 4.16.0-rc1+ #230 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1dc/0x200 kernel/panic.c:547 report_bug+0x211/0x2d0 lib/bug.c:184 fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178 fixup_bug arch/x86/kernel/traps.c:247 [inline] do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315 invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:988 RIP: 0010:__queue_work+0xdf4/0x1230 kernel/workqueue.c:1444 RSP: 0018:ffff8801db507538 EFLAGS: 00010006 RAX: ffff8801aeb46080 RBX: ffff8801db530200 RCX: ffffffff81481404 RDX: 0000000000000100 RSI: ffffffff86b42640 RDI: 0000000000000082 RBP: ffff8801db507758 R08: 1ffff1003b6a0de5 R09: 000000000000000c R10: ffff8801db5073f0 R11: 0000000000000020 R12: 1ffff1003b6a0eb6 R13: ffff8801b1067ae0 R14: 00000000000001f8 R15: dffffc0000000000 queue_work_on+0x16a/0x1c0 kernel/workqueue.c:1488 queue_work include/linux/workqueue.h:488 [inline] schedule_work include/linux/workqueue.h:546 [inline] idletimer_tg_expired+0x44/0x60 net/netfilter/xt_IDLETIMER.c:116 call_timer_fn+0x228/0x820 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x7ee/0xb70 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:541 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:829 RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:777 [inline] RIP: 0010:__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160 [inline] RIP: 0010:_raw_spin_unlock_irqrestore+0x5e/0xba kernel/locking/spinlock.c:184 RSP: 0018:ffff8801c20173c8 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff12 RAX: dffffc0000000000 RBX: 0000000000000282 RCX: 0000000000000006 RDX: 1ffffffff0d592cd RSI: 1ffff10035d68d23 RDI: 0000000000000282 RBP: ffff8801c20173d8 R08: 1ffff10038402e47 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff8820e5c8 R13: ffff8801b1067ad8 R14: ffff8801aea7c268 R15: ffff8801aea7c278 __debug_object_init+0x235/0x1040 lib/debugobjects.c:378 debug_object_init+0x17/0x20 lib/debugobjects.c:391 __init_work+0x2b/0x60 kernel/workqueue.c:506 idletimer_tg_create net/netfilter/xt_IDLETIMER.c:152 [inline] idletimer_tg_checkentry+0x691/0xb00 net/netfilter/xt_IDLETIMER.c:213 xt_check_target+0x22c/0x7d0 net/netfilter/x_tables.c:850 check_target net/ipv6/netfilter/ip6_tables.c:533 [inline] find_check_entry.isra.7+0x935/0xcf0 net/ipv6/netfilter/ip6_tables.c:575 translate_table+0xf52/0x1690 net/ipv6/netfilter/ip6_tables.c:744 do_replace net/ipv6/netfilter/ip6_tables.c:1160 [inline] do_ip6t_set_ctl+0x370/0x5f0 net/ipv6/netfilter/ip6_tables.c:1686 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ipv6_setsockopt+0x10b/0x130 net/ipv6/ipv6_sockglue.c:927 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2976 SYSC_setsockopt net/socket.c:1850 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1829 do_syscall_64+0x282/0x940 arch/x86/entry/common.c:287 Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation") Signed-off-by: Eric Dumazet Reported-by: syzkaller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_IDLETIMER.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 6c2482b709b1e..1ac6600bfafd6 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -146,11 +146,11 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) timer_setup(&info->timer->timer, idletimer_tg_expired, 0); info->timer->refcnt = 1; + INIT_WORK(&info->timer->work, idletimer_tg_work); + mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); - INIT_WORK(&info->timer->work, idletimer_tg_work); - return 0; out_free_attr: @@ -191,7 +191,10 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) pr_debug("timeout value is zero\n"); return -EINVAL; } - + if (info->timeout >= INT_MAX / 1000) { + pr_debug("timeout value is too big\n"); + return -EINVAL; + } if (info->label[0] == '\0' || strnlen(info->label, MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) { From a988681dbbca01c64d86455c0153899870d7a63c Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Sun, 18 Feb 2018 21:11:25 +0100 Subject: [PATCH 155/269] MAINTAINERS: Remove Richard Purdie from LED maintainers Richard has been inactive on the linux-leds list for a long time. After email discussion we agreed on removing him from the LED maintainers, which will better reflect the actual status. Acked-by: Richard Purdie Signed-off-by: Jacek Anaszewski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9a7f76eadae9a..93a12af4f180b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7909,7 +7909,6 @@ S: Maintained F: scripts/leaking_addresses.pl LED SUBSYSTEM -M: Richard Purdie M: Jacek Anaszewski M: Pavel Machek L: linux-leds@vger.kernel.org From a588a8bb7b25a3fb4f7fed00feb7aec541fc2632 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Jan 2018 18:01:21 +0100 Subject: [PATCH 156/269] drm/exynos: g2d: use monotonic timestamps The exynos DRM driver uses real-time 'struct timeval' values for exporting its timestamps to user space. This has multiple problems: 1. signed seconds overflow in y2038 2. the 'struct timeval' definition is deprecated in the kernel 3. time may jump or go backwards after a 'settimeofday()' syscall 4. other DRM timestamps are in CLOCK_MONOTONIC domain, so they can't be compared 5. exporting microseconds requires a division by 1000, which may be slow on some architectures. The code existed in two places before, but the IPP portion was removed in 8ded59413ccc ("drm/exynos: ipp: Remove Exynos DRM IPP subsystem"), so we no longer need to worry about it. Ideally timestamps should just use 64-bit nanoseconds instead, but of course we can't change that now. Instead, this tries to address the first four points above by using monotonic 'timespec' values. According to Tobias Jakobi, user space doesn't care about the timestamp at the moment, so we can change the format. Even if there is something looking at them, it will work just fine with monotonic times as long as the application only looks at the relative values between two events. Link: https://patchwork.kernel.org/patch/10038593/ Cc: Tobias Jakobi Signed-off-by: Arnd Bergmann Reviewed-by: Tobias Jakobi Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 2b8bf2dd63874..9effe40f5fa5d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -926,7 +926,7 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no) struct drm_device *drm_dev = g2d->subdrv.drm_dev; struct g2d_runqueue_node *runqueue_node = g2d->runqueue_node; struct drm_exynos_pending_g2d_event *e; - struct timeval now; + struct timespec64 now; if (list_empty(&runqueue_node->event_list)) return; @@ -934,9 +934,9 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no) e = list_first_entry(&runqueue_node->event_list, struct drm_exynos_pending_g2d_event, base.link); - do_gettimeofday(&now); + ktime_get_ts64(&now); e->event.tv_sec = now.tv_sec; - e->event.tv_usec = now.tv_usec; + e->event.tv_usec = now.tv_nsec / NSEC_PER_USEC; e->event.cmdlist_no = cmdlist_no; drm_send_event(drm_dev, &e->base); From 1293b6191010672c0c9dacae8f71c6f3e4d70cbe Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 5 Feb 2018 21:09:59 +0100 Subject: [PATCH 157/269] drm/exynos: fix comparison to bitshift when dealing with a mask Due to a typo, the mask was destroyed by a comparison instead of a bit shift. Signed-off-by: Wolfram Sang Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/regs-fimc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/regs-fimc.h b/drivers/gpu/drm/exynos/regs-fimc.h index 30496134a3d07..d7cbe53c4c01f 100644 --- a/drivers/gpu/drm/exynos/regs-fimc.h +++ b/drivers/gpu/drm/exynos/regs-fimc.h @@ -569,7 +569,7 @@ #define EXYNOS_CIIMGEFF_FIN_EMBOSSING (4 << 26) #define EXYNOS_CIIMGEFF_FIN_SILHOUETTE (5 << 26) #define EXYNOS_CIIMGEFF_FIN_MASK (7 << 26) -#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK ((0xff < 13) | (0xff < 0)) +#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK ((0xff << 13) | (0xff << 0)) /* Real input DMA size register */ #define EXYNOS_CIREAL_ISIZE_AUTOLOAD_ENABLE (1 << 31) From 6f0a60298bbbea43ab5e3955913ab19c153076f3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 8 Feb 2018 18:42:51 +0100 Subject: [PATCH 158/269] drm/exynos: g2d: Delete an error message for a failed memory allocation in two functions Omit an extra message for a memory allocation failure in these functions. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 9effe40f5fa5d..f68ef1b3a28c7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -286,7 +286,6 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) node = kcalloc(G2D_CMDLIST_NUM, sizeof(*node), GFP_KERNEL); if (!node) { - dev_err(dev, "failed to allocate memory\n"); ret = -ENOMEM; goto err; } @@ -1358,10 +1357,9 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data, return -EFAULT; runqueue_node = kmem_cache_alloc(g2d->runqueue_slab, GFP_KERNEL); - if (!runqueue_node) { - dev_err(dev, "failed to allocate memory\n"); + if (!runqueue_node) return -ENOMEM; - } + run_cmdlist = &runqueue_node->run_cmdlist; event_list = &runqueue_node->event_list; INIT_LIST_HEAD(run_cmdlist); From b701a1436a5b177dc2240ba7e8f2ff7106bc8d84 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 15 Feb 2018 08:23:15 +0000 Subject: [PATCH 159/269] drm/exynos: remove exynos_drm_rotator.h Since its inclusion in 2012 via commit bea8a429d91a ("drm/exynos: add rotator ipp driver") this header is not used by any source files and is empty. Lets just remove it. Signed-off-by: Corentin Labbe Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_rotator.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 drivers/gpu/drm/exynos/exynos_drm_rotator.h diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.h b/drivers/gpu/drm/exynos/exynos_drm_rotator.h deleted file mode 100644 index 71a0b4c0c1e89..0000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * YoungJun Cho - * Eunchul Kim - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_ROTATOR_H_ -#define _EXYNOS_DRM_ROTATOR_H_ - -/* TODO */ - -#endif From c84b66f8aa3f879dbf41353f677d87875f5fc6c9 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 14 Feb 2018 18:23:56 +0100 Subject: [PATCH 160/269] drm: exynos: Use proper macro definition for HDMI_I2S_PIN_SEL_1 Bit field [2:0] of HDMI_I2S_PIN_SEL_1 corresponds to SDATA_0, not SDATA_2. This patch removes redefinition of HDMI_I2S_SEL_DATA2 constant and adds missing HDMI_I2S_SEL_DATA0. The value of bit field selecting SDATA_1 (pin_sel_3) is also changed, so it is 3 as suggested in the Exynos TRMs. Signed-off-by: Sylwester Nawrocki Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 7 +++++-- drivers/gpu/drm/exynos/regs-hdmi.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index a4b75a46f9463..abd84cbcf1c2c 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1068,10 +1068,13 @@ static void hdmi_audio_config(struct hdmi_context *hdata) /* Configuration I2S input ports. Configure I2S_PIN_SEL_0~4 */ hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_0, HDMI_I2S_SEL_SCLK(5) | HDMI_I2S_SEL_LRCK(6)); - hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(1) - | HDMI_I2S_SEL_SDATA2(4)); + + hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(3) + | HDMI_I2S_SEL_SDATA0(4)); + hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_2, HDMI_I2S_SEL_SDATA3(1) | HDMI_I2S_SEL_SDATA2(2)); + hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_3, HDMI_I2S_SEL_DSD(0)); /* I2S_CON_1 & 2 */ diff --git a/drivers/gpu/drm/exynos/regs-hdmi.h b/drivers/gpu/drm/exynos/regs-hdmi.h index 04be0f7e81932..4420c203ac85e 100644 --- a/drivers/gpu/drm/exynos/regs-hdmi.h +++ b/drivers/gpu/drm/exynos/regs-hdmi.h @@ -464,7 +464,7 @@ /* I2S_PIN_SEL_1 */ #define HDMI_I2S_SEL_SDATA1(x) (((x) & 0x7) << 4) -#define HDMI_I2S_SEL_SDATA2(x) ((x) & 0x7) +#define HDMI_I2S_SEL_SDATA0(x) ((x) & 0x7) /* I2S_PIN_SEL_2 */ #define HDMI_I2S_SEL_SDATA3(x) (((x) & 0x7) << 4) From f8f4aa68a8ae98ed79c8fee3488c38a2f5d2de8c Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 18 Feb 2018 11:05:15 +0200 Subject: [PATCH 161/269] mei: me: add cannon point device ids Add CNP LP and CNP H device ids for cannon lake and coffee lake platforms. Cc: 4.14+ Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 3 +++ drivers/misc/mei/pci-me.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 0ccccbaf530d2..bda3bd8f3141f 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -132,6 +132,9 @@ #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ #define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ +#define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */ +#define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 4a0ccda4d04b9..f915000e5bf94 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -98,6 +98,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)}, + /* required last entry */ {0, } }; From 2a4ac172c2f257d28c47b90c9e381bec31edcc44 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 18 Feb 2018 11:05:16 +0200 Subject: [PATCH 162/269] mei: me: add cannon point device ids for 4th device Add cannon point device ids for 4th (itouch) device. Cc: 4.14+ Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bda3bd8f3141f..e4b10b2d1a083 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -133,7 +133,9 @@ #define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ #define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */ +#define MEI_DEV_ID_CNP_LP_4 0x9DE4 /* Cannon Point LP 4 (iTouch) */ #define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ +#define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f915000e5bf94..ea4e152270a3b 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -99,7 +99,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, /* required last entry */ {0, } From b8ff1802815913aad52695898cccbc9f77b7e726 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Feb 2018 11:35:43 +0000 Subject: [PATCH 163/269] drm: Handle unexpected holes in color-eviction During eviction, the driver may free more than one hole in the drm_mm due to the side-effects in evicting the scanned nodes. However, drm_mm_scan_color_evict() expects that the scan result is the first available hole (in the mru freed hole_stack list): kernel BUG at drivers/gpu/drm/drm_mm.c:844! invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: i915 snd_hda_codec_analog snd_hda_codec_generic coretemp snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core lpc_ich snd_pcm e1000e mei_me prime_numbers mei CPU: 1 PID: 1490 Comm: gem_userptr_bli Tainted: G U 4.16.0-rc1-g740f57c54ecf-kasan_6+ #1 Hardware name: Dell Inc. OptiPlex 755 /0PU052, BIOS A08 02/19/2008 RIP: 0010:drm_mm_scan_color_evict+0x2b8/0x3d0 RSP: 0018:ffff880057a573f8 EFLAGS: 00010287 RAX: ffff8800611f5980 RBX: ffff880057a575d0 RCX: dffffc0000000000 RDX: 00000000029d5000 RSI: 1ffff1000af4aec1 RDI: ffff8800611f5a10 RBP: ffff88005ab884d0 R08: ffff880057a57600 R09: 000000000afff000 R10: 1ffff1000b5710b5 R11: 0000000000001000 R12: 1ffff1000af4ae82 R13: ffff8800611f59b0 R14: ffff8800611f5980 R15: ffff880057a57608 FS: 00007f2de0c2e8c0(0000) GS:ffff88006ac40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2ddde1e000 CR3: 00000000609b2000 CR4: 00000000000006e0 Call Trace: ? drm_mm_scan_remove_block+0x330/0x330 ? drm_mm_scan_remove_block+0x151/0x330 i915_gem_evict_something+0x711/0xbd0 [i915] ? igt_evict_contexts+0x50/0x50 [i915] ? nop_clear_range+0x10/0x10 [i915] ? igt_evict_something+0x90/0x90 [i915] ? i915_gem_gtt_reserve+0x1a1/0x320 [i915] i915_gem_gtt_insert+0x237/0x400 [i915] __i915_vma_do_pin+0xc25/0x1a20 [i915] eb_lookup_vmas+0x1c63/0x3790 [i915] ? i915_gem_check_execbuffer+0x250/0x250 [i915] ? trace_hardirqs_on_caller+0x33f/0x590 ? _raw_spin_unlock_irqrestore+0x39/0x60 ? __pm_runtime_resume+0x7d/0xf0 i915_gem_do_execbuffer+0x86a/0x2ff0 [i915] ? __kmalloc+0x132/0x340 ? i915_gem_execbuffer2_ioctl+0x10f/0x760 [i915] ? drm_ioctl_kernel+0x12e/0x1c0 ? drm_ioctl+0x662/0x980 ? eb_relocate_slow+0xa90/0xa90 [i915] ? i915_gem_execbuffer2_ioctl+0x10f/0x760 [i915] ? __might_fault+0xea/0x1a0 i915_gem_execbuffer2_ioctl+0x3cc/0x760 [i915] ? i915_gem_execbuffer_ioctl+0xba0/0xba0 [i915] ? lock_acquire+0x3c0/0x3c0 ? i915_gem_execbuffer_ioctl+0xba0/0xba0 [i915] drm_ioctl_kernel+0x12e/0x1c0 drm_ioctl+0x662/0x980 ? i915_gem_execbuffer_ioctl+0xba0/0xba0 [i915] ? drm_getstats+0x20/0x20 ? debug_check_no_obj_freed+0x2a6/0x8c0 do_vfs_ioctl+0x170/0xe70 ? ioctl_preallocate+0x170/0x170 ? task_work_run+0xbe/0x160 ? lock_acquire+0x3c0/0x3c0 ? trace_hardirqs_on_caller+0x33f/0x590 ? _raw_spin_unlock_irq+0x2f/0x50 SyS_ioctl+0x36/0x70 ? do_vfs_ioctl+0xe70/0xe70 do_syscall_64+0x18c/0x5d0 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x7f2ddf13b587 RSP: 002b:00007fff15c4f9d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f2ddf13b587 RDX: 00007fff15c4fa20 RSI: 0000000040406469 RDI: 0000000000000003 RBP: 00007fff15c4fa20 R08: 0000000000000000 R09: 00007f2ddf3fe120 R10: 0000000000000073 R11: 0000000000000246 R12: 0000000040406469 R13: 0000000000000003 R14: 00007fff15c4fa20 R15: 00000000000000c7 Code: 00 00 00 4a c7 44 22 08 00 00 00 00 42 c7 44 22 10 00 00 00 00 48 81 c4 b8 00 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 0b 0f 0b <0f> 0b 31 c0 eb c0 4c 89 ef e8 9a 09 41 ff e9 1e fe ff ff 4c 89 RIP: drm_mm_scan_color_evict+0x2b8/0x3d0 RSP: ffff880057a573f8 We can trivially relax this assumption by searching the hole_stack for the scan result and warn instead if the driver called us without any result. Fixes: 3fa489dabea9 ("drm: Apply tight eviction scanning to color_adjust") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: # v4.11+ Reviewed-by: Joonas Lahtinen Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180219113543.8010-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_mm.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index c3c79ee6119e0..edab571dbc905 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -836,9 +836,24 @@ struct drm_mm_node *drm_mm_scan_color_evict(struct drm_mm_scan *scan) if (!mm->color_adjust) return NULL; - hole = list_first_entry(&mm->hole_stack, typeof(*hole), hole_stack); - hole_start = __drm_mm_hole_node_start(hole); - hole_end = hole_start + hole->hole_size; + /* + * The hole found during scanning should ideally be the first element + * in the hole_stack list, but due to side-effects in the driver it + * may not be. + */ + list_for_each_entry(hole, &mm->hole_stack, hole_stack) { + hole_start = __drm_mm_hole_node_start(hole); + hole_end = hole_start + hole->hole_size; + + if (hole_start <= scan->hit_start && + hole_end >= scan->hit_end) + break; + } + + /* We should only be called after we found the hole previously */ + DRM_MM_BUG_ON(&hole->hole_stack == &mm->hole_stack); + if (unlikely(&hole->hole_stack == &mm->hole_stack)) + return NULL; DRM_MM_BUG_ON(hole_start > scan->hit_start); DRM_MM_BUG_ON(hole_end < scan->hit_end); From e88230a3744a71a0b5ecfb45e08ddfe1c884e50d Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 15 Feb 2018 11:19:36 +0100 Subject: [PATCH 164/269] drm/meson: fix vsync buffer update The plane buffer address/stride/height was incorrectly updated in the plane_atomic_update operation instead of the vsync irq. This patch delays this operation in the vsync irq along with the other plane delayed setup. This issue was masked using legacy framebuffer and X11 modesetting, but is clearly visible using gbm rendering when buffer is submitted late after vblank, like using software decoding and OpenGL rendering in Kodi. With this patch, tearing and other artifacts disappears completely. Cc: Michal Lazo Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Signed-off-by: Neil Armstrong Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1518689976-23292-1-git-send-email-narmstrong@baylibre.com --- drivers/gpu/drm/meson/meson_crtc.c | 6 ++++++ drivers/gpu/drm/meson/meson_drv.h | 3 +++ drivers/gpu/drm/meson/meson_plane.c | 7 +++---- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c index 5155f0179b617..05520202c9677 100644 --- a/drivers/gpu/drm/meson/meson_crtc.c +++ b/drivers/gpu/drm/meson/meson_crtc.c @@ -36,6 +36,7 @@ #include "meson_venc.h" #include "meson_vpp.h" #include "meson_viu.h" +#include "meson_canvas.h" #include "meson_registers.h" /* CRTC definition */ @@ -192,6 +193,11 @@ void meson_crtc_irq(struct meson_drm *priv) } else meson_vpp_disable_interlace_vscaler_osd1(priv); + meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1, + priv->viu.osd1_addr, priv->viu.osd1_stride, + priv->viu.osd1_height, MESON_CANVAS_WRAP_NONE, + MESON_CANVAS_BLKMODE_LINEAR); + /* Enable OSD1 */ writel_bits_relaxed(VPP_OSD1_POSTBLEND, VPP_OSD1_POSTBLEND, priv->io_base + _REG(VPP_MISC)); diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 5e8b392b9d1ff..8450d6ac8c9bc 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -43,6 +43,9 @@ struct meson_drm { bool osd1_commit; uint32_t osd1_ctrl_stat; uint32_t osd1_blk0_cfg[5]; + uint32_t osd1_addr; + uint32_t osd1_stride; + uint32_t osd1_height; } viu; struct { diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c index 17e96fa478685..0b6011b8d6321 100644 --- a/drivers/gpu/drm/meson/meson_plane.c +++ b/drivers/gpu/drm/meson/meson_plane.c @@ -164,10 +164,9 @@ static void meson_plane_atomic_update(struct drm_plane *plane, /* Update Canvas with buffer address */ gem = drm_fb_cma_get_gem_obj(fb, 0); - meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1, - gem->paddr, fb->pitches[0], - fb->height, MESON_CANVAS_WRAP_NONE, - MESON_CANVAS_BLKMODE_LINEAR); + priv->viu.osd1_addr = gem->paddr; + priv->viu.osd1_stride = fb->pitches[0]; + priv->viu.osd1_height = fb->height; spin_unlock_irqrestore(&priv->drm->event_lock, flags); } From 2b05f6ae1ee5a3c625478acd10b0966b66a3a017 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:55 +0000 Subject: [PATCH 165/269] ARM: ux500: remove PMU IRQ bouncer The ux500 PMU IRQ bouncer is getting in the way of some fundametnal changes to the ARM PMU driver, and it's the only special case that exists today. Let's remove it. Reviewed-by: Linus Walleij Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/mach-ux500/cpu-db8500.c | 35 -------------------------------- 1 file changed, 35 deletions(-) diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 57058ac46f497..7e5d7a0837070 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -112,37 +111,6 @@ static void ux500_restart(enum reboot_mode mode, const char *cmd) prcmu_system_reset(0); } -/* - * The PMU IRQ lines of two cores are wired together into a single interrupt. - * Bounce the interrupt to the other core if it's not ours. - */ -static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) -{ - irqreturn_t ret = handler(irq, dev); - int other = !smp_processor_id(); - - if (ret == IRQ_NONE && cpu_online(other)) - irq_set_affinity(irq, cpumask_of(other)); - - /* - * We should be able to get away with the amount of IRQ_NONEs we give, - * while still having the spurious IRQ detection code kick in if the - * interrupt really starts hitting spuriously. - */ - return ret; -} - -static struct arm_pmu_platdata db8500_pmu_platdata = { - .handle_irq = db8500_pmu_handler, - .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD, -}; - -static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { - /* Requires call-back bindings. */ - OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata), - {}, -}; - static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = { OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL), {}, @@ -165,9 +133,6 @@ static void __init u8500_init_machine(void) if (of_machine_is_compatible("st-ericsson,u8540")) of_platform_populate(NULL, u8500_local_bus_nodes, u8540_auxdata_lookup, NULL); - else - of_platform_populate(NULL, u8500_local_bus_nodes, - u8500_auxdata_lookup, NULL); } static const char * stericsson_dt_platform_compat[] = { From c0248c96631f38f02d58762fc018e316843acac8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:56 +0000 Subject: [PATCH 166/269] arm_pmu: kill arm_pmu_platdata Now that we have no platforms passing platform data to the arm_pmu code, we can get rid of the platdata and associated hooks, paving the way for rework of our IRQ handling. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 27 ++++----------------------- include/linux/perf/arm_pmu.h | 17 ----------------- 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 7bc5eee96b310..82b09d1cb42c2 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -320,17 +319,9 @@ validate_group(struct perf_event *event) return 0; } -static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu) -{ - struct platform_device *pdev = armpmu->plat_device; - - return pdev ? dev_get_platdata(&pdev->dev) : NULL; -} - static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { struct arm_pmu *armpmu; - struct arm_pmu_platdata *plat; int ret; u64 start_clock, finish_clock; @@ -342,13 +333,8 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) */ armpmu = *(void **)dev; - plat = armpmu_get_platdata(armpmu); - start_clock = sched_clock(); - if (plat && plat->handle_irq) - ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); - else - ret = armpmu->handle_irq(irq, armpmu); + ret = armpmu->handle_irq(irq, armpmu); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); @@ -578,7 +564,6 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) goto err_out; } } else { - struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu); unsigned long irq_flags; err = irq_force_affinity(irq, cpumask_of(cpu)); @@ -589,13 +574,9 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) goto err_out; } - if (platdata && platdata->irq_flags) { - irq_flags = platdata->irq_flags; - } else { - irq_flags = IRQF_PERCPU | - IRQF_NOBALANCING | - IRQF_NO_THREAD; - } + irq_flags = IRQF_PERCPU | + IRQF_NOBALANCING | + IRQF_NO_THREAD; err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index af0f44effd44a..712764b35c6a1 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -17,23 +17,6 @@ #include #include -/* - * struct arm_pmu_platdata - ARM PMU platform data - * - * @handle_irq: an optional handler which will be called from the - * interrupt and passed the address of the low level handler, - * and can be used to implement any platform specific handling - * before or after calling it. - * - * @irq_flags: if non-zero, these flags will be passed to request_irq - * when requesting interrupts for this PMU device. - */ -struct arm_pmu_platdata { - irqreturn_t (*handle_irq)(int irq, void *dev, - irq_handler_t pmu_handler); - unsigned long irq_flags; -}; - #ifdef CONFIG_ARM_PMU /* From d3d5aac206b4e9e569a22fe1811c909dde17587c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:57 +0000 Subject: [PATCH 167/269] arm_pmu: fold platform helpers into platform code The armpmu_{request,free}_irqs() helpers are only used by arm_pmu_platform.c, so let's fold them in and make them static. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 21 --------------------- drivers/perf/arm_pmu_platform.c | 21 +++++++++++++++++++++ include/linux/perf/arm_pmu.h | 2 -- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 82b09d1cb42c2..373dfd7d8a1da 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -534,14 +534,6 @@ void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } -void armpmu_free_irqs(struct arm_pmu *armpmu) -{ - int cpu; - - for_each_cpu(cpu, &armpmu->supported_cpus) - armpmu_free_irq(armpmu, cpu); -} - int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) { int err = 0; @@ -593,19 +585,6 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) return err; } -int armpmu_request_irqs(struct arm_pmu *armpmu) -{ - int cpu, err; - - for_each_cpu(cpu, &armpmu->supported_cpus) { - err = armpmu_request_irq(armpmu, cpu); - if (err) - break; - } - - return err; -} - static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) { struct pmu_hw_events __percpu *hw_events = pmu->hw_events; diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 46501cc79fd7c..244558cfdbce7 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -164,6 +164,27 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) return 0; } +static int armpmu_request_irqs(struct arm_pmu *armpmu) +{ + int cpu, err; + + for_each_cpu(cpu, &armpmu->supported_cpus) { + err = armpmu_request_irq(armpmu, cpu); + if (err) + break; + } + + return err; +} + +static void armpmu_free_irqs(struct arm_pmu *armpmu) +{ + int cpu; + + for_each_cpu(cpu, &armpmu->supported_cpus) + armpmu_free_irq(armpmu, cpu); +} + int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 712764b35c6a1..899bc7ef08812 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -159,8 +159,6 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } struct arm_pmu *armpmu_alloc(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); -int armpmu_request_irqs(struct arm_pmu *armpmu); -void armpmu_free_irqs(struct arm_pmu *armpmu); int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); void armpmu_free_irq(struct arm_pmu *armpmu, int cpu); From 0dc1a1851af1d593eee248b94c1277c7c7ccbbce Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:58 +0000 Subject: [PATCH 168/269] arm_pmu: add armpmu_alloc_atomic() In ACPI systems, we don't know the makeup of CPUs until we hotplug them on, and thus have to allocate the PMU datastructures at hotplug time. Thus, we must use GFP_ATOMIC allocations. Let's add an armpmu_alloc_atomic() that we can use in this case. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 17 ++++++++++++++--- drivers/perf/arm_pmu_acpi.c | 2 +- include/linux/perf/arm_pmu.h | 1 + 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 373dfd7d8a1da..4f73c5e8d6239 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -760,18 +760,18 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) &cpu_pmu->node); } -struct arm_pmu *armpmu_alloc(void) +static struct arm_pmu *__armpmu_alloc(gfp_t flags) { struct arm_pmu *pmu; int cpu; - pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + pmu = kzalloc(sizeof(*pmu), flags); if (!pmu) { pr_info("failed to allocate PMU device!\n"); goto out; } - pmu->hw_events = alloc_percpu(struct pmu_hw_events); + pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags); if (!pmu->hw_events) { pr_info("failed to allocate per-cpu PMU data.\n"); goto out_free_pmu; @@ -817,6 +817,17 @@ struct arm_pmu *armpmu_alloc(void) return NULL; } +struct arm_pmu *armpmu_alloc(void) +{ + return __armpmu_alloc(GFP_KERNEL); +} + +struct arm_pmu *armpmu_alloc_atomic(void) +{ + return __armpmu_alloc(GFP_ATOMIC); +} + + void armpmu_free(struct arm_pmu *pmu) { free_percpu(pmu->hw_events); diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 705f1a390e312..30c5f2bbce59a 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -127,7 +127,7 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void) return pmu; } - pmu = armpmu_alloc(); + pmu = armpmu_alloc_atomic(); if (!pmu) { pr_warn("Unable to allocate PMU for CPU%d\n", smp_processor_id()); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 899bc7ef08812..1f8bb83ef42fc 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -157,6 +157,7 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); +struct arm_pmu *armpmu_alloc_atomic(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); From 43fc9a2febbd96dd39588d67ace456b7bbc73d9f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:59 +0000 Subject: [PATCH 169/269] arm_pmu: acpi: check for mismatched PPIs The arm_pmu platform code explicitly checks for mismatched PPIs at probe time, while the ACPI code leaves this to the core code. Future refactoring will make this difficult for the core code to check, so let's have the ACPI code check this explicitly. As before, upon a failure we'll continue on without an interrupt. Ho hum. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 17 ++++--------- drivers/perf/arm_pmu_acpi.c | 42 +++++++++++++++++++++++++++++---- drivers/perf/arm_pmu_platform.c | 7 ------ 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 4f73c5e8d6239..ddcabd6a5d52f 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -543,19 +543,7 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) if (!irq) return 0; - if (irq_is_percpu_devid(irq) && cpumask_empty(&armpmu->active_irqs)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - } else if (irq_is_percpu_devid(irq)) { - int other_cpu = cpumask_first(&armpmu->active_irqs); - int other_irq = per_cpu(hw_events->irq, other_cpu); - - if (irq != other_irq) { - pr_warn("mismatched PPIs detected.\n"); - err = -EINVAL; - goto err_out; - } - } else { + if (!irq_is_percpu_devid(irq)) { unsigned long irq_flags; err = irq_force_affinity(irq, cpumask_of(cpu)); @@ -572,6 +560,9 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } else if (cpumask_empty(&armpmu->active_irqs)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); } if (err) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 30c5f2bbce59a..09a1a36cff579 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -139,6 +141,35 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void) return pmu; } +/* + * Check whether the new IRQ is compatible with those already associated with + * the PMU (e.g. we don't have mismatched PPIs). + */ +static bool pmu_irq_matches(struct arm_pmu *pmu, int irq) +{ + struct pmu_hw_events __percpu *hw_events = pmu->hw_events; + int cpu; + + if (!irq) + return true; + + for_each_cpu(cpu, &pmu->supported_cpus) { + int other_irq = per_cpu(hw_events->irq, cpu); + if (!other_irq) + continue; + + if (irq == other_irq) + continue; + if (!irq_is_percpu_devid(irq) && !irq_is_percpu_devid(other_irq)) + continue; + + pr_warn("mismatched PPIs detected\n"); + return false; + } + + return true; +} + /* * This must run before the common arm_pmu hotplug logic, so that we can * associate a CPU and its interrupt before the common code tries to manage the @@ -164,18 +195,21 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu) if (!pmu) return -ENOMEM; - cpumask_set_cpu(cpu, &pmu->supported_cpus); - per_cpu(probed_pmus, cpu) = pmu; + if (pmu_irq_matches(pmu, irq)) { + hw_events = pmu->hw_events; + per_cpu(hw_events->irq, cpu) = irq; + } + + cpumask_set_cpu(cpu, &pmu->supported_cpus); + /* * Log and request the IRQ so the core arm_pmu code can manage it. In * some situations (e.g. mismatched PPIs), we may fail to request the * IRQ. However, it may be too late for us to do anything about it. * The common ARM PMU code will log a warning in this case. */ - hw_events = pmu->hw_events; - per_cpu(hw_events->irq, cpu) = irq; armpmu_request_irq(pmu, cpu); /* diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 244558cfdbce7..1dc3c1f574e0a 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -127,13 +127,6 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) pdev->dev.of_node); } - /* - * Some platforms have all PMU IRQs OR'd into a single IRQ, with a - * special platdata function that attempts to demux them. - */ - if (dev_get_platdata(&pdev->dev)) - cpumask_setall(&pmu->supported_cpus); - for (i = 0; i < num_irqs; i++) { int cpu, irq; From 6de3f79112cc26bf24edbb240248d21e1dd85dde Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:42:00 +0000 Subject: [PATCH 170/269] arm_pmu: explicitly enable/disable SPIs at hotplug To support ACPI systems, we need to request IRQs before CPUs are hotplugged, and thus we need to request IRQs before we know their associated PMU. This is problematic if a PMU IRQ is pending out of reset, as it may be taken before we know the PMU, and thus the IRQ handler won't be able to handle it, leaving it screaming. To avoid such problems, lets request all IRQs in a disabled state, and explicitly enable/disable them at hotplug time, when we're sure the PMU has been probed. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index ddcabd6a5d52f..72118e6f9122b 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -558,6 +558,7 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) IRQF_NOBALANCING | IRQF_NO_THREAD; + irq_set_status_flags(irq, IRQ_NOAUTOEN); err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } else if (cpumask_empty(&armpmu->active_irqs)) { @@ -600,10 +601,10 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) irq = armpmu_get_cpu_irq(pmu, cpu); if (irq) { - if (irq_is_percpu_devid(irq)) { + if (irq_is_percpu_devid(irq)) enable_percpu_irq(irq, IRQ_TYPE_NONE); - return 0; - } + else + enable_irq(irq); } return 0; @@ -618,8 +619,12 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq && irq_is_percpu_devid(irq)) - disable_percpu_irq(irq); + if (irq) { + if (irq_is_percpu_devid(irq)) + disable_percpu_irq(irq); + else + disable_irq(irq); + } return 0; } From 84b4be57ae17f8c0b3c1d8629e10f23910838fd7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 12 Dec 2017 16:56:06 +0000 Subject: [PATCH 171/269] arm_pmu: note IRQs and PMUs per-cpu To support ACPI systems, we need to request IRQs before we know the associated PMU, and thus we need some percpu variable that the IRQ handler can find the PMU from. As we're going to request IRQs without the PMU, we can't rely on the arm_pmu::active_irqs mask, and similarly need to track requested IRQs with a percpu variable. Signed-off-by: Mark Rutland [will: made armpmu_count_irq_users static] Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 69 +++++++++++++++++++++++++++--------- include/linux/perf/arm_pmu.h | 1 - 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 72118e6f9122b..2b2af35db1b6e 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -25,6 +25,9 @@ #include +static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); +static DEFINE_PER_CPU(int, cpu_irq); + static int armpmu_map_cache_event(const unsigned (*cache_map) [PERF_COUNT_HW_CACHE_MAX] @@ -332,6 +335,8 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) * dereference. */ armpmu = *(void **)dev; + if (WARN_ON_ONCE(!armpmu)) + return IRQ_NONE; start_clock = sched_clock(); ret = armpmu->handle_irq(irq, armpmu); @@ -517,29 +522,45 @@ int perf_num_counters(void) } EXPORT_SYMBOL_GPL(perf_num_counters); -void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) +static int armpmu_count_irq_users(const int irq) { - struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int irq = per_cpu(hw_events->irq, cpu); + int cpu, count = 0; - if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs)) - return; + for_each_possible_cpu(cpu) { + if (per_cpu(cpu_irq, cpu) == irq) + count++; + } + + return count; +} - if (irq_is_percpu_devid(irq)) { - free_percpu_irq(irq, &hw_events->percpu_pmu); - cpumask_clear(&armpmu->active_irqs); +void armpmu_free_cpu_irq(int irq, int cpu) +{ + if (per_cpu(cpu_irq, cpu) == 0) return; - } + if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) + return; + + if (!irq_is_percpu_devid(irq)) + free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu)); + else if (armpmu_count_irq_users(irq) == 1) + free_percpu_irq(irq, &cpu_armpmu); - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + per_cpu(cpu_irq, cpu) = 0; } -int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) +void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) { - int err = 0; struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - const irq_handler_t handler = armpmu_dispatch_irq; int irq = per_cpu(hw_events->irq, cpu); + + armpmu_free_cpu_irq(irq, cpu); +} + +int armpmu_request_cpu_irq(int irq, int cpu) +{ + int err = 0; + const irq_handler_t handler = armpmu_dispatch_irq; if (!irq) return 0; @@ -560,16 +581,16 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) irq_set_status_flags(irq, IRQ_NOAUTOEN); err = request_irq(irq, handler, irq_flags, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } else if (cpumask_empty(&armpmu->active_irqs)) { + per_cpu_ptr(&cpu_armpmu, cpu)); + } else if (armpmu_count_irq_users(irq) == 0) { err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); + &cpu_armpmu); } if (err) goto err_out; - cpumask_set_cpu(cpu, &armpmu->active_irqs); + per_cpu(cpu_irq, cpu) = irq; return 0; err_out: @@ -577,6 +598,16 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) return err; } +int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) +{ + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; + int irq = per_cpu(hw_events->irq, cpu); + if (!irq) + return 0; + + return armpmu_request_cpu_irq(irq, cpu); +} + static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) { struct pmu_hw_events __percpu *hw_events = pmu->hw_events; @@ -599,6 +630,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) if (pmu->reset) pmu->reset(pmu); + per_cpu(cpu_armpmu, cpu) = pmu; + irq = armpmu_get_cpu_irq(pmu, cpu); if (irq) { if (irq_is_percpu_devid(irq)) @@ -626,6 +659,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) disable_irq(irq); } + per_cpu(cpu_armpmu, cpu) = NULL; + return 0; } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 1f8bb83ef42fc..feec9e7e85db8 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -75,7 +75,6 @@ enum armpmu_attr_groups { struct arm_pmu { struct pmu pmu; - cpumask_t active_irqs; cpumask_t supported_cpus; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); From 167e61438da0664cab87c825a6c0cb83510d578e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 9 Oct 2017 17:09:05 +0100 Subject: [PATCH 172/269] arm_pmu: acpi: request IRQs up-front We can't request IRQs in atomic context, so for ACPI systems we'll have to request them up-front, and later associate them with CPUs. This patch reorganises the arm_pmu code to do so. As we no longer have the arm_pmu structure at probe time, a number of prototypes need to be adjusted, requiring changes to the common arm_pmu code and arm_pmu platform code. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 22 ++-------------------- drivers/perf/arm_pmu_acpi.c | 19 ++++++------------- drivers/perf/arm_pmu_platform.c | 15 ++++++++++++--- include/linux/perf/arm_pmu.h | 5 +++-- 4 files changed, 23 insertions(+), 38 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 2b2af35db1b6e..0c2ed11c06030 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -534,7 +534,7 @@ static int armpmu_count_irq_users(const int irq) return count; } -void armpmu_free_cpu_irq(int irq, int cpu) +void armpmu_free_irq(int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) return; @@ -549,15 +549,7 @@ void armpmu_free_cpu_irq(int irq, int cpu) per_cpu(cpu_irq, cpu) = 0; } -void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) -{ - struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int irq = per_cpu(hw_events->irq, cpu); - - armpmu_free_cpu_irq(irq, cpu); -} - -int armpmu_request_cpu_irq(int irq, int cpu) +int armpmu_request_irq(int irq, int cpu) { int err = 0; const irq_handler_t handler = armpmu_dispatch_irq; @@ -598,16 +590,6 @@ int armpmu_request_cpu_irq(int irq, int cpu) return err; } -int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) -{ - struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int irq = per_cpu(hw_events->irq, cpu); - if (!irq) - return 0; - - return armpmu_request_cpu_irq(irq, cpu); -} - static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) { struct pmu_hw_events __percpu *hw_events = pmu->hw_events; diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 09a1a36cff579..0f197516d7089 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -89,7 +89,13 @@ static int arm_pmu_acpi_parse_irqs(void) pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu); } + /* + * Log and request the IRQ so the core arm_pmu code can manage + * it. We'll have to sanity-check IRQs later when we associate + * them with their PMUs. + */ per_cpu(pmu_irqs, cpu) = irq; + armpmu_request_irq(irq, cpu); } return 0; @@ -204,14 +210,6 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu) cpumask_set_cpu(cpu, &pmu->supported_cpus); - /* - * Log and request the IRQ so the core arm_pmu code can manage it. In - * some situations (e.g. mismatched PPIs), we may fail to request the - * IRQ. However, it may be too late for us to do anything about it. - * The common ARM PMU code will log a warning in this case. - */ - armpmu_request_irq(pmu, cpu); - /* * Ideally, we'd probe the PMU here when we find the first matching * CPU. We can't do that for several reasons; see the comment in @@ -281,11 +279,6 @@ static int arm_pmu_acpi_init(void) if (acpi_disabled) return 0; - /* - * We can't request IRQs yet, since we don't know the cookie value - * until we know which CPUs share the same logical PMU. We'll handle - * that in arm_pmu_acpi_cpu_starting(). - */ ret = arm_pmu_acpi_parse_irqs(); if (ret) return ret; diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 1dc3c1f574e0a..7729eda5909df 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -159,10 +159,15 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) static int armpmu_request_irqs(struct arm_pmu *armpmu) { + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; int cpu, err; for_each_cpu(cpu, &armpmu->supported_cpus) { - err = armpmu_request_irq(armpmu, cpu); + int irq = per_cpu(hw_events->irq, cpu); + if (!irq) + continue; + + err = armpmu_request_irq(irq, cpu); if (err) break; } @@ -173,9 +178,13 @@ static int armpmu_request_irqs(struct arm_pmu *armpmu) static void armpmu_free_irqs(struct arm_pmu *armpmu) { int cpu; + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - for_each_cpu(cpu, &armpmu->supported_cpus) - armpmu_free_irq(armpmu, cpu); + for_each_cpu(cpu, &armpmu->supported_cpus) { + int irq = per_cpu(hw_events->irq, cpu); + + armpmu_free_irq(irq, cpu); + } } int arm_pmu_device_probe(struct platform_device *pdev, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index feec9e7e85db8..40036a57d072f 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -159,8 +160,8 @@ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); -int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); -void armpmu_free_irq(struct arm_pmu *armpmu, int cpu); +int armpmu_request_irq(int irq, int cpu); +void armpmu_free_irq(int irq, int cpu); #define ARMV8_PMU_PDEV_NAME "armv8-pmu" From 0331365edb1d6ccd6ae68b1038111da85d4c68d1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 14 Feb 2018 17:21:57 +0000 Subject: [PATCH 173/269] arm64: perf: correct PMUVer probing The ID_AA64DFR0_EL1.PMUVer field doesn't follow the usual ID registers scheme. While value 0xf indicates a non-architected PMU is implemented, values 0x1 to 0xe indicate an increasingly featureful architected PMU, as if the field were unsigned. For more details, see ARM DDI 0487C.a, D10.1.4, "Alternative ID scheme used for the Performance Monitors Extension version". Currently, we treat the field as signed, and erroneously bail out for values 0x8 to 0xe. Let's correct that. Signed-off-by: Mark Rutland Reviewed-by: Robin Murphy Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 75b220ba73a32..85a251b6dfa84 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -908,9 +908,9 @@ static void __armv8pmu_probe_pmu(void *info) int pmuver; dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_signed_field(dfr0, + pmuver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMUVER_SHIFT); - if (pmuver < 1) + if (pmuver == 0xf || pmuver == 0) return; probe->present = true; From 17539f2f4f0b7fa906b508765c8ada07a1e45f52 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Tue, 20 Feb 2018 07:30:10 -0600 Subject: [PATCH 174/269] usb: musb: fix enumeration after resume On dm3730 there are enumeration problems after resume. Investigation led to the cause that the MUSB_POWER_SOFTCONN bit is not set. If it was set before suspend (because it was enabled via musb_pullup()), it is set in musb_restore_context() so the pullup is enabled. But then musb_start() is called which overwrites MUSB_POWER and therefore disables MUSB_POWER_SOFTCONN, so no pullup is enabled and the device is not enumerated. So let's do a subset of what musb_start() does in the same way as musb_suspend() does it. Platform-specific stuff it still called as there might be some phy-related stuff which needs to be enabled. Also interrupts are enabled, as it was the original idea of calling musb_start() in musb_resume() according to Commit 6fc6f4b87cb3 ("usb: musb: Disable interrupts on suspend, enable them on resume") Signed-off-by: Andreas Kemnade Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 968bf1e8b0fed..eef4ad578b31d 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2708,7 +2708,8 @@ static int musb_resume(struct device *dev) if ((devctl & mask) != (musb->context.devctl & mask)) musb->port1_status = 0; - musb_start(musb); + musb_enable_interrupts(musb); + musb_platform_enable(musb); spin_lock_irqsave(&musb->lock, flags); error = musb_run_resume_work(musb); From 44eb5e12b845cc8a0634f21b70ef07d774eb4b25 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 20 Feb 2018 07:31:35 -0600 Subject: [PATCH 175/269] Revert "usb: musb: host: don't start next rx urb if current one failed" This reverts commit dbac5d07d13e330e6706813c9fde477140fb5d80. commit dbac5d07d13e ("usb: musb: host: don't start next rx urb if current one failed") along with commit b5801212229f ("usb: musb: host: clear rxcsr error bit if set") try to solve the issue described in [1], but the latter alone is sufficient, and the former causes the issue as in [2], so now revert it. [1] https://marc.info/?l=linux-usb&m=146173995117456&w=2 [2] https://marc.info/?l=linux-usb&m=151689238420622&w=2 Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 394b4ac861617..45ed32c2cba94 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -391,13 +391,7 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb, } } - /* - * The pipe must be broken if current urb->status is set, so don't - * start next urb. - * TODO: to minimize the risk of regression, only check urb->status - * for RX, until we have a test case to understand the behavior of TX. - */ - if ((!status || !is_in) && qh && qh->is_ready) { + if (qh != NULL && qh->is_ready) { musb_dbg(musb, "... next ep%d %cX urb %p", hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh)); musb_start_urb(musb, is_in, qh); From 6ae1756faddefd7494353380ee546dd38c2f97eb Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 20 Feb 2018 15:44:37 +0000 Subject: [PATCH 176/269] MIPS: Drop spurious __unused in struct compat_flock MIPS' struct compat_flock doesn't match the 32-bit struct flock, as it has an extra short __unused before pad[4], which combined with alignment increases the size to 40 bytes compared with struct flock's 36 bytes. Since commit 8c6657cb50cb ("Switch flock copyin/copyout primitives to copy_{from,to}_user()"), put_compat_flock() writes the full compat_flock struct to userland, which results in corruption of the userland word after the struct flock when running 32-bit userlands on 64-bit kernels. This was observed to cause a bus error exception when starting Firefox on Debian 8 (Jessie). Reported-by: Peter Mamonov Signed-off-by: James Hogan Tested-by: Peter Mamonov Cc: Ralf Baechle Cc: Al Viro Cc: linux-mips@linux-mips.org Cc: # 4.13+ Patchwork: https://patchwork.linux-mips.org/patch/18646/ --- arch/mips/include/asm/compat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 946681db8dc3a..9a0fa66b81aca 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -86,7 +86,6 @@ struct compat_flock { compat_off_t l_len; s32 l_sysid; compat_pid_t l_pid; - short __unused; s32 pad[4]; }; From 7ff662b76167fd9a68254352287c5de0dc698942 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 15 Feb 2018 21:20:08 -0800 Subject: [PATCH 177/269] RDMA/bnxt_re: Disable atomic capability on bnxt_re adapters More testing needs to be done before enabling this feature. Disabling the feature temporarily Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 ++---- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 14 +------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index ae9e9ff54826b..280354ffa6421 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -174,10 +174,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->max_pd = dev_attr->max_pd; ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom; ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom; - if (dev_attr->is_atomic) { - ib_attr->atomic_cap = IB_ATOMIC_HCA; - ib_attr->masked_atomic_cap = IB_ATOMIC_HCA; - } + ib_attr->atomic_cap = IB_ATOMIC_NONE; + ib_attr->masked_atomic_cap = IB_ATOMIC_NONE; ib_attr->max_ee_rd_atom = 0; ib_attr->max_res_rd_atom = 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index c015c1861351a..03057983341f7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -52,18 +52,6 @@ const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0, /* Device */ -static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw) -{ - int rc; - u16 pcie_ctl2; - - rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, - &pcie_ctl2); - if (rc) - return false; - return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ); -} - static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, char *fw_ver) { @@ -165,7 +153,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc); } - attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw); + attr->is_atomic = 0; bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); return rc; From 6b4521f5174c26020ae0deb3ef7f2c28557cf445 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 15 Feb 2018 21:20:10 -0800 Subject: [PATCH 178/269] RDMA/bnxt_re: Unpin SQ and RQ memory if QP create fails Driver leaves the QP memory pinned if QP create command fails from the FW. Avoids this scenario by adding a proper exit path if the FW command fails. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 280354ffa6421..29e6b17365048 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1183,7 +1183,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to create HW QP"); - goto fail; + goto free_umem; } } @@ -1211,6 +1211,13 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, return &qp->ib_qp; qp_destroy: bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); +free_umem: + if (udata) { + if (qp->rumem) + ib_umem_release(qp->rumem); + if (qp->sumem) + ib_umem_release(qp->sumem); + } fail: kfree(qp); return ERR_PTR(rc); From 3b921e3bc4c20af58a663ed238ad57e87493dde2 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 15 Feb 2018 21:20:11 -0800 Subject: [PATCH 179/269] RDMA/bnxt_re: Synchronize destroy_qp with poll_cq Avoid system crash when destroy_qp is invoked while the driver is processing the poll_cq. Synchronize these functions using the cq_lock. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 39 ++++++++++++++++++++++-- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 ++ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 ++++--------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 4 ++- 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 29e6b17365048..643174d949a8c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -785,20 +785,51 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) return 0; } +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp) + __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock) +{ + unsigned long flags; + + spin_lock_irqsave(&qp->scq->cq_lock, flags); + if (qp->rcq != qp->scq) + spin_lock(&qp->rcq->cq_lock); + else + __acquire(&qp->rcq->cq_lock); + + return flags; +} + +static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, + unsigned long flags) + __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock) +{ + if (qp->rcq != qp->scq) + spin_unlock(&qp->rcq->cq_lock); + else + __release(&qp->rcq->cq_lock); + spin_unlock_irqrestore(&qp->scq->cq_lock, flags); +} + /* Queue Pairs */ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; int rc; + unsigned int flags; bnxt_qplib_flush_cqn_wq(&qp->qplib_qp); - bnxt_qplib_del_flush_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP"); return rc; } + + flags = bnxt_re_lock_cqs(qp); + bnxt_qplib_clean_qp(&qp->qplib_qp); + bnxt_re_unlock_cqs(qp, flags); + bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp); + if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &rdev->sqp_ah->qplib_ah); @@ -808,7 +839,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) return rc; } - bnxt_qplib_del_flush_qp(&qp->qplib_qp); + bnxt_qplib_clean_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &rdev->qp1_sqp->qplib_qp); if (rc) { @@ -1067,6 +1098,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, goto fail; } qp->qplib_qp.scq = &cq->qplib_cq; + qp->scq = cq; } if (qp_init_attr->recv_cq) { @@ -1078,6 +1110,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, goto fail; } qp->qplib_qp.rcq = &cq->qplib_cq; + qp->rcq = cq; } if (qp_init_attr->srq) { @@ -1608,7 +1641,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, dev_dbg(rdev_to_dev(rdev), "Move QP = %p out of flush list\n", qp); - bnxt_qplib_del_flush_qp(&qp->qplib_qp); + bnxt_qplib_clean_qp(&qp->qplib_qp); } } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 423ebe012f957..b88a48d43a9dd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -89,6 +89,8 @@ struct bnxt_re_qp { /* QP1 */ u32 send_psn; struct ib_ud_header qp1_hdr; + struct bnxt_re_cq *scq; + struct bnxt_re_cq *rcq; }; struct bnxt_re_cq { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 1b0e94697fe34..3ea5b9624f6b7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -173,7 +173,7 @@ static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) } } -void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) +void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp) { unsigned long flags; @@ -1419,7 +1419,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_qp req; struct creq_destroy_qp_resp resp; - unsigned long flags; u16 cmd_flags = 0; int rc; @@ -1437,19 +1436,12 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, return rc; } - /* Must walk the associated CQs to nullified the QP ptr */ - spin_lock_irqsave(&qp->scq->hwq.lock, flags); - - __clean_cq(qp->scq, (u64)(unsigned long)qp); - - if (qp->rcq && qp->rcq != qp->scq) { - spin_lock(&qp->rcq->hwq.lock); - __clean_cq(qp->rcq, (u64)(unsigned long)qp); - spin_unlock(&qp->rcq->hwq.lock); - } - - spin_unlock_irqrestore(&qp->scq->hwq.lock, flags); + return 0; +} +void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res, + struct bnxt_qplib_qp *qp) +{ bnxt_qplib_free_qp_hdr_buf(res, qp); bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq); kfree(qp->sq.swq); @@ -1462,7 +1454,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, if (qp->orrq.max_elements) bnxt_qplib_free_hwq(res->pdev, &qp->orrq); - return 0; } void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 211b27a8f9e27..ca0a2ffa35090 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -478,6 +478,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); +void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp); +void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res, + struct bnxt_qplib_qp *qp); void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, struct bnxt_qplib_sge *sge); void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, @@ -500,7 +503,6 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type); void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq); int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq); void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp); -void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp); void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, unsigned long *flags); void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, From dcdaba08062b4726500b9456f8664bfda896c664 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 15 Feb 2018 21:20:12 -0800 Subject: [PATCH 180/269] RDMA/bnxt_re: Fix system crash during load/unload During driver unload, the driver proceeds with cleanup without waiting for the scheduled events. So the device pointers get freed up and driver crashes when the events are scheduled later. Flush the bnxt_re_task work queue before starting device removal. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 508d00a5a1066..7f9298db507b4 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1578,6 +1578,11 @@ static void __exit bnxt_re_mod_exit(void) */ list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) { dev_info(rdev_to_dev(rdev), "Unregistering Device"); + /* + * Flush out any scheduled tasks before destroying the + * resources + */ + flush_workqueue(bnxt_re_wq); bnxt_re_dev_stop(rdev); bnxt_re_ib_unreg(rdev, true); bnxt_re_remove_one(rdev); From 7374fbd9e167ddc4f380d056ca74518be5d45518 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 15 Feb 2018 21:20:13 -0800 Subject: [PATCH 181/269] RDMA/bnxt_re: Avoid system hang during device un-reg BNXT_RE_FLAG_TASK_IN_PROG doesn't handle multiple work requests posted together. Track schedule of multiple workqueue items by maintaining a per device counter and proceed with IB dereg only if this counter is zero. flush_workqueue is no longer required from NETDEV_UNREGISTER path. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 +- drivers/infiniband/hw/bnxt_re/main.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ca32057e886f0..3eb7a8387116d 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -120,7 +120,6 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 #define BNXT_RE_FLAG_QOS_WORK_REG 5 -#define BNXT_RE_FLAG_TASK_IN_PROG 6 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; unsigned int version, major, minor; @@ -158,6 +157,7 @@ struct bnxt_re_dev { atomic_t srq_count; atomic_t mr_count; atomic_t mw_count; + atomic_t sched_count; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 7f9298db507b4..33a448036c2eb 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -656,7 +656,6 @@ static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) mutex_unlock(&bnxt_re_dev_lock); synchronize_rcu(); - flush_workqueue(bnxt_re_wq); ib_dealloc_device(&rdev->ibdev); /* rdev is gone */ @@ -1441,7 +1440,7 @@ static void bnxt_re_task(struct work_struct *work) break; } smp_mb__before_atomic(); - clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags); + atomic_dec(&rdev->sched_count); kfree(re_work); } @@ -1503,7 +1502,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, /* netdev notifier will call NETDEV_UNREGISTER again later since * we are still holding the reference to the netdev */ - if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags)) + if (atomic_read(&rdev->sched_count) > 0) goto exit; bnxt_re_ib_unreg(rdev, false); bnxt_re_remove_one(rdev); @@ -1523,7 +1522,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, re_work->vlan_dev = (real_dev == netdev ? NULL : netdev); INIT_WORK(&re_work->work, bnxt_re_task); - set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags); + atomic_inc(&rdev->sched_count); queue_work(bnxt_re_wq, &re_work->work); } } From ab0dc41b7324329af1c18580b0fc891922a717cf Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:19 +0100 Subject: [PATCH 182/269] riscv: Remove ARCH_WANT_OPTIONAL_GPIOLIB select The ARCH_WANT_OPTIONAL_GPIOLIB symbol was removed in commit 65053e1a7743 ("gpio: delete ARCH_[WANTS_OPTIONAL|REQUIRE]_GPIOLIB"). GPIOLIB should just be selected explicitly if needed. Remove the ARCH_WANT_OPTIONAL_GPIOLIB select from RISCV. See commit 0145071b3314 ("x86: Do away with ARCH_[WANT_OPTIONAL|REQUIRE]_GPIOLIB") and commit da9a1c6767 ("arm64: do away with ARCH_[WANT_OPTIONAL|REQUIRE]_GPIOLIB") as well. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Reviewed-by: Linus Walleij Signed-off-by: Ulf Magnusson Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b6722c246d9c8..f9fd6ed042b9f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -20,7 +20,6 @@ config RISCV select GENERIC_STRNLEN_USER select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A - select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_API_DEBUG From 2aaa2dc31bee808703c24ce626e50d1b6d8c7f9c Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Thu, 8 Feb 2018 23:54:46 +0100 Subject: [PATCH 183/269] riscv: kconfig: Remove RISCV_IRQ_INTC select The RISCV_IRQ_INTC configuration symbol is undefined, but RISCV selects it. Quoting Palmer Dabbelt: It looks like this slipped through, the symbol has been renamed RISCV_INTC. No RISCV_INTC configuration symbol has been merged either. Just remove the RISCV_IRQ_INTC select for now. Signed-off-by: Ulf Magnusson Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index f9fd6ed042b9f..97407480982f9 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -33,7 +33,6 @@ config RISCV select HAVE_ARCH_TRACEHOOK select MODULES_USE_ELF_RELA if MODULES select THREAD_INFO_IN_TASK - select RISCV_IRQ_INTC select RISCV_TIMER config MMU From 89a4b4441206962d1bbb62f128604a269b60933d Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:18 +0100 Subject: [PATCH 184/269] riscv: Remove ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE symbol was removed in commit 51a021244b9d ("atomic64: no need for CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE"). Remove the ARCH_HAS_ATOMIC64_DEC_IS_POSITIVE select from RISCV. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Signed-off-by: Ulf Magnusson Reviewed-by: Jonathan Neuschäfer Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 97407480982f9..04807c7f64cc5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -8,7 +8,6 @@ config RISCV select OF select OF_EARLY_FLATTREE select OF_IRQ - select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK From bcae803a213172c79ab5d077f169e3428e44d2ba Mon Sep 17 00:00:00 2001 From: "zongbox@gmail.com" Date: Mon, 29 Jan 2018 23:51:45 -0800 Subject: [PATCH 185/269] RISC-V: Enable IRQ during exception handling Interrupt is allowed during exception handling. There are warning messages if the kernel enables the configuration 'CONFIG_DEBUG_ATOMIC_SLEEP=y'. BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:23 in_atomic(): 0, irqs_disabled(): 1, pid: 43, name: ash CPU: 0 PID: 43 Comm: ash Tainted: G W 4.15.0-rc8-00089-g89ffdae-dirty #17 Call Trace: [<000000009abb1587>] walk_stackframe+0x0/0x7a [<00000000d4f3d088>] ___might_sleep+0x102/0x11a [<00000000b1fd792a>] down_read+0x18/0x28 [<000000000289ec01>] do_page_fault+0x86/0x2f6 [<00000000012441f6>] _do_fork+0x1b4/0x1e0 [<00000000f46c3e3b>] ret_from_syscall+0xa/0xe Reviewed-by: Christoph Hellwig Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 87fc045be51fa..56fa592cfa349 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -172,6 +172,9 @@ ENTRY(handle_exception) move a1, sp /* pt_regs */ tail do_IRQ 1: + /* Exceptions run with interrupts enabled */ + csrs sstatus, SR_SIE + /* Handle syscalls */ li t0, EXC_SYSCALL beq s4, t0, handle_syscall @@ -198,8 +201,6 @@ handle_syscall: */ addi s2, s2, 0x4 REG_S s2, PT_SEPC(sp) - /* System calls run with interrupts enabled */ - csrs sstatus, SR_SIE /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_TRACE From 8b08f50152ff85a4780e5c385d2b65889406e842 Mon Sep 17 00:00:00 2001 From: Michael Clark Date: Fri, 16 Feb 2018 09:30:29 +1300 Subject: [PATCH 186/269] Rename sbi_save to parse_dtb to improve code readability The sbi_ prefix would seem to indicate an SBI interface, and save is not very specific. After applying this patch, reading head.S makes more sense. Signed-off-by: Michael Clark Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 2 +- arch/riscv/kernel/setup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 226eeb190f908..6e07ed37bbff7 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -64,7 +64,7 @@ ENTRY(_start) /* Start the kernel */ mv a0, s0 mv a1, s1 - call sbi_save + call parse_dtb tail start_kernel relocate: diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 09f7064e898cc..c11f40c1b2a88 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -144,7 +144,7 @@ asmlinkage void __init setup_vm(void) #endif } -void __init sbi_save(unsigned int hartid, void *dtb) +void __init parse_dtb(unsigned int hartid, void *dtb) { early_init_dt_scan(__va(dtb)); } From abe27a885d9e6575e663a16176dabc58ce9d7188 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 19 Feb 2018 20:12:57 -0600 Subject: [PATCH 187/269] ibmvnic: Check for NULL skb's in NAPI poll routine After introduction of commit d0869c0071e4, there were some instances of RX queue entries from a previous session (before the device was closed and reopened) returned to the NAPI polling routine. Since the corresponding socket buffers were freed, this resulted in a panic on reopen. Include a check for a NULL skb here to avoid this. Fixes: d0869c0071e4 ("ibmvnic: Clean RX pool buffers during device close") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 996f47568f9e3..1495cb99f9249 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1901,6 +1901,11 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget) dev_kfree_skb_any(rx_buff->skb); remove_buff_from_pool(adapter, rx_buff); continue; + } else if (!rx_buff->skb) { + /* free the entry */ + next->rx_comp.first = 0; + remove_buff_from_pool(adapter, rx_buff); + continue; } length = be32_to_cpu(next->rx_comp.len); From b1a2ce825737b0165cc08e6f98f8c0ea1affdd60 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 20 Feb 2018 01:00:07 +0000 Subject: [PATCH 188/269] tools/libbpf: Avoid possibly using uninitialized variable Fixes a GCC maybe-uninitialized warning introduced by 48cca7e44f9f. "text" is only initialized inside the if statement so only print debug info there. Fixes: 48cca7e44f9f ("libbpf: add support for bpf_call") Signed-off-by: Jeremy Cline Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 97073d649c1a0..5bbbf285af74a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1060,11 +1060,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, prog->insns = new_insn; prog->main_prog_cnt = prog->insns_cnt; prog->insns_cnt = new_cnt; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, + prog->section_name); } insn = &prog->insns[relo->insn_idx]; insn->imm += prog->main_prog_cnt - relo->insn_idx; - pr_debug("added %zd insn from %s to prog %s\n", - text->insns_cnt, text->section_name, prog->section_name); return 0; } From 8babd44d2079079f9d5a4aca7005aed80236efe0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 20 Dec 2017 08:48:24 +0200 Subject: [PATCH 189/269] net/mlx5e: Fix TCP checksum in LRO buffers When receiving an LRO packet, the checksum field is set by the hardware to the checksum of the first coalesced packet. Obviously, this checksum is not valid for the merged LRO packet and should be fixed. We can use the CQE checksum which covers the checksum of the entire merged packet TCP payload to help us calculate the checksum incrementally. Tested by sending IPv4/6 traffic with LRO enabled, RX checksum disabled and watching nstat checksum error counters (in addition to the obvious bandwidth drop caused by checksum errors). This bug is usually "hidden" since LRO packets would go through the CHECKSUM_UNNECESSARY flow which does not validate the packet checksum. It's important to note that previous to this patch, LRO packets provided with CHECKSUM_UNNECESSARY are indeed packets with a correct validated checksum (even though the checksum inside the TCP header is incorrect), since the hardware LRO aggregation is terminated upon receiving a packet with bad checksum. Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 49 +++++++++++++------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0d4bb0688faa1..e5c3ab46a24a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -546,20 +547,33 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) return true; } +static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) +{ + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); + + tcp->check = 0; + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } +} + static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct tcphdr *tcp; int network_depth = 0; + __wsum check; __be16 proto; u16 tot_len; void *ip_p; - u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); - u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || - (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); - proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); tot_len = cqe_bcnt - network_depth; @@ -576,23 +590,30 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, ipv4->ihl); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr, + tot_len - sizeof(struct iphdr), + IPPROTO_TCP, check); } else { + u16 payload_len = tot_len - sizeof(struct ipv6hdr); struct ipv6hdr *ipv6 = ip_p; tcp = ip_p + sizeof(struct ipv6hdr); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; ipv6->hop_limit = cqe->lro_min_ttl; - ipv6->payload_len = cpu_to_be16(tot_len - - sizeof(struct ipv6hdr)); - } - - tcp->psh = get_cqe_lro_tcppsh(cqe); - - if (tcp_ack) { - tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; + ipv6->payload_len = cpu_to_be16(payload_len); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len, + IPPROTO_TCP, check); } } From ef7a3518f7dd4f4cf5e5b5358c93d1eb78df28fb Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Thu, 7 Dec 2017 17:26:33 +0200 Subject: [PATCH 190/269] net/mlx5e: Fix loopback self test when GRO is off When GRO is off, the transport header pointer in sk_buff is initialized to network's header. To find the udp header, instead of using udp_hdr() which assumes skb_network_header was set, manually calculate the udp header offset. Fixes: 0952da791c97 ("net/mlx5e: Add support for loopback selftest") Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5a4608281f38d..707976482c098 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -216,7 +216,8 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, if (iph->protocol != IPPROTO_UDP) goto out; - udph = udp_hdr(skb); + /* Don't assume skb_transport_header() was set */ + udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl); if (udph->dest != htons(9)) goto out; From f600c6088018d1dbc5777d18daa83660f7ea4a64 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 25 Jan 2018 11:18:09 +0200 Subject: [PATCH 191/269] net/mlx5e: Verify inline header size do not exceed SKB linear size Driver tries to copy at least MLX5E_MIN_INLINE bytes into the control segment of the WQE. It assumes that the linear part contains at least MLX5E_MIN_INLINE bytes, which can be wrong. Cited commit verified that driver will not copy more bytes into the inline header part that the actual size of the packet. Re-factor this check to make sure we do not exceed the linear part as well. This fix is aligned with the current driver's assumption that the entire L2 will be present in the linear part of the SKB. Fixes: 6aace17e64f4 ("net/mlx5e: Fix inline header size for small packets") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 569b42a010265..11b4f1089d1ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -176,7 +176,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, default: hlen = mlx5e_skb_l2_header_offset(skb); } - return min_t(u16, hlen, skb->len); + return min_t(u16, hlen, skb_headlen(skb)); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, From 9afe9a5353778994d4396f3d5ff639221bfa5cc9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Jan 2018 13:19:51 +0000 Subject: [PATCH 192/269] net/mlx5e: Eliminate build warnings on no previous prototype Fix these gcc warnings on drivers/net/ethernet/mellanox/mlx5: [..]/core/lib/clock.c:454:6: warning: no previous prototype for 'mlx5_init_clock' [-Wmissing-prototypes] [..]/core/lib/clock.c:510:6: warning: no previous prototype for 'mlx5_cleanup_clock' [-Wmissing-prototypes] [..]/core/en_main.c:3141:5: warning: no previous prototype for 'mlx5e_setup_tc' [-Wmissing-prototypes] Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 47bab842c5eea..a64b9226d281e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2994,8 +2994,8 @@ static int mlx5e_setup_tc_block(struct net_device *dev, } #endif -int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { switch (type) { #ifdef CONFIG_MLX5_ESWITCH diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index e159243e0fcfb..857035583ccdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -34,6 +34,7 @@ #include #include #include "en.h" +#include "clock.h" enum { MLX5_CYCLES_SHIFT = 23 From 4f5c02f949973b7c9dfa8a7c23d766b1208d208f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Jan 2018 13:29:53 +0000 Subject: [PATCH 193/269] net/mlx5: Address static checker warnings on non-constant initializers Address these sparse warnings on drivers/net/ethernet/mellanox/mlx5 [..]/core/diag/fs_tracepoint.c:99:53: warning: non-constant initializer for static object [..]/core/diag/fs_tracepoint.c:102:53: warning: non-constant initializer for static object etc Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 0be4575b58a27..fd509160c8f6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -96,10 +96,10 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p, "%pI4"); } else if (ethertype.v == ETH_P_IPV6) { static const struct in6_addr full_ones = { - .in6_u.u6_addr32 = {htonl(0xffffffff), - htonl(0xffffffff), - htonl(0xffffffff), - htonl(0xffffffff)}, + .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff)}, }; DECLARE_MASK_VAL(struct in6_addr, src_ipv6); DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); From 001a2fc0c8cc29241305e44ffbce52d1daf8782b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 30 Jan 2018 13:16:58 +0200 Subject: [PATCH 194/269] net/mlx5e: Return error if prio is specified when offloading eswitch vlan push This isn't supported when we emulate eswitch vlan push action which is the current state of things. Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd98b0dc610ff..fa86a14667180 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2529,7 +2529,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || + tcf_vlan_push_prio(a)) return -EOPNOTSUPP; attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; From 2f0db87901698cd73d828cc6fb1957b8916fc911 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 25 Jan 2018 18:00:41 +0200 Subject: [PATCH 195/269] net/mlx5e: Specify numa node when allocating drop rq When allocating a drop rq, no numa node is explicitly set which means allocations are done on node zero. This is not necessarily the nearest numa node to the HCA, and even worse, might even be a memoryless numa node. Choose the numa_node given to us by the pci device in order to properly allocate the coherent dma memory instead of assuming zero is valid. Fixes: 556dd1b9c313 ("net/mlx5e: Set drop RQ's necessary parameters only") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a64b9226d281e..da94c8cba5ee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1768,13 +1768,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, param->wq.linear = 1; } -static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) +static void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); } static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, @@ -2634,6 +2637,9 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); + param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev); + return mlx5e_alloc_cq_common(mdev, param, cq); } @@ -2645,7 +2651,7 @@ static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq = &drop_rq->cq; int err; - mlx5e_build_drop_rq_param(&rq_param); + mlx5e_build_drop_rq_param(mdev, &rq_param); err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); if (err) From c67f100edae0d2f43e8b35955f7710d702efd590 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 2 Feb 2018 09:32:53 -0600 Subject: [PATCH 196/269] net/mlx5: Use 128B cacheline size for 128B or larger cachelines The adapter uses the cache_line_128byte setting to set the bounds for end padding. On systems where the cacheline size is greater than 128B use 128B instead of the default of 64B. This results in fewer partial cacheline writes. There's a 50% chance it will pad to the end of a 256B cache line vs only 25% when using 64B. Fixes: f32f5bd2eb7e ("net/mlx5: Configure cache line size for start and end padding") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2ef641c91c267..ae391e4b70706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -551,7 +551,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, cache_line_128byte, - cache_line_size() == 128 ? 1 : 0); + cache_line_size() >= 128 ? 1 : 0); if (MLX5_CAP_GEN_MAX(dev, dct)) MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); From 96de67a77293b4da48a05f6ec0385f60006a7ba6 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 Feb 2018 13:26:06 +0200 Subject: [PATCH 197/269] net/mlx5: Add header re-write to the checks for conflicting actions We can't allow only some of the rules sharing an FTE to ask for header re-write, add it to the conflicting action checks. Fixes: 0d235c3fabb7 ('net/mlx5: Add hash table to search FTEs in a flow-group') Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c025c98700e4c..6caa4a7ad8699 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1429,7 +1429,8 @@ static bool check_conflicting_actions(u32 action1, u32 action2) if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_ENCAP | - MLX5_FLOW_CONTEXT_ACTION_DECAP)) + MLX5_FLOW_CONTEXT_ACTION_DECAP | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) return true; return false; From 26a0f6e82997d5c8345782b55d3a7894421f777f Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 31 Jan 2018 09:36:29 +0200 Subject: [PATCH 198/269] net/mlx5: E-Switch, Fix drop counters use before creation First use of drop counters happens in esw_apply_vport_conf function, while they are allocated later in the flow. Fix that by moving esw_vport_create_drop_counters function to be called before the first use. Fixes: b8a0dbe3a90b ("net/mlx5e: E-switch, Add steering drop counters") Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5ecf2cddc16df..c2b1d7d351fc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1529,6 +1529,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + /* Create steering drop counters for ingress and egress ACLs */ + if (vport_num && esw->mode == SRIOV_LEGACY) + esw_vport_create_drop_counters(vport); + /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); @@ -1545,10 +1549,6 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, if (!vport_num) vport->info.trusted = true; - /* create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == SRIOV_LEGACY) - esw_vport_create_drop_counters(vport); - esw_vport_change_handle_locked(vport); esw->enabled_vports++; From 9238e380e823a39983ee8d6b6ee8d1a9c4ba8a65 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 6 Feb 2018 10:52:19 +0200 Subject: [PATCH 199/269] net/mlx5: Fix error handling when adding flow rules If building match list or adding existing fg fails when node is locked, function returned without unlocking it. This happened if node version changed or adding existing fg returned with EAGAIN after jumping to search_again_locked label. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Vlad Buslov Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6caa4a7ad8699..31fc2cfac3b3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1759,8 +1759,11 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, /* Collect all fgs which has a matching match_criteria */ err = build_match_list(&match_head, ft, spec); - if (err) + if (err) { + if (take_write) + up_write_ref_node(&ft->node); return ERR_PTR(err); + } if (!take_write) up_read_ref_node(&ft->node); @@ -1769,8 +1772,11 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, dest_num, version); free_match_list(&match_head); if (!IS_ERR(rule) || - (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { + if (take_write) + up_write_ref_node(&ft->node); return rule; + } if (!take_write) { nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); From b3b12ea3661958bc093e258b7c0dd0a13bdcc719 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 19 Feb 2018 18:59:36 +0100 Subject: [PATCH 200/269] drm/edid: quirk Oculus Rift headsets as non-desktop This uses the EDID info from Oculus Rift DK1 (OVR-0001), DK2 (OVR-0003), and CV1 (OVR-0004) to mark them as non-desktop. Signed-off-by: Philipp Zabel Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ddd5379145758..d6fa56bb6906b 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -162,6 +162,11 @@ static const struct edid_quirk { /* HTC Vive VR Headset */ { "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP }, + + /* Oculus Rift DK1, DK2, and CV1 VR Headsets */ + { "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP }, + { "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP }, + { "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP }, }; /* From 90eda8fc8016cfe39e2c73222e14665f0e5dabb1 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 19 Feb 2018 18:59:37 +0100 Subject: [PATCH 201/269] drm/edid: quirk Windows Mixed Reality headsets as non-desktop This uses the EDID info from Lenovo Explorer (LEN-b800), Acer AH100 (ACR-7fce), and Samsung Odyssey (SEC-144a) to mark them as non-desktop. The other entries are for the HP Windows Mixed Reality Headset (HPN-3515), the Fujitsu Windows Mixed Reality headset (FUJ-1970), the Dell Visor (DEL-7fce), and the ASUS HC102 (AUS-c102). They are not tested with real hardware, but listed as HMD monitors alongside the tested headsets in the Microsoft HololensSensors driver package. Signed-off-by: Philipp Zabel Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index d6fa56bb6906b..bfd89b47b1629 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -167,6 +167,16 @@ static const struct edid_quirk { { "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP }, { "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP }, { "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP }, + + /* Windows Mixed Reality Headsets */ + { "ACR", 0x7fce, EDID_QUIRK_NON_DESKTOP }, + { "HPN", 0x3515, EDID_QUIRK_NON_DESKTOP }, + { "LEN", 0x0408, EDID_QUIRK_NON_DESKTOP }, + { "LEN", 0xb800, EDID_QUIRK_NON_DESKTOP }, + { "FUJ", 0x1970, EDID_QUIRK_NON_DESKTOP }, + { "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP }, + { "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP }, + { "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP }, }; /* From ccffc9ebfa66e3f2cc5e17b2579202786050b32e Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 19 Feb 2018 18:59:38 +0100 Subject: [PATCH 202/269] drm/edid: quirk Sony PlayStation VR headset as non-desktop This uses the EDID info from the Sony PlayStation VR headset, when connected directly, to mark it as non-desktop. Since the connection box (product id b403) defaults to HDMI pass-through to the TV, it is not marked as non-desktop. Signed-off-by: Philipp Zabel Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index bfd89b47b1629..9796c29dc004c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -177,6 +177,9 @@ static const struct edid_quirk { { "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP }, { "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP }, { "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP }, + + /* Sony PlayStation VR Headset */ + { "SNY", 0x0704, EDID_QUIRK_NON_DESKTOP }, }; /* From 5ae437ad5a2ed573b1ebb04e0afa70b8869f88dd Mon Sep 17 00:00:00 2001 From: Roman Kapl Date: Mon, 19 Feb 2018 21:32:51 +0100 Subject: [PATCH 203/269] net: sched: report if filter is too large to dump So far, if the filter was too large to fit in the allocated skb, the kernel did not return any error and stopped dumping. Modify the dumper so that it returns -EMSGSIZE when a filter fails to dump and it is the first filter in the skb. If we are not first, we will get a next chance with more room. I understand this is pretty near to being an API change, but the original design (silent truncation) can be considered a bug. Note: The error case can happen pretty easily if you create a filter with 32 actions and have 4kb pages. Also recent versions of iproute try to be clever with their buffer allocation size, which in turn leads to Signed-off-by: Roman Kapl Acked-by: Jiri Pirko Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7dc7271042a2..247b7cc20c131 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1397,13 +1397,18 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, - index_start, &index)) + index_start, &index)) { + err = -EMSGSIZE; break; + } } cb->args[0] = index; out: + /* If we did no progress, the error (EMSGSIZE) is real */ + if (skb->len == 0 && err) + return err; return skb->len; } From 30a3317ddc2427d173d8bcffaa3f41a61eb66560 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Feb 2018 16:20:08 +0200 Subject: [PATCH 204/269] drm/tve200: fix kernel-doc documentation comment include The DOC: line acts as an identifier for the :doc: include. Fixes: ./drivers/gpu/drm/tve200/tve200_drv.c:1: warning: no structured comments found Cc: Linus Walleij Reviewed-by: Daniel Vetter Reviewed-by: Linus Walleij Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180220142008.9330-1-jani.nikula@intel.com --- Documentation/gpu/tve200.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gpu/tve200.rst b/Documentation/gpu/tve200.rst index 69b17b324e127..152ea9398f7e5 100644 --- a/Documentation/gpu/tve200.rst +++ b/Documentation/gpu/tve200.rst @@ -3,4 +3,4 @@ ================================== .. kernel-doc:: drivers/gpu/drm/tve200/tve200_drv.c - :doc: Faraday TV Encoder 200 + :doc: Faraday TV Encoder TVE200 DRM Driver From fba4adbbf670577e605f9ad306629db6031cd48b Mon Sep 17 00:00:00 2001 From: Ben Gardner Date: Wed, 14 Feb 2018 09:29:52 -0600 Subject: [PATCH 205/269] i2c: designware: must wait for enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One I2C bus on my Atom E3845 board has been broken since 4.9. It has two devices, both declared by ACPI and with built-in drivers. There are two back-to-back transactions originating from the kernel, one targeting each device. The first transaction works, the second one locks up the I2C controller. The controller never recovers. These kernel logs show up whenever an I2C transaction is attempted after this failure. i2c-designware-pci 0000:00:18.3: timeout in disabling adapter i2c-designware-pci 0000:00:18.3: timeout waiting for bus ready Waiting for the I2C controller status to indicate that it is enabled before programming it fixes the issue. I have tested this patch on 4.14 and 4.15. Fixes: commit 2702ea7dbec5 ("i2c: designware: wait for disable/enable only if necessary") Cc: linux-stable #4.13+ Signed-off-by: Ben Gardner Acked-by: Jarkko Nikula Reviewed-by: José Roberto de Souza Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index ae691884d0716..55926ef41ef17 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -209,7 +209,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) i2c_dw_disable_int(dev); /* Enable the adapter */ - __i2c_dw_enable(dev, true); + __i2c_dw_enable_and_wait(dev, true); /* Clear and enable interrupts */ dw_readl(dev, DW_IC_CLR_INTR); From 15407798835a94f0936c7cbabb2f611bf20f467a Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 16 Feb 2018 11:24:29 +0200 Subject: [PATCH 206/269] i2c: i801: Add missing documentation entries for Braswell and Kaby Lake Commits adding PCI IDs for Intel Braswell and Kaby Lake PCH-H lacked the respective Kconfig and Documentation/i2c/busses/i2c-i801 change. Add them now. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801 | 2 ++ drivers/i2c/busses/Kconfig | 2 ++ drivers/i2c/busses/i2c-i801.c | 1 + 3 files changed, 5 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index d477024569269..65514c2513187 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -28,8 +28,10 @@ Supported adapters: * Intel Wildcat Point (PCH) * Intel Wildcat Point-LP (PCH) * Intel BayTrail (SOC) + * Intel Braswell (SOC) * Intel Sunrise Point-H (PCH) * Intel Sunrise Point-LP (PCH) + * Intel Kaby Lake-H (PCH) * Intel DNV (SOC) * Intel Broxton (SOC) * Intel Lewisburg (PCH) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index a9805c7cb305a..e2954fb86d659 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -123,8 +123,10 @@ config I2C_I801 Wildcat Point (PCH) Wildcat Point-LP (PCH) BayTrail (SOC) + Braswell (SOC) Sunrise Point-H (PCH) Sunrise Point-LP (PCH) + Kaby Lake-H (PCH) DNV (SOC) Broxton (SOC) Lewisburg (PCH) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 8eac00efadc1a..692b341258667 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -58,6 +58,7 @@ * Wildcat Point (PCH) 0x8ca2 32 hard yes yes yes * Wildcat Point-LP (PCH) 0x9ca2 32 hard yes yes yes * BayTrail (SOC) 0x0f12 32 hard yes yes yes + * Braswell (SOC) 0x2292 32 hard yes yes yes * Sunrise Point-H (PCH) 0xa123 32 hard yes yes yes * Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes * DNV (SOC) 0x19df 32 hard yes yes yes From f45765872e7aae7b81feb3044aaf9886b21885ef Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 21 Feb 2018 10:25:01 +0200 Subject: [PATCH 207/269] RDMA/uverbs: Fix kernel panic while using XRC_TGT QP type Attempt to modify XRC_TGT QP type from the user space (ibv_xsrq_pingpong invocation) will trigger the following kernel panic. It is caused by the fact that such QPs missed uobject initialization. [ 17.408845] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [ 17.412645] IP: rdma_lookup_put_uobject+0x9/0x50 [ 17.416567] PGD 0 P4D 0 [ 17.419262] Oops: 0000 [#1] SMP PTI [ 17.422915] CPU: 0 PID: 455 Comm: ibv_xsrq_pingpo Not tainted 4.16.0-rc1+ #86 [ 17.424765] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 17.427399] RIP: 0010:rdma_lookup_put_uobject+0x9/0x50 [ 17.428445] RSP: 0018:ffffb8c7401e7c90 EFLAGS: 00010246 [ 17.429543] RAX: 0000000000000000 RBX: ffffb8c7401e7cf8 RCX: 0000000000000000 [ 17.432426] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 [ 17.437448] RBP: 0000000000000000 R08: 00000000000218f0 R09: ffffffff8ebc4cac [ 17.440223] R10: fffff6038052cd80 R11: ffff967694b36400 R12: ffff96769391f800 [ 17.442184] R13: ffffb8c7401e7cd8 R14: 0000000000000000 R15: ffff967699f60000 [ 17.443971] FS: 00007fc29207d700(0000) GS:ffff96769fc00000(0000) knlGS:0000000000000000 [ 17.446623] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 17.448059] CR2: 0000000000000048 CR3: 000000001397a000 CR4: 00000000000006b0 [ 17.449677] Call Trace: [ 17.450247] modify_qp.isra.20+0x219/0x2f0 [ 17.451151] ib_uverbs_modify_qp+0x90/0xe0 [ 17.452126] ib_uverbs_write+0x1d2/0x3c0 [ 17.453897] ? __handle_mm_fault+0x93c/0xe40 [ 17.454938] __vfs_write+0x36/0x180 [ 17.455875] vfs_write+0xad/0x1e0 [ 17.456766] SyS_write+0x52/0xc0 [ 17.457632] do_syscall_64+0x75/0x180 [ 17.458631] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 17.460004] RIP: 0033:0x7fc29198f5a0 [ 17.460982] RSP: 002b:00007ffccc71f018 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 17.463043] RAX: ffffffffffffffda RBX: 0000000000000078 RCX: 00007fc29198f5a0 [ 17.464581] RDX: 0000000000000078 RSI: 00007ffccc71f050 RDI: 0000000000000003 [ 17.466148] RBP: 0000000000000000 R08: 0000000000000078 R09: 00007ffccc71f050 [ 17.467750] R10: 000055b6cf87c248 R11: 0000000000000246 R12: 00007ffccc71f300 [ 17.469541] R13: 000055b6cf8733a0 R14: 0000000000000000 R15: 0000000000000000 [ 17.471151] Code: 00 00 0f 1f 44 00 00 48 8b 47 48 48 8b 00 48 8b 40 10 e9 0b 8b 68 00 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 53 89 f5 <48> 8b 47 48 48 89 fb 40 0f b6 f6 48 8b 00 48 8b 40 20 e8 e0 8a [ 17.475185] RIP: rdma_lookup_put_uobject+0x9/0x50 RSP: ffffb8c7401e7c90 [ 17.476841] CR2: 0000000000000048 [ 17.477764] ---[ end trace 1dbcc5354071a712 ]--- [ 17.478880] Kernel panic - not syncing: Fatal exception [ 17.480277] Kernel Offset: 0xd000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Fixes: 2f08ee363fe0 ("RDMA/restrack: don't use uaccess_kernel()") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 25a0e0e083b33..a148de35df8d4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1553,6 +1553,9 @@ static int create_qp(struct ib_uverbs_file *file, atomic_inc(&attr.srq->usecnt); if (ind_tbl) atomic_inc(&ind_tbl->usecnt); + } else { + /* It is done in _ib_create_qp for other QP types */ + qp->uobject = &obj->uevent.uobject; } obj->uevent.uobject.object = qp; From 7324f5399b06cdbbd1520b8fde8024035953179d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:04 +0100 Subject: [PATCH 208/269] virtio_net: disable XDP_REDIRECT in receive_mergeable() case The virtio_net code have three different RX code-paths in receive_buf(). Two of these code paths can handle XDP, but one of them is broken for at least XDP_REDIRECT. Function(1): receive_big() does not support XDP. Function(2): receive_small() support XDP fully and uses build_skb(). Function(3): receive_mergeable() broken XDP_REDIRECT uses napi_alloc_skb(). The simple explanation is that receive_mergeable() is broken because it uses napi_alloc_skb(), which violates XDP given XDP assumes packet header+data in single page and enough tail room for skb_shared_info. The longer explaination is that receive_mergeable() tries to work-around and satisfy these XDP requiresments e.g. by having a function xdp_linearize_page() that allocates and memcpy RX buffers around (in case packet is scattered across multiple rx buffers). This does currently satisfy XDP_PASS, XDP_DROP and XDP_TX (but only because we have not implemented bpf_xdp_adjust_tail yet). The XDP_REDIRECT action combined with cpumap is broken, and cause hard to debug crashes. The main issue is that the RX packet does not have the needed tail-room (SKB_DATA_ALIGN(skb_shared_info)), causing skb_shared_info to overlap the next packets head-room (in which cpumap stores info). Reproducing depend on the packet payload length and if RX-buffer size happened to have tail-room for skb_shared_info or not. But to make this even harder to troubleshoot, the RX-buffer size is runtime dynamically change based on an Exponentially Weighted Moving Average (EWMA) over the packet length, when refilling RX rings. This patch only disable XDP_REDIRECT support in receive_mergeable() case, because it can cause a real crash. IMHO we should consider NOT supporting XDP in receive_mergeable() at all, because the principles behind XDP are to gain speed by (1) code simplicity, (2) sacrificing memory and (3) where possible moving runtime checks to setup time. These principles are clearly being violated in receive_mergeable(), that e.g. runtime track average buffer size to save memory consumption. In the longer run, we should consider introducing a separate receive function when attaching an XDP program, and also change the memory model to be compatible with XDP when attaching an XDP prog. Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 626c27352ae24..0ca91942a884a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -677,7 +677,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); - int err; head_skb = NULL; @@ -754,12 +753,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_xdp; rcu_read_unlock(); goto xdp_xmit; - case XDP_REDIRECT: - err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (!err) - *xdp_xmit = true; - rcu_read_unlock(); - goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: From 95dbe9e7b3720efa5cf83d21f44f6d953f7cf4a2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:10 +0100 Subject: [PATCH 209/269] virtio_net: fix XDP code path in receive_small() When configuring virtio_net to use the code path 'receive_small()', in-order to get correct XDP_REDIRECT support, I discovered TCP packets would get silently dropped when loading an XDP program action XDP_PASS. The bug seems to be that receive_small() when XDP is loaded check that hdr->hdr.flags is zero, which seems wrong as hdr.flags contains the flags VIRTIO_NET_HDR_F_* : #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ TCP got dropped as it had the VIRTIO_NET_HDR_F_DATA_VALID flag set. The flags that are relevant here are the VIRTIO_NET_HDR_GSO_* flags stored in hdr->hdr.gso_type. Thus, the fix is just check that none of the gso_type flags have been set. Fixes: bb91accf2733 ("virtio-net: XDP support for small buffers") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0ca91942a884a..10c8fc46b588d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -558,7 +558,7 @@ static struct sk_buff *receive_small(struct net_device *dev, void *orig_data; u32 act; - if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + if (unlikely(hdr->hdr.gso_type)) goto err_xdp; if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { From 11b7d897ccc1fb5a3d3f9eb1e6b4574671e5dd7d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:15 +0100 Subject: [PATCH 210/269] virtio_net: fix memory leak in XDP_REDIRECT XDP_REDIRECT calling xdp_do_redirect() can fail for multiple reasons (which can be inspected by tracepoints). The current semantics is that on failure the driver calling xdp_do_redirect() must handle freeing or recycling the page associated with this frame. This can be seen as an optimization, as drivers usually have an optimized XDP_DROP code path for frame recycling in place already. The virtio_net driver didn't handle when xdp_do_redirect() failed. This caused a memory leak as the page refcnt wasn't decremented on failures. The function __virtnet_xdp_xmit() did handle one type of failure, when the xmit queue virtqueue_add_outbuf() is full, which "hides" releasing a refcnt on the page. Instead the function __virtnet_xdp_xmit() must follow API of xdp_do_redirect(), which on errors leave it up to the caller to free the page, of the failed send operation. Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 10c8fc46b588d..1e0e0fce3ab2e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -443,12 +443,8 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); - if (unlikely(err)) { - struct page *page = virt_to_head_page(xdp->data); - - put_page(page); - return false; - } + if (unlikely(err)) + return false; /* Caller handle free/refcnt */ return true; } @@ -546,8 +542,11 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page = virt_to_head_page(buf); - unsigned int delta = 0, err; + unsigned int delta = 0; struct page *xdp_page; + bool sent; + int err; + len -= vi->hdr_len; rcu_read_lock(); @@ -596,16 +595,19 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) + sent = __virtnet_xdp_xmit(vi, &xdp); + if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); - else - *xdp_xmit = true; + goto err_xdp; + } + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (!err) - *xdp_xmit = true; + if (err) + goto err_xdp; + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; default: @@ -677,6 +679,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); + bool sent; head_skb = NULL; @@ -745,10 +748,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) + sent = __virtnet_xdp_xmit(vi, &xdp); + if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); - else - *xdp_xmit = true; + if (unlikely(xdp_page != page)) + put_page(xdp_page); + goto err_xdp; + } + *xdp_xmit = true; if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); From 8dcc5b0ab0ec9a2efb3362d380272546b8b2ee26 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:20 +0100 Subject: [PATCH 211/269] virtio_net: fix ndo_xdp_xmit crash towards dev not ready for XDP When a driver implements the ndo_xdp_xmit() function, there is (currently) no generic way to determine whether it is safe to call. It is e.g. unsafe to call the drivers ndo_xdp_xmit, if it have not allocated the needed XDP TX queues yet. This is the case for virtio_net, which first allocates the XDP TX queues once an XDP/bpf prog is attached (in virtnet_xdp_set()). Thus, a crash will occur for virtio_net when redirecting to another virtio_net device's ndo_xdp_xmit, which have not attached a XDP prog. The sample xdp_redirect_map tries to attach a dummy XDP prog to take this into account, but it can also easily fail if the virtio_net (or actually underlying vhost driver) have not allocated enough extra queues for the device. Allocating more queue this is currently a manual config. Hint for libvirt XML add: The solution in this patch is to check that the device have loaded an XDP/bpf prog before proceeding. This is similar to the check performed in driver ixgbe. Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1e0e0fce3ab2e..9bb9e562b8934 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -452,8 +452,18 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) { struct virtnet_info *vi = netdev_priv(dev); - bool sent = __virtnet_xdp_xmit(vi, xdp); + struct receive_queue *rq = vi->rq; + struct bpf_prog *xdp_prog; + bool sent; + + /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this + * indicate XDP resources have been successfully allocated. + */ + xdp_prog = rcu_dereference(rq->xdp_prog); + if (!xdp_prog) + return -ENXIO; + sent = __virtnet_xdp_xmit(vi, xdp); if (!sent) return -ENOSPC; return 0; From cfd092f2db8b4b6727e1c03ef68a7842e1023573 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 20 Feb 2018 15:22:05 -0600 Subject: [PATCH 212/269] amd-xgbe: Restore PCI interrupt enablement setting on resume After resuming from suspend, the PCI device support must re-enable the interrupt setting so that interrupts are actually delivered. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 3e5833cf1faba..eb23f9ba1a9a1 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -426,6 +426,8 @@ static int xgbe_pci_resume(struct pci_dev *pdev) struct net_device *netdev = pdata->netdev; int ret = 0; + XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); From 7ed1c1901fe52e6c5828deb155920b44b0adabb1 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Wed, 21 Feb 2018 14:45:12 -0800 Subject: [PATCH 213/269] tools: fix cross-compile var clobbering Currently a number of Makefiles break when used with toolchains that pass extra flags in CC and other cross-compile related variables (such as --sysroot). Thus we get this error when we use a toolchain that puts --sysroot in the CC var: ~/src/linux/tools$ make iio [snip] iio_event_monitor.c:18:10: fatal error: unistd.h: No such file or directory #include ^~~~~~~~~~ This occurs because we clobber several env vars related to cross-compiling with lines like this: CC = $(CROSS_COMPILE)gcc Although this will point to a valid cross-compiler, we lose any extra flags that might exist in the CC variable, which can break toolchains that rely on them (for example, those that use --sysroot). This easily shows up using a Yocto SDK: $ . [snip]/sdk/environment-setup-cortexa8hf-neon-poky-linux-gnueabi $ echo $CC arm-poky-linux-gnueabi-gcc -march=armv7-a -mfpu=neon -mfloat-abi=hard -mcpu=cortex-a8 --sysroot=[snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi $ echo $CROSS_COMPILE arm-poky-linux-gnueabi- $ echo ${CROSS_COMPILE}gcc krm-poky-linux-gnueabi-gcc Although arm-poky-linux-gnueabi-gcc is a cross-compiler, we've lost the --sysroot and other flags that enable us to find the right libraries to link against, so we can't find unistd.h and other libraries and headers. Normally with the --sysroot flag we would find unistd.h in the sdk directory in the sysroot: $ find [snip]/sdk/sysroots -path '*/usr/include/unistd.h' [snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi/usr/include/unistd.h The perf Makefile adds CC = $(CROSS_COMPILE)gcc if and only if CC is not already set, and it compiles correctly with the above toolchain. So, generalize the logic that perf uses in the common Makefile and remove the manual CC = $(CROSS_COMPILE)gcc lines from each Makefile. Note that this patch does not fix cross-compile for all the tools (some have other bugs), but it does fix it for all except usb and acpi, which still have other unrelated issues. I tested both with and without the patch on native and cross-build and there appear to be no regressions. Link: http://lkml.kernel.org/r/20180107214028.23771-1-martin@martingkelly.com Signed-off-by: Martin Kelly Acked-by: Mark Brown Cc: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Linus Walleij Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Jonathan Cameron Cc: Pali Rohar Cc: Richard Purdie Cc: Jacek Anaszewski Cc: Pavel Machek Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Robert Moore Cc: Lv Zheng Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Cc: Valentina Manea Cc: Shuah Khan Cc: Mario Limonciello Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/cgroup/Makefile | 1 - tools/gpio/Makefile | 2 -- tools/hv/Makefile | 1 - tools/iio/Makefile | 2 -- tools/laptop/freefall/Makefile | 1 - tools/leds/Makefile | 1 - tools/perf/Makefile.perf | 6 ------ tools/power/acpi/Makefile.config | 3 --- tools/scripts/Makefile.include | 18 ++++++++++++++++++ tools/spi/Makefile | 2 -- tools/usb/Makefile | 1 - tools/vm/Makefile | 1 - tools/wmi/Makefile | 1 - 13 files changed, 18 insertions(+), 22 deletions(-) diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile index 860fa151640ab..ffca068e4a761 100644 --- a/tools/cgroup/Makefile +++ b/tools/cgroup/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for cgroup tools -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra all: cgroup_event_listener diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 805a2c0cf4cd3..240eda014b371 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -12,8 +12,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 1139d71fa0cf5..5db5e62cebdae 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for Hyper-V tools -CC = $(CROSS_COMPILE)gcc WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS) diff --git a/tools/iio/Makefile b/tools/iio/Makefile index a08e7a47d6a32..332ed2f6c2c2e 100644 --- a/tools/iio/Makefile +++ b/tools/iio/Makefile @@ -12,8 +12,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile index 5f758c489a208..b572d94255f66 100644 --- a/tools/laptop/freefall/Makefile +++ b/tools/laptop/freefall/Makefile @@ -2,7 +2,6 @@ PREFIX ?= /usr SBINDIR ?= sbin INSTALL ?= install -CC = $(CROSS_COMPILE)gcc TARGET = freefall diff --git a/tools/leds/Makefile b/tools/leds/Makefile index c379af003807a..7b6bed13daaae 100644 --- a/tools/leds/Makefile +++ b/tools/leds/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for LEDs tools -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -g -I../../include/uapi all: uledmon led_hw_brightness_mon diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9b0351d3ce348..0123280385940 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -146,12 +146,6 @@ define allow-override $(eval $(1) = $(2))) endef -# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,LD,$(CROSS_COMPILE)ld) -$(call allow-override,CXX,$(CROSS_COMPILE)g++) - LD += $(EXTRA_LDFLAGS) HOSTCC ?= gcc diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index a1883bbb01447..2cccbba644187 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -56,9 +56,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} # to compile vs uClibc, that can be done here as well. CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)gcc -STRIP = $(CROSS_COMPILE)strip HOSTCC = gcc # check if compiler option is supported diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index fcb3ed0be5f81..dd614463d4d69 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -42,6 +42,24 @@ EXTRA_WARNINGS += -Wformat CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) +$(call allow-override,CXX,$(CROSS_COMPILE)g++) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif diff --git a/tools/spi/Makefile b/tools/spi/Makefile index 90615e10c79af..815d155891779 100644 --- a/tools/spi/Makefile +++ b/tools/spi/Makefile @@ -11,8 +11,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := spidev_test spidev_fdx diff --git a/tools/usb/Makefile b/tools/usb/Makefile index 4e6506078494f..01d758d73b6db 100644 --- a/tools/usb/Makefile +++ b/tools/usb/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for USB tools -CC = $(CROSS_COMPILE)gcc PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g -I../include diff --git a/tools/vm/Makefile b/tools/vm/Makefile index be320b905ea75..20f6cf04377f0 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -6,7 +6,6 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api LIBS = $(LIB_DIR)/libapi.a -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -I../lib/ LDFLAGS = $(LIBS) diff --git a/tools/wmi/Makefile b/tools/wmi/Makefile index e664f1167388a..e0e87239126b5 100644 --- a/tools/wmi/Makefile +++ b/tools/wmi/Makefile @@ -2,7 +2,6 @@ PREFIX ?= /usr SBINDIR ?= sbin INSTALL ?= install CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include -CC = $(CROSS_COMPILE)gcc TARGET = dell-smbios-example From d34bc48f8275b6ce0da44f639d68344891268ee9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 21 Feb 2018 14:45:17 -0800 Subject: [PATCH 214/269] include/linux/sched/mm.h: re-inline mmdrop() As Peter points out, Doing a CALL+RET for just the decrement is a bit silly. Fixes: d70f2a14b72a4bc ("include/linux/sched/mm.h: uninline mmdrop_async(), etc") Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Michal Hocko Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 13 ++++++++++++- kernel/fork.c | 15 ++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1149533aa2fa2..9806184bb3d54 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -36,7 +36,18 @@ static inline void mmgrab(struct mm_struct *mm) atomic_inc(&mm->mm_count); } -extern void mmdrop(struct mm_struct *mm); +extern void __mmdrop(struct mm_struct *mm); + +static inline void mmdrop(struct mm_struct *mm) +{ + /* + * The implicit full barrier implied by atomic_dec_and_test() is + * required by the membarrier system call before returning to + * user-space, after storing to rq->curr. + */ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) + __mmdrop(mm); +} /** * mmget() - Pin the address space associated with a &struct mm_struct. diff --git a/kernel/fork.c b/kernel/fork.c index be8aa5b986662..e5d9d405ae4e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -592,7 +592,7 @@ static void check_mm(struct mm_struct *mm) * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */ -static void __mmdrop(struct mm_struct *mm) +void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); mm_free_pgd(mm); @@ -603,18 +603,7 @@ static void __mmdrop(struct mm_struct *mm) put_user_ns(mm->user_ns); free_mm(mm); } - -void mmdrop(struct mm_struct *mm) -{ - /* - * The implicit full barrier implied by atomic_dec_and_test() is - * required by the membarrier system call before returning to - * user-space, after storing to rq->curr. - */ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} -EXPORT_SYMBOL_GPL(mmdrop); +EXPORT_SYMBOL_GPL(__mmdrop); static void mmdrop_async_fn(struct work_struct *work) { From 101110f6271ce956a049250c907bc960030577f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Feb 2018 14:45:20 -0800 Subject: [PATCH 215/269] Kbuild: always define endianess in kconfig.h Build testing with LTO found a couple of files that get compiled differently depending on whether asm/byteorder.h gets included early enough or not. In particular, include/asm-generic/qrwlock_types.h is affected by this, but there are probably others as well. The symptom is a series of LTO link time warnings, including these: net/netlabel/netlabel_unlabeled.h:223: error: type of 'netlbl_unlhsh_add' does not match original declaration [-Werror=lto-type-mismatch] int netlbl_unlhsh_add(struct net *net, net/netlabel/netlabel_unlabeled.c:377: note: 'netlbl_unlhsh_add' was previously declared here include/net/ipv6.h:360: error: type of 'ipv6_renew_options_kern' does not match original declaration [-Werror=lto-type-mismatch] ipv6_renew_options_kern(struct sock *sk, net/ipv6/exthdrs.c:1162: note: 'ipv6_renew_options_kern' was previously declared here net/core/dev.c:761: note: 'dev_get_by_name_rcu' was previously declared here struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) net/core/dev.c:761: note: code may be misoptimized unless -fno-strict-aliasing is used drivers/gpu/drm/i915/i915_drv.h:3377: error: type of 'i915_gem_object_set_to_wc_domain' does not match original declaration [-Werror=lto-type-mismatch] i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); drivers/gpu/drm/i915/i915_gem.c:3639: note: 'i915_gem_object_set_to_wc_domain' was previously declared here include/linux/debugfs.h:92:9: error: type of 'debugfs_attr_read' does not match original declaration [-Werror=lto-type-mismatch] ssize_t debugfs_attr_read(struct file *file, char __user *buf, fs/debugfs/file.c:318: note: 'debugfs_attr_read' was previously declared here include/linux/rwlock_api_smp.h:30: error: type of '_raw_read_unlock' does not match original declaration [-Werror=lto-type-mismatch] void __lockfunc _raw_read_unlock(rwlock_t *lock) __releases(lock); kernel/locking/spinlock.c:246:26: note: '_raw_read_unlock' was previously declared here include/linux/fs.h:3308:5: error: type of 'simple_attr_open' does not match original declaration [-Werror=lto-type-mismatch] int simple_attr_open(struct inode *inode, struct file *file, fs/libfs.c:795: note: 'simple_attr_open' was previously declared here All of the above are caused by include/asm-generic/qrwlock_types.h failing to include asm/byteorder.h after commit e0d02285f16e ("locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'") in linux-4.15. Similar bugs may or may not exist in older kernels as well, but there is no easy way to test those with link-time optimizations, and kernels before 4.14 are harder to fix because they don't have Babu's patch series We had similar issues with CONFIG_ symbols in the past and ended up always including the configuration headers though linux/kconfig.h. This works around the issue through that same file, defining either __BIG_ENDIAN or __LITTLE_ENDIAN depending on CONFIG_CPU_BIG_ENDIAN, which is now always set on all architectures since commit 4c97a0c8fee3 ("arch: define CPU_BIG_ENDIAN for all fixed big endian archs"). Link: http://lkml.kernel.org/r/20180202154104.1522809-2-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Babu Moger Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: Nicolas Pitre Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kconfig.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index fec5076eda91d..cc8fa109cfa3e 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -4,6 +4,12 @@ #include +#ifdef CONFIG_CPU_BIG_ENDIAN +#define __BIG_ENDIAN 4321 +#else +#define __LITTLE_ENDIAN 1234 +#endif + #define __ARG_PLACEHOLDER_1 0, #define __take_second_arg(__ignored, val, ...) val From c3cc39118c3610eb6ab4711bc624af7fc48a35fe Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Feb 2018 14:45:24 -0800 Subject: [PATCH 216/269] mm: memcontrol: fix NR_WRITEBACK leak in memcg and system stats After commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting"), we observed slowly upward creeping NR_WRITEBACK counts over the course of several days, both the per-memcg stats as well as the system counter in e.g. /proc/meminfo. The conversion from full per-cpu stat counts to per-cpu cached atomic stat counts introduced an irq-unsafe RMW operation into the updates. Most stat updates come from process context, but one notable exception is the NR_WRITEBACK counter. While writebacks are issued from process context, they are retired from (soft)irq context. When writeback completions interrupt the RMW counter updates of new writebacks being issued, the decs from the completions are lost. Since the global updates are routed through the joint lruvec API, both the memcg counters as well as the system counters are affected. This patch makes the joint stat and event API irq safe. Link: http://lkml.kernel.org/r/20180203082353.17284-1-hannes@cmpxchg.org Fixes: a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting") Signed-off-by: Johannes Weiner Debugged-by: Tejun Heo Reviewed-by: Rik van Riel Reviewed-by: Andrew Morton Cc: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8820468635810..c46016bb25ebe 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -523,9 +523,11 @@ static inline void __mod_memcg_state(struct mem_cgroup *memcg, static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __mod_memcg_state(memcg, idx, val); - preempt_enable(); + local_irq_restore(flags); } /** @@ -606,9 +608,11 @@ static inline void __mod_lruvec_state(struct lruvec *lruvec, static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __mod_lruvec_state(lruvec, idx, val); - preempt_enable(); + local_irq_restore(flags); } static inline void __mod_lruvec_page_state(struct page *page, @@ -630,9 +634,11 @@ static inline void __mod_lruvec_page_state(struct page *page, static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __mod_lruvec_page_state(page, idx, val); - preempt_enable(); + local_irq_restore(flags); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -659,9 +665,11 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, static inline void count_memcg_events(struct mem_cgroup *memcg, int idx, unsigned long count) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __count_memcg_events(memcg, idx, count); - preempt_enable(); + local_irq_restore(flags); } /* idx can be of type enum memcg_event_item or vm_event_item */ From 9c4e6b1a7027f102990c0395296015a812525f4d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 21 Feb 2018 14:45:28 -0800 Subject: [PATCH 217/269] mm, mlock, vmscan: no more skipping pagevecs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a thread mlocks an address space backed either by file pages which are currently not present in memory or swapped out anon pages (not in swapcache), a new page is allocated and added to the local pagevec (lru_add_pvec), I/O is triggered and the thread then sleeps on the page. On I/O completion, the thread can wake on a different CPU, the mlock syscall will then sets the PageMlocked() bit of the page but will not be able to put that page in unevictable LRU as the page is on the pagevec of a different CPU. Even on drain, that page will go to evictable LRU because the PageMlocked() bit is not checked on pagevec drain. The page will eventually go to right LRU on reclaim but the LRU stats will remain skewed for a long time. This patch puts all the pages, even unevictable, to the pagevecs and on the drain, the pages will be added on their LRUs correctly by checking their evictability. This resolves the mlocked pages on pagevec of other CPUs issue because when those pagevecs will be drained, the mlocked file pages will go to unevictable LRU. Also this makes the race with munlock easier to resolve because the pagevec drains happen in LRU lock. However there is still one place which makes a page evictable and does PageLRU check on that page without LRU lock and needs special attention. TestClearPageMlocked() and isolate_lru_page() in clear_page_mlock(). #0: __pagevec_lru_add_fn #1: clear_page_mlock SetPageLRU() if (!TestClearPageMlocked()) return smp_mb() // <--required // inside does PageLRU if (!PageMlocked()) if (isolate_lru_page()) move to evictable LRU putback_lru_page() else move to unevictable LRU In '#1', TestClearPageMlocked() provides full memory barrier semantics and thus the PageLRU check (inside isolate_lru_page) can not be reordered before it. In '#0', without explicit memory barrier, the PageMlocked() check can be reordered before SetPageLRU(). If that happens, '#0' can put a page in unevictable LRU and '#1' might have just cleared the Mlocked bit of that page but fails to isolate as PageLRU fails as '#0' still hasn't set PageLRU bit of that page. That page will be stranded on the unevictable LRU. There is one (good) side effect though. Without this patch, the pages allocated for System V shared memory segment are added to evictable LRUs even after shmctl(SHM_LOCK) on that segment. This patch will correctly put such pages to unevictable LRU. Link: http://lkml.kernel.org/r/20171121211241.18877-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Jérôme Glisse Cc: Huang Ying Cc: Tim Chen Cc: Michal Hocko Cc: Greg Thelen Cc: Johannes Weiner Cc: Balbir Singh Cc: Minchan Kim Cc: Shaohua Li Cc: Jan Kara Cc: Nicholas Piggin Cc: Dan Williams Cc: Mel Gorman Cc: Hugh Dickins Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- mm/mlock.c | 6 ++++ mm/swap.c | 82 +++++++++++++++++++++++++------------------- mm/vmscan.c | 59 +------------------------------ 4 files changed, 54 insertions(+), 95 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7b6a59f722a39..a1a3f4ed94cea 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -337,8 +337,6 @@ extern void deactivate_file_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); -extern void add_page_to_unevictable_list(struct page *page); - extern void lru_cache_add_active_or_unevictable(struct page *page, struct vm_area_struct *vma); diff --git a/mm/mlock.c b/mm/mlock.c index 79398200e423b..74e5a6547c3dd 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -64,6 +64,12 @@ void clear_page_mlock(struct page *page) mod_zone_page_state(page_zone(page), NR_MLOCK, -hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGCLEARED); + /* + * The previous TestClearPageMlocked() corresponds to the smp_mb() + * in __pagevec_lru_add_fn(). + * + * See __pagevec_lru_add_fn for more explanation. + */ if (!isolate_lru_page(page)) { putback_lru_page(page); } else { diff --git a/mm/swap.c b/mm/swap.c index 567a7b96e41d6..2d337710218fa 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -445,30 +445,6 @@ void lru_cache_add(struct page *page) __lru_cache_add(page); } -/** - * add_page_to_unevictable_list - add a page to the unevictable list - * @page: the page to be added to the unevictable list - * - * Add page directly to its zone's unevictable list. To avoid races with - * tasks that might be making the page evictable, through eg. munlock, - * munmap or exit, while it's not on the lru, we want to add the page - * while it's locked or otherwise "invisible" to other tasks. This is - * difficult to do when using the pagevec cache, so bypass that. - */ -void add_page_to_unevictable_list(struct page *page) -{ - struct pglist_data *pgdat = page_pgdat(page); - struct lruvec *lruvec; - - spin_lock_irq(&pgdat->lru_lock); - lruvec = mem_cgroup_page_lruvec(page, pgdat); - ClearPageActive(page); - SetPageUnevictable(page); - SetPageLRU(page); - add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); - spin_unlock_irq(&pgdat->lru_lock); -} - /** * lru_cache_add_active_or_unevictable * @page: the page to be added to LRU @@ -484,13 +460,9 @@ void lru_cache_add_active_or_unevictable(struct page *page, { VM_BUG_ON_PAGE(PageLRU(page), page); - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) { + if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) SetPageActive(page); - lru_cache_add(page); - return; - } - - if (!TestSetPageMlocked(page)) { + else if (!TestSetPageMlocked(page)) { /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte @@ -500,7 +472,7 @@ void lru_cache_add_active_or_unevictable(struct page *page, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } - add_page_to_unevictable_list(page); + lru_cache_add(page); } /* @@ -886,15 +858,55 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, void *arg) { - int file = page_is_file_cache(page); - int active = PageActive(page); - enum lru_list lru = page_lru(page); + enum lru_list lru; + int was_unevictable = TestClearPageUnevictable(page); VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); + /* + * Page becomes evictable in two ways: + * 1) Within LRU lock [munlock_vma_pages() and __munlock_pagevec()]. + * 2) Before acquiring LRU lock to put the page to correct LRU and then + * a) do PageLRU check with lock [check_move_unevictable_pages] + * b) do PageLRU check before lock [clear_page_mlock] + * + * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need + * following strict ordering: + * + * #0: __pagevec_lru_add_fn #1: clear_page_mlock + * + * SetPageLRU() TestClearPageMlocked() + * smp_mb() // explicit ordering // above provides strict + * // ordering + * PageMlocked() PageLRU() + * + * + * if '#1' does not observe setting of PG_lru by '#0' and fails + * isolation, the explicit barrier will make sure that page_evictable + * check will put the page in correct LRU. Without smp_mb(), SetPageLRU + * can be reordered after PageMlocked check and can make '#1' to fail + * the isolation of the page whose Mlocked bit is cleared (#0 is also + * looking at the same page) and the evictable page will be stranded + * in an unevictable LRU. + */ + smp_mb(); + + if (page_evictable(page)) { + lru = page_lru(page); + update_page_reclaim_stat(lruvec, page_is_file_cache(page), + PageActive(page)); + if (was_unevictable) + count_vm_event(UNEVICTABLE_PGRESCUED); + } else { + lru = LRU_UNEVICTABLE; + ClearPageActive(page); + SetPageUnevictable(page); + if (!was_unevictable) + count_vm_event(UNEVICTABLE_PGCULLED); + } + add_page_to_lru_list(page, lruvec, lru); - update_page_reclaim_stat(lruvec, file, active); trace_mm_lru_insertion(page, lru); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 444749669187e..bee53495a8292 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -769,64 +769,7 @@ int remove_mapping(struct address_space *mapping, struct page *page) */ void putback_lru_page(struct page *page) { - bool is_unevictable; - int was_unevictable = PageUnevictable(page); - - VM_BUG_ON_PAGE(PageLRU(page), page); - -redo: - ClearPageUnevictable(page); - - if (page_evictable(page)) { - /* - * For evictable pages, we can use the cache. - * In event of a race, worst case is we end up with an - * unevictable page on [in]active list. - * We know how to handle that. - */ - is_unevictable = false; - lru_cache_add(page); - } else { - /* - * Put unevictable pages directly on zone's unevictable - * list. - */ - is_unevictable = true; - add_page_to_unevictable_list(page); - /* - * When racing with an mlock or AS_UNEVICTABLE clearing - * (page is unlocked) make sure that if the other thread - * does not observe our setting of PG_lru and fails - * isolation/check_move_unevictable_pages, - * we see PG_mlocked/AS_UNEVICTABLE cleared below and move - * the page back to the evictable list. - * - * The other side is TestClearPageMlocked() or shmem_lock(). - */ - smp_mb(); - } - - /* - * page's status can change while we move it among lru. If an evictable - * page is on unevictable list, it never be freed. To avoid that, - * check after we added it to the list, again. - */ - if (is_unevictable && page_evictable(page)) { - if (!isolate_lru_page(page)) { - put_page(page); - goto redo; - } - /* This means someone else dropped this page from LRU - * So, it will be freed or putback to LRU again. There is - * nothing to do here. - */ - } - - if (was_unevictable && !is_unevictable) - count_vm_event(UNEVICTABLE_PGRESCUED); - else if (!was_unevictable && is_unevictable) - count_vm_event(UNEVICTABLE_PGCULLED); - + lru_cache_add(page); put_page(page); /* drop ref from isolate */ } From 88913bd8ea2a75d7e460a4bed5f75e1c32660d7e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 21 Feb 2018 14:45:32 -0800 Subject: [PATCH 218/269] kernel/relay.c: limit kmalloc size to KMALLOC_MAX_SIZE chan->n_subbufs is set by the user and relay_create_buf() does a kmalloc() of chan->n_subbufs * sizeof(size_t *). kmalloc_slab() will generate a warning when this fails if chan->subbufs * sizeof(size_t *) > KMALLOC_MAX_SIZE. Limit chan->n_subbufs to the maximum allowed kmalloc() size. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1802061216100.122576@chino.kir.corp.google.com Fixes: f6302f1bcd75 ("relay: prevent integer overflow in relay_open()") Signed-off-by: David Rientjes Reviewed-by: Andrew Morton Cc: Jens Axboe Cc: Dave Jiang Cc: Al Viro Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index c3029402f15c3..c955b10c973c0 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -163,7 +163,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan) { struct rchan_buf *buf; - if (chan->n_subbufs > UINT_MAX / sizeof(size_t *)) + if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *)) return NULL; buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); From 2be04df5668d81f9a98e57b81bc53f72bd5f4f92 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 21 Feb 2018 14:45:35 -0800 Subject: [PATCH 219/269] certs/blacklist_nohashes.c: fix const confusion in certs blacklist const must be marked __initconst, not __initdata. Link: http://lkml.kernel.org/r/20171222001335.1987-1-andi@firstfloor.org Signed-off-by: Andi Kleen Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- certs/blacklist_nohashes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/certs/blacklist_nohashes.c b/certs/blacklist_nohashes.c index 73fd99098ad7c..753b703ef0ef8 100644 --- a/certs/blacklist_nohashes.c +++ b/certs/blacklist_nohashes.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "blacklist.h" -const char __initdata *const blacklist_hashes[] = { +const char __initconst *const blacklist_hashes[] = { NULL }; From 7ba716698cc53f8d5367766c93c538c7da6c68ce Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 21 Feb 2018 14:45:39 -0800 Subject: [PATCH 220/269] mm, swap, frontswap: fix THP swap if frontswap enabled It was reported by Sergey Senozhatsky that if THP (Transparent Huge Page) and frontswap (via zswap) are both enabled, when memory goes low so that swap is triggered, segfault and memory corruption will occur in random user space applications as follow, kernel: urxvt[338]: segfault at 20 ip 00007fc08889ae0d sp 00007ffc73a7fc40 error 6 in libc-2.26.so[7fc08881a000+1ae000] #0 0x00007fc08889ae0d _int_malloc (libc.so.6) #1 0x00007fc08889c2f3 malloc (libc.so.6) #2 0x0000560e6004bff7 _Z14rxvt_wcstoutf8PKwi (urxvt) #3 0x0000560e6005e75c n/a (urxvt) #4 0x0000560e6007d9f1 _ZN16rxvt_perl_interp6invokeEP9rxvt_term9hook_typez (urxvt) #5 0x0000560e6003d988 _ZN9rxvt_term9cmd_parseEv (urxvt) #6 0x0000560e60042804 _ZN9rxvt_term6pty_cbERN2ev2ioEi (urxvt) #7 0x0000560e6005c10f _Z17ev_invoke_pendingv (urxvt) #8 0x0000560e6005cb55 ev_run (urxvt) #9 0x0000560e6003b9b9 main (urxvt) #10 0x00007fc08883af4a __libc_start_main (libc.so.6) #11 0x0000560e6003f9da _start (urxvt) After bisection, it was found the first bad commit is bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out"). The root cause is as follows: When the pages are written to swap device during swapping out in swap_writepage(), zswap (fontswap) is tried to compress the pages to improve performance. But zswap (frontswap) will treat THP as a normal page, so only the head page is saved. After swapping in, tail pages will not be restored to their original contents, causing memory corruption in the applications. This is fixed by refusing to save page in the frontswap store functions if the page is a THP. So that the THP will be swapped out to swap device. Another choice is to split THP if frontswap is enabled. But it is found that the frontswap enabling isn't flexible. For example, if CONFIG_ZSWAP=y (cannot be module), frontswap will be enabled even if zswap itself isn't enabled. Frontswap has multiple backends, to make it easy for one backend to enable THP support, the THP checking is put in backend frontswap store functions instead of the general interfaces. Link: http://lkml.kernel.org/r/20180209084947.22749-1-ying.huang@intel.com Fixes: bd4c82c22c367e068 ("mm, THP, swap: delay splitting THP after swapped out") Signed-off-by: "Huang, Ying" Reported-by: Sergey Senozhatsky Tested-by: Sergey Senozhatsky Suggested-by: Minchan Kim [put THP checking in backend] Cc: Konrad Rzeszutek Wilk Cc: Dan Streetman Cc: Seth Jennings Cc: Tetsuo Handa Cc: Shaohua Li Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Cc: Shakeel Butt Cc: Boris Ostrovsky Cc: Juergen Gross Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/xen/tmem.c | 4 ++++ mm/zswap.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index bf13d1ec51f3b..04e7b3b29bac8 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -284,6 +284,10 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset, int pool = tmem_frontswap_poolid; int ret; + /* THP isn't supported */ + if (PageTransHuge(page)) + return -1; + if (pool < 0) return -1; if (ind64 != ind) diff --git a/mm/zswap.c b/mm/zswap.c index c004aa4fd3f48..61a5c41972dba 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1007,6 +1007,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, u8 *src, *dst; struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; + /* THP isn't supported */ + if (PageTransHuge(page)) { + ret = -EINVAL; + goto reject; + } + if (!zswap_enabled || !tree) { ret = -ENODEV; goto reject; From b1a8a7a70043400d1e685899548c92b92f640d71 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 21 Feb 2018 14:45:43 -0800 Subject: [PATCH 221/269] ida: do zeroing in ida_pre_get() As far as I can tell, the only place the per-cpu ida_bitmap is populated is in ida_pre_get. The pre-allocated element is stolen in two places in ida_get_new_above, in both cases immediately followed by a memset(0). Since ida_get_new_above is called with locks held, do the zeroing in ida_pre_get, or rather let kmalloc() do it. Also, apparently gcc generates ~44 bytes of code to do a memset(, 0, 128): $ scripts/bloat-o-meter vmlinux.{0,1} add/remove: 0/0 grow/shrink: 2/1 up/down: 5/-88 (-83) Function old new delta ida_pre_get 115 119 +4 vermagic 27 28 +1 ida_get_new_above 715 627 -88 Link: http://lkml.kernel.org/r/20180108225634.15340-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Matthew Wilcox Cc: Eric Biggers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 2 -- lib/radix-tree.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index c98d77fcf3934..99ec5bc89d252 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -431,7 +431,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id) bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - memset(bitmap, 0, sizeof(*bitmap)); bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; rcu_assign_pointer(*slot, bitmap); } @@ -464,7 +463,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id) bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - memset(bitmap, 0, sizeof(*bitmap)); __set_bit(bit, bitmap->bitmap); radix_tree_iter_replace(root, &iter, slot, bitmap); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 0a7ae3288a248..8e00138d593fd 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2125,7 +2125,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) preempt_enable(); if (!this_cpu_read(ida_bitmap)) { - struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); + struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp); if (!bitmap) return 0; if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) From 14fec9eba43b05d39825128e4354a2dc50fb59ea Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Feb 2018 14:45:46 -0800 Subject: [PATCH 222/269] mm/zpool.c: zpool_evictable: fix mismatch in parameter name and kernel-doc [akpm@linux-foundation.org: add colon, per Randy] Link: http://lkml.kernel.org/r/1518116984-21141-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrew Morton Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zpool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zpool.c b/mm/zpool.c index f8cb83e7699bb..01a771e304fab 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -360,7 +360,7 @@ u64 zpool_get_total_size(struct zpool *zpool) /** * zpool_evictable() - Test if zpool is potentially evictable - * @pool The zpool to test + * @zpool: The zpool to test * * Zpool is only potentially evictable when it's created with struct * zpool_ops.evict and its driver implements struct zpool_driver.shrink. From cb6f0f34802dd7148d930f4f8d1cce991b8c23be Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Feb 2018 14:45:50 -0800 Subject: [PATCH 223/269] mm/swap.c: make functions and their kernel-doc agree (again) There was a conflict between the commit e02a9f048ef7 ("mm/swap.c: make functions and their kernel-doc agree") and the commit f144c390f905 ("mm: docs: fix parameter names mismatch") that both tried to fix mismatch betweeen pagevec_lookup_entries() parameter names and their description. Since nr_entries is a better name for the parameter, fix the description again. Link: http://lkml.kernel.org/r/1518116946-20947-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Acked-by: Randy Dunlap Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swap.c b/mm/swap.c index 2d337710218fa..0f17330dd0e5a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -925,7 +925,7 @@ EXPORT_SYMBOL(__pagevec_lru_add); * @pvec: Where the resulting entries are placed * @mapping: The address_space to search * @start: The starting entry index - * @nr_pages: The maximum number of pages + * @nr_entries: The maximum number of pages * @indices: The cache indices corresponding to the entries in @pvec * * pagevec_lookup_entries() will search for and return a group of up From 173a3efd3edb2ef6ef07471397c5f542a360e9c1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Feb 2018 14:45:54 -0800 Subject: [PATCH 224/269] bug.h: work around GCC PR82365 in BUG() Looking at functions with large stack frames across all architectures led me discovering that BUG() suffers from the same problem as fortify_panic(), which I've added a workaround for already. In short, variables that go out of scope by calling a noreturn function or __builtin_unreachable() keep using stack space in functions afterwards. A workaround that was identified is to insert an empty assembler statement just before calling the function that doesn't return. I'm adding a macro "barrier_before_unreachable()" to document this, and insert calls to that in all instances of BUG() that currently suffer from this problem. The files that saw the largest change from this had these frame sizes before, and much less with my patch: fs/ext4/inode.c:82:1: warning: the frame size of 1672 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/namei.c:434:1: warning: the frame size of 904 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/super.c:2279:1: warning: the frame size of 1160 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/xattr.c:146:1: warning: the frame size of 1168 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/f2fs/inode.c:152:1: warning: the frame size of 1424 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_core.c:1195:1: warning: the frame size of 1068 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_core.c:395:1: warning: the frame size of 1084 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_ftp.c:298:1: warning: the frame size of 928 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_ftp.c:418:1: warning: the frame size of 908 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_lblcr.c:718:1: warning: the frame size of 960 bytes is larger than 800 bytes [-Wframe-larger-than=] drivers/net/xen-netback/netback.c:1500:1: warning: the frame size of 1088 bytes is larger than 800 bytes [-Wframe-larger-than=] In case of ARC and CRIS, it turns out that the BUG() implementation actually does return (or at least the compiler thinks it does), resulting in lots of warnings about uninitialized variable use and leaving noreturn functions, such as: block/cfq-iosched.c: In function 'cfq_async_queue_prio': block/cfq-iosched.c:3804:1: error: control reaches end of non-void function [-Werror=return-type] include/linux/dmaengine.h: In function 'dma_maxpq': include/linux/dmaengine.h:1123:1: error: control reaches end of non-void function [-Werror=return-type] This makes them call __builtin_trap() instead, which should normally dump the stack and kill the current process, like some of the other architectures already do. I tried adding barrier_before_unreachable() to panic() and fortify_panic() as well, but that had very little effect, so I'm not submitting that patch. Vineet said: : For ARC, it is double win. : : 1. Fixes 3 -Wreturn-type warnings : : | ../net/core/ethtool.c:311:1: warning: control reaches end of non-void function : [-Wreturn-type] : | ../kernel/sched/core.c:3246:1: warning: control reaches end of non-void function : [-Wreturn-type] : | ../include/linux/sunrpc/svc_xprt.h:180:1: warning: control reaches end of : non-void function [-Wreturn-type] : : 2. bloat-o-meter reports code size improvements as gcc elides the : generated code for stack return. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 Link: http://lkml.kernel.org/r/20171219114112.939391-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Vineet Gupta [arch/arc] Tested-by: Vineet Gupta [arch/arc] Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Tony Luck Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: "David S. Miller" Cc: Christopher Li Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Kees Cook Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Will Deacon Cc: "Steven Rostedt (VMware)" Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/bug.h | 3 ++- arch/cris/include/arch-v10/arch/bug.h | 11 +++++++++-- arch/ia64/include/asm/bug.h | 6 +++++- arch/m68k/include/asm/bug.h | 3 +++ arch/sparc/include/asm/bug.h | 6 +++++- include/asm-generic/bug.h | 1 + include/linux/compiler-gcc.h | 15 ++++++++++++++- include/linux/compiler.h | 5 +++++ 8 files changed, 44 insertions(+), 6 deletions(-) diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index ea022d47896ce..21ec82466d62c 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #define BUG() do { \ pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ - dump_stack(); \ + barrier_before_unreachable(); \ + __builtin_trap(); \ } while (0) #define HAVE_ARCH_BUG diff --git a/arch/cris/include/arch-v10/arch/bug.h b/arch/cris/include/arch-v10/arch/bug.h index 905afeacfedf5..06da9d49152a0 100644 --- a/arch/cris/include/arch-v10/arch/bug.h +++ b/arch/cris/include/arch-v10/arch/bug.h @@ -44,18 +44,25 @@ struct bug_frame { * not be used like this with newer versions of gcc. */ #define BUG() \ +do { \ __asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\ "movu.w " __stringify(__LINE__) ",$r0\n\t"\ "jump 0f\n\t" \ ".section .rodata\n" \ "0:\t.string \"" __FILE__ "\"\n\t" \ - ".previous") + ".previous"); \ + unreachable(); \ +} while (0) #endif #else /* This just causes an oops. */ -#define BUG() (*(int *)0 = 0) +#define BUG() \ +do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h index bd3eeb8d1cfa3..66b37a5327654 100644 --- a/arch/ia64/include/asm/bug.h +++ b/arch/ia64/include/asm/bug.h @@ -4,7 +4,11 @@ #ifdef CONFIG_BUG #define ia64_abort() __builtin_trap() -#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ + ia64_abort(); \ +} while (0) /* should this BUG be made generic? */ #define HAVE_ARCH_BUG diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h index b7e2bf1ba4a60..275dca1435bf9 100644 --- a/arch/m68k/include/asm/bug.h +++ b/arch/m68k/include/asm/bug.h @@ -8,16 +8,19 @@ #ifndef CONFIG_SUN3 #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif #else #define BUG() do { \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #endif diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h index 6f17528356b2f..ea53e418f6c04 100644 --- a/arch/sparc/include/asm/bug.h +++ b/arch/sparc/include/asm/bug.h @@ -9,10 +9,14 @@ void do_BUG(const char *file, int line); #define BUG() do { \ do_BUG(__FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else -#define BUG() __builtin_trap() +#define BUG() do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif #define HAVE_ARCH_BUG diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 963b755d19b03..a7613e1b0c87a 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -52,6 +52,7 @@ struct bug_entry { #ifndef HAVE_ARCH_BUG #define BUG() do { \ printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 73bc63e0a1c4b..901c1ccb3374a 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -207,6 +207,15 @@ #endif #endif +/* + * calling noreturn functions, __builtin_unreachable() and __builtin_trap() + * confuse the stack allocation in gcc, leading to overly large stack + * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 + * + * Adding an empty inline assembly before it works around the problem + */ +#define barrier_before_unreachable() asm volatile("") + /* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer @@ -217,7 +226,11 @@ * unreleased. Really, we need to have autoconf for the kernel. */ #define unreachable() \ - do { annotate_unreachable(); __builtin_unreachable(); } while (0) + do { \ + annotate_unreachable(); \ + barrier_before_unreachable(); \ + __builtin_unreachable(); \ + } while (0) /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e835fc0423ecc..ab4711c636014 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -86,6 +86,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define barrier_data(ptr) barrier() #endif +/* workaround for GCC PR82365 if needed */ +#ifndef barrier_before_unreachable +# define barrier_before_unreachable() do { } while (0) +#endif + /* Unreachable code */ #ifdef CONFIG_STACK_VALIDATION /* From bdefe01a6b14bde268741435ac854fda4ef7e847 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 14:45:58 -0800 Subject: [PATCH 225/269] selftests/memfd: add run_fuse_test.sh to TEST_FILES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While testing memfd tests, there is a missing script, as reported by kselftest: ./run_tests.sh: line 7: ./run_fuse_test.sh: No such file or directory Link: http://lkml.kernel.org/r/1517955779-11386-1-git-send-email-daniel.diaz@linaro.org Signed-off-by: Anders Roxell Signed-off-by: Daniel Díaz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/memfd/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index a5276a91dfbfc..0862e6f47a38c 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -5,6 +5,7 @@ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_tests.sh +TEST_FILES := run_fuse_test.sh TEST_GEN_FILES := memfd_test fuse_mnt fuse_test fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) From 698d0831ba87b92ae10b15e8203cfd59f5a59a35 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 21 Feb 2018 14:46:01 -0800 Subject: [PATCH 226/269] vmalloc: fix __GFP_HIGHMEM usage for vmalloc_32 on 32b systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kai Heng Feng has noticed that BUG_ON(PageHighMem(pg)) triggers in drivers/media/common/saa7146/saa7146_core.c since 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly"). saa7146_vmalloc_build_pgtable uses vmalloc_32 and it is reasonable to expect that the resulting page is not in highmem. The above commit aimed to add __GFP_HIGHMEM only for those requests which do not specify any zone modifier gfp flag. vmalloc_32 relies on GFP_VMALLOC32 which should do the right thing. Except it has been missed that GFP_VMALLOC32 is an alias for GFP_KERNEL on 32b architectures. Thanks to Matthew to notice this. Fix the problem by unconditionally setting GFP_DMA32 in GFP_VMALLOC32 for !64b arches (as a bailout). This should do the right thing and use ZONE_NORMAL which should be always below 4G on 32b systems. Debugged by Matthew Wilcox. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20180212095019.GX21609@dhcp22.suse.cz Fixes: 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly”) Signed-off-by: Michal Hocko Reported-by: Kai Heng Feng Cc: Matthew Wilcox Cc: Laura Abbott Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 673942094328a..ebff729cc9562 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1943,11 +1943,15 @@ void *vmalloc_exec(unsigned long size) } #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) -#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL +#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) -#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL +#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL) #else -#define GFP_VMALLOC32 GFP_KERNEL +/* + * 64b systems should always have either DMA or DMA32 zones. For others + * GFP_DMA32 should do the right thing and use the normal zone. + */ +#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL #endif /** From 908009e832b4e58796ed95d4544e3210bc0ff2c4 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 14:46:05 -0800 Subject: [PATCH 227/269] lib/Kconfig.debug: enable RUNTIME_TESTING_MENU Commit d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig to ease disabling it all") causes a regression when using runtime tests due to it defaults RUNTIME_TESTING_MENU to not set. Link: http://lkml.kernel.org/r/20180214133015.10090-1-anders.roxell@linaro.org Fixes: d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig to easedisabling it all") Signed-off-by: Anders Roxell Cc: Vincent Legoll Cc: Ingo Molnar Cc: Byungchul Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6088408ef26c5..64155e310a9f2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1642,6 +1642,7 @@ config DMA_API_DEBUG menuconfig RUNTIME_TESTING_MENU bool "Runtime Testing" + def_bool y if RUNTIME_TESTING_MENU From 895f7b8e90200cf1a5dc313329369adf30e51f9a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 21 Feb 2018 14:46:09 -0800 Subject: [PATCH 228/269] mm: don't defer struct page initialization for Xen pv guests Commit f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") broke Xen pv domains in some configurations, as the "Pinned" information in struct page of early page tables could get lost. This will lead to the kernel trying to write directly into the page tables instead of asking the hypervisor to do so. The result is a crash like the following: BUG: unable to handle kernel paging request at ffff8801ead19008 IP: xen_set_pud+0x4e/0xd0 PGD 1c0a067 P4D 1c0a067 PUD 23a0067 PMD 1e9de0067 PTE 80100001ead19065 Oops: 0003 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.0-default+ #271 Hardware name: Dell Inc. Latitude E6440/0159N7, BIOS A07 06/26/2014 task: ffffffff81c10480 task.stack: ffffffff81c00000 RIP: e030:xen_set_pud+0x4e/0xd0 Call Trace: __pmd_alloc+0x128/0x140 ioremap_page_range+0x3f4/0x410 __ioremap_caller+0x1c3/0x2e0 acpi_os_map_iomem+0x175/0x1b0 acpi_tb_acquire_table+0x39/0x66 acpi_tb_validate_table+0x44/0x7c acpi_tb_verify_temp_table+0x45/0x304 acpi_reallocate_root_table+0x12d/0x141 acpi_early_init+0x4d/0x10a start_kernel+0x3eb/0x4a1 xen_start_kernel+0x528/0x532 Code: 48 01 e8 48 0f 42 15 a2 fd be 00 48 01 d0 48 ba 00 00 00 00 00 ea ff ff 48 c1 e8 0c 48 c1 e0 06 48 01 d0 48 8b 00 f6 c4 02 75 5d <4c> 89 65 00 5b 5d 41 5c c3 65 8b 05 52 9f fe 7e 89 c0 48 0f a3 RIP: xen_set_pud+0x4e/0xd0 RSP: ffffffff81c03cd8 CR2: ffff8801ead19008 ---[ end trace 38eca2e56f1b642e ]--- Avoid this problem by not deferring struct page initialization when running as Xen pv guest. Pavel said: : This is unique for Xen, so this particular issue won't effect other : configurations. I am going to investigate if there is a way to : re-enable deferred page initialization on xen guests. [akpm@linux-foundation.org: explicitly include xen.h] Link: http://lkml.kernel.org/r/20180216154101.22865-1-jgross@suse.com Fixes: f7f99100d8d95d ("mm: stop zeroing memory during allocation in vmemmap") Signed-off-by: Juergen Gross Reviewed-by: Pavel Tatashin Cc: Steven Sistare Cc: Daniel Jordan Cc: Bob Picco Cc: [4.15.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 81e18ceef579c..cb416723538fe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,9 @@ static inline bool update_defer_init(pg_data_t *pgdat, /* Always populate low zones for address-constrained allocations */ if (zone_end < pgdat_end_pfn(pgdat)) return true; + /* Xen PV domains need page structures early */ + if (xen_pv_domain()) + return true; (*nr_initialised)++; if ((*nr_initialised > pgdat->static_init_pgcnt) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { From a7dcdf6ea1b264ee7655a8cafe844f06eed3906a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 23:07:33 +0100 Subject: [PATCH 229/269] bpf: clean up unused-variable warning The only user of this variable is inside of an #ifdef, causing a warning without CONFIG_INET: net/core/filter.c: In function '____bpf_sock_ops_cb_flags_set': net/core/filter.c:3382:6: error: unused variable 'val' [-Werror=unused-variable] int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; This replaces the #ifdef with a nicer IS_ENABLED() check that makes the code more readable and avoids the warning. Fixes: b13d88072172 ("bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann --- net/core/filter.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 08ab4c65a998d..0c121adbdbaaa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3381,17 +3381,13 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, struct sock *sk = bpf_sock->sk; int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; - if (!sk_fullsock(sk)) + if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk)) return -EINVAL; -#ifdef CONFIG_INET if (val) tcp_sk(sk)->bpf_sock_ops_cb_flags = val; return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); -#else - return -EINVAL; -#endif } static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { From b52db43a3d2e34b4ef2bb563d95227bb755027df Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 17:51:16 +0100 Subject: [PATCH 230/269] selftests/bpf: tcpbpf_kern: use in6_* macros from glibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both glibc and the kernel have in6_* macros definitions. Build fails because it picks up wrong in6_* macro from the kernel header and not the header from glibc. Fixes build error below: clang -I. -I./include/uapi -I../../../include/uapi -Wno-compare-distinct-pointer-types \ -O2 -target bpf -emit-llvm -c test_tcpbpf_kern.c -o - | \ llc -march=bpf -mcpu=generic -filetype=obj -o .../tools/testing/selftests/bpf/test_tcpbpf_kern.o In file included from test_tcpbpf_kern.c:12: .../netinet/in.h:101:5: error: expected identifier IPPROTO_HOPOPTS = 0, /* IPv6 Hop-by-Hop options. */ ^ .../linux/in6.h:131:26: note: expanded from macro 'IPPROTO_HOPOPTS' ^ In file included from test_tcpbpf_kern.c:12: /usr/include/netinet/in.h:103:5: error: expected identifier IPPROTO_ROUTING = 43, /* IPv6 routing header. */ ^ .../linux/in6.h:132:26: note: expanded from macro 'IPPROTO_ROUTING' ^ In file included from test_tcpbpf_kern.c:12: .../netinet/in.h:105:5: error: expected identifier IPPROTO_FRAGMENT = 44, /* IPv6 fragmentation header. */ ^ Since both glibc and the kernel have in6_* macros definitions, use the one from glibc. Kernel headers will check for previous libc definitions by including include/linux/libc-compat.h. Reported-by: Daniel Díaz Signed-off-by: Anders Roxell Tested-by: Daniel Díaz Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tcpbpf_kern.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 57119ad57a3fb..3e645ee41ed5f 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include From 31a8260d3e34aaddf821388b8e0d589f44401f75 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 22:30:01 +0100 Subject: [PATCH 231/269] selftests/bpf: update gitignore with test_libbpf_open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bpf builds a test program for loading BPF ELF files. Add the executable to the .gitignore list. Signed-off-by: Anders Roxell Tested-by: Daniel Díaz Acked-by: David S. Miller Acked-by: Shuah Khan Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index cc15af2e54fe8..9cf83f895d988 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -11,3 +11,4 @@ test_progs test_tcpbpf_user test_verifier_log feature +test_libbpf_open From 2a040f9f39d3b020c79e08dec26d12a7ce131c10 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 20 Feb 2018 19:47:45 -0700 Subject: [PATCH 232/269] seccomp, ptrace: switch get_metadata types to arch independent Commit 26500475ac1b ("ptrace, seccomp: add support for retrieving seccomp metadata") introduced `struct seccomp_metadata`, which contained unsigned longs that should be arch independent. The type of the flags member was chosen to match the corresponding argument to seccomp(), and so we need something at least as big as unsigned long. My understanding is that __u64 should fit the bill, so let's switch both types to that. While this is userspace facing, it was only introduced in 4.16-rc2, and so should be safe assuming it goes in before then. Reported-by: "Dmitry V. Levin" Signed-off-by: Tycho Andersen CC: Kees Cook CC: Oleg Nesterov Reviewed-by: "Dmitry V. Levin" Signed-off-by: Kees Cook --- include/uapi/linux/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index e46d82b911669..d5a1b8a492b93 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -69,8 +69,8 @@ struct ptrace_peeksiginfo_args { #define PTRACE_SECCOMP_GET_METADATA 0x420d struct seccomp_metadata { - unsigned long filter_off; /* Input: which filter */ - unsigned int flags; /* Output: filter's flags */ + __u64 filter_off; /* Input: which filter */ + __u64 flags; /* Output: filter's flags */ }; /* Read signals from a shared (process wide) queue */ From 63bb0045b98ae821e56e27c2250e14bb0ae663e5 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 20 Feb 2018 19:47:46 -0700 Subject: [PATCH 233/269] ptrace, seccomp: tweak get_metadata behavior slightly Previously if users passed a small size for the input structure size, they would get get odd behavior. It doesn't make sense to pass a structure smaller than at least filter_off size, so let's just give -EINVAL in this case. This changes userspace visible behavior, but was only introduced in commit 26500475ac1b ("ptrace, seccomp: add support for retrieving seccomp metadata") in 4.16-rc2, so should be safe to change if merged before then. Reported-by: Eugene Syromiatnikov Signed-off-by: Tycho Andersen CC: Kees Cook CC: Oleg Nesterov Signed-off-by: Kees Cook --- kernel/seccomp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 61bd9dc260c8e..1245b2338fff1 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1076,14 +1076,16 @@ long seccomp_get_metadata(struct task_struct *task, size = min_t(unsigned long, size, sizeof(kmd)); - if (copy_from_user(&kmd, data, size)) + if (size < sizeof(kmd.filter_off)) + return -EINVAL; + + if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off))) return -EFAULT; filter = get_nth_filter(task, kmd.filter_off); if (IS_ERR(filter)) return PTR_ERR(filter); - memset(&kmd, 0, sizeof(kmd)); if (filter->log) kmd.flags |= SECCOMP_FILTER_FLAG_LOG; From d057dc4e35e16050befa3dda943876dab39cbf80 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 20 Feb 2018 19:47:47 -0700 Subject: [PATCH 234/269] seccomp: add a selftest for get_metadata Let's test that we get the flags correctly, and that we preserve the filter index across the ptrace(PTRACE_SECCOMP_GET_METADATA) correctly. Signed-off-by: Tycho Andersen CC: Kees Cook Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 24dbf634e2dd8..92db48825dc13 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -141,6 +141,15 @@ struct seccomp_data { #define SECCOMP_FILTER_FLAG_LOG 2 #endif +#ifndef PTRACE_SECCOMP_GET_METADATA +#define PTRACE_SECCOMP_GET_METADATA 0x420d + +struct seccomp_metadata { + __u64 filter_off; /* Input: which filter */ + __u64 flags; /* Output: filter's flags */ +}; +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -2845,6 +2854,58 @@ TEST(get_action_avail) EXPECT_EQ(errno, EOPNOTSUPP); } +TEST(get_metadata) +{ + pid_t pid; + int pipefd[2]; + char buf; + struct seccomp_metadata md; + + ASSERT_EQ(0, pipe(pipefd)); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + /* one with log, one without */ + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_LOG, &prog)); + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); + + ASSERT_EQ(0, close(pipefd[0])); + ASSERT_EQ(1, write(pipefd[1], "1", 1)); + ASSERT_EQ(0, close(pipefd[1])); + + while (1) + sleep(100); + } + + ASSERT_EQ(0, close(pipefd[1])); + ASSERT_EQ(1, read(pipefd[0], &buf, 1)); + + ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); + ASSERT_EQ(pid, waitpid(pid, NULL, 0)); + + md.filter_off = 0; + ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); + EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); + EXPECT_EQ(md.filter_off, 0); + + md.filter_off = 1; + ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); + EXPECT_EQ(md.flags, 0); + EXPECT_EQ(md.filter_off, 1); + + ASSERT_EQ(0, kill(pid, SIGKILL)); +} + /* * TODO: * - add microbenchmarks From fe32a815f05c8568669a062587435e15f9345764 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 8 Feb 2018 14:54:05 +0100 Subject: [PATCH 235/269] i2c: bcm2835: Set up the rising/falling edge delays We were leaving them in the power on state (or the state the firmware had set up for some client, if we were taking over from them). The boot state was 30 core clocks, when we actually want to sample some time after (to make sure that the new input bit has actually arrived). Signed-off-by: Eric Anholt Signed-off-by: Boris Brezillon Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-bcm2835.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index cd07a69e2e935..44deae78913e5 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -50,6 +50,9 @@ #define BCM2835_I2C_S_CLKT BIT(9) #define BCM2835_I2C_S_LEN BIT(10) /* Fake bit for SW error reporting */ +#define BCM2835_I2C_FEDL_SHIFT 16 +#define BCM2835_I2C_REDL_SHIFT 0 + #define BCM2835_I2C_CDIV_MIN 0x0002 #define BCM2835_I2C_CDIV_MAX 0xFFFE @@ -81,7 +84,7 @@ static inline u32 bcm2835_i2c_readl(struct bcm2835_i2c_dev *i2c_dev, u32 reg) static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev) { - u32 divider; + u32 divider, redl, fedl; divider = DIV_ROUND_UP(clk_get_rate(i2c_dev->clk), i2c_dev->bus_clk_rate); @@ -100,6 +103,22 @@ static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev) bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DIV, divider); + /* + * Number of core clocks to wait after falling edge before + * outputting the next data bit. Note that both FEDL and REDL + * can't be greater than CDIV/2. + */ + fedl = max(divider / 16, 1u); + + /* + * Number of core clocks to wait after rising edge before + * sampling the next incoming data bit. + */ + redl = max(divider / 4, 1u); + + bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DEL, + (fedl << BCM2835_I2C_FEDL_SHIFT) | + (redl << BCM2835_I2C_REDL_SHIFT)); return 0; } From c396b9a03e3bb5e95e036bdb0c7d614e0e1a4e3d Mon Sep 17 00:00:00 2001 From: Patryk Kocielnik Date: Fri, 26 Jan 2018 21:19:26 +0100 Subject: [PATCH 236/269] i2c: busses: i2c-sirf: Fix spelling: "formular" -> "formula". Fix spelling. Signed-off-by: Patryk Kocielnik [wsa: fixed "Initialization", too] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sirf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c index 2fd8b6d003910..87197ece0f903 100644 --- a/drivers/i2c/busses/i2c-sirf.c +++ b/drivers/i2c/busses/i2c-sirf.c @@ -341,7 +341,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, adap); init_completion(&siic->done); - /* Controller Initalisation */ + /* Controller initialisation */ writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL); while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET) @@ -369,7 +369,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev) * but they start to affect the speed when clock is set to faster * frequencies. * Through the actual tests, use the different user_div value(which - * in the divider formular 'Fio / (Fi2c * user_div)') to adapt + * in the divider formula 'Fio / (Fi2c * user_div)') to adapt * the different ranges of i2c bus clock frequency, to make the SCL * more accurate. */ From d1fa74520dcdbeae891b30035e6c51aafa35306d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 17 Feb 2018 22:58:43 +0200 Subject: [PATCH 237/269] i2c: designware: Consider SCL GPIO optional GPIO library can return -ENOSYS for the failed request. Instead of failing ->probe() in this case override error code to 0. Fixes: ca382f5b38f3 ("i2c: designware: add i2c gpio recovery option") Reported-by: Dominik Brodowski Signed-off-by: Andy Shevchenko Tested-by: Dominik Brodowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 55926ef41ef17..05732531829fe 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -644,7 +644,7 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev) gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH); if (IS_ERR(gpio)) { r = PTR_ERR(gpio); - if (r == -ENOENT) + if (r == -ENOENT || r == -ENOSYS) return 0; return r; } From 15122ee2c515a253b0c66a3e618bc7ebe35105eb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 21 Feb 2018 12:59:27 +0000 Subject: [PATCH 238/269] arm64: Enforce BBM for huge IO/VMAP mappings ioremap_page_range doesn't honour break-before-make and attempts to put down huge mappings (using p*d_set_huge) over the top of pre-existing table entries. This leads to us leaking page table memory and also gives rise to TLB conflicts and spurious aborts, which have been seen in practice on Cortex-A75. Until this has been resolved, refuse to put block mappings when the existing entry is found to be present. Fixes: 324420bf91f60 ("arm64: add support for ioremap() block mappings") Reported-by: Hanjun Guo Reported-by: Lei Li Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3161b853f29e1..84a019f550229 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -933,6 +933,11 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pud_present(READ_ONCE(*pudp))) + return 0; + BUG_ON(phys & ~PUD_MASK); set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; @@ -942,6 +947,11 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pmd_present(READ_ONCE(*pmdp))) + return 0; + BUG_ON(phys & ~PMD_MASK); set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; From 80475c48c6a8a65171e035e0915dc7996b5a0a65 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 22 Feb 2018 10:34:02 +0800 Subject: [PATCH 239/269] selftests/bpf/test_maps: exit child process without error in ENOMEM case test_maps contains a series of stress tests, and previously it will break the rest tests when it failed to alloc memory. ----------------------- Failed to create hashmap key=8 value=262144 'Cannot allocate memory' Failed to create hashmap key=16 value=262144 'Cannot allocate memory' Failed to create hashmap key=8 value=262144 'Cannot allocate memory' Failed to create hashmap key=8 value=262144 'Cannot allocate memory' test_maps: test_maps.c:955: run_parallel: Assertion `status == 0' failed. Aborted not ok 1..3 selftests: test_maps [FAIL] ----------------------- after this patch, the rest tests will be continue when it occurs an ENOMEM failure CC: Alexei Starovoitov CC: Philip Li Suggested-by: Daniel Borkmann Signed-off-by: Li Zhijian Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 436c4c72414f4..9e03a4c356a49 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -126,6 +126,8 @@ static void test_hashmap_sizes(int task, void *data) fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, 2, map_flags); if (fd < 0) { + if (errno == ENOMEM) + return; printf("Failed to create hashmap key=%d value=%d '%s'\n", i, j, strerror(errno)); exit(1); From 971b42c038dc83e3327872d294fe7131bab152fc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: [PATCH 240/269] PKCS#7: fix certificate chain verification When pkcs7_verify_sig_chain() is building the certificate chain for a SignerInfo using the certificates in the PKCS#7 message, it is passing the wrong arguments to public_key_verify_signature(). Consequently, when the next certificate is supposed to be used to verify the previous certificate, the next certificate is actually used to verify itself. An attacker can use this bug to create a bogus certificate chain that has no cryptographic relationship between the beginning and end. Fortunately I couldn't quite find a way to use this to bypass the overall signature verification, though it comes very close. Here's the reasoning: due to the bug, every certificate in the chain beyond the first actually has to be self-signed (where "self-signed" here refers to the actual key and signature; an attacker might still manipulate the certificate fields such that the self_signed flag doesn't actually get set, and thus the chain doesn't end immediately). But to pass trust validation (pkcs7_validate_trust()), either the SignerInfo or one of the certificates has to actually be signed by a trusted key. Since only self-signed certificates can be added to the chain, the only way for an attacker to introduce a trusted signature is to include a self-signed trusted certificate. But, when pkcs7_validate_trust_one() reaches that certificate, instead of trying to verify the signature on that certificate, it will actually look up the corresponding trusted key, which will succeed, and then try to verify the *previous* certificate, which will fail. Thus, disaster is narrowly averted (as far as I could tell). Fixes: 6c2dc5ae4ab7 ("X.509: Extract signature digest and make self-signed cert checks earlier") Cc: # v4.7+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/pkcs7_verify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 39e6de0c2761f..2f6a768b91d70 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -270,7 +270,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7, sinfo->index); return 0; } - ret = public_key_verify_signature(p->pub, p->sig); + ret = public_key_verify_signature(p->pub, x509->sig); if (ret < 0) return ret; x509->signer = p; From 29f4a67c17e19314b7d74b8569be935e6c7edf50 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: [PATCH 241/269] PKCS#7: fix certificate blacklisting If there is a blacklisted certificate in a SignerInfo's certificate chain, then pkcs7_verify_sig_chain() sets sinfo->blacklisted and returns 0. But, pkcs7_verify() fails to handle this case appropriately, as it actually continues on to the line 'actual_ret = 0;', indicating that the SignerInfo has passed verification. Consequently, PKCS#7 signature verification ignores the certificate blacklist. Fix this by not considering blacklisted SignerInfos to have passed verification. Also fix the function comment with regards to when 0 is returned. Fixes: 03bb79315ddc ("PKCS#7: Handle blacklisted certificates") Cc: # v4.12+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/pkcs7_verify.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 2f6a768b91d70..97c77f66b20d6 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -366,8 +366,7 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7, * * (*) -EBADMSG if some part of the message was invalid, or: * - * (*) 0 if no signature chains were found to be blacklisted or to contain - * unsupported crypto, or: + * (*) 0 if a signature chain passed verification, or: * * (*) -EKEYREJECTED if a blacklisted key was encountered, or: * @@ -423,8 +422,11 @@ int pkcs7_verify(struct pkcs7_message *pkcs7, for (sinfo = pkcs7->signed_infos; sinfo; sinfo = sinfo->next) { ret = pkcs7_verify_one(pkcs7, sinfo); - if (sinfo->blacklisted && actual_ret == -ENOPKG) - actual_ret = -EKEYREJECTED; + if (sinfo->blacklisted) { + if (actual_ret == -ENOPKG) + actual_ret = -EKEYREJECTED; + continue; + } if (ret < 0) { if (ret == -ENOPKG) { sinfo->unsupported_crypto = true; From 6459ae386699a5fe0dc52cf30255f75274fa43a4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: [PATCH 242/269] PKCS#7: fix direct verification of SignerInfo signature If none of the certificates in a SignerInfo's certificate chain match a trusted key, nor is the last certificate signed by a trusted key, then pkcs7_validate_trust_one() tries to check whether the SignerInfo's signature was made directly by a trusted key. But, it actually fails to set the 'sig' variable correctly, so it actually verifies the last signature seen. That will only be the SignerInfo's signature if the certificate chain is empty; otherwise it will actually be the last certificate's signature. This is not by itself a security problem, since verifying any of the certificates in the chain should be sufficient to verify the SignerInfo. Still, it's not working as intended so it should be fixed. Fix it by setting 'sig' correctly for the direct verification case. Fixes: 757932e6da6d ("PKCS#7: Handle PKCS#7 messages that contain no X.509 certs") Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/pkcs7_trust.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c index 1f4e25f10049c..598906b1e28d3 100644 --- a/crypto/asymmetric_keys/pkcs7_trust.c +++ b/crypto/asymmetric_keys/pkcs7_trust.c @@ -106,6 +106,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7, pr_devel("sinfo %u: Direct signer is key %x\n", sinfo->index, key_serial(key)); x509 = NULL; + sig = sinfo->sig; goto matched; } if (PTR_ERR(key) != -ENOKEY) From 437499eea4291ae9621e8763a41df027c110a1ef Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: [PATCH 243/269] X.509: fix BUG_ON() when hash algorithm is unsupported The X.509 parser mishandles the case where the certificate's signature's hash algorithm is not available in the crypto API. In this case, x509_get_sig_params() doesn't allocate the cert->sig->digest buffer; this part seems to be intentional. However, public_key_verify_signature() is still called via x509_check_for_self_signed(), which triggers the 'BUG_ON(!sig->digest)'. Fix this by making public_key_verify_signature() return -ENOPKG if the hash buffer has not been allocated. Reproducer when all the CONFIG_CRYPTO_SHA512* options are disabled: openssl req -new -sha512 -x509 -batch -nodes -outform der \ | keyctl padd asymmetric desc @s Fixes: 6c2dc5ae4ab7 ("X.509: Extract signature digest and make self-signed cert checks earlier") Reported-by: Paolo Valente Cc: Paolo Valente Cc: # v4.7+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/public_key.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index de996586762a8..e929fe1e4106c 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -79,9 +79,11 @@ int public_key_verify_signature(const struct public_key *pkey, BUG_ON(!pkey); BUG_ON(!sig); - BUG_ON(!sig->digest); BUG_ON(!sig->s); + if (!sig->digest) + return -ENOPKG; + alg_name = sig->pkey_algo; if (strcmp(sig->pkey_algo, "rsa") == 0) { /* The data wangled by the RSA algorithm is typically padded From 4b34968e77ad09628cfb3c4a7daf2adc2cefc6e8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:34 +0000 Subject: [PATCH 244/269] X.509: fix NULL dereference when restricting key with unsupported_sig The asymmetric key type allows an X.509 certificate to be added even if its signature's hash algorithm is not available in the crypto API. In that case 'payload.data[asym_auth]' will be NULL. But the key restriction code failed to check for this case before trying to use the signature, resulting in a NULL pointer dereference in key_or_keyring_common() or in restrict_link_by_signature(). Fix this by returning -ENOPKG when the signature is unsupported. Reproducer when all the CONFIG_CRYPTO_SHA512* options are disabled and keyctl has support for the 'restrict_keyring' command: keyctl new_session keyctl restrict_keyring @s asymmetric builtin_trusted openssl req -new -sha512 -x509 -batch -nodes -outform der \ | keyctl padd asymmetric desc @s Fixes: a511e1af8b12 ("KEYS: Move the point of trust determination to __key_link()") Cc: # v4.7+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/restrict.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/crypto/asymmetric_keys/restrict.c b/crypto/asymmetric_keys/restrict.c index 86fb685089528..7c93c7728454a 100644 --- a/crypto/asymmetric_keys/restrict.c +++ b/crypto/asymmetric_keys/restrict.c @@ -67,8 +67,9 @@ __setup("ca_keys=", ca_keys_setup); * * Returns 0 if the new certificate was accepted, -ENOKEY if we couldn't find a * matching parent certificate in the trusted list, -EKEYREJECTED if the - * signature check fails or the key is blacklisted and some other error if - * there is a matching certificate but the signature check cannot be performed. + * signature check fails or the key is blacklisted, -ENOPKG if the signature + * uses unsupported crypto, or some other error if there is a matching + * certificate but the signature check cannot be performed. */ int restrict_link_by_signature(struct key *dest_keyring, const struct key_type *type, @@ -88,6 +89,8 @@ int restrict_link_by_signature(struct key *dest_keyring, return -EOPNOTSUPP; sig = payload->data[asym_auth]; + if (!sig) + return -ENOPKG; if (!sig->auth_ids[0] && !sig->auth_ids[1]) return -ENOKEY; @@ -139,6 +142,8 @@ static int key_or_keyring_common(struct key *dest_keyring, return -EOPNOTSUPP; sig = payload->data[asym_auth]; + if (!sig) + return -ENOPKG; if (!sig->auth_ids[0] && !sig->auth_ids[1]) return -ENOKEY; @@ -222,9 +227,9 @@ static int key_or_keyring_common(struct key *dest_keyring, * * Returns 0 if the new certificate was accepted, -ENOKEY if we * couldn't find a matching parent certificate in the trusted list, - * -EKEYREJECTED if the signature check fails, and some other error if - * there is a matching certificate but the signature check cannot be - * performed. + * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses + * unsupported crypto, or some other error if there is a matching certificate + * but the signature check cannot be performed. */ int restrict_link_by_key_or_keyring(struct key *dest_keyring, const struct key_type *type, @@ -249,9 +254,9 @@ int restrict_link_by_key_or_keyring(struct key *dest_keyring, * * Returns 0 if the new certificate was accepted, -ENOKEY if we * couldn't find a matching parent certificate in the trusted list, - * -EKEYREJECTED if the signature check fails, and some other error if - * there is a matching certificate but the signature check cannot be - * performed. + * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses + * unsupported crypto, or some other error if there is a matching certificate + * but the signature check cannot be performed. */ int restrict_link_by_key_or_keyring_chain(struct key *dest_keyring, const struct key_type *type, From d9f4bb1a0f4db493efe6d7c58ffe696a57de7eb3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Feb 2018 14:38:34 +0000 Subject: [PATCH 245/269] KEYS: Use individual pages in big_key for crypto buffers kmalloc() can't always allocate large enough buffers for big_key to use for crypto (1MB + some metadata) so we cannot use that to allocate the buffer. Further, vmalloc'd pages can't be passed to sg_init_one() and the aead crypto accessors cannot be called progressively and must be passed all the data in one go (which means we can't pass the data in one block at a time). Fix this by allocating the buffer pages individually and passing them through a multientry scatterlist to the crypto layer. This has the bonus advantage that we don't have to allocate a contiguous series of pages. We then vmap() the page list and pass that through to the VFS read/write routines. This can trigger a warning: WARNING: CPU: 0 PID: 60912 at mm/page_alloc.c:3883 __alloc_pages_nodemask+0xb7c/0x15f8 ([<00000000002acbb6>] __alloc_pages_nodemask+0x1ee/0x15f8) [<00000000002dd356>] kmalloc_order+0x46/0x90 [<00000000002dd3e0>] kmalloc_order_trace+0x40/0x1f8 [<0000000000326a10>] __kmalloc+0x430/0x4c0 [<00000000004343e4>] big_key_preparse+0x7c/0x210 [<000000000042c040>] key_create_or_update+0x128/0x420 [<000000000042e52c>] SyS_add_key+0x124/0x220 [<00000000007bba2c>] system_call+0xc4/0x2b0 from the keyctl/padd/useradd test of the keyutils testsuite on s390x. Note that it might be better to shovel data through in page-sized lumps instead as there's no particular need to use a monolithic buffer unless the kernel itself wants to access the data. Fixes: 13100a72f40f ("Security: Keys: Big keys stored encrypted") Reported-by: Paul Bunyan Signed-off-by: David Howells cc: Kirill Marinushkin --- security/keys/big_key.c | 110 +++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 23 deletions(-) diff --git a/security/keys/big_key.c b/security/keys/big_key.c index 929e14978c421..fa728f662a6f3 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -22,6 +22,13 @@ #include #include +struct big_key_buf { + unsigned int nr_pages; + void *virt; + struct scatterlist *sg; + struct page *pages[]; +}; + /* * Layout of key payload words. */ @@ -91,10 +98,9 @@ static DEFINE_MUTEX(big_key_aead_lock); /* * Encrypt/decrypt big_key data */ -static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) +static int big_key_crypt(enum big_key_op op, struct big_key_buf *buf, size_t datalen, u8 *key) { int ret; - struct scatterlist sgio; struct aead_request *aead_req; /* We always use a zero nonce. The reason we can get away with this is * because we're using a different randomly generated key for every @@ -109,8 +115,7 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) return -ENOMEM; memset(zero_nonce, 0, sizeof(zero_nonce)); - sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0)); - aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce); + aead_request_set_crypt(aead_req, buf->sg, buf->sg, datalen, zero_nonce); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); aead_request_set_ad(aead_req, 0); @@ -129,22 +134,82 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) return ret; } +/* + * Free up the buffer. + */ +static void big_key_free_buffer(struct big_key_buf *buf) +{ + unsigned int i; + + if (buf->virt) { + memset(buf->virt, 0, buf->nr_pages * PAGE_SIZE); + vunmap(buf->virt); + } + + for (i = 0; i < buf->nr_pages; i++) + if (buf->pages[i]) + __free_page(buf->pages[i]); + + kfree(buf); +} + +/* + * Allocate a buffer consisting of a set of pages with a virtual mapping + * applied over them. + */ +static void *big_key_alloc_buffer(size_t len) +{ + struct big_key_buf *buf; + unsigned int npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned int i, l; + + buf = kzalloc(sizeof(struct big_key_buf) + + sizeof(struct page) * npg + + sizeof(struct scatterlist) * npg, + GFP_KERNEL); + if (!buf) + return NULL; + + buf->nr_pages = npg; + buf->sg = (void *)(buf->pages + npg); + sg_init_table(buf->sg, npg); + + for (i = 0; i < buf->nr_pages; i++) { + buf->pages[i] = alloc_page(GFP_KERNEL); + if (!buf->pages[i]) + goto nomem; + + l = min_t(size_t, len, PAGE_SIZE); + sg_set_page(&buf->sg[i], buf->pages[i], l, 0); + len -= l; + } + + buf->virt = vmap(buf->pages, buf->nr_pages, VM_MAP, PAGE_KERNEL); + if (!buf->virt) + goto nomem; + + return buf; + +nomem: + big_key_free_buffer(buf); + return NULL; +} + /* * Preparse a big key */ int big_key_preparse(struct key_preparsed_payload *prep) { + struct big_key_buf *buf; struct path *path = (struct path *)&prep->payload.data[big_key_path]; struct file *file; u8 *enckey; - u8 *data = NULL; ssize_t written; - size_t datalen = prep->datalen; + size_t datalen = prep->datalen, enclen = datalen + ENC_AUTHTAG_SIZE; int ret; - ret = -EINVAL; if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) - goto error; + return -EINVAL; /* Set an arbitrary quota */ prep->quotalen = 16; @@ -157,13 +222,12 @@ int big_key_preparse(struct key_preparsed_payload *prep) * * File content is stored encrypted with randomly generated key. */ - size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; - memcpy(data, prep->data, datalen); + memcpy(buf->virt, prep->data, datalen); /* generate random key */ enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL); @@ -176,7 +240,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; /* encrypt aligned data */ - ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey); + ret = big_key_crypt(BIG_KEY_ENC, buf, datalen, enckey); if (ret) goto err_enckey; @@ -187,7 +251,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; } - written = kernel_write(file, data, enclen, &pos); + written = kernel_write(file, buf->virt, enclen, &pos); if (written != enclen) { ret = written; if (written >= 0) @@ -202,7 +266,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) *path = file->f_path; path_get(path); fput(file); - kzfree(data); + big_key_free_buffer(buf); } else { /* Just store the data in a buffer */ void *data = kmalloc(datalen, GFP_KERNEL); @@ -220,7 +284,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) err_enckey: kzfree(enckey); error: - kzfree(data); + big_key_free_buffer(buf); return ret; } @@ -298,15 +362,15 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) return datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { + struct big_key_buf *buf; struct path *path = (struct path *)&key->payload.data[big_key_path]; struct file *file; - u8 *data; u8 *enckey = (u8 *)key->payload.data[big_key_data]; size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; file = dentry_open(path, O_RDONLY, current_cred()); @@ -316,26 +380,26 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) } /* read file to kernel and decrypt */ - ret = kernel_read(file, data, enclen, &pos); + ret = kernel_read(file, buf->virt, enclen, &pos); if (ret >= 0 && ret != enclen) { ret = -EIO; goto err_fput; } - ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey); + ret = big_key_crypt(BIG_KEY_DEC, buf, enclen, enckey); if (ret) goto err_fput; ret = datalen; /* copy decrypted data to user */ - if (copy_to_user(buffer, data, datalen) != 0) + if (copy_to_user(buffer, buf->virt, datalen) != 0) ret = -EFAULT; err_fput: fput(file); error: - kzfree(data); + big_key_free_buffer(buf); } else { ret = datalen; if (copy_to_user(buffer, key->payload.data[big_key_data], From b21ebf2fb4cde1618915a97cc773e287ff49173e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 7 Feb 2018 14:20:09 -0800 Subject: [PATCH 246/269] x86: Treat R_X86_64_PLT32 as R_X86_64_PC32 On i386, there are 2 types of PLTs, PIC and non-PIC. PIE and shared objects must use PIC PLT. To use PIC PLT, you need to load _GLOBAL_OFFSET_TABLE_ into EBX first. There is no need for that on x86-64 since x86-64 uses PC-relative PLT. On x86-64, for 32-bit PC-relative branches, we can generate PLT32 relocation, instead of PC32 relocation, which can also be used as a marker for 32-bit PC-relative branches. Linker can always reduce PLT32 relocation to PC32 if function is defined locally. Local functions should use PC32 relocation. As far as Linux kernel is concerned, R_X86_64_PLT32 can be treated the same as R_X86_64_PC32 since Linux kernel doesn't use PLT. R_X86_64_PLT32 for 32-bit PC-relative branches has been enabled in binutils master branch which will become binutils 2.31. [ hjl is working on having better documentation on this all, but a few more notes from him: "PLT32 relocation is used as marker for PC-relative branches. Because of EBX, it looks odd to generate PLT32 relocation on i386 when EBX doesn't have GOT. As for symbol resolution, PLT32 and PC32 relocations are almost interchangeable. But when linker sees PLT32 relocation against a protected symbol, it can resolved locally at link-time since it is used on a branch instruction. Linker can't do that for PC32 relocation" but for the kernel use, the two are basically the same, and this commit gets things building and working with the current binutils master - Linus ] Signed-off-by: H.J. Lu Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_64.c | 1 + arch/x86/kernel/module.c | 1 + arch/x86/tools/relocs.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1f790cf9d38fe..3b7427aa7d850 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index da0c160e55890..f58336af095c9 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: if (*(u32 *)loc != 0) goto invalid_relocation; val -= (u64)loc; diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5d73c443e778b..220e97841e494 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, break; case R_X86_64_PC32: + case R_X86_64_PLT32: /* * PC relative relocations don't need to be adjusted unless * referencing a percpu symbol. + * + * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ if (is_percpu_sym(sym, symname)) add_reloc(&relocs32neg, offset); From 28128c61e08eaeced9cc8ec0e6b5d677b5b94690 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2018 09:41:40 -0800 Subject: [PATCH 247/269] kconfig.h: Include compiler types to avoid missed struct attributes The header files for some structures could get included in such a way that struct attributes (specifically __randomize_layout from path.h) would be parsed as variable names instead of attributes. This could lead to some instances of a structure being unrandomized, causing nasty GPFs, etc. This patch makes sure the compiler_types.h header is included in kconfig.h so that we've always got types and struct attributes defined, since kconfig.h is included from the compiler command line. Reported-by: Patrick McLean Root-caused-by: Maciej S. Szmigiero Suggested-by: Linus Torvalds Tested-by: Maciej S. Szmigiero Fixes: 3859a271a003 ("randstruct: Mark various structs for randomization") Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- include/linux/kconfig.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index fec5076eda91d..c5fd4ee776ba1 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -64,4 +64,7 @@ */ #define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) +/* Make sure we always have all types and struct attributes defined. */ +#include + #endif /* __LINUX_KCONFIG_H */ From bef3efbeb897b56867e271cdbc5f8adaacaeb9cd Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Thu, 22 Feb 2018 09:15:06 -0800 Subject: [PATCH 248/269] efivarfs: Limit the rate for non-root to read files Each read from a file in efivarfs results in two calls to EFI (one to get the file size, another to get the actual data). On X86 these EFI calls result in broadcast system management interrupts (SMI) which affect performance of the whole system. A malicious user can loop performing reads from efivarfs bringing the system to its knees. Linus suggested per-user rate limit to solve this. So we add a ratelimit structure to "user_struct" and initialize it for the root user for no limit. When allocating user_struct for other users we set the limit to 100 per second. This could be used for other places that want to limit the rate of some detrimental user action. In efivarfs if the limit is exceeded when reading, we take an interruptible nap for 50ms and check the rate limit again. Signed-off-by: Tony Luck Acked-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- fs/efivarfs/file.c | 6 ++++++ include/linux/sched/user.h | 4 ++++ kernel/user.c | 3 +++ 3 files changed, 13 insertions(+) diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 5f22e74bbadea..8e568428c88be 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -74,6 +75,11 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf, ssize_t size = 0; int err; + while (!__ratelimit(&file->f_cred->user->ratelimit)) { + if (!msleep_interruptible(50)) + return -EINTR; + } + err = efivar_entry_size(var, &datasize); /* diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 0dcf4e480ef73..96fe289c4c6e4 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -4,6 +4,7 @@ #include #include +#include struct key; @@ -41,6 +42,9 @@ struct user_struct { defined(CONFIG_NET) atomic_long_t locked_vm; #endif + + /* Miscellaneous per-user rate limit */ + struct ratelimit_state ratelimit; }; extern int uids_sysfs_init(void); diff --git a/kernel/user.c b/kernel/user.c index 9a20acce460d5..36288d8406756 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -101,6 +101,7 @@ struct user_struct root_user = { .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, + .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0), }; /* @@ -191,6 +192,8 @@ struct user_struct *alloc_uid(kuid_t uid) new->uid = uid; atomic_set(&new->__count, 1); + ratelimit_state_init(&new->ratelimit, HZ, 100); + ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE); /* * Before adding this, check whether we raced From b87b6194be631c94785fe93398651e804ed43e28 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 21 Feb 2018 04:41:59 +0100 Subject: [PATCH 249/269] netlink: put module reference if dump start fails Before, if cb->start() failed, the module reference would never be put, because cb->cb_running is intentionally false at this point. Users are generally annoyed by this because they can no longer unload modules that leak references. Also, it may be possible to tediously wrap a reference counter back to zero, especially since module.c still uses atomic_inc instead of refcount_inc. This patch expands the error path to simply call module_put if cb->start() fails. Fixes: 41c87425a1ac ("netlink: do not set cb_running if dump's start() errs") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2ad445c1d27cc..07e8478068f0a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2308,7 +2308,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb->start) { ret = cb->start(cb); if (ret) - goto error_unlock; + goto error_put; } nlk->cb_running = true; @@ -2328,6 +2328,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, */ return -EINTR; +error_put: + module_put(control->module); error_unlock: sock_put(sk); mutex_unlock(nlk->cb_mutex); From 88e80c62671ceecdbb77c902731ec95a4bfa62f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Feb 2018 21:42:26 -0800 Subject: [PATCH 250/269] smsc75xx: fix smsc75xx_set_features() If an attempt is made to disable RX checksums, USB adapter is changed but netdev->features is not, because smsc75xx_set_features() returns a non zero value. This throws errors from netdev_rx_csum_fault() : : hw csum failure Signed-off-by: Eric Dumazet Cc: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index d0a113743195a..7a6a1fe793090 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -954,10 +954,11 @@ static int smsc75xx_set_features(struct net_device *netdev, /* it's racing here! */ ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); - if (ret < 0) + if (ret < 0) { netdev_warn(dev->net, "Error writing RFE_CTL\n"); - - return ret; + return ret; + } + return 0; } static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm) From 350c9f484bde93ef229682eedd98cd5f74350f7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Feb 2018 06:43:03 -0800 Subject: [PATCH 251/269] tcp_bbr: better deal with suboptimal GSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BBR uses tcp_tso_autosize() in an attempt to probe what would be the burst sizes and to adjust cwnd in bbr_target_cwnd() with following gold formula : /* Allow enough full-sized skbs in flight to utilize end systems. */ cwnd += 3 * bbr->tso_segs_goal; But GSO can be lacking or be constrained to very small units (ip link set dev ... gso_max_segs 2) What we really want is to have enough packets in flight so that both GSO and GRO are efficient. So in the case GSO is off or downgraded, we still want to have the same number of packets in flight as if GSO/TSO was fully operational, so that GRO can hopefully be working efficiently. To fix this issue, we make tcp_tso_autosize() unaware of sk->sk_gso_max_segs Only tcp_tso_segs() has to enforce the gso_max_segs limit. Tested: ethtool -K eth0 tso off gso off tc qd replace dev eth0 root pfifo_fast Before patch: for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done     691  (ss -temoi shows cwnd is stuck around 6 )     667     651     631     517 After patch : # for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done    1733 (ss -temoi shows cwnd is around 386 )    1778    1746    1781    1718 Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Eric Dumazet Reported-by: Oleksandr Natalenko Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b2bca373f8bee..6818042cd8a9a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1730,7 +1730,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, */ segs = max_t(u32, bytes / mss_now, min_tso_segs); - return min_t(u32, segs, sk->sk_gso_max_segs); + return segs; } EXPORT_SYMBOL(tcp_tso_autosize); @@ -1742,9 +1742,10 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; - return tso_segs ? : - tcp_tso_autosize(sk, mss_now, - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + if (!tso_segs) + tso_segs = tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + return min_t(u32, tso_segs, sk->sk_gso_max_segs); } /* Returns the portion of skb which can be sent right away */ From 1fe4b1184c2ae2bfbf9e8b14c9c0c1945c98f205 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 21 Feb 2018 11:00:54 -0800 Subject: [PATCH 252/269] net: ipv4: Set addr_type in hash_keys for forwarded case The result of the skb flow dissect is copied from keys to hash_keys to ensure only the intended data is hashed. The original L4 hash patch overlooked setting the addr_type for this case; add it. Fixes: bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice") Reported-by: Ido Schimmel Signed-off-by: David Ahern Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cc1c1df1bac..a4f44d815a61a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1826,6 +1826,8 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, return skb_get_hash_raw(skb) >> 1; memset(&hash_keys, 0, sizeof(hash_keys)); skb_flow_dissect_flow_keys(skb, &keys, flag); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; hash_keys.ports.src = keys.ports.src; From 83090e7d35caaabc8daa65fd698275951455bbec Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 22 Feb 2018 09:24:59 +1100 Subject: [PATCH 253/269] net/smc9194: Remove bogus CONFIG_MAC reference AFAIK the only version of smc9194.c with Mac support is the one in the linux-mac68k CVS repo, which never made it to the mainline. Despite that, from v2.3.45, arch/m68k/config.in listed CONFIG_SMC9194 under CONFIG_MAC. This mistake got carried over into Kconfig in v2.5.55. (See pre-git era "[PATCH] add m68k dependencies to net driver config".) Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index 63aca9f847e12..4c2f612e4414d 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_SMSC config SMC9194 tristate "SMC 9194 support" - depends on (ISA || MAC && BROKEN) + depends on ISA select CRC32 ---help--- This is support for the SMC9xxx based Ethernet cards. Choose this From a2c0f039bbd0f9ebf375176d05b056e3f3b5c4f7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 21 Feb 2018 18:18:30 -0600 Subject: [PATCH 254/269] ibmvnic: Fix early release of login buffer The login buffer is released before the driver can perform sanity checks between resources the driver requested and what firmware will provide. Don't release the login buffer until the sanity check is performed. Fixes: 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 1495cb99f9249..1b3cc8bb07050 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3760,7 +3760,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_BIDIRECTIONAL); - release_login_buffer(adapter); dma_unmap_single(dev, adapter->login_rsp_buf_token, adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL); @@ -3791,6 +3790,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, ibmvnic_remove(adapter->vdev); return -EIO; } + release_login_buffer(adapter); complete(&adapter->init_done); return 0; From 657308f73e674e86b60509a430a46e569bf02846 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Feb 2018 20:55:28 +0100 Subject: [PATCH 255/269] regulatory: add NUL to request alpha2 Similar to the ancient commit a5fe8e7695dc ("regulatory: add NUL to alpha2"), add another byte to alpha2 in the request struct so that when we use nla_put_string(), we don't overrun anything. Fixes: 73d54c9e74c4 ("cfg80211: add regulatory netlink multicast group") Reported-by: Kees Cook Signed-off-by: Johannes Berg --- include/net/regulatory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebc5a2ed86317..f83cacce33085 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -78,7 +78,7 @@ struct regulatory_request { int wiphy_idx; enum nl80211_reg_initiator initiator; enum nl80211_user_reg_hint_type user_reg_hint_type; - char alpha2[2]; + char alpha2[3]; enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; From 32fff239de37ef226d5b66329dd133f64d63b22d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2018 08:33:24 -0800 Subject: [PATCH 256/269] bpf: add schedule points in percpu arrays management syszbot managed to trigger RCU detected stalls in bpf_array_free_percpu() It takes time to allocate a huge percpu map, but even more time to free it. Since we run in process context, use cond_resched() to yield cpu if needed. Fixes: a10423b87a7e ("bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Daniel Borkmann --- kernel/bpf/arraymap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index a364c408f25a5..14750e7c5ee48 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array) { int i; - for (i = 0; i < array->map.max_entries; i++) + for (i = 0; i < array->map.max_entries; i++) { free_percpu(array->pptrs[i]); + cond_resched(); + } } static int bpf_array_alloc_percpu(struct bpf_array *array) @@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) return -ENOMEM; } array->pptrs[i] = ptr; + cond_resched(); } return 0; From 6c5f61023c5b0edb0c8a64c902fe97c6453b1852 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 22 Feb 2018 10:10:35 -0800 Subject: [PATCH 257/269] bpf: fix rcu lockdep warning for lpm_trie map_free callback Commit 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback function") fixed a memory leak and removed unnecessary locks in map_free callback function. Unfortrunately, it introduced a lockdep warning. When lockdep checking is turned on, running tools/testing/selftests/bpf/test_lpm_map will have: [ 98.294321] ============================= [ 98.294807] WARNING: suspicious RCU usage [ 98.295359] 4.16.0-rc2+ #193 Not tainted [ 98.295907] ----------------------------- [ 98.296486] /home/yhs/work/bpf/kernel/bpf/lpm_trie.c:572 suspicious rcu_dereference_check() usage! [ 98.297657] [ 98.297657] other info that might help us debug this: [ 98.297657] [ 98.298663] [ 98.298663] rcu_scheduler_active = 2, debug_locks = 1 [ 98.299536] 2 locks held by kworker/2:1/54: [ 98.300152] #0: ((wq_completion)"events"){+.+.}, at: [<00000000196bc1f0>] process_one_work+0x157/0x5c0 [ 98.301381] #1: ((work_completion)(&map->work)){+.+.}, at: [<00000000196bc1f0>] process_one_work+0x157/0x5c0 Since actual trie tree removal happens only after no other accesses to the tree are possible, replacing rcu_dereference_protected(*slot, lockdep_is_held(&trie->lock)) with rcu_dereference_protected(*slot, 1) fixed the issue. Fixes: 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback function") Reported-by: Eric Dumazet Suggested-by: Eric Dumazet Signed-off-by: Yonghong Song Reviewed-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- kernel/bpf/lpm_trie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index a75e02c961b5b..b4b5b81e7251e 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -569,8 +569,7 @@ static void trie_free(struct bpf_map *map) slot = &trie->root; for (;;) { - node = rcu_dereference_protected(*slot, - lockdep_is_held(&trie->lock)); + node = rcu_dereference_protected(*slot, 1); if (!node) goto out; From 370c10522e96bf1b2e7fd9e906dbe8fb5be895d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Feb 2018 12:11:55 +0300 Subject: [PATCH 258/269] net: aquantia: Fix error handling in aq_pci_probe() We should check "self->aq_hw" for allocation failure, and also we should free it on the error paths. Fixes: 23ee07ad3c2f ("net: aquantia: Cleanup pci functions module") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_pci_func.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 22889fc158f27..87c4308b52a7c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -226,6 +226,10 @@ static int aq_pci_probe(struct pci_dev *pdev, goto err_ioremap; self->aq_hw = kzalloc(sizeof(*self->aq_hw), GFP_KERNEL); + if (!self->aq_hw) { + err = -ENOMEM; + goto err_ioremap; + } self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self); for (bar = 0; bar < 4; ++bar) { @@ -235,19 +239,19 @@ static int aq_pci_probe(struct pci_dev *pdev, mmio_pa = pci_resource_start(pdev, bar); if (mmio_pa == 0U) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } reg_sz = pci_resource_len(pdev, bar); if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz); if (!self->aq_hw->mmio) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } break; } @@ -255,7 +259,7 @@ static int aq_pci_probe(struct pci_dev *pdev, if (bar == 4) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } numvecs = min((u8)AQ_CFG_VECS_DEF, @@ -290,6 +294,8 @@ static int aq_pci_probe(struct pci_dev *pdev, aq_pci_free_irq_vectors(self); err_hwinit: iounmap(self->aq_hw->mmio); +err_free_aq_hw: + kfree(self->aq_hw); err_ioremap: free_netdev(ndev); err_pci_func: From 93c62c45ed5fad1b87e3a45835b251cd68de9c46 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Feb 2018 14:38:14 +0000 Subject: [PATCH 259/269] rxrpc: Fix send in rxrpc_send_data_packet() All the kernel_sendmsg() calls in rxrpc_send_data_packet() need to send both parts of the iov[] buffer, but one of them does not. Fix it so that it does. Without this, short IPv6 rxrpc DATA packets may be seen that have the rxrpc header included, but no payload. Fixes: 5a924b8951f8 ("rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs") Reported-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 42410e910affb..cf73dc006c3bf 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -445,7 +445,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, (char *)&opt, sizeof(opt)); if (ret == 0) { ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 1, iov[0].iov_len); + iov, 2, len); opt = IPV6_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, From 9026e820cbd2ea39a06a129ecdddf2739bd3602b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 Feb 2018 13:18:38 -0800 Subject: [PATCH 260/269] fs/signalfd: fix build error for BUS_MCEERR_AR Fix build error in fs/signalfd.c by using same method that is used in kernel/signal.c: separate blocks for different signal si_code values. ./fs/signalfd.c: error: 'BUS_MCEERR_AR' undeclared (first use in this function) Reported-by: Geert Uytterhoeven Signed-off-by: Randy Dunlap Cc: Alexander Viro Signed-off-by: Eric W. Biederman --- fs/signalfd.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 9990957264e3c..76bf9cc620742 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -118,13 +118,22 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno); #endif #ifdef BUS_MCEERR_AO - /* + /* + * Other callers might not initialize the si_lsb field, + * so check explicitly for the right codes here. + */ + if (kinfo->si_signo == SIGBUS && + kinfo->si_code == BUS_MCEERR_AO) + err |= __put_user((short) kinfo->si_addr_lsb, + &uinfo->ssi_addr_lsb); +#endif +#ifdef BUS_MCEERR_AR + /* * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ if (kinfo->si_signo == SIGBUS && - (kinfo->si_code == BUS_MCEERR_AR || - kinfo->si_code == BUS_MCEERR_AO)) + kinfo->si_code == BUS_MCEERR_AR) err |= __put_user((short) kinfo->si_addr_lsb, &uinfo->ssi_addr_lsb); #endif From a493a87f38cfa48caaa95c9347be2d914c6fdf29 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 22 Feb 2018 15:12:53 +0100 Subject: [PATCH 261/269] bpf, x64: implement retpoline for tail call Implement a retpoline [0] for the BPF tail call JIT'ing that converts the indirect jump via jmp %rax that is used to make the long jump into another JITed BPF image. Since this is subject to speculative execution, we need to control the transient instruction sequence here as well when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop. The latter aligns also with what gcc / clang emits (e.g. [1]). JIT dump after patch: # bpftool p d x i 1 0: (18) r2 = map[id:1] 2: (b7) r3 = 0 3: (85) call bpf_tail_call#12 4: (b7) r0 = 2 5: (95) exit With CONFIG_RETPOLINE: # bpftool p d j i 1 [...] 33: cmp %edx,0x24(%rsi) 36: jbe 0x0000000000000072 |* 38: mov 0x24(%rbp),%eax 3e: cmp $0x20,%eax 41: ja 0x0000000000000072 | 43: add $0x1,%eax 46: mov %eax,0x24(%rbp) 4c: mov 0x90(%rsi,%rdx,8),%rax 54: test %rax,%rax 57: je 0x0000000000000072 | 59: mov 0x28(%rax),%rax 5d: add $0x25,%rax 61: callq 0x000000000000006d |+ 66: pause | 68: lfence | 6b: jmp 0x0000000000000066 | 6d: mov %rax,(%rsp) | 71: retq | 72: mov $0x2,%eax [...] * relative fall-through jumps in error case + retpoline for indirect jump Without CONFIG_RETPOLINE: # bpftool p d j i 1 [...] 33: cmp %edx,0x24(%rsi) 36: jbe 0x0000000000000063 |* 38: mov 0x24(%rbp),%eax 3e: cmp $0x20,%eax 41: ja 0x0000000000000063 | 43: add $0x1,%eax 46: mov %eax,0x24(%rbp) 4c: mov 0x90(%rsi,%rdx,8),%rax 54: test %rax,%rax 57: je 0x0000000000000063 | 59: mov 0x28(%rax),%rax 5d: add $0x25,%rax 61: jmpq *%rax |- 63: mov $0x2,%eax [...] * relative fall-through jumps in error case - plain indirect jump as before [0] https://support.google.com/faqs/answer/7625886 [1] https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/x86/include/asm/nospec-branch.h | 37 ++++++++++++++++++++++++++++ arch/x86/net/bpf_jit_comp.c | 9 ++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 76b058533e473..81a1be3265711 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ + +/* + * Below is used in the eBPF JIT compiler and emits the byte sequence + * for the following assembly: + * + * With retpolines configured: + * + * callq do_rop + * spec_trap: + * pause + * lfence + * jmp spec_trap + * do_rop: + * mov %rax,(%rsp) + * retq + * + * Without retpolines configured: + * + * jmp *%rax + */ +#ifdef CONFIG_RETPOLINE +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ + /* spec_trap: */ \ + EMIT2(0xF3, 0x90); /* pause */ \ + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ + /* do_rop: */ \ + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ + EMIT1(0xC3); /* retq */ +#else +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +#endif + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4923d92f918d5..45e4eb5bcbb2a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include /* @@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 43 /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 32 +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ @@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog) * goto out; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ -#define OFFSET3 10 +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; @@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog) * rdi == ctx (1st arg) * rax == prog->bpf_func + prologue_size */ - EMIT2(0xFF, 0xE0); /* jmp rax */ + RETPOLINE_RAX_BPF_JIT(); /* out: */ BUILD_BUG_ON(cnt - label1 != OFFSET1); From 16338a9b3ac30740d49f5dfed81bac0ffa53b9c7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Feb 2018 01:03:43 +0100 Subject: [PATCH 262/269] bpf, arm64: fix out of bounds access in tail call I recently noticed a crash on arm64 when feeding a bogus index into BPF tail call helper. The crash would not occur when the interpreter is used, but only in case of JIT. Output looks as follows: [ 347.007486] Unable to handle kernel paging request at virtual address fffb850e96492510 [...] [ 347.043065] [fffb850e96492510] address between user and kernel address ranges [ 347.050205] Internal error: Oops: 96000004 [#1] SMP [...] [ 347.190829] x13: 0000000000000000 x12: 0000000000000000 [ 347.196128] x11: fffc047ebe782800 x10: ffff808fd7d0fd10 [ 347.201427] x9 : 0000000000000000 x8 : 0000000000000000 [ 347.206726] x7 : 0000000000000000 x6 : 001c991738000000 [ 347.212025] x5 : 0000000000000018 x4 : 000000000000ba5a [ 347.217325] x3 : 00000000000329c4 x2 : ffff808fd7cf0500 [ 347.222625] x1 : ffff808fd7d0fc00 x0 : ffff808fd7cf0500 [ 347.227926] Process test_verifier (pid: 4548, stack limit = 0x000000007467fa61) [ 347.235221] Call trace: [ 347.237656] 0xffff000002f3a4fc [ 347.240784] bpf_test_run+0x78/0xf8 [ 347.244260] bpf_prog_test_run_skb+0x148/0x230 [ 347.248694] SyS_bpf+0x77c/0x1110 [ 347.251999] el0_svc_naked+0x30/0x34 [ 347.255564] Code: 9100075a d280220a 8b0a002a d37df04b (f86b694b) [...] In this case the index used in BPF r3 is the same as in r1 at the time of the call, meaning we fed a pointer as index; here, it had the value 0xffff808fd7cf0500 which sits in x2. While I found tail calls to be working in general (also for hitting the error cases), I noticed the following in the code emission: # bpftool p d j i 988 [...] 38: ldr w10, [x1,x10] 3c: cmp w2, w10 40: b.ge 0x000000000000007c <-- signed cmp 44: mov x10, #0x20 // #32 48: cmp x26, x10 4c: b.gt 0x000000000000007c 50: add x26, x26, #0x1 54: mov x10, #0x110 // #272 58: add x10, x1, x10 5c: lsl x11, x2, #3 60: ldr x11, [x10,x11] <-- faulting insn (f86b694b) 64: cbz x11, 0x000000000000007c [...] Meaning, the tests passed because commit ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") was using signed compares instead of unsigned which as a result had the test wrongly passing. Change this but also the tail call count test both into unsigned and cap the index as u32. Latter we did as well in 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT") and is needed in addition here, too. Tested on HiSilicon Hi1616. Result after patch: # bpftool p d j i 268 [...] 38: ldr w10, [x1,x10] 3c: add w2, w2, #0x0 40: cmp w2, w10 44: b.cs 0x0000000000000080 48: mov x10, #0x20 // #32 4c: cmp x26, x10 50: b.hi 0x0000000000000080 54: add x26, x26, #0x1 58: mov x10, #0x110 // #272 5c: add x10, x1, x10 60: lsl x11, x2, #3 64: ldr x11, [x10,x11] 68: cbz x11, 0x0000000000000080 [...] Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 5 ++-- tools/testing/selftests/bpf/test_verifier.c | 26 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1d4f1da7c58f8..a93350451e8ec 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) off = offsetof(struct bpf_array, map.max_entries); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR32(tmp, r2, tmp), ctx); + emit(A64_MOV(0, r3, r3), ctx); emit(A64_CMP(0, r3, tmp), ctx); - emit(A64_B_(A64_COND_GE, jmp_offset), ctx); + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; @@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); emit(A64_CMP(1, tcc, tmp), ctx); - emit(A64_B_(A64_COND_GT, jmp_offset), ctx); + emit(A64_B_(A64_COND_HI, jmp_offset), ctx); emit(A64_ADD_I(1, tcc, tcc, 1), ctx); /* prog = array->ptrs[index]; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c0f16e93f9bd1..c73592fa3d417 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2586,6 +2586,32 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "runtime/jit: pass negative index to tail_call", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + }, + { + "runtime/jit: pass > 32bit index to tail_call", + .insns = { + BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 2 }, + .result = ACCEPT, + }, { "stack pointer arithmetic", .insns = { From 0f9da844d87796ac31b04e81ee95e155e9043132 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2018 16:59:26 -0800 Subject: [PATCH 263/269] MIPS: boot: Define __ASSEMBLY__ for its.S build The MIPS %.its.S compiler command did not define __ASSEMBLY__, which meant when compiler_types.h was added to kconfig.h, unexpected things appeared (e.g. struct declarations) which should not have been present. As done in the general %.S compiler command, __ASSEMBLY__ is now included here too. The failure was: Error: arch/mips/boot/vmlinux.gz.its:201.1-2 syntax error FATAL ERROR: Unable to parse input tree /usr/bin/mkimage: Can't read arch/mips/boot/vmlinux.gz.itb.tmp: Invalid argument /usr/bin/mkimage Can't add hashes to FIT blob Reported-by: kbuild test robot Fixes: 28128c61e08e ("kconfig.h: Include compiler types to avoid missed struct attributes") Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- arch/mips/boot/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 1bd5c4f00d19b..c22da16d67b82 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -126,6 +126,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS quiet_cmd_cpp_its_S = ITS $@ cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \ + -D__ASSEMBLY__ \ -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \ -DVMLINUX_BINARY="\"$(3)\"" \ -DVMLINUX_COMPRESSION="\"$(2)\"" \ From 120f3b11ef88fc38ce1d0ff9c9a4b37860ad3140 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 Feb 2018 17:26:20 -0800 Subject: [PATCH 264/269] integrity/security: fix digsig.c build error with header file security/integrity/digsig.c has build errors on some $ARCH due to a missing header file, so add it. security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration] Reported-by: Michael Ellerman Signed-off-by: Randy Dunlap Cc: Mimi Zohar Cc: linux-integrity@vger.kernel.org Link: http://kisskb.ellerman.id.au/kisskb/head/13396/ Signed-off-by: James Morris --- security/integrity/digsig.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 6f9e4ce568cd8..9bb0a7f2863e3 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include From 9f416319f40cd857d2bb517630e5855a905ef3fb Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 5 Feb 2018 14:28:01 +0100 Subject: [PATCH 265/269] arm64: fix unwind_frame() for filtered out fn for function graph tracing do_task_stat() calls get_wchan(), which further does unwind_frame(). unwind_frame() restores frame->pc to original value in case function graph tracer has modified a return address (LR) in a stack frame to hook a function return. However, if function graph tracer has hit a filtered function, then we can't unwind it as ftrace_push_return_trace() has biased the index(frame->graph) with a 'huge negative' offset(-FTRACE_NOTRACE_DEPTH). Moreover, arm64 stack walker defines index(frame->graph) as unsigned int, which can not compare a -ve number. Similar problem we can have with calling of walk_stackframe() from save_stack_trace_tsk() or dump_backtrace(). This patch fixes unwind_frame() to test the index for -ve value and restore index accordingly before we can restore frame->pc. Reproducer: cd /sys/kernel/debug/tracing/ echo schedule > set_graph_notrace echo 1 > options/display-graph echo wakeup > current_tracer ps -ef | grep -i agent Above commands result in: Unable to handle kernel paging request at virtual address ffff801bd3d1e000 pgd = ffff8003cbe97c00 [ffff801bd3d1e000] *pgd=0000000000000000, *pud=0000000000000000 Internal error: Oops: 96000006 [#1] SMP [...] CPU: 5 PID: 11696 Comm: ps Not tainted 4.11.0+ #33 [...] task: ffff8003c21ba000 task.stack: ffff8003cc6c0000 PC is at unwind_frame+0x12c/0x180 LR is at get_wchan+0xd4/0x134 pc : [] lr : [] pstate: 60000145 sp : ffff8003cc6c3ab0 x29: ffff8003cc6c3ab0 x28: 0000000000000001 x27: 0000000000000026 x26: 0000000000000026 x25: 00000000000012d8 x24: 0000000000000000 x23: ffff8003c1c04000 x22: ffff000008c83000 x21: ffff8003c1c00000 x20: 000000000000000f x19: ffff8003c1bc0000 x18: 0000fffffc593690 x17: 0000000000000000 x16: 0000000000000001 x15: 0000b855670e2b60 x14: 0003e97f22cf1d0f x13: 0000000000000001 x12: 0000000000000000 x11: 00000000e8f4883e x10: 0000000154f47ec8 x9 : 0000000070f367c0 x8 : 0000000000000000 x7 : 00008003f7290000 x6 : 0000000000000018 x5 : 0000000000000000 x4 : ffff8003c1c03cb0 x3 : ffff8003c1c03ca0 x2 : 00000017ffe80000 x1 : ffff8003cc6c3af8 x0 : ffff8003d3e9e000 Process ps (pid: 11696, stack limit = 0xffff8003cc6c0000) Stack: (0xffff8003cc6c3ab0 to 0xffff8003cc6c4000) [...] [] unwind_frame+0x12c/0x180 [] do_task_stat+0x864/0x870 [] proc_tgid_stat+0x3c/0x48 [] proc_single_show+0x5c/0xb8 [] seq_read+0x160/0x414 [] __vfs_read+0x58/0x164 [] vfs_read+0x88/0x144 [] SyS_read+0x60/0xc0 [] __sys_trace_return+0x0/0x4 Fixes: 20380bb390a4 (arm64: ftrace: fix a stack tracer's output under function graph tracer) Signed-off-by: Pratyush Anand Signed-off-by: Jerome Marchand [catalin.marinas@arm.com: replace WARN_ON with WARN_ON_ONCE] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace.h | 2 +- arch/arm64/kernel/stacktrace.c | 5 +++++ arch/arm64/kernel/time.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 472ef944e9326..902f9edacbea9 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -28,7 +28,7 @@ struct stackframe { unsigned long fp; unsigned long pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - unsigned int graph; + int graph; #endif }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 76809ccd309cc..d5718a060672e 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { + if (WARN_ON_ONCE(frame->graph == -1)) + return -EINVAL; + if (frame->graph < -1) + frame->graph += FTRACE_NOTRACE_DEPTH; + /* * This is a case where function graph tracer has * modified a return address (LR) in a stack frame diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a4391280fba96..f258636273c95 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.pc = regs->pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = -1; /* no task info */ + frame.graph = current->curr_ret_stack; #endif do { int ret = unwind_frame(NULL, &frame); From 4e14bf4236490306004782813b8b4494b18f5e60 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 22 Feb 2018 18:20:30 +0300 Subject: [PATCH 266/269] macvlan: fix use-after-free in macvlan_common_newlink() The following use-after-free was reported by KASan when running LTP macvtap01 test on 4.16-rc2: [10642.528443] BUG: KASAN: use-after-free in macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10642.626607] Read of size 8 at addr ffff880ba49f2100 by task ip/18450 ... [10642.963873] Call Trace: [10642.994352] dump_stack+0x5c/0x7c [10643.035325] print_address_description+0x75/0x290 [10643.092938] kasan_report+0x28d/0x390 [10643.137971] ? macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10643.207963] macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10643.275978] macvtap_newlink+0x171/0x260 [macvtap] [10643.334532] rtnl_newlink+0xd4f/0x1300 ... [10646.256176] Allocated by task 18450: [10646.299964] kasan_kmalloc+0xa6/0xd0 [10646.343746] kmem_cache_alloc_trace+0xf1/0x210 [10646.397826] macvlan_common_newlink+0x6de/0x14a0 [macvlan] [10646.464386] macvtap_newlink+0x171/0x260 [macvtap] [10646.522728] rtnl_newlink+0xd4f/0x1300 ... [10647.022028] Freed by task 18450: [10647.061549] __kasan_slab_free+0x138/0x180 [10647.111468] kfree+0x9e/0x1c0 [10647.147869] macvlan_port_destroy+0x3db/0x650 [macvlan] [10647.211411] rollback_registered_many+0x5b9/0xb10 [10647.268715] rollback_registered+0xd9/0x190 [10647.319675] register_netdevice+0x8eb/0xc70 [10647.370635] macvlan_common_newlink+0xe58/0x14a0 [macvlan] [10647.437195] macvtap_newlink+0x171/0x260 [macvtap] Commit d02fd6e7d293 ("macvlan: Fix one possible double free") handles the case when register_netdevice() invokes ndo_uninit() on error and as a result free the port. But 'macvlan_port_get_rtnl(dev))' check (returns dev->rx_handler_data), which was added by this commit in order to prevent double free, is not quite correct: * for macvlan it always returns NULL because 'lowerdev' is the one that was used to register rx handler (port) in macvlan_port_create() as well as to unregister it in macvlan_port_destroy(). * for macvtap it always returns a valid pointer because macvtap registers its own rx handler before macvlan_common_newlink(). Fixes: d02fd6e7d293 ("macvlan: Fix one possible double free") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a0f2be81d52e4..8fc02d9db3d01 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1451,7 +1451,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(dev)) + if (create && macvlan_port_get_rtnl(lowerdev)) macvlan_port_destroy(port->dev); return err; } From ca79bec237f5809a7c3c59bd41cd0880aa889966 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Feb 2018 16:55:34 +0100 Subject: [PATCH 267/269] ipv6 sit: work around bogus gcc-8 -Wrestrict warning gcc-8 has a new warning that detects overlapping input and output arguments in memcpy(). It triggers for sit_init_net() calling ipip6_tunnel_clone_6rd(), which is actually correct: net/ipv6/sit.c: In function 'sit_init_net': net/ipv6/sit.c:192:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] The problem here is that the logic detecting the memcpy() arguments finds them to be the same, but the conditional that tests for the input and output of ipip6_tunnel_clone_6rd() to be identical is not a compile-time constant. We know that netdev_priv(t->dev) is the same as t for a tunnel device, and comparing "dev" directly here lets the compiler figure out as well that 'dev == sitn->fb_tunnel_dev' when called from sit_init_net(), so it no longer warns. This code is old, so Cc stable to make sure that we don't get the warning for older kernels built with new gcc. Cc: Martin Sebor Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83456 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3873d38771357..3a1775a62973b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); - if (t->dev == sitn->fb_tunnel_dev) { + if (dev == sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; From d903ec77118c09f93a610b384d83a6df33a64fe6 Mon Sep 17 00:00:00 2001 From: Andy Spencer Date: Thu, 22 Feb 2018 11:05:33 -0800 Subject: [PATCH 268/269] gianfar: simplify FCS handling and fix memory leak Previously, buffer descriptors containing only the frame check sequence (FCS) were skipped and not added to the skb. However, the page reference count was still incremented, leading to a memory leak. Fixing this inside gfar_add_rx_frag() is difficult due to reserved memory handling and page reuse. Instead, move the FCS handling to gfar_process_frame() and trim off the FCS before passing the skb up the networking stack. Signed-off-by: Andy Spencer Signed-off-by: Jim Gruen Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3bdeb295514bd..f5c87bd35fa1a 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2934,29 +2934,17 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, { int size = lstatus & BD_LENGTH_MASK; struct page *page = rxb->page; - bool last = !!(lstatus & BD_LFLAG(RXBD_LAST)); - - /* Remove the FCS from the packet length */ - if (last) - size -= ETH_FCS_LEN; if (likely(first)) { skb_put(skb, size); } else { /* the last fragments' length contains the full frame length */ - if (last) + if (lstatus & BD_LFLAG(RXBD_LAST)) size -= skb->len; - /* Add the last fragment if it contains something other than - * the FCS, otherwise drop it and trim off any part of the FCS - * that was already received. - */ - if (size > 0) - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rxb->page_offset + RXBUF_ALIGNMENT, - size, GFAR_RXB_TRUESIZE); - else if (size < 0) - pskb_trim(skb, skb->len + size); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rxb->page_offset + RXBUF_ALIGNMENT, + size, GFAR_RXB_TRUESIZE); } /* try reuse page */ @@ -3069,6 +3057,9 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb) if (priv->padding) skb_pull(skb, priv->padding); + /* Trim off the FCS */ + pskb_trim(skb, skb->len - ETH_FCS_LEN); + if (ndev->features & NETIF_F_RXCSUM) gfar_rx_checksum(skb, fcb); From a5f7add332b4ea6d4b9480971b3b0f5e66466ae9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2018 19:45:27 -0800 Subject: [PATCH 269/269] net_sched: gen_estimator: fix broken estimators based on percpu stats pfifo_fast got percpu stats lately, uncovering a bug I introduced last year in linux-4.10. I missed the fact that we have to clear our temporary storage before calling __gnet_stats_copy_basic() in the case of percpu stats. Without this fix, rate estimators (tc qd replace dev xxx root est 1sec 4sec pfifo_fast) are utterly broken. Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 0a3f88f08727f..98fd12721221e 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,6 +66,7 @@ struct net_rate_estimator { static void est_fetch_counters(struct net_rate_estimator *e, struct gnet_stats_basic_packed *b) { + memset(b, 0, sizeof(*b)); if (e->stats_lock) spin_lock(e->stats_lock);