From 96f05be37f4ece1dffba92d4ae079a486a4cf6df Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 13 Apr 2015 14:23:29 +0200 Subject: [PATCH 001/104] ASoC: qcom: Return an error for invalid PCM trigger command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a compile warning sound/soc/qcom/lpass-cpu.c: In function ‘lpass_cpu_daiops_trigger’: sound/soc/qcom/lpass-cpu.c:224:2: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized] return ret; ^ Although switch () lists the most of existing cases, it's still better to cover the rest as an error properly. Signed-off-by: Takashi Iwai Acked-by: Kenneth Westfield Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 6698d058de29..dc790abaa331 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -194,7 +194,7 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct lpass_data *drvdata = snd_soc_dai_get_drvdata(dai); - int ret; + int ret = -EINVAL; switch (cmd) { case SNDRV_PCM_TRIGGER_START: From d1acba2fdebb449bad01e7cf77a9f9730dfaca6e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 13 Apr 2015 06:00:45 +0000 Subject: [PATCH 002/104] ASoC: rsnd: set dmaen->chan = NULL when error case rsnd_dmaen_quit() is assuming dmaen->chan is NULL if it failed to get DMAEngine channel. but, current dmaen->chan might have error value when error case (this driver is checking it by IS_ERR_OR_NULL()) This patch makes sure dmaen->chan is NULL when error case. Otherwise, it will contact to unknown address in rsnd_dmaen_quit() Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index ac3756f6af60..144308f15fb3 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -156,6 +156,7 @@ static int rsnd_dmaen_init(struct rsnd_priv *priv, struct rsnd_dma *dma, int id, (void *)id); } if (IS_ERR_OR_NULL(dmaen->chan)) { + dmaen->chan = NULL; dev_err(dev, "can't get dma channel\n"); goto rsnd_dma_channel_err; } From ac98b4c015b50b1e452f8d55b612320be7f80825 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 13 Apr 2015 14:20:54 +0800 Subject: [PATCH 003/104] ASoC: Intel: Remove invalid kfree of devm allocated data kbuild robot reports following warning: "sound/soc/intel/haswell/sst-haswell-ipc.c:2204:1-6: WARNING: invalid free of devm_ allocated data" As julia explains to me, the memory allocated with devm_kalloc is freed automatically on failure of a probe function. So this kfree should be removed otherwise the double free will be got in error handler path. Signed-off-by: Jin Yao Signed-off-by: Mark Brown --- sound/soc/intel/haswell/sst-haswell-ipc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/haswell/sst-haswell-ipc.c b/sound/soc/intel/haswell/sst-haswell-ipc.c index 344a1e9bbce5..324eceb07b25 100644 --- a/sound/soc/intel/haswell/sst-haswell-ipc.c +++ b/sound/soc/intel/haswell/sst-haswell-ipc.c @@ -2201,7 +2201,6 @@ int sst_hsw_dsp_init(struct device *dev, struct sst_pdata *pdata) dsp_new_err: sst_ipc_fini(ipc); ipc_init_err: - kfree(hsw); return ret; } EXPORT_SYMBOL_GPL(sst_hsw_dsp_init); From 28ecc0b658e2ac882faa80e7ff1d72d144299bd0 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 15 Apr 2015 00:08:15 -0300 Subject: [PATCH 004/104] ASoC: fsl_ssi: Fix platform_get_irq() error handling We should check whether platform_get_irq() returns a negative number and propagate the error in this case. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index e8bb8eef1d16..0d48804218b1 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1357,7 +1357,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) } ssi_private->irq = platform_get_irq(pdev, 0); - if (!ssi_private->irq) { + if (ssi_private->irq < 0) { dev_err(&pdev->dev, "no irq for node %s\n", pdev->name); return ssi_private->irq; } From 427ced4b203dfea4f08b1298cd1f88e19039fca4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 16 Apr 2015 20:17:46 +0800 Subject: [PATCH 005/104] ASoC: tfa9879: Fix return value check in tfa9879_i2c_probe() In case of error, the function devm_kzalloc() returns NULL not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Signed-off-by: Wei Yongjun Acked-by: Peter Rosin Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/tfa9879.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tfa9879.c b/sound/soc/codecs/tfa9879.c index 16f1b71edb55..aab0af681e8c 100644 --- a/sound/soc/codecs/tfa9879.c +++ b/sound/soc/codecs/tfa9879.c @@ -280,8 +280,8 @@ static int tfa9879_i2c_probe(struct i2c_client *i2c, int i; tfa9879 = devm_kzalloc(&i2c->dev, sizeof(*tfa9879), GFP_KERNEL); - if (IS_ERR(tfa9879)) - return PTR_ERR(tfa9879); + if (!tfa9879) + return -ENOMEM; i2c_set_clientdata(i2c, tfa9879); From c479163a1b6ab424786fbcd9225b4e3c1c58eb0b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 16 Apr 2015 20:18:02 +0800 Subject: [PATCH 006/104] ASoC: samsung: s3c24xx-i2s: Fix return value check in s3c24xx_iis_dev_probe() In case of error, the function devm_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/samsung/s3c24xx-i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/samsung/s3c24xx-i2s.c b/sound/soc/samsung/s3c24xx-i2s.c index 326d3c3804e3..5bf723689692 100644 --- a/sound/soc/samsung/s3c24xx-i2s.c +++ b/sound/soc/samsung/s3c24xx-i2s.c @@ -461,8 +461,8 @@ static int s3c24xx_iis_dev_probe(struct platform_device *pdev) return -ENOENT; } s3c24xx_i2s.regs = devm_ioremap_resource(&pdev->dev, res); - if (s3c24xx_i2s.regs == NULL) - return -ENXIO; + if (IS_ERR(s3c24xx_i2s.regs)) + return PTR_ERR(s3c24xx_i2s.regs); s3c24xx_i2s_pcm_stereo_out.dma_addr = res->start + S3C2410_IISFIFO; s3c24xx_i2s_pcm_stereo_in.dma_addr = res->start + S3C2410_IISFIFO; From d09a6b4a1412149c133a58b53f50e9c05a95e834 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 16 Apr 2015 21:46:29 +0800 Subject: [PATCH 007/104] ASoC: Intel: sst_byt: remove kfree for memory allocated with devm_kzalloc It's not necessary to free memory allocated with devm_kzalloc and using kfree leads to a double free. Signed-off-by: Wei Yongjun Acked-by: Jarkko Nikula Signed-off-by: Mark Brown --- sound/soc/intel/baytrail/sst-baytrail-ipc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/baytrail/sst-baytrail-ipc.c b/sound/soc/intel/baytrail/sst-baytrail-ipc.c index 1efb33b36303..a839dbfa5218 100644 --- a/sound/soc/intel/baytrail/sst-baytrail-ipc.c +++ b/sound/soc/intel/baytrail/sst-baytrail-ipc.c @@ -759,7 +759,6 @@ int sst_byt_dsp_init(struct device *dev, struct sst_pdata *pdata) dsp_new_err: sst_ipc_fini(ipc); ipc_init_err: - kfree(byt); return err; } From aae013d646aae8787b76255d1a1cadd7f64a47dd Mon Sep 17 00:00:00 2001 From: Jie Yang Date: Sun, 19 Apr 2015 09:45:48 +0800 Subject: [PATCH 008/104] ASoC: add static inline funcs to fix a compiling issue When CONFIG_PM_SLEEP is not selected, calling funcs snd_soc_suspend and _resume will generate a compiling issue. Here add static inline stub functions to fix it. Signed-off-by: Jie Yang Signed-off-by: Mark Brown --- include/sound/soc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/sound/soc.h b/include/sound/soc.h index 0d1ade195628..cd8a12508f16 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -387,8 +387,20 @@ int snd_soc_codec_set_pll(struct snd_soc_codec *codec, int pll_id, int source, int snd_soc_register_card(struct snd_soc_card *card); int snd_soc_unregister_card(struct snd_soc_card *card); int devm_snd_soc_register_card(struct device *dev, struct snd_soc_card *card); +#ifdef CONFIG_PM_SLEEP int snd_soc_suspend(struct device *dev); int snd_soc_resume(struct device *dev); +#else +static inline int snd_soc_suspend(struct device *dev) +{ + return 0; +} + +static inline int snd_soc_resume(struct device *dev) +{ + return 0; +} +#endif int snd_soc_poweroff(struct device *dev); int snd_soc_register_platform(struct device *dev, const struct snd_soc_platform_driver *platform_drv); From a2d97723cb3a7741af81868427b36bba274b681b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 22 Apr 2015 13:58:47 +0100 Subject: [PATCH 009/104] ASoC: dapm: Enable autodisable on SOC_DAPM_SINGLE_TLV_AUTODISABLE Correct small copy and paste error where autodisable was not being enabled for the SOC_DAPM_SINGLE_TLV_AUTODISABLE control. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/sound/soc-dapm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 8d7416e46861..15355892a0ff 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -287,7 +287,7 @@ struct device; .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | SNDRV_CTL_ELEM_ACCESS_READWRITE,\ .tlv.p = (tlv_array), \ .get = snd_soc_dapm_get_volsw, .put = snd_soc_dapm_put_volsw, \ - .private_value = SOC_SINGLE_VALUE(reg, shift, max, invert, 0) } + .private_value = SOC_SINGLE_VALUE(reg, shift, max, invert, 1) } #define SOC_DAPM_SINGLE_TLV_VIRT(xname, max, tlv_array) \ SOC_DAPM_SINGLE(xname, SND_SOC_NOPM, 0, max, 0, tlv_array) #define SOC_DAPM_ENUM(xname, xenum) \ From 8faf141a9903477910387af062ece04ea7d730ed Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 23 Apr 2015 14:38:13 +0530 Subject: [PATCH 010/104] ASoC: Intel: fix the makefile for atom code The tom code should be using SND_SST_MFLD_PLATFORM and not the baytrail one. So fix it now Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/Makefile b/sound/soc/intel/Makefile index cd9aee9871a3..3853ec2ddbc7 100644 --- a/sound/soc/intel/Makefile +++ b/sound/soc/intel/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SND_SOC_INTEL_SST) += common/ # Platform Support obj-$(CONFIG_SND_SOC_INTEL_HASWELL) += haswell/ obj-$(CONFIG_SND_SOC_INTEL_BAYTRAIL) += baytrail/ -obj-$(CONFIG_SND_SOC_INTEL_BAYTRAIL) += atom/ +obj-$(CONFIG_SND_SST_MFLD_PLATFORM) += atom/ # Machine support obj-$(CONFIG_SND_SOC_INTEL_SST) += boards/ From 7e01b5acd88b3f3108d8c4ce44e3205d67437202 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 16 Apr 2015 14:47:33 +0200 Subject: [PATCH 011/104] kexec: allocate the kexec control page with KEXEC_CONTROL_MEMORY_GFP Introduce KEXEC_CONTROL_MEMORY_GFP to allow the architecture code to override the gfp flags of the allocation for the kexec control page. The loop in kimage_alloc_normal_control_pages allocates pages with GFP_KERNEL until a page is found that happens to have an address smaller than the KEXEC_CONTROL_MEMORY_LIMIT. On systems with a large memory size but a small KEXEC_CONTROL_MEMORY_LIMIT the loop will keep allocating memory until the oom killer steps in. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kexec.h | 3 +++ include/linux/kexec.h | 4 ++++ kernel/kexec.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 694bcd6bd927..2f924bc30e35 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -26,6 +26,9 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Allocate control page with GFP_DMA */ +#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA + /* Maximum address we can use for the crash control pages */ #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e60a745ac198..e804306ef5e8 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -40,6 +40,10 @@ #error KEXEC_CONTROL_MEMORY_LIMIT not defined #endif +#ifndef KEXEC_CONTROL_MEMORY_GFP +#define KEXEC_CONTROL_MEMORY_GFP GFP_KERNEL +#endif + #ifndef KEXEC_CONTROL_PAGE_SIZE #error KEXEC_CONTROL_PAGE_SIZE not defined #endif diff --git a/kernel/kexec.c b/kernel/kexec.c index 38c25b1f2fd5..7a36fdcca5bf 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -707,7 +707,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, do { unsigned long pfn, epfn, addr, eaddr; - pages = kimage_alloc_pages(GFP_KERNEL, order); + pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); if (!pages) break; pfn = page_to_pfn(pages); From 0b46e0a3ec0d7a04af6a091354f1b5e1b952d70a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 15 Apr 2015 13:23:26 +0200 Subject: [PATCH 012/104] s390/kvm: remove delayed reallocation of page tables for KVM Replacing a 2K page table with a 4K page table while a VMA is active for the affected memory region is fundamentally broken. Rip out the page table reallocation code and replace it with a simple system control 'vm.allocate_pgste'. If the system control is set the page tables for all processes are allocated as full 4K pages, even for processes that do not need it. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 4 +- arch/s390/include/asm/mmu_context.h | 3 + arch/s390/include/asm/pgalloc.h | 1 + arch/s390/include/asm/pgtable.h | 9 ++ arch/s390/mm/pgtable.c | 142 +++++++++------------------- 5 files changed, 59 insertions(+), 100 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index a5e656260a70..d29ad9545b41 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -14,7 +14,9 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - /* The mmu context has extended page tables. */ + /* The mmu context allocates 4K page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; /* The mmu context uses storage keys. */ unsigned int use_skey:1; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d25d9ff10ba8..fb1b93ea3e3f 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -20,8 +20,11 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.flush_mm = 0; mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; mm->context.asce_bits |= _ASCE_TYPE_REGION3; +#ifdef CONFIG_PGSTE + mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; +#endif mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 51e7fb634ebc..7b7858f158b4 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -21,6 +21,7 @@ void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); +extern int page_table_allocate_pgste; int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 989cfae9e202..1fba63997d50 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -423,6 +423,15 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +static inline int mm_alloc_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.alloc_pgste)) + return 1; +#endif + return 0; +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 33f589459113..b33f66110ca9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -920,6 +921,40 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) } EXPORT_SYMBOL(get_guest_storage_key); +static int page_table_allocate_pgste_min = 0; +static int page_table_allocate_pgste_max = 1; +int page_table_allocate_pgste = 0; +EXPORT_SYMBOL(page_table_allocate_pgste); + +static struct ctl_table page_table_sysctl[] = { + { + .procname = "allocate_pgste", + .data = &page_table_allocate_pgste, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + .extra1 = &page_table_allocate_pgste_min, + .extra2 = &page_table_allocate_pgste_max, + }, + { } +}; + +static struct ctl_table page_table_sysctl_dir[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = page_table_sysctl, + }, + { } +}; + +static int __init page_table_register_sysctl(void) +{ + return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; +} +__initcall(page_table_register_sysctl); + #else /* CONFIG_PGSTE */ static inline int page_table_with_pgste(struct page *page) @@ -963,7 +998,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) struct page *uninitialized_var(page); unsigned int mask, bit; - if (mm_has_pgste(mm)) + if (mm_alloc_pgste(mm)) return page_table_alloc_pgste(mm); /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); @@ -1165,116 +1200,25 @@ static inline void thp_split_mm(struct mm_struct *mm) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, - struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end) -{ - unsigned long next, *table, *new; - struct page *page; - spinlock_t *ptl; - pmd_t *pmd; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); -again: - if (pmd_none_or_clear_bad(pmd)) - continue; - table = (unsigned long *) pmd_deref(*pmd); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (page_table_with_pgste(page)) - continue; - /* Allocate new page table with pgstes */ - new = page_table_alloc_pgste(mm); - if (!new) - return -ENOMEM; - - ptl = pmd_lock(mm, pmd); - if (likely((unsigned long *) pmd_deref(*pmd) == table)) { - /* Nuke pmd entry pointing to the "short" page table */ - pmdp_flush_lazy(mm, addr, pmd); - pmd_clear(pmd); - /* Copy ptes from old table to new table */ - memcpy(new, table, PAGE_SIZE/2); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - /* Establish new table */ - pmd_populate(mm, pmd, (pte_t *) new); - /* Free old table with rcu, there might be a walker! */ - page_table_free_rcu(tlb, table, addr); - new = NULL; - } - spin_unlock(ptl); - if (new) { - page_table_free_pgste(new); - goto again; - } - } while (pmd++, addr = next, addr != end); - - return addr; -} - -static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, - struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end) -{ - unsigned long next; - pud_t *pud; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - next = page_table_realloc_pmd(tlb, mm, pud, addr, next); - if (unlikely(IS_ERR_VALUE(next))) - return next; - } while (pud++, addr = next, addr != end); - - return addr; -} - -static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long addr, unsigned long end) -{ - unsigned long next; - pgd_t *pgd; - - pgd = pgd_offset(mm, addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - next = page_table_realloc_pud(tlb, mm, pgd, addr, next); - if (unlikely(IS_ERR_VALUE(next))) - return next; - } while (pgd++, addr = next, addr != end); - - return 0; -} - /* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) { - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - struct mmu_gather tlb; + struct mm_struct *mm = current->mm; /* Do we have pgstes? if yes, we are done */ - if (mm_has_pgste(tsk->mm)) + if (mm_has_pgste(mm)) return 0; - + /* Fail if the page tables are 2K */ + if (!mm_alloc_pgste(mm)) + return -EINVAL; down_write(&mm->mmap_sem); + mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); - /* Reallocate the page tables with pgstes */ - tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); - if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) - mm->context.has_pgste = 1; - tlb_finish_mmu(&tlb, 0, TASK_SIZE); up_write(&mm->mmap_sem); - return mm->context.has_pgste ? 0 : -ENOMEM; + return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); From 0a7c501e6759db49d9dffb10ed62142d705e3f90 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 20 Apr 2015 10:26:52 +0200 Subject: [PATCH 013/104] s390/3215: free memory in error path If one memory allocation fails, there is a memory leak. Signed-off-by: Christophe Jaillet Signed-off-by: Martin Schwidefsky --- drivers/s390/char/con3215.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index c43aca69fb30..0fc3fe5fd5b8 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -667,6 +667,8 @@ static struct raw3215_info *raw3215_alloc_info(void) info->buffer = kzalloc(RAW3215_BUFFER_SIZE, GFP_KERNEL | GFP_DMA); info->inbuf = kzalloc(RAW3215_INBUF_SIZE, GFP_KERNEL | GFP_DMA); if (!info->buffer || !info->inbuf) { + kfree(info->inbuf); + kfree(info->buffer); kfree(info); return NULL; } From 77a87f0cb1a57237860754525d4e8cb2789e6e12 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Apr 2015 16:50:08 +0200 Subject: [PATCH 014/104] s390/bpf: add dependency to z196 features The new ebpf code uses e.g. the laal instruction which is part of the interlocked-access facility 1 which again was introduced with z196. So we must make sure the ebpf code generator depends on MARCH_Z196_FEATURES. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8e58c614c37d..b06dc3839268 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -115,7 +115,7 @@ config S390 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z9_109_FEATURES + select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK From b7d14f3a92223c3f5e52e9f20c74cb96dc130e87 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 22 Apr 2015 10:26:20 +0200 Subject: [PATCH 015/104] s390/mm: correct transfer of dirty & young bits in __pmd_to_pte The dirty & young bit from the pmd is not copied correctly to the pseudo pte in __pmd_to_pte. In fact it is not copied at all, the bits get lost. As the old style huge page currently does not need the dirty & young information this has no effect, but may be needed in the future. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/hugetlbpage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 210ffede0153..c67e8bf012b6 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -70,8 +70,8 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); - pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; } else pte_val(pte) = _PAGE_INVALID; return pte; From a1c843b82541fdd4c4644607c942dabc7c7e6f6c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 22 Apr 2015 13:55:59 +0200 Subject: [PATCH 016/104] s390/mm: change swap pte encoding and pgtable cleanup After the file ptes have been removed the bit combination used to encode non-linear mappings can be reused for the swap ptes. This frees up a precious pte software bit. Reflect the change in the swap encoding in the comments and do some cleanup while we are at it. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 158 +++++++++++++------------------- arch/s390/mm/hugetlbpage.c | 62 +++++++------ 2 files changed, 97 insertions(+), 123 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1fba63997d50..fc642399b489 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -12,12 +12,9 @@ #define _ASM_S390_PGTABLE_H /* - * The Linux memory management assumes a three-level page table setup. For - * s390 31 bit we "fold" the mid level into the top-level page table, so - * that we physically have the same two-level page table as the s390 mmu - * expects in 31 bit mode. For s390 64 bit we use three of the five levels - * the hardware provides (region first and region second tables are not - * used). + * The Linux memory management assumes a three-level page table setup. + * For s390 64 bit we use up to four of the five levels the hardware + * provides (region first tables are not used). * * The "pgd_xxx()" functions are trivial for a folded two-level * setup: the pgd is never bad, and a pmd always exists (as it's folded @@ -101,8 +98,8 @@ extern unsigned long zero_page_mask; #ifndef __ASSEMBLY__ /* - * The vmalloc and module area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * The vmalloc and module area will always be on the topmost area of the + * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where * modules will reside. That makes sure that inter module branches always * happen without trampolines and in addition the placement within a 2GB frame @@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr) } /* - * A 31 bit pagetable entry of S390 has following format: - * | PFRA | | OS | - * 0 0IP0 - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Page-Invalid Bit: Page is not available for address-translation - * P Page-Protection Bit: Store access not possible for page - * - * A 31 bit segmenttable entry of S390 has following format: - * | P-table origin | |PTL - * 0 IC - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * I Segment-Invalid Bit: Segment is not available for address-translation - * C Common-Segment Bit: Segment is not private (PoP 3-30) - * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) - * - * The 31 bit segmenttable origin of S390 has following format: - * - * |S-table origin | | STL | - * X **GPS - * 00000000001111111111222222222233 - * 01234567890123456789012345678901 - * - * X Space-Switch event: - * G Segment-Invalid Bit: * - * P Private-Space Bit: Segment is not private (PoP 3-30) - * S Storage-Alteration: - * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) - * * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | * 0000000000111111111122222222223333333333444444444455555555556666 @@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr) /* Software bits in the page table entry */ #define _PAGE_PRESENT 0x001 /* SW pte present bit */ -#define _PAGE_TYPE 0x002 /* SW pte type bit */ #define _PAGE_YOUNG 0x004 /* SW pte young bit */ #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ #define _PAGE_READ 0x010 /* SW pte read bit */ @@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr) * table lock held. * * The following table gives the different possible bit combinations for - * the pte hardware and software bits in the last 12 bits of a pte: + * the pte hardware and software bits in the last 12 bits of a pte + * (. unassigned bit, x don't care, t swap type): * * 842100000000 * 000084210000 * 000000008421 - * .IR...wrdytp - * empty .10...000000 - * swap .10...xxxx10 - * file .11...xxxxx0 - * prot-none, clean, old .11...000001 - * prot-none, clean, young .11...000101 - * prot-none, dirty, old .10...001001 - * prot-none, dirty, young .10...001101 - * read-only, clean, old .11...010001 - * read-only, clean, young .01...010101 - * read-only, dirty, old .11...011001 - * read-only, dirty, young .01...011101 - * read-write, clean, old .11...110001 - * read-write, clean, young .01...110101 - * read-write, dirty, old .10...111001 - * read-write, dirty, young .00...111101 + * .IR.uswrdy.p + * empty .10.00000000 + * swap .11..ttttt.0 + * prot-none, clean, old .11.xx0000.1 + * prot-none, clean, young .11.xx0001.1 + * prot-none, dirty, old .10.xx0010.1 + * prot-none, dirty, young .10.xx0011.1 + * read-only, clean, old .11.xx0100.1 + * read-only, clean, young .01.xx0101.1 + * read-only, dirty, old .11.xx0110.1 + * read-only, dirty, young .01.xx0111.1 + * read-write, clean, old .11.xx1100.1 + * read-write, clean, young .01.xx1101.1 + * read-write, dirty, old .10.xx1110.1 + * read-write, dirty, young .00.xx1111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large * - * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 - * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 - * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 + * pte_none is true for the bit pattern .10.00000000, pte == 0x400 + * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200 + * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001 */ /* Bits in the segment/region table address-space-control-element */ @@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr) * read-write, dirty, young 11..0...0...11 * The segment table origin is used to distinguish empty (origin==0) from * read-write, old segment table entries (origin!=0) + * HW-bits: R read-only, I invalid + * SW-bits: y young, d dirty, r read, w write */ #define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ @@ -591,10 +560,9 @@ static inline int pte_none(pte_t pte) static inline int pte_swap(pte_t pte) { - /* Bit pattern: (pte & 0x603) == 0x402 */ - return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | - _PAGE_TYPE | _PAGE_PRESENT)) - == (_PAGE_INVALID | _PAGE_TYPE); + /* Bit pattern: (pte & 0x201) == 0x200 */ + return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT)) + == _PAGE_PROTECT; } static inline int pte_special(pte_t pte) @@ -1595,51 +1563,51 @@ static inline int has_transparent_hugepage(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * 31 bit swap entry format: - * A page-table entry has some bits we have to treat in a special way. - * Bits 0, 20 and bit 23 have to be zero, otherwise an specification - * exception will occur instead of a page translation exception. The - * specifiation exception has the bad habit not to store necessary - * information in the lowcore. - * Bits 21, 22, 30 and 31 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 1-19 and bits 24-29 to store type and offset. - * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 - * plus 24 for the offset. - * 0| offset |0110|o|type |00| - * 0 0000000001111111111 2222 2 22222 33 - * 0 1234567890123456789 0123 4 56789 01 - * * 64 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. * Bits 52 and bit 55 have to be zero, otherwise an specification * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bits 53, 54, 62 and 63 are used to indicate the page type. - * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 - * This leaves the bits 0-51 and bits 56-61 to store type and offset. - * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 - * plus 56 for the offset. - * | offset |0110|o|type |00| - * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 - * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 + * Bits 54 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200 + * This leaves the bits 0-51 and bits 56-62 to store type and offset. + * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51 + * for the offset. + * | offset |01100|type |00| + * |0000000000111111111122222222223333333333444444444455|55555|55566|66| + * |0123456789012345678901234567890123456789012345678901|23456|78901|23| */ -#define __SWP_OFFSET_MASK (~0UL >> 11) +#define __SWP_OFFSET_MASK ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_TYPE_MASK ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT 2 static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; - offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | - ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); + + pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; + pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; return pte; } -#define __swp_type(entry) (((entry).val >> 2) & 0x1f) -#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) -#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) +static inline unsigned long __swp_type(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK; +} + +static inline unsigned long __swp_offset(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK; +} + +static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) +{ + return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) }; +} #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c67e8bf012b6..e617e74b7be2 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte) /* * Convert encoding pte bits pmd bits - * .IR...wrdytp dy..R...I...wr - * empty .10...000000 -> 00..0...1...00 - * prot-none, clean, old .11...000001 -> 00..1...1...00 - * prot-none, clean, young .11...000101 -> 01..1...1...00 - * prot-none, dirty, old .10...001001 -> 10..1...1...00 - * prot-none, dirty, young .10...001101 -> 11..1...1...00 - * read-only, clean, old .11...010001 -> 00..1...1...01 - * read-only, clean, young .01...010101 -> 01..1...0...01 - * read-only, dirty, old .11...011001 -> 10..1...1...01 - * read-only, dirty, young .01...011101 -> 11..1...0...01 - * read-write, clean, old .11...110001 -> 00..0...1...11 - * read-write, clean, young .01...110101 -> 01..0...0...11 - * read-write, dirty, old .10...111001 -> 10..0...1...11 - * read-write, dirty, young .00...111101 -> 11..0...0...11 + * lIR.uswrdy.p dy..R...I...wr + * empty 010.000000.0 -> 00..0...1...00 + * prot-none, clean, old 111.000000.1 -> 00..1...1...00 + * prot-none, clean, young 111.000001.1 -> 01..1...1...00 + * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 + * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 + * read-only, clean, old 111.000100.1 -> 00..1...1...01 + * read-only, clean, young 101.000101.1 -> 01..1...0...01 + * read-only, dirty, old 111.000110.1 -> 10..1...1...01 + * read-only, dirty, young 101.000111.1 -> 11..1...0...01 + * read-write, clean, old 111.001100.1 -> 00..1...1...11 + * read-write, clean, young 101.001101.1 -> 01..1...0...11 + * read-write, dirty, old 110.001110.1 -> 10..0...1...11 + * read-write, dirty, young 100.001111.1 -> 11..0...0...11 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large */ if (pte_present(pte)) { pmd_val(pmd) = pte_val(pte) & PAGE_MASK; @@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) /* * Convert encoding pmd bits pte bits - * dy..R...I...wr .IR...wrdytp - * empty 00..0...1...00 -> .10...001100 - * prot-none, clean, old 00..0...1...00 -> .10...000001 - * prot-none, clean, young 01..0...1...00 -> .10...000101 - * prot-none, dirty, old 10..0...1...00 -> .10...001001 - * prot-none, dirty, young 11..0...1...00 -> .10...001101 - * read-only, clean, old 00..1...1...01 -> .11...010001 - * read-only, clean, young 01..1...1...01 -> .11...010101 - * read-only, dirty, old 10..1...1...01 -> .11...011001 - * read-only, dirty, young 11..1...1...01 -> .11...011101 - * read-write, clean, old 00..0...1...11 -> .10...110001 - * read-write, clean, young 01..0...1...11 -> .10...110101 - * read-write, dirty, old 10..0...1...11 -> .10...111001 - * read-write, dirty, young 11..0...1...11 -> .10...111101 + * dy..R...I...wr lIR.uswrdy.p + * empty 00..0...1...00 -> 010.000000.0 + * prot-none, clean, old 00..1...1...00 -> 111.000000.1 + * prot-none, clean, young 01..1...1...00 -> 111.000001.1 + * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 + * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 + * read-only, clean, old 00..1...1...01 -> 111.000100.1 + * read-only, clean, young 01..1...0...01 -> 101.000101.1 + * read-only, dirty, old 10..1...1...01 -> 111.000110.1 + * read-only, dirty, young 11..1...0...01 -> 101.000111.1 + * read-write, clean, old 00..1...1...11 -> 111.001100.1 + * read-write, clean, young 01..1...0...11 -> 101.001101.1 + * read-write, dirty, old 10..0...1...11 -> 110.001110.1 + * read-write, dirty, young 11..0...0...11 -> 100.001111.1 + * HW-bits: R read-only, I invalid + * SW-bits: p present, y young, d dirty, r read, w write, s special, + * u unused, l large */ if (pmd_present(pmd)) { pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; From 57127645d79d2e83e801f141f7d03f64accf28aa Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 16 Mar 2015 14:52:52 +0100 Subject: [PATCH 017/104] s390/zcrypt: Introduce new SHA-512 based Pseudo Random Generator. Rework of the prandom device with introduction of a new SHA-512 based NIST SP 800-90 conform deterministic random bit generator. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/crypt_s390.h | 122 +++-- arch/s390/crypto/prng.c | 850 +++++++++++++++++++++++++++++++--- 2 files changed, 870 insertions(+), 102 deletions(-) diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index ba3b2aefddf5..d9c4c313fbc6 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -3,9 +3,10 @@ * * Support for s390 cryptographic instructions. * - * Copyright IBM Corp. 2003, 2007 + * Copyright IBM Corp. 2003, 2015 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) + * Harald Freudenberger (freude@de.ibm.com) * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -28,15 +29,17 @@ #define CRYPT_S390_MSA 0x1 #define CRYPT_S390_MSA3 0x2 #define CRYPT_S390_MSA4 0x4 +#define CRYPT_S390_MSA5 0x8 /* s390 cryptographic operations */ enum crypt_s390_operations { - CRYPT_S390_KM = 0x0100, - CRYPT_S390_KMC = 0x0200, - CRYPT_S390_KIMD = 0x0300, - CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500, - CRYPT_S390_KMCTR = 0x0600 + CRYPT_S390_KM = 0x0100, + CRYPT_S390_KMC = 0x0200, + CRYPT_S390_KIMD = 0x0300, + CRYPT_S390_KLMD = 0x0400, + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600, + CRYPT_S390_PPNO = 0x0700 }; /* @@ -138,6 +141,16 @@ enum crypt_s390_kmac_func { KMAC_TDEA_192 = CRYPT_S390_KMAC | 3 }; +/* + * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER + * OPERATION) instruction + */ +enum crypt_s390_ppno_func { + PPNO_QUERY = CRYPT_S390_PPNO | 0, + PPNO_SHA512_DRNG_GEN = CRYPT_S390_PPNO | 3, + PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83 +}; + /** * crypt_s390_km: * @func: the function code passed to KM; see crypt_s390_km_func @@ -162,11 +175,11 @@ static inline int crypt_s390_km(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb92e0000,%3,%1 \n" /* KM opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92e0000,%3,%1\n" /* KM opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -198,11 +211,11 @@ static inline int crypt_s390_kmc(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb92f0000,%3,%1 \n" /* KMC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92f0000,%3,%1\n" /* KMC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -233,11 +246,11 @@ static inline int crypt_s390_kimd(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb93e0000,%1,%1\n" /* KIMD opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -267,11 +280,11 @@ static inline int crypt_s390_klmd(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb93f0000,%1,%1\n" /* KLMD opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -302,11 +315,11 @@ static inline int crypt_s390_kmac(long func, void *param, int ret; asm volatile( - "0: .insn rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb91e0000,%1,%1\n" /* KLAC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "=d" (ret), "+a" (__src), "+d" (__src_len) : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); if (ret < 0) @@ -340,11 +353,11 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, int ret = -1; asm volatile( - "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), "+a" (__ctr) : "d" (__func), "a" (__param) : "cc", "memory"); @@ -353,6 +366,47 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; } +/** + * crypt_s390_ppno: + * @func: the function code passed to PPNO; see crypt_s390_ppno_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @dest_len: size of destination memory area in bytes + * @seed: address of seed data + * @seed_len: size of seed data in bytes + * + * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of random + * bytes stored in dest buffer for generate function + */ +static inline int crypt_s390_ppno(long func, void *param, + u8 *dest, long dest_len, + const u8 *seed, long seed_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; /* param block (240 bytes) */ + register u8 *__dest asm("2") = dest; /* buf for recv random bytes */ + register long __dest_len asm("3") = dest_len; /* requested random bytes */ + register const u8 *__seed asm("4") = seed; /* buf with seed data */ + register long __seed_len asm("5") = seed_len; /* bytes in seed buf */ + int ret = -1; + + asm volatile ( + "0: .insn rre,0xb93c0000,%1,%5\n" /* PPNO opcode */ + "1: brc 1,0b\n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (ret), "+a"(__dest), "+d"(__dest_len) + : "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len) + : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0; +} + /** * crypt_s390_func_available: * @func: the function code of the specific function; 0 if op in general @@ -373,6 +427,9 @@ static inline int crypt_s390_func_available(int func, return 0; if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) return 0; + if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57)) + return 0; + switch (func & CRYPT_S390_OP_MASK) { case CRYPT_S390_KM: ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); @@ -390,8 +447,12 @@ static inline int crypt_s390_func_available(int func, ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); break; case CRYPT_S390_KMCTR: - ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, - NULL); + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, + NULL, NULL, 0, NULL); + break; + case CRYPT_S390_PPNO: + ret = crypt_s390_ppno(PPNO_QUERY, &status, + NULL, 0, NULL, 0); break; default: return 0; @@ -419,15 +480,14 @@ static inline int crypt_s390_pcc(long func, void *param) int ret = -1; asm volatile( - "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ - "1: brc 1,0b \n" /* handle partial completion */ + "0: .insn rre,0xb92c0000,0,0\n" /* PCC opcode */ + "1: brc 1,0b\n" /* handle partial completion */ " la %0,0\n" "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) : "+d" (ret) : "d" (__func), "a" (__param) : "cc", "memory"); return ret; } - #endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 94a35a4c1b48..1f374b39a4ec 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,106 +1,529 @@ /* - * Copyright IBM Corp. 2006, 2007 + * Copyright IBM Corp. 2006, 2015 * Author(s): Jan Glauber + * Harald Freudenberger * Driver for the s390 pseudo random number generator */ + +#define KMSG_COMPONENT "prng" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include +#include #include #include +#include #include #include #include +#include #include #include #include #include +#include #include "crypt_s390.h" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jan Glauber "); +MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("s390 PRNG interface"); -static int prng_chunk_size = 256; -module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH); + +#define PRNG_MODE_AUTO 0 +#define PRNG_MODE_TDES 1 +#define PRNG_MODE_SHA512 2 + +static unsigned int prng_mode = PRNG_MODE_AUTO; +module_param_named(mode, prng_mode, int, 0); +MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512"); + + +#define PRNG_CHUNKSIZE_TDES_MIN 8 +#define PRNG_CHUNKSIZE_TDES_MAX (64*1024) +#define PRNG_CHUNKSIZE_SHA512_MIN 64 +#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024) + +static unsigned int prng_chunk_size = 256; +module_param_named(chunksize, prng_chunk_size, int, 0); MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes"); -static int prng_entropy_limit = 4096; -module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); -MODULE_PARM_DESC(prng_entropy_limit, - "PRNG add entropy after that much bytes were produced"); + +#define PRNG_RESEED_LIMIT_TDES 4096 +#define PRNG_RESEED_LIMIT_TDES_LOWER 4096 +#define PRNG_RESEED_LIMIT_SHA512 100000 +#define PRNG_RESEED_LIMIT_SHA512_LOWER 10000 + +static unsigned int prng_reseed_limit; +module_param_named(reseed_limit, prng_reseed_limit, int, 0); +MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit"); + /* * Any one who considers arithmetical methods of producing random digits is, * of course, in a state of sin. -- John von Neumann */ -struct s390_prng_data { - unsigned long count; /* how many bytes were produced */ - char *buf; +static int prng_errorflag; + +#define PRNG_GEN_ENTROPY_FAILED 1 +#define PRNG_SELFTEST_FAILED 2 +#define PRNG_INSTANTIATE_FAILED 3 +#define PRNG_SEED_FAILED 4 +#define PRNG_RESEED_FAILED 5 +#define PRNG_GEN_FAILED 6 + +struct prng_ws_s { + u8 parm_block[32]; + u32 reseed_counter; + u64 byte_counter; }; -static struct s390_prng_data *p; +struct ppno_ws_s { + u32 res; + u32 reseed_counter; + u64 stream_bytes; + u8 V[112]; + u8 C[112]; +}; -/* copied from libica, use a non-zero initial parameter block */ -static unsigned char parm_block[32] = { -0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4, -0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0, +struct prng_data_s { + struct mutex mutex; + union { + struct prng_ws_s prngws; + struct ppno_ws_s ppnows; + }; + u8 *buf; + u32 rest; + u8 *prev; }; -static int prng_open(struct inode *inode, struct file *file) +static struct prng_data_s *prng_data; + +/* initial parameter block for tdes mode, copied from libica */ +static const u8 initial_parm_block[32] __initconst = { + 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52, + 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4, + 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF, + 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 }; + + +/*** helper functions ***/ + +static int generate_entropy(u8 *ebuf, size_t nbytes) { - return nonseekable_open(inode, file); + int n, ret = 0; + u8 *pg, *h, hash[32]; + + pg = (u8 *) __get_free_page(GFP_KERNEL); + if (!pg) { + prng_errorflag = PRNG_GEN_ENTROPY_FAILED; + return -ENOMEM; + } + + while (nbytes) { + /* fill page with urandom bytes */ + get_random_bytes(pg, PAGE_SIZE); + /* exor page with stckf values */ + for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) { + u64 *p = ((u64 *)pg) + n; + *p ^= get_tod_clock_fast(); + } + n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash); + if (n < sizeof(hash)) + h = hash; + else + h = ebuf; + /* generate sha256 from this page */ + if (crypt_s390_kimd(KIMD_SHA_256, h, + pg, PAGE_SIZE) != PAGE_SIZE) { + prng_errorflag = PRNG_GEN_ENTROPY_FAILED; + ret = -EIO; + goto out; + } + if (n < sizeof(hash)) + memcpy(ebuf, hash, n); + ret += n; + ebuf += n; + nbytes -= n; + } + +out: + free_page((unsigned long)pg); + return ret; } -static void prng_add_entropy(void) + +/*** tdes functions ***/ + +static void prng_tdes_add_entropy(void) { __u64 entropy[4]; unsigned int i; int ret; for (i = 0; i < 16; i++) { - ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy, - (char *)entropy, sizeof(entropy)); + ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, + (char *)entropy, (char *)entropy, + sizeof(entropy)); BUG_ON(ret < 0 || ret != sizeof(entropy)); - memcpy(parm_block, entropy, sizeof(entropy)); + memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy)); } } -static void prng_seed(int nbytes) + +static void prng_tdes_seed(int nbytes) { char buf[16]; int i = 0; - BUG_ON(nbytes > 16); + BUG_ON(nbytes > sizeof(buf)); + get_random_bytes(buf, nbytes); /* Add the entropy */ while (nbytes >= 8) { - *((__u64 *)parm_block) ^= *((__u64 *)(buf+i)); - prng_add_entropy(); + *((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i)); + prng_tdes_add_entropy(); i += 8; nbytes -= 8; } - prng_add_entropy(); + prng_tdes_add_entropy(); + prng_data->prngws.reseed_counter = 0; +} + + +static int __init prng_tdes_instantiate(void) +{ + int datalen; + + pr_debug("prng runs in TDES mode with " + "chunksize=%d and reseed_limit=%u\n", + prng_chunk_size, prng_reseed_limit); + + /* memory allocation, prng_data struct init, mutex init */ + datalen = sizeof(struct prng_data_s) + prng_chunk_size; + prng_data = kzalloc(datalen, GFP_KERNEL); + if (!prng_data) { + prng_errorflag = PRNG_INSTANTIATE_FAILED; + return -ENOMEM; + } + mutex_init(&prng_data->mutex); + prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s); + memcpy(prng_data->prngws.parm_block, initial_parm_block, 32); + + /* initialize the PRNG, add 128 bits of entropy */ + prng_tdes_seed(16); + + return 0; } -static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, - loff_t *ppos) + +static void prng_tdes_deinstantiate(void) +{ + pr_debug("The prng module stopped " + "after running in triple DES mode\n"); + kzfree(prng_data); +} + + +/*** sha512 functions ***/ + +static int __init prng_sha512_selftest(void) { - int chunk, n; + /* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */ + static const u8 seed[] __initconst = { + 0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a, + 0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22, + 0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b, + 0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c, + 0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa, + 0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 }; + static const u8 V0[] __initconst = { + 0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76, + 0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22, + 0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60, + 0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1, + 0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95, + 0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b, + 0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79, + 0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03, + 0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd, + 0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36, + 0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0, + 0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e, + 0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea, + 0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 }; + static const u8 C0[] __initconst = { + 0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95, + 0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e, + 0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94, + 0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86, + 0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50, + 0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f, + 0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9, + 0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43, + 0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee, + 0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a, + 0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9, + 0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09, + 0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc, + 0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e }; + static const u8 random[] __initconst = { + 0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57, + 0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6, + 0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d, + 0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf, + 0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26, + 0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64, + 0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55, + 0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a, + 0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78, + 0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e, + 0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb, + 0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30, + 0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19, + 0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f, + 0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d, + 0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a, + 0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2, + 0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd, + 0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd, + 0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2, + 0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b, + 0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1, + 0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99, + 0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e, + 0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3, + 0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67, + 0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3, + 0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d, + 0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b, + 0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13, + 0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c, + 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 }; + int ret = 0; - int tmp; + u8 buf[sizeof(random)]; + struct ppno_ws_s ws; + + memset(&ws, 0, sizeof(ws)); + + /* initial seed */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &ws, NULL, 0, + seed, sizeof(seed)); + if (ret < 0) { + pr_err("The prng self test seed operation for the " + "SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* check working states V and C */ + if (memcmp(ws.V, V0, sizeof(V0)) != 0 + || memcmp(ws.C, C0, sizeof(C0)) != 0) { + pr_err("The prng self test state test " + "for the SHA-512 mode failed\n"); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* generate random bytes */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), + NULL, 0); + if (ret < 0) { + pr_err("The prng self test generate operation for " + "the SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), + NULL, 0); + if (ret < 0) { + pr_err("The prng self test generate operation for " + "the SHA-512 mode failed with rc=%d\n", ret); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + /* check against expected data */ + if (memcmp(buf, random, sizeof(random)) != 0) { + pr_err("The prng self test data test " + "for the SHA-512 mode failed\n"); + prng_errorflag = PRNG_SELFTEST_FAILED; + return -EIO; + } + + return 0; +} + + +static int __init prng_sha512_instantiate(void) +{ + int ret, datalen; + u8 seed[64]; + + pr_debug("prng runs in SHA-512 mode " + "with chunksize=%d and reseed_limit=%u\n", + prng_chunk_size, prng_reseed_limit); + + /* memory allocation, prng_data struct init, mutex init */ + datalen = sizeof(struct prng_data_s) + prng_chunk_size; + if (fips_enabled) + datalen += prng_chunk_size; + prng_data = kzalloc(datalen, GFP_KERNEL); + if (!prng_data) { + prng_errorflag = PRNG_INSTANTIATE_FAILED; + return -ENOMEM; + } + mutex_init(&prng_data->mutex); + prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s); + + /* selftest */ + ret = prng_sha512_selftest(); + if (ret) + goto outfree; + + /* generate initial seed bytestring, first 48 bytes of entropy */ + ret = generate_entropy(seed, 48); + if (ret != 48) + goto outfree; + /* followed by 16 bytes of unique nonce */ + get_tod_clock_ext(seed + 48); + + /* initial seed of the ppno drng */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, + seed, sizeof(seed)); + if (ret < 0) { + prng_errorflag = PRNG_SEED_FAILED; + ret = -EIO; + goto outfree; + } + + /* if fips mode is enabled, generate a first block of random + bytes for the FIPS 140-2 Conditional Self Test */ + if (fips_enabled) { + prng_data->prev = prng_data->buf + prng_chunk_size; + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, + prng_data->prev, + prng_chunk_size, + NULL, 0); + if (ret < 0 || ret != prng_chunk_size) { + prng_errorflag = PRNG_GEN_FAILED; + ret = -EIO; + goto outfree; + } + } + + return 0; + +outfree: + kfree(prng_data); + return ret; +} + + +static void prng_sha512_deinstantiate(void) +{ + pr_debug("The prng module stopped after running in SHA-512 mode\n"); + kzfree(prng_data); +} + + +static int prng_sha512_reseed(void) +{ + int ret; + u8 seed[32]; + + /* generate 32 bytes of fresh entropy */ + ret = generate_entropy(seed, sizeof(seed)); + if (ret != sizeof(seed)) + return ret; + + /* do a reseed of the ppno drng with this bytestring */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, + seed, sizeof(seed)); + if (ret) { + prng_errorflag = PRNG_RESEED_FAILED; + return -EIO; + } + + return 0; +} + + +static int prng_sha512_generate(u8 *buf, size_t nbytes) +{ + int ret; + + /* reseed needed ? */ + if (prng_data->ppnows.reseed_counter > prng_reseed_limit) { + ret = prng_sha512_reseed(); + if (ret) + return ret; + } + + /* PPNO generate */ + ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, buf, nbytes, + NULL, 0); + if (ret < 0 || ret != nbytes) { + prng_errorflag = PRNG_GEN_FAILED; + return -EIO; + } + + /* FIPS 140-2 Conditional Self Test */ + if (fips_enabled) { + if (!memcmp(prng_data->prev, buf, nbytes)) { + prng_errorflag = PRNG_GEN_FAILED; + return -EILSEQ; + } + memcpy(prng_data->prev, buf, nbytes); + } + + return ret; +} + + +/*** file io functions ***/ + +static int prng_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + + +static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, + size_t nbytes, loff_t *ppos) +{ + int chunk, n, tmp, ret = 0; + + /* lock prng_data struct */ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; - /* nbytes can be arbitrary length, we split it into chunks */ while (nbytes) { - /* same as in extract_entropy_user in random.c */ if (need_resched()) { if (signal_pending(current)) { if (ret == 0) ret = -ERESTARTSYS; break; } + /* give mutex free before calling schedule() */ + mutex_unlock(&prng_data->mutex); schedule(); + /* occopy mutex again */ + if (mutex_lock_interruptible(&prng_data->mutex)) { + if (ret == 0) + ret = -ERESTARTSYS; + return ret; + } } /* @@ -112,12 +535,11 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, /* PRNG only likes multiples of 8 bytes */ n = (chunk + 7) & -8; - if (p->count > prng_entropy_limit) - prng_seed(8); + if (prng_data->prngws.reseed_counter > prng_reseed_limit) + prng_tdes_seed(8); /* if the CPU supports PRNG stckf is present too */ - asm volatile(".insn s,0xb27c0000,%0" - : "=m" (*((unsigned long long *)p->buf)) : : "cc"); + *((unsigned long long *)prng_data->buf) = get_tod_clock_fast(); /* * Beside the STCKF the input for the TDES-EDE is the output @@ -132,35 +554,259 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, * Note: you can still get strict X9.17 conformity by setting * prng_chunk_size to 8 bytes. */ - tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n); - BUG_ON((tmp < 0) || (tmp != n)); + tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, + prng_data->buf, prng_data->buf, n); + if (tmp < 0 || tmp != n) { + ret = -EIO; + break; + } - p->count += n; + prng_data->prngws.byte_counter += n; + prng_data->prngws.reseed_counter += n; - if (copy_to_user(ubuf, p->buf, chunk)) + if (copy_to_user(ubuf, prng_data->buf, chunk)) return -EFAULT; nbytes -= chunk; ret += chunk; ubuf += chunk; } + + /* unlock prng_data struct */ + mutex_unlock(&prng_data->mutex); + return ret; } -static const struct file_operations prng_fops = { + +static ssize_t prng_sha512_read(struct file *file, char __user *ubuf, + size_t nbytes, loff_t *ppos) +{ + int n, ret = 0; + u8 *p; + + /* if errorflag is set do nothing and return 'broken pipe' */ + if (prng_errorflag) + return -EPIPE; + + /* lock prng_data struct */ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + + while (nbytes) { + if (need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + /* give mutex free before calling schedule() */ + mutex_unlock(&prng_data->mutex); + schedule(); + /* occopy mutex again */ + if (mutex_lock_interruptible(&prng_data->mutex)) { + if (ret == 0) + ret = -ERESTARTSYS; + return ret; + } + } + if (prng_data->rest) { + /* push left over random bytes from the previous read */ + p = prng_data->buf + prng_chunk_size - prng_data->rest; + n = (nbytes < prng_data->rest) ? + nbytes : prng_data->rest; + prng_data->rest -= n; + } else { + /* generate one chunk of random bytes into read buf */ + p = prng_data->buf; + n = prng_sha512_generate(p, prng_chunk_size); + if (n < 0) { + ret = n; + break; + } + if (nbytes < prng_chunk_size) { + n = nbytes; + prng_data->rest = prng_chunk_size - n; + } else { + n = prng_chunk_size; + prng_data->rest = 0; + } + } + if (copy_to_user(ubuf, p, n)) { + ret = -EFAULT; + break; + } + ubuf += n; + nbytes -= n; + ret += n; + } + + /* unlock prng_data struct */ + mutex_unlock(&prng_data->mutex); + + return ret; +} + + +/*** sysfs stuff ***/ + +static const struct file_operations prng_sha512_fops = { + .owner = THIS_MODULE, + .open = &prng_open, + .release = NULL, + .read = &prng_sha512_read, + .llseek = noop_llseek, +}; +static const struct file_operations prng_tdes_fops = { .owner = THIS_MODULE, .open = &prng_open, .release = NULL, - .read = &prng_read, + .read = &prng_tdes_read, .llseek = noop_llseek, }; -static struct miscdevice prng_dev = { +static struct miscdevice prng_sha512_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .fops = &prng_sha512_fops, +}; +static struct miscdevice prng_tdes_dev = { .name = "prandom", .minor = MISC_DYNAMIC_MINOR, - .fops = &prng_fops, + .fops = &prng_tdes_fops, }; + +/* chunksize attribute (ro) */ +static ssize_t prng_chunksize_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); +} +static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); + +/* counter attribute (ro) */ +static ssize_t prng_counter_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 counter; + + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + if (prng_mode == PRNG_MODE_SHA512) + counter = prng_data->ppnows.stream_bytes; + else + counter = prng_data->prngws.byte_counter; + mutex_unlock(&prng_data->mutex); + + return snprintf(buf, PAGE_SIZE, "%llu\n", counter); +} +static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); + +/* errorflag attribute (ro) */ +static ssize_t prng_errorflag_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); +} +static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); + +/* mode attribute (ro) */ +static ssize_t prng_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (prng_mode == PRNG_MODE_TDES) + return snprintf(buf, PAGE_SIZE, "TDES\n"); + else + return snprintf(buf, PAGE_SIZE, "SHA512\n"); +} +static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); + +/* reseed attribute (w) */ +static ssize_t prng_reseed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + if (mutex_lock_interruptible(&prng_data->mutex)) + return -ERESTARTSYS; + prng_sha512_reseed(); + mutex_unlock(&prng_data->mutex); + + return count; +} +static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store); + +/* reseed limit attribute (rw) */ +static ssize_t prng_reseed_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); +} +static ssize_t prng_reseed_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned limit; + + if (sscanf(buf, "%u\n", &limit) != 1) + return -EINVAL; + + if (prng_mode == PRNG_MODE_SHA512) { + if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER) + return -EINVAL; + } else { + if (limit < PRNG_RESEED_LIMIT_TDES_LOWER) + return -EINVAL; + } + + prng_reseed_limit = limit; + + return count; +} +static DEVICE_ATTR(reseed_limit, 0644, + prng_reseed_limit_show, prng_reseed_limit_store); + +/* strength attribute (ro) */ +static ssize_t prng_strength_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "256\n"); +} +static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); + +static struct attribute *prng_sha512_dev_attrs[] = { + &dev_attr_errorflag.attr, + &dev_attr_chunksize.attr, + &dev_attr_byte_counter.attr, + &dev_attr_mode.attr, + &dev_attr_reseed.attr, + &dev_attr_reseed_limit.attr, + &dev_attr_strength.attr, + NULL +}; +static struct attribute *prng_tdes_dev_attrs[] = { + &dev_attr_chunksize.attr, + &dev_attr_byte_counter.attr, + &dev_attr_mode.attr, + NULL +}; + +static struct attribute_group prng_sha512_dev_attr_group = { + .attrs = prng_sha512_dev_attrs +}; +static struct attribute_group prng_tdes_dev_attr_group = { + .attrs = prng_tdes_dev_attrs +}; + + +/*** module init and exit ***/ + static int __init prng_init(void) { int ret; @@ -169,43 +815,105 @@ static int __init prng_init(void) if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) return -EOPNOTSUPP; - if (prng_chunk_size < 8) - return -EINVAL; + /* choose prng mode */ + if (prng_mode != PRNG_MODE_TDES) { + /* check for MSA5 support for PPNO operations */ + if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN, + CRYPT_S390_MSA5)) { + if (prng_mode == PRNG_MODE_SHA512) { + pr_err("The prng module cannot " + "start in SHA-512 mode\n"); + return -EOPNOTSUPP; + } + prng_mode = PRNG_MODE_TDES; + } else + prng_mode = PRNG_MODE_SHA512; + } - p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL); - if (!p) - return -ENOMEM; - p->count = 0; + if (prng_mode == PRNG_MODE_SHA512) { - p->buf = kmalloc(prng_chunk_size, GFP_KERNEL); - if (!p->buf) { - ret = -ENOMEM; - goto out_free; - } + /* SHA512 mode */ - /* initialize the PRNG, add 128 bits of entropy */ - prng_seed(16); + if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN + || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX) + return -EINVAL; + prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f; - ret = misc_register(&prng_dev); - if (ret) - goto out_buf; - return 0; + if (prng_reseed_limit == 0) + prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512; + else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER) + return -EINVAL; + + ret = prng_sha512_instantiate(); + if (ret) + goto out; + + ret = misc_register(&prng_sha512_dev); + if (ret) { + prng_sha512_deinstantiate(); + goto out; + } + ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj, + &prng_sha512_dev_attr_group); + if (ret) { + misc_deregister(&prng_sha512_dev); + prng_sha512_deinstantiate(); + goto out; + } -out_buf: - kfree(p->buf); -out_free: - kfree(p); + } else { + + /* TDES mode */ + + if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN + || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX) + return -EINVAL; + prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07; + + if (prng_reseed_limit == 0) + prng_reseed_limit = PRNG_RESEED_LIMIT_TDES; + else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER) + return -EINVAL; + + ret = prng_tdes_instantiate(); + if (ret) + goto out; + + ret = misc_register(&prng_tdes_dev); + if (ret) { + prng_tdes_deinstantiate(); + goto out; + } + ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj, + &prng_tdes_dev_attr_group); + if (ret) { + misc_deregister(&prng_tdes_dev); + prng_tdes_deinstantiate(); + goto out; + } + + } + +out: return ret; } + static void __exit prng_exit(void) { - /* wipe me */ - kzfree(p->buf); - kfree(p); - - misc_deregister(&prng_dev); + if (prng_mode == PRNG_MODE_SHA512) { + sysfs_remove_group(&prng_sha512_dev.this_device->kobj, + &prng_sha512_dev_attr_group); + misc_deregister(&prng_sha512_dev); + prng_sha512_deinstantiate(); + } else { + sysfs_remove_group(&prng_tdes_dev.this_device->kobj, + &prng_tdes_dev_attr_group); + misc_deregister(&prng_tdes_dev); + prng_tdes_deinstantiate(); + } } + module_init(prng_init); module_exit(prng_exit); From 74d6ea52aeef0236242221c6eff6d892565c5a92 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 24 Apr 2015 15:19:29 +0800 Subject: [PATCH 018/104] ASoC: rt5677: add register patch for PLL The PLL output will be unstable in some cases. We can fix it by setting some registers. Signed-off-by: Bard Liao Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/rt5677.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 5d0bb8748dd1..c6d4e8fa8bd3 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -62,6 +62,9 @@ static const struct reg_default init_list[] = { {RT5677_PR_BASE + 0x1e, 0x0000}, {RT5677_PR_BASE + 0x12, 0x0eaa}, {RT5677_PR_BASE + 0x14, 0x018a}, + {RT5677_PR_BASE + 0x15, 0x0490}, + {RT5677_PR_BASE + 0x38, 0x0f71}, + {RT5677_PR_BASE + 0x39, 0x0f71}, }; #define RT5677_INIT_REG_LEN ARRAY_SIZE(init_list) From 3168c201f7ca333d12f80f8d98bbbe3a33746f8b Mon Sep 17 00:00:00 2001 From: "Fang, Yang A" Date: Thu, 23 Apr 2015 16:35:17 -0700 Subject: [PATCH 019/104] ASoC: rt5645: Add ACPI match ID This patch adds the ACPI match ID for rt5645/5650 codec Signed-off-by: Fang, Yang A Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index c9a4c5be083b..e16724a11c47 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2618,6 +2619,15 @@ static const struct i2c_device_id rt5645_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, rt5645_i2c_id); +#ifdef CONFIG_ACPI +static struct acpi_device_id rt5645_acpi_match[] = { + { "10EC5645", 0 }, + { "10EC5650", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, rt5645_acpi_match); +#endif + static int rt5645_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { @@ -2834,6 +2844,7 @@ static struct i2c_driver rt5645_i2c_driver = { .driver = { .name = "rt5645", .owner = THIS_MODULE, + .acpi_match_table = ACPI_PTR(rt5645_acpi_match), }, .probe = rt5645_i2c_probe, .remove = rt5645_i2c_remove, From 4c4ed0748f82e26d8b884383e6737cf5a861ed6f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Apr 2015 16:44:14 +0200 Subject: [PATCH 020/104] netfilter: nf_tables: fix wrong length for jump/goto verdicts NFT_JUMP/GOTO erronously sets length to sizeof(void *). We then allocate insufficient memory when such element is added to a vmap. Suggested-by: Patrick McHardy Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 78af83bc9c8e..ad9d11fb29fd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4340,7 +4340,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: - desc->len = sizeof(data->verdict); break; case NFT_JUMP: case NFT_GOTO: @@ -4355,10 +4354,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, chain->use++; data->verdict.chain = chain; - desc->len = sizeof(data); break; } + desc->len = sizeof(data->verdict); desc->type = NFT_DATA_VERDICT; return 0; } From 547c4b547e07dcc60874b6ef6252dd49ff74aec1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Apr 2015 12:35:47 +0200 Subject: [PATCH 021/104] netfilter: bridge: fix NULL deref in physin/out ifindex helpers Might not have an outdev yet. We'll oops when iface goes down while skbs are still nfqueue'd: RIP: 0010:[] [] dev_cmp+0x4f/0x80 nfqnl_rcv_dev_event+0xe2/0x150 notifier_call_chain+0x53/0xa0 Fixes: c737b7c4510026 ("netfilter: bridge: add helpers for fetching physin/outdev") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ab8f76dba668..f2fdb5a52070 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -39,12 +39,24 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) static inline int nf_bridge_get_physinif(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0; + struct nf_bridge_info *nf_bridge; + + if (skb->nf_bridge == NULL) + return 0; + + nf_bridge = skb->nf_bridge; + return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0; + struct nf_bridge_info *nf_bridge; + + if (skb->nf_bridge == NULL) + return 0; + + nf_bridge = skb->nf_bridge; + return nf_bridge->physoutdev ? nf_bridge->physoutdev->ifindex : 0; } static inline struct net_device * From b58d1a9ef92031a6fc2f418c01abafb4458ba009 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 25 Apr 2015 18:29:16 +0100 Subject: [PATCH 022/104] Btrfs: fix race between start dirty bg cache writeout and bg deletion While running xfstests I ran into the following: [20892.242791] ------------[ cut here ]------------ [20892.243776] WARNING: CPU: 0 PID: 13299 at fs/btrfs/super.c:260 __btrfs_abort_transaction+0x52/0x114 [btrfs]() [20892.245874] BTRFS: Transaction aborted (error -2) [20892.247329] Modules linked in: btrfs dm_snapshot dm_bufio dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse$ [20892.258488] CPU: 0 PID: 13299 Comm: fsstress Tainted: G W 4.0.0-rc5-btrfs-next-9+ #2 [20892.262011] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [20892.264738] 0000000000000009 ffff880427f8bc18 ffffffff8142fa46 ffffffff8108b6a2 [20892.266244] ffff880427f8bc68 ffff880427f8bc58 ffffffff81045ea5 ffff880427f8bc48 [20892.267761] ffffffffa0509a6d 00000000fffffffe ffff8803545d6f40 ffffffffa05a15a0 [20892.269378] Call Trace: [20892.269915] [] dump_stack+0x4f/0x7b [20892.271097] [] ? console_unlock+0x361/0x3ad [20892.272173] [] warn_slowpath_common+0xa1/0xbb [20892.273386] [] ? __btrfs_abort_transaction+0x52/0x114 [btrfs] [20892.274857] [] warn_slowpath_fmt+0x46/0x48 [20892.275851] [] __btrfs_abort_transaction+0x52/0x114 [btrfs] [20892.277341] [] write_one_cache_group+0x68/0xaf [btrfs] [20892.278628] [] btrfs_start_dirty_block_groups+0x18d/0x29b [btrfs] [20892.280191] [] btrfs_commit_transaction+0x130/0x9c9 [btrfs] [20892.281781] [] ? trace_hardirqs_on+0xd/0xf [20892.282873] [] btrfs_sync_file+0x313/0x387 [btrfs] [20892.284111] [] vfs_fsync_range+0x95/0xa4 [20892.285203] [] ? time_hardirqs_on+0x15/0x28 [20892.286290] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [20892.287469] [] vfs_fsync+0x1c/0x1e [20892.288412] [] do_fsync+0x34/0x4e [20892.289348] [] SyS_fsync+0x10/0x14 [20892.290255] [] system_call_fastpath+0x12/0x17 [20892.291316] ---[ end trace 597f77e664245373 ]--- [20892.293955] BTRFS: error (device sdg) in write_one_cache_group:3184: errno=-2 No such entry [20892.297390] BTRFS info (device sdg): forced readonly This happens because in btrfs_start_dirty_block_groups() we splice the transaction's list of dirty block groups into a local list and then we keep extracting the first element of the list without holding the cache_write_mutex mutex. This means that before we acquire that mutex the first block group on the list might be removed by a conurrent task running btrfs_remove_block_group(). So make sure we extract the first element (and test the list emptyness) while holding that mutex. Fixes: 1bbc621ef284 ("Btrfs: allow block group cache writeout outside critical section in commit") Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 44 ++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1eef4ee01d1a..fc0db9887c0e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3408,17 +3408,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); - if (!list_empty(&cur_trans->dirty_bgs)) { - list_splice_init(&cur_trans->dirty_bgs, &dirty); + if (list_empty(&cur_trans->dirty_bgs)) { + spin_unlock(&cur_trans->dirty_bgs_lock); + return 0; } + list_splice_init(&cur_trans->dirty_bgs, &dirty); spin_unlock(&cur_trans->dirty_bgs_lock); again: - if (list_empty(&dirty)) { - btrfs_free_path(path); - return 0; - } - /* * make sure all the block groups on our dirty list actually * exist @@ -3431,18 +3428,16 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, return -ENOMEM; } + /* + * cache_write_mutex is here only to save us from balance or automatic + * removal of empty block groups deleting this block group while we are + * writing out the cache + */ + mutex_lock(&trans->transaction->cache_write_mutex); while (!list_empty(&dirty)) { cache = list_first_entry(&dirty, struct btrfs_block_group_cache, dirty_list); - - /* - * cache_write_mutex is here only to save us from balance - * deleting this block group while we are writing out the - * cache - */ - mutex_lock(&trans->transaction->cache_write_mutex); - /* * this can happen if something re-dirties a block * group that is already under IO. Just wait for it to @@ -3495,7 +3490,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, } if (!ret) ret = write_one_cache_group(trans, root, path, cache); - mutex_unlock(&trans->transaction->cache_write_mutex); /* if its not on the io list, we need to put the block group */ if (should_put) @@ -3503,7 +3497,16 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, if (ret) break; + + /* + * Avoid blocking other tasks for too long. It might even save + * us from writing caches for block groups that are going to be + * removed. + */ + mutex_unlock(&trans->transaction->cache_write_mutex); + mutex_lock(&trans->transaction->cache_write_mutex); } + mutex_unlock(&trans->transaction->cache_write_mutex); /* * go through delayed refs for all the stuff we've just kicked off @@ -3514,8 +3517,15 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, loops++; spin_lock(&cur_trans->dirty_bgs_lock); list_splice_init(&cur_trans->dirty_bgs, &dirty); + /* + * dirty_bgs_lock protects us from concurrent block group + * deletes too (not just cache_write_mutex). + */ + if (!list_empty(&dirty)) { + spin_unlock(&cur_trans->dirty_bgs_lock); + goto again; + } spin_unlock(&cur_trans->dirty_bgs_lock); - goto again; } btrfs_free_path(path); From 24b89d08ef63f9c62e55593e27a55e57b0062046 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 25 Apr 2015 18:31:05 +0100 Subject: [PATCH 023/104] Btrfs: fix deadlock when starting writeback of bg caches While starting the writes of the dirty block group caches, if we don't find a block group item in the extent tree we were leaving without releasing our path, running delayed references and then looping again to process any new dirty block groups. However this second iteration of the loop could cause a deadlock because it tries to lock some other extent tree node/leaf which another task already locked and it's blocked because it's waiting for a lock on some node/leaf that is in our path that was not released before. We could also deadlock when running the delayed references - as we could end up trying to lock the same nodes/leafs that we have in our local path (with a different lock type). Got into such case when running xfstests: [20892.242791] ------------[ cut here ]------------ [20892.243776] WARNING: CPU: 0 PID: 13299 at fs/btrfs/super.c:260 __btrfs_abort_transaction+0x52/0x114 [btrfs]() [20892.245874] BTRFS: Transaction aborted (error -2) (...) [20892.269378] Call Trace: [20892.269915] [] dump_stack+0x4f/0x7b [20892.271097] [] ? console_unlock+0x361/0x3ad [20892.272173] [] warn_slowpath_common+0xa1/0xbb [20892.273386] [] ? __btrfs_abort_transaction+0x52/0x114 [btrfs] [20892.274857] [] warn_slowpath_fmt+0x46/0x48 [20892.275851] [] __btrfs_abort_transaction+0x52/0x114 [btrfs] [20892.277341] [] write_one_cache_group+0x68/0xaf [btrfs] [20892.278628] [] btrfs_start_dirty_block_groups+0x18d/0x29b [btrfs] [20892.280191] [] btrfs_commit_transaction+0x130/0x9c9 [btrfs] (...) [20892.291316] ---[ end trace 597f77e664245373 ]--- [20892.293955] BTRFS: error (device sdg) in write_one_cache_group:3184: errno=-2 No such entry [20892.297390] BTRFS info (device sdg): forced readonly [20892.298222] ------------[ cut here ]------------ [20892.299190] WARNING: CPU: 0 PID: 13299 at fs/btrfs/ctree.c:2683 btrfs_search_slot+0x7e/0x7d2 [btrfs]() (...) [20892.326253] Call Trace: [20892.326904] [] dump_stack+0x4f/0x7b [20892.329503] [] ? console_unlock+0x361/0x3ad [20892.330815] [] warn_slowpath_common+0xa1/0xbb [20892.332556] [] ? btrfs_search_slot+0x7e/0x7d2 [btrfs] [20892.333955] [] warn_slowpath_null+0x1a/0x1c [20892.335562] [] btrfs_search_slot+0x7e/0x7d2 [btrfs] [20892.336849] [] ? arch_local_irq_save+0x9/0xc [20892.338222] [] ? cache_save_setup+0x43/0x2a5 [btrfs] [20892.339823] [] ? cache_save_setup+0x57/0x2a5 [btrfs] [20892.341275] [] ? _raw_spin_unlock+0x32/0x46 [20892.342810] [] write_one_cache_group+0x3f/0xaf [btrfs] [20892.344184] [] btrfs_start_dirty_block_groups+0x18d/0x29b [btrfs] [20892.347162] [] btrfs_commit_transaction+0x130/0x9c9 [btrfs] (...) [20892.361015] ---[ end trace 597f77e664245374 ]--- [21120.688097] INFO: task kworker/u8:17:29854 blocked for more than 120 seconds. [21120.689881] Tainted: G W 4.0.0-rc5-btrfs-next-9+ #2 [21120.691384] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. (...) [21120.703696] Call Trace: [21120.704310] [] schedule+0x74/0x83 [21120.705490] [] btrfs_tree_lock+0xd7/0x236 [btrfs] [21120.706757] [] ? signal_pending_state+0x31/0x31 [21120.708156] [] lock_extent_buffer_for_io+0x3e/0x194 [btrfs] [21120.709892] [] ? btree_write_cache_pages+0x273/0x385 [btrfs] [21120.711605] [] btree_write_cache_pages+0x32f/0x385 [btrfs] [21120.723440] [] btree_writepages+0x23/0x5c [btrfs] [21120.724943] [] do_writepages+0x23/0x2c [21120.726008] [] __writeback_single_inode+0x73/0x2fa [21120.727230] [] ? writeback_sb_inodes+0xe5/0x38b [21120.728526] [] ? writeback_sb_inodes+0x196/0x38b [21120.729701] [] writeback_sb_inodes+0x205/0x38b (...) [21120.747853] INFO: task btrfs:13282 blocked for more than 120 seconds. [21120.749459] Tainted: G W 4.0.0-rc5-btrfs-next-9+ #2 [21120.751137] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. (...) [21120.768457] Call Trace: [21120.769039] [] schedule+0x74/0x83 [21120.770107] [] btrfs_commit_transaction+0x315/0x9c9 [btrfs] [21120.771558] [] ? signal_pending_state+0x31/0x31 [21120.773659] [] prepare_to_relocate+0xcb/0xd2 [btrfs] [21120.776257] [] relocate_block_group+0x44/0x4a9 [btrfs] [21120.777755] [] ? btrfs_relocate_block_group+0x161/0x288 [btrfs] [21120.779459] [] btrfs_relocate_block_group+0x169/0x288 [btrfs] [21120.781153] [] btrfs_relocate_chunk.isra.29+0x3e/0xa7 [btrfs] [21120.783918] [] btrfs_balance+0xaa4/0xc52 [btrfs] [21120.785436] [] ? cpu_cache_get.isra.39+0xe/0x1f [21120.786434] [] btrfs_ioctl_balance+0x23f/0x2b0 [btrfs] (...) [21120.889251] INFO: task fsstress:13288 blocked for more than 120 seconds. [21120.890526] Tainted: G W 4.0.0-rc5-btrfs-next-9+ #2 [21120.891773] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. (...) [21120.899960] Call Trace: [21120.900743] [] schedule+0x74/0x83 [21120.903004] [] btrfs_tree_lock+0xd7/0x236 [btrfs] [21120.904383] [] ? signal_pending_state+0x31/0x31 [21120.905608] [] btrfs_search_slot+0x766/0x7d2 [btrfs] [21120.906812] [] ? virt_to_head_page+0x9/0x2c [21120.907874] [] ? cache_alloc_debugcheck_after.isra.42+0x16c/0x1cb [21120.909551] [] btrfs_insert_empty_items+0x5d/0xa8 [btrfs] [21120.910914] [] btrfs_insert_item+0x5a/0xa5 [btrfs] [21120.912181] [] ? btrfs_create_pending_block_groups+0x96/0x130 [btrfs] [21120.913784] [] btrfs_create_pending_block_groups+0xaf/0x130 [btrfs] [21120.915374] [] __btrfs_end_transaction+0x84/0x366 [btrfs] [21120.916735] [] btrfs_end_transaction+0x10/0x12 [btrfs] [21120.917996] [] btrfs_check_data_free_space+0x11f/0x27c [btrfs] [21120.919478] [] btrfs_delalloc_reserve_space+0x1e/0x51 [btrfs] [21120.921226] [] btrfs_truncate_page+0x85/0x2c4 [btrfs] [21120.923121] [] btrfs_cont_expand+0x41/0x3ef [btrfs] [21120.924449] [] ? btrfs_file_write_iter+0x19a/0x431 [btrfs] [21120.926602] [] ? arch_local_irq_save+0x9/0xc [21120.927769] [] ? btrfs_file_write_iter+0x19a/0x431 [btrfs] [21120.929324] [] ? btrfs_file_write_iter+0x1a9/0x431 [btrfs] [21120.930723] [] btrfs_file_write_iter+0x1e2/0x431 [btrfs] [21120.931897] [] ? get_parent_ip+0xe/0x3e [21120.934446] [] new_sync_write+0x7c/0xa0 [21120.935528] [] vfs_write+0xb2/0x117 (...) Fixes: 1bbc621ef284 ("Btrfs: allow block group cache writeout outside critical section in commit") Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fc0db9887c0e..eeb429cafcb5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3178,8 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(path); fail: + btrfs_release_path(path); if (ret) btrfs_abort_transaction(trans, root, ret); return ret; From e4c88f007be78d38eaef316c599a1ee2f0272c15 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 18 Apr 2015 05:22:48 -0700 Subject: [PATCH 024/104] Btrfs: don't check for delalloc_bytes in cache_save_setup Now that we're doing free space cache writeback outside the critical section in the commit, there is a bigger window for delalloc_bytes to be added after a cache has been written. find_free_extent may do this without putting the block group back into the dirty list, and also without a transaction running. Checking for delalloc_bytes in cache_save_setup means we might leave the cache marked as written without invalidating it. Consistency checks during mount will toss the cache, but it's better to get rid of the check in cache_save_setup and let it get invalidated by the checks already done during cache write out. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index eeb429cafcb5..6f420c8efdaf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3305,8 +3305,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, spin_lock(&block_group->lock); if (block_group->cached != BTRFS_CACHE_FINISHED || - !btrfs_test_opt(root, SPACE_CACHE) || - block_group->delalloc_bytes) { + !btrfs_test_opt(root, SPACE_CACHE)) { /* * don't bother trying to write stuff out _if_ * a) we're not cached, From 1b9845081633072c18f30d8cfd09c267adf0b109 Mon Sep 17 00:00:00 2001 From: Forrest Liu Date: Mon, 9 Feb 2015 17:30:47 +0800 Subject: [PATCH 025/104] Btrfs: fix find_free_dev_extent() malfunction in case device tree has hole If device tree has hole, find_free_dev_extent() cannot find available address properly. The problem can be reproduce by following script. mntpath=/btrfs loopdev=/dev/loop0 filepath=/home/forrest/image umount $mntpath losetup -d $loopdev truncate --size 100g $filepath losetup $loopdev $filepath mkfs.btrfs -f $loopdev mount $loopdev $mntpath # make device tree with one big hole for i in `seq 1 1 100`; do fallocate -l 1g $mntpath/$i done sync for i in `seq 1 1 95`; do rm $mntpath/$i done sync # wait cleaner thread remove unused block group sleep 300 fallocate -l 1g $mntpath/aaa # failed to allocate new chunk fallocate -l 1g $mntpath/bbb Above script will make device tree with one big hole, and can only allocate just one chunk in a transaction, so failed to allocate new chunk for $mntpath/bbb item 8 key (1 DEV_EXTENT 2185232384) itemoff 15859 itemsize 48 dev extent chunk_tree 3 chunk objectid 256 chunk offset 106292051968 length 1073741824 item 9 key (1 DEV_EXTENT 104190705664) itemoff 15811 itemsize 48 dev extent chunk_tree 3 chunk objectid 256 chunk offset 103108575232 length 1073741824 Signed-off-by: Forrest Liu Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bcd2a007517..96aebf3bcd5b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans, struct extent_map *em; struct list_head *search_list = &trans->transaction->pending_chunks; int ret = 0; + u64 physical_start = *start; again: list_for_each_entry(em, search_list, list) { @@ -1068,9 +1069,9 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans, for (i = 0; i < map->num_stripes; i++) { if (map->stripes[i].dev != device) continue; - if (map->stripes[i].physical >= *start + len || + if (map->stripes[i].physical >= physical_start + len || map->stripes[i].physical + em->orig_block_len <= - *start) + physical_start) continue; *start = map->stripes[i].physical + em->orig_block_len; @@ -1193,8 +1194,14 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, */ if (contains_pending_extent(trans, device, &search_start, - hole_size)) - hole_size = 0; + hole_size)) { + if (key.offset >= search_start) { + hole_size = key.offset - search_start; + } else { + WARN_ON_ONCE(1); + hole_size = 0; + } + } if (hole_size > max_hole_size) { max_hole_start = search_start; From 67b7859e9bfa0dcee5a8256932e393e98739be1b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 24 Feb 2015 02:47:04 -0800 Subject: [PATCH 026/104] btrfs: handle ENOMEM in btrfs_alloc_tree_block This is one of the first places to give out when memory is tight. Handle it properly rather than with a BUG_ON. Also fix the comment about the return value, which is an ERR_PTR, not NULL, on error. Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6f420c8efdaf..0ec8e228b89f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7546,7 +7546,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, * returns the key for the extent through ins, and a tree buffer for * the first block of the extent through buf. * - * returns the tree buffer or NULL. + * returns the tree buffer or an ERR_PTR on error. */ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -7557,6 +7557,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, struct btrfs_key ins; struct btrfs_block_rsv *block_rsv; struct extent_buffer *buf; + struct btrfs_delayed_extent_op *extent_op; u64 flags = 0; int ret; u32 blocksize = root->nodesize; @@ -7577,13 +7578,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, ret = btrfs_reserve_extent(root, blocksize, blocksize, empty_size, hint, &ins, 0, 0); - if (ret) { - unuse_block_rsv(root->fs_info, block_rsv, blocksize); - return ERR_PTR(ret); - } + if (ret) + goto out_unuse; buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); - BUG_ON(IS_ERR(buf)); /* -ENOMEM */ + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_free_reserved; + } if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { if (parent == 0) @@ -7593,9 +7595,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, BUG_ON(parent > 0); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { - struct btrfs_delayed_extent_op *extent_op; extent_op = btrfs_alloc_delayed_extent_op(); - BUG_ON(!extent_op); /* -ENOMEM */ + if (!extent_op) { + ret = -ENOMEM; + goto out_free_buf; + } if (key) memcpy(&extent_op->key, key, sizeof(extent_op->key)); else @@ -7610,13 +7614,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, extent_op->level = level; ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, - ins.objectid, - ins.offset, parent, root_objectid, - level, BTRFS_ADD_DELAYED_EXTENT, - extent_op, 0); - BUG_ON(ret); /* -ENOMEM */ + ins.objectid, ins.offset, + parent, root_objectid, level, + BTRFS_ADD_DELAYED_EXTENT, + extent_op, 0); + if (ret) + goto out_free_delayed; } return buf; + +out_free_delayed: + btrfs_free_delayed_extent_op(extent_op); +out_free_buf: + free_extent_buffer(buf); +out_free_reserved: + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0); +out_unuse: + unuse_block_rsv(root->fs_info, block_rsv, blocksize); + return ERR_PTR(ret); } struct walk_control { From 5ca64f45e92dc52bd3bc1ad93f4f9e5a57955f28 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 24 Feb 2015 02:47:05 -0800 Subject: [PATCH 027/104] btrfs: fix race on ENOMEM in alloc_extent_buffer Consider the following interleaving of overlapping calls to alloc_extent_buffer: Call 1: - Successfully allocates a few pages with find_or_create_page - find_or_create_page fails, goto free_eb - Unlocks the allocated pages Call 2: - Calls find_or_create_page and gets a page in call 1's extent_buffer - Finds that the page is already associated with an extent_buffer - Grabs a reference to the half-written extent_buffer and calls mark_extent_buffer_accessed on it mark_extent_buffer_accessed will then try to call mark_page_accessed on a null page and panic. The fix is to decrement the reference count on the half-written extent_buffer before unlocking the pages so call 2 won't use it. We should also set exists = NULL in the case that we don't use exists to avoid accidentally returning a freed extent_buffer in an error case. Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 782f3bc4651d..ea100eb188de 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4870,6 +4870,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, mark_extent_buffer_accessed(exists, p); goto free_eb; } + exists = NULL; /* * Do this so attach doesn't complain and we need to @@ -4933,12 +4934,12 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, return eb; free_eb: + WARN_ON(!atomic_dec_and_test(&eb->refs)); for (i = 0; i < num_pages; i++) { if (eb->pages[i]) unlock_page(eb->pages[i]); } - WARN_ON(!atomic_dec_and_test(&eb->refs)); btrfs_release_extent_buffer(eb); return exists; } From b86054540e4ad7d8df67f3a80658d037a2ce1c02 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 24 Feb 2015 02:47:06 -0800 Subject: [PATCH 028/104] btrfs: check io_ctl_prepare_pages return in __btrfs_write_out_cache If io_ctl_prepare_pages fails, the pages in io_ctl.pages are not valid. When we try to access them later, things will blow up in various ways. Also fix the comment about the return value, which is an errno on error, not -1, and update the cases where it was not. Reviewed-by: Liu Bo Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 81fa75a8e1f3..41c510b7cc11 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1218,7 +1218,7 @@ int btrfs_wait_cache_io(struct btrfs_root *root, * * This function writes out a free space cache struct to disk for quick recovery * on mount. This will return 0 if it was successfull in writing the cache out, - * and -1 if it was not. + * or an errno if it was not. */ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, @@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, int must_iput = 0; if (!i_size_read(inode)) - return -1; + return -EIO; WARN_ON(io_ctl->pages); ret = io_ctl_init(io_ctl, inode, root, 1); if (ret) - return -1; + return ret; if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { down_write(&block_group->data_rwsem); @@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, } /* Lock all pages first so we can lock the extent safely. */ - io_ctl_prepare_pages(io_ctl, inode, 0); + ret = io_ctl_prepare_pages(io_ctl, inode, 0); + if (ret) + goto out; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 0, &cached_state); From 909e26dce3f7600f5e293ac0522c28790a0c8c9c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 10 Apr 2015 14:20:40 -0700 Subject: [PATCH 029/104] btrfs: unlock i_mutex after attempting to delete subvolume during send Whenever the check for a send in progress introduced in commit 521e0546c970 (btrfs: protect snapshots from deleting during send) is hit, we return without unlocking inode->i_mutex. This is easy to see with lockdep enabled: [ +0.000059] ================================================ [ +0.000028] [ BUG: lock held when returning to user space! ] [ +0.000029] 4.0.0-rc5-00096-g3c435c1 #93 Not tainted [ +0.000026] ------------------------------------------------ [ +0.000029] btrfs/211 is leaving the kernel with locks still held! [ +0.000029] 1 lock held by btrfs/211: [ +0.000023] #0: (&type->i_mutex_dir_key){+.+.+.}, at: [] btrfs_ioctl_snap_destroy+0x2df/0x7a0 Make sure we unlock it in the error path. Reviewed-by: Filipe Manana Reviewed-by: David Sterba Cc: stable@vger.kernel.org Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ca5d968f4c37..cdcd98ceddea 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, "Attempt to delete subvolume %llu during send", dest->root_key.objectid); err = -EPERM; - goto out_dput; + goto out_unlock_inode; } d_invalidate(dentry); @@ -2505,6 +2505,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); } +out_unlock_inode: mutex_unlock(&inode->i_mutex); if (!err) { shrink_dcache_sb(root->fs_info->sb); From 6e17d30bfaf43e04d991392d8484f1c556810c33 Mon Sep 17 00:00:00 2001 From: Yang Dongsheng Date: Thu, 9 Apr 2015 12:08:43 +0800 Subject: [PATCH 030/104] Btrfs: fill ->last_trans for delayed inode in btrfs_fill_inode. We need to fill inode when we found a node for it in delayed_nodes_tree. But we did not fill the ->last_trans currently, it will cause the test of xfstest/generic/311 fail. Scenario of the 311 is shown as below: Problem: (1). test_fd = open(fname, O_RDWR|O_DIRECT) (2). pwrite(test_fd, buf, 4096, 0) (3). close(test_fd) (4). drop_all_caches() <-------- "echo 3 > /proc/sys/vm/drop_caches" (5). test_fd = open(fname, O_RDWR|O_DIRECT) (6). fsync(test_fd); <-------- we did not get the correct log entry for the file Reason: When we re-open this file in (5), we would find a node in delayed_nodes_tree and fill the inode we are lookup with the information. But the ->last_trans is not filled, then the fsync() will check the ->last_trans and found it's 0 then say this inode is already in our tree which is commited, not recording the extents for it. Fix: This patch fill the ->last_trans properly and set the runtime_flags if needed in this situation. Then we can get the log entries we expected after (6) and generic/311 passed. Signed-off-by: Dongsheng Yang Reviewed-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 2 ++ fs/btrfs/inode.c | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index cde698a07d21..a2ae42720a6a 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); + BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); + inode->i_version = btrfs_stack_inode_sequence(inode_item); inode->i_rdev = 0; *rdev = btrfs_stack_inode_rdev(inode_item); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 27b59b8362f9..0020b5675fa9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3633,25 +3633,28 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); + inode->i_version = btrfs_inode_sequence(leaf, inode_item); + inode->i_generation = BTRFS_I(inode)->generation; + inode->i_rdev = 0; + rdev = btrfs_inode_rdev(leaf, inode_item); + + BTRFS_I(inode)->index_cnt = (u64)-1; + BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); + +cache_index: /* * If we were modified in the current generation and evicted from memory * and then re-read we need to do a full sync since we don't have any * idea about which extents were modified before we were evicted from * cache. + * + * This is required for both inode re-read from disk and delayed inode + * in delayed_nodes_tree. */ if (BTRFS_I(inode)->last_trans == root->fs_info->generation) set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); - inode->i_version = btrfs_inode_sequence(leaf, inode_item); - inode->i_generation = BTRFS_I(inode)->generation; - inode->i_rdev = 0; - rdev = btrfs_inode_rdev(leaf, inode_item); - - BTRFS_I(inode)->index_cnt = (u64)-1; - BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - -cache_index: path->slots[0]++; if (inode->i_nlink != 1 || path->slots[0] >= btrfs_header_nritems(leaf)) From 41b4b3bc7905615f7617c88bf9cf4b3b04bda78f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Feb 2015 16:31:39 +0100 Subject: [PATCH 031/104] drivers: sh: Disable PM runtime for multi-platform sh73a0 with genpd If the default PM domain using PM_CLK is used for PM runtime, the real PM domain(s) cannot be registered from DT later. Hence do not enable it when running a multi-platform kernel with genpd support on an sh73a0. The R-Mobile PM domain driver will take care of PM runtime management of the module clocks. The default PM domain is still needed for: - platforms without genpd support, - the legacy (non-DT) case, where genpd may take over later, except for the C5 "always on" PM domain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- drivers/sh/pm_runtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index cd4c293f0dd0..6b16a67ba618 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -83,6 +83,7 @@ static int __init sh_pm_runtime_init(void) !of_machine_is_compatible("renesas,r8a73a4") && #ifndef CONFIG_PM_GENERIC_DOMAINS_OF !of_machine_is_compatible("renesas,r8a7740") && + !of_machine_is_compatible("renesas,sh73a0") && #endif !of_machine_is_compatible("renesas,r8a7778") && !of_machine_is_compatible("renesas,r8a7779") && @@ -91,8 +92,7 @@ static int __init sh_pm_runtime_init(void) !of_machine_is_compatible("renesas,r8a7792") && !of_machine_is_compatible("renesas,r8a7793") && !of_machine_is_compatible("renesas,r8a7794") && - !of_machine_is_compatible("renesas,sh7372") && - !of_machine_is_compatible("renesas,sh73a0")) + !of_machine_is_compatible("renesas,sh7372")) return 0; } From 230f259ffe4ad6d334bb100ce1c0c6a73ff18989 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Feb 2015 16:43:03 +0100 Subject: [PATCH 032/104] drivers: sh: Disable PM runtime for multi-platform r8a73a4 with genpd If the default PM domain using PM_CLK is used for PM runtime, the real PM domain(s) cannot be registered from DT later. Hence do not enable it when running a multi-platform kernel with genpd support on an r8a73a4. The R-Mobile PM domain driver will take care of PM runtime management of the module clocks. The default PM domain is still needed for: - platforms without genpd support, - the legacy (non-DT) case, where genpd may take over later, except for the C5 "always on" PM domain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- drivers/sh/pm_runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index 6b16a67ba618..89b7785de1fc 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -80,8 +80,8 @@ static int __init sh_pm_runtime_init(void) if (IS_ENABLED(CONFIG_ARCH_SHMOBILE_MULTI)) { if (!of_machine_is_compatible("renesas,emev2") && !of_machine_is_compatible("renesas,r7s72100") && - !of_machine_is_compatible("renesas,r8a73a4") && #ifndef CONFIG_PM_GENERIC_DOMAINS_OF + !of_machine_is_compatible("renesas,r8a73a4") && !of_machine_is_compatible("renesas,r8a7740") && !of_machine_is_compatible("renesas,sh73a0") && #endif From 00170528f0486c08a18475af7fefa0a7679e042d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 19 Feb 2015 18:46:46 +0100 Subject: [PATCH 033/104] drivers: sh: Remove test for now unsupported sh7372 Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- drivers/sh/pm_runtime.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index 89b7785de1fc..fe8875f0d7be 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -91,8 +91,7 @@ static int __init sh_pm_runtime_init(void) !of_machine_is_compatible("renesas,r8a7791") && !of_machine_is_compatible("renesas,r8a7792") && !of_machine_is_compatible("renesas,r8a7793") && - !of_machine_is_compatible("renesas,r8a7794") && - !of_machine_is_compatible("renesas,sh7372")) + !of_machine_is_compatible("renesas,r8a7794")) return 0; } From 3e1b0c4a9d563d7fc6e22dc92613cd3237bb5ce0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 10:43:22 +0200 Subject: [PATCH 034/104] ALSA: hda - Fix click noise at start on Dell XPS13 Dell XPS13 produces a click noise at boot up, and Gabriele spotted out that it's triggered by the initial pin control of the mic (NID 0x19). This has to be set to Hi-Z Vref while the driver initializes to Vref 80% as a normal mic. This patch fixes the generic parser code not to override the target vref if it has been already set by the driver, and adds a proper initialization of the target vref for this pin in the Realtek driver side. Reported-and-tested-by: Gabriele Mazzotta Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 3 ++- sound/pci/hda/patch_realtek.c | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 3d2597b7037b..788f969b1a68 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3259,7 +3259,8 @@ static int create_input_ctls(struct hda_codec *codec) val = PIN_IN; if (cfg->inputs[i].type == AUTO_PIN_MIC) val |= snd_hda_get_default_vref(codec, pin); - if (pin != spec->hp_mic_pin) + if (pin != spec->hp_mic_pin && + !snd_hda_codec_get_pin_target(codec, pin)) set_pin_target(codec, pin, val, false); if (mixer) { diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 06199e4e930f..e2afd53cc14c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4190,11 +4190,18 @@ static void alc_shutup_dell_xps13(struct hda_codec *codec) static void alc_fixup_dell_xps13(struct hda_codec *codec, const struct hda_fixup *fix, int action) { - if (action == HDA_FIXUP_ACT_PROBE) { - struct alc_spec *spec = codec->spec; - struct hda_input_mux *imux = &spec->gen.input_mux; - int i; + struct alc_spec *spec = codec->spec; + struct hda_input_mux *imux = &spec->gen.input_mux; + int i; + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* mic pin 0x19 must be initialized with Vref Hi-Z, otherwise + * it causes a click noise at start up + */ + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ); + break; + case HDA_FIXUP_ACT_PROBE: spec->shutup = alc_shutup_dell_xps13; /* Make the internal mic the default input source. */ @@ -4204,6 +4211,7 @@ static void alc_fixup_dell_xps13(struct hda_codec *codec, break; } } + break; } } From ee52e56e7b12834476cd0031c5986254ba1b6317 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 10:36:11 +0200 Subject: [PATCH 035/104] ALSA: hda - Fix mute-LED fixed mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mute-LED mode control has the fixed on/off states that are supposed to remain on/off regardless of the master switch. However, this doesn't work actually because the vmaster hook is called in the vmaster code itself. This patch fixes it by calling the hook indirectly after checking the mute LED mode. Reported-and-tested-by: Pali Rohár Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 873ed1bce12b..27333e0d8ebe 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2082,6 +2082,16 @@ static struct snd_kcontrol_new vmaster_mute_mode = { .put = vmaster_mute_mode_put, }; +/* meta hook to call each driver's vmaster hook */ +static void vmaster_hook(void *private_data, int enabled) +{ + struct hda_vmaster_mute_hook *hook = private_data; + + if (hook->mute_mode != HDA_VMUTE_FOLLOW_MASTER) + enabled = hook->mute_mode; + hook->hook(hook->codec, enabled); +} + /** * snd_hda_add_vmaster_hook - Add a vmaster hook for mute-LED * @codec: the HDA codec @@ -2100,9 +2110,9 @@ int snd_hda_add_vmaster_hook(struct hda_codec *codec, if (!hook->hook || !hook->sw_kctl) return 0; - snd_ctl_add_vmaster_hook(hook->sw_kctl, hook->hook, codec); hook->codec = codec; hook->mute_mode = HDA_VMUTE_FOLLOW_MASTER; + snd_ctl_add_vmaster_hook(hook->sw_kctl, vmaster_hook, hook); if (!expose_enum_ctl) return 0; kctl = snd_ctl_new1(&vmaster_mute_mode, hook); @@ -2128,14 +2138,7 @@ void snd_hda_sync_vmaster_hook(struct hda_vmaster_mute_hook *hook) */ if (hook->codec->bus->shutdown) return; - switch (hook->mute_mode) { - case HDA_VMUTE_FOLLOW_MASTER: - snd_ctl_sync_vmaster_hook(hook->sw_kctl); - break; - default: - hook->hook(hook->codec, hook->mute_mode); - break; - } + snd_ctl_sync_vmaster_hook(hook->sw_kctl); } EXPORT_SYMBOL_GPL(snd_hda_sync_vmaster_hook); From 7290006d8c0900c56d8c58428134f02c35109d17 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 10:40:45 +0200 Subject: [PATCH 036/104] ALSA: hda - Add mute-LED mode control to Thinkpad MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the missing flag to enable "Mute-LED Mode" mixer enum ctl for Thinkpads that have also the software mute-LED control. Reported-and-tested-by: Pali Rohár Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/thinkpad_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index 0a4ad5feb82e..d51703e30523 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -72,6 +72,7 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec, if (led_set_func(TPACPI_LED_MUTE, false) >= 0) { old_vmaster_hook = spec->vmaster_mute.hook; spec->vmaster_mute.hook = update_tpacpi_mute_led; + spec->vmaster_mute_enum = 1; removefunc = false; } if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0) { From 878a84d5a8a18a4ab241d40cebb791d6aedf5605 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 20 Apr 2015 11:14:19 +0100 Subject: [PATCH 037/104] arm64: add missing data types in smp_load_acquire/smp_store_release Commit 8053871d0f7f ("smp: Fix smp_call_function_single_async() locking") introduced a call to smp_load_acquire() with a u16 argument, but we only cared about u32 and u64 types in that function so far. This resulted in a compiler warning fortunately, pointing at an uninitialized use. Due to the implementation structure the compiler misses that bug in the smp_store_release(), though. Add the u16 and u8 variants using ldarh/stlrh and ldarb/stlrb, respectively. Together with the compiletime_assert_atomic_type() check this should cover all cases now. Acked-by: Will Deacon Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/barrier.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index a5abb0062d6e..71f19c4dc0de 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -65,6 +65,14 @@ do { \ do { \ compiletime_assert_atomic_type(*p); \ switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) : "r" (v) : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) : "r" (v) : "memory"); \ + break; \ case 4: \ asm volatile ("stlr %w1, %0" \ : "=Q" (*p) : "r" (v) : "memory"); \ @@ -81,6 +89,14 @@ do { \ typeof(*p) ___p1; \ compiletime_assert_atomic_type(*p); \ switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + break; \ case 4: \ asm volatile ("ldar %w0, %1" \ : "=r" (___p1) : "Q" (*p) : "memory"); \ From 6544e67bfb1bf55bcf3c0f6b37631917e9acfb74 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 22 Apr 2015 18:16:33 +0100 Subject: [PATCH 038/104] ARM64: Enable CONFIG_GENERIC_IRQ_SHOW_LEVEL Since several interrupt controllers including GIC support both edge and level triggered interrupts, it's useful to provide that information in /proc/interrupts even on ARM64 similar to ARM and PPC. This is based on Geert Uytterhoeven's commit 7c07005eea96 ("ARM: 8339/1: Enable CONFIG_GENERIC_IRQ_SHOW_LEVEL") Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4269dba63cf1..7796af4b1d6f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -31,6 +31,7 @@ config ARM64 select GENERIC_EARLY_IOREMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD From 6829e274a623187c24f7cfc0e3d35f25d087fcc5 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 23 Apr 2015 12:46:16 +0100 Subject: [PATCH 039/104] arm64: dma-mapping: always clear allocated buffers Buffers allocated by dma_alloc_coherent() are always zeroed on Alpha, ARM (32bit), MIPS, PowerPC, x86/x86_64 and probably other architectures. It turned out that some drivers rely on this 'feature'. Allocated buffer might be also exposed to userspace with dma_mmap() call, so clearing it is desired from security point of view to avoid exposing random memory to userspace. This patch unifies dma_alloc_coherent() behavior on ARM64 architecture with other implementations by unconditionally zeroing allocated buffer. Cc: # v3.14+ Signed-off-by: Marek Szyprowski Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index ef7d112f5ce0..e0f14ee26b68 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -67,8 +67,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) *ret_page = phys_to_page(phys); ptr = (void *)val; - if (flags & __GFP_ZERO) - memset(ptr, 0, size); + memset(ptr, 0, size); } return ptr; @@ -113,8 +112,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, *dma_handle = phys_to_dma(dev, page_to_phys(page)); addr = page_address(page); - if (flags & __GFP_ZERO) - memset(addr, 0, size); + memset(addr, 0, size); return addr; } else { return swiotlb_alloc_coherent(dev, size, dma_handle, flags); From d02260824e2cad626fb2a9d62e27006d34b6dedc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 13:00:09 +0200 Subject: [PATCH 040/104] ALSA: emu10k1: Fix card shortname string buffer overflow Some models provide too long string for the shortname that has 32bytes including the terminator, and it results in a non-terminated string exposed to the user-space. This isn't too critical, though, as the string is stopped at the succeeding longname string. This patch fixes such entries by dropping "SB" prefix (it's enough to fit within 32 bytes, so far). Meanwhile, it also changes strcpy() with strlcpy() to make sure that this kind of problem won't happen in future, too. Cc: Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1.c | 6 ++++-- sound/pci/emu10k1/emu10k1_main.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 37d0220a094c..db7a2e5e4a14 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -183,8 +183,10 @@ static int snd_card_emu10k1_probe(struct pci_dev *pci, } #endif - strcpy(card->driver, emu->card_capabilities->driver); - strcpy(card->shortname, emu->card_capabilities->name); + strlcpy(card->driver, emu->card_capabilities->driver, + sizeof(card->driver)); + strlcpy(card->shortname, emu->card_capabilities->name, + sizeof(card->shortname)); snprintf(card->longname, sizeof(card->longname), "%s (rev.%d, serial:0x%x) at 0x%lx, irq %i", card->shortname, emu->revision, emu->serial, emu->port, emu->irq); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 54079f5d5673..4f8cf5e7e45f 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1446,7 +1446,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { * */ {.vendor = 0x1102, .device = 0x0008, .subsystem = 0x20011102, - .driver = "Audigy2", .name = "SB Audigy 2 ZS Notebook [SB0530]", + .driver = "Audigy2", .name = "Audigy 2 ZS Notebook [SB0530]", .id = "Audigy2", .emu10k2_chip = 1, .ca0108_chip = 1, @@ -1596,7 +1596,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .adc_1361t = 1, /* 24 bit capture instead of 16bit */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .subsystem = 0x10051102, - .driver = "Audigy2", .name = "SB Audigy 2 Platinum EX [SB0280]", + .driver = "Audigy2", .name = "Audigy 2 Platinum EX [SB0280]", .id = "Audigy2", .emu10k2_chip = 1, .ca0102_chip = 1, From 07b0e5d49d227e3950cb13a3e8caf248ef2a310e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 14:50:39 +0200 Subject: [PATCH 041/104] ALSA: emux: Fix mutex deadlock at unloading The emux-synth driver has a possible AB/BA mutex deadlock at unloading the emu10k1 driver: snd_emux_free() -> snd_emux_detach_seq(): mutex_lock(&emu->register_mutex) -> snd_seq_delete_kernel_client() -> snd_seq_free_client(): mutex_lock(®ister_mutex) snd_seq_release() -> snd_seq_free_client(): mutex_lock(®ister_mutex) -> snd_seq_delete_all_ports() -> snd_emux_unuse(): mutex_lock(&emu->register_mutex) Basically snd_emux_detach_seq() doesn't need a protection of emu->register_mutex as it's already being unregistered. So, we can get rid of this for avoiding the deadlock. Cc: Signed-off-by: Takashi Iwai --- sound/synth/emux/emux_seq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/synth/emux/emux_seq.c b/sound/synth/emux/emux_seq.c index 7778b8e19782..188fda0effb0 100644 --- a/sound/synth/emux/emux_seq.c +++ b/sound/synth/emux/emux_seq.c @@ -124,12 +124,10 @@ snd_emux_detach_seq(struct snd_emux *emu) if (emu->voices) snd_emux_terminate_all(emu); - mutex_lock(&emu->register_mutex); if (emu->client >= 0) { snd_seq_delete_kernel_client(emu->client); emu->client = -1; } - mutex_unlock(&emu->register_mutex); } From 5dca0d9147458be9b9363b8a484aa77d710b412a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 25 Mar 2015 12:08:14 +0100 Subject: [PATCH 042/104] kvm: x86: fix kvmclock update protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kvmclock spec says that the host will increment a version field to an odd number, then update stuff, then increment it to an even number. The host is buggy and doesn't do this, and the result is observable when one vcpu reads another vcpu's kvmclock data. There's no good way for a guest kernel to keep its vdso from reading a different vcpu's kvmclock data, but we don't need to care about changing VCPUs as long as we read a consistent data from kvmclock. (VCPU can change outside of this loop too, so it doesn't matter if we return a value not fit for this VCPU.) Based on a patch by Radim Krčmář. Reviewed-by: Radim Krčmář Acked-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ed31c31b2485..c73efcd03e29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1669,12 +1669,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &guest_hv_clock, sizeof(guest_hv_clock)))) return 0; - /* - * The interface expects us to write an even number signaling that the - * update is finished. Since the guest won't see the intermediate - * state, we just increase by 2 at the end. + /* This VCPU is paused, but it's legal for a guest to read another + * VCPU's kvmclock, so we really have to follow the specification where + * it says that version is odd if data is being modified, and even after + * it is consistent. + * + * Version field updates must be kept separate. This is because + * kvm_write_guest_cached might use a "rep movs" instruction, and + * writes within a string instruction are weakly ordered. So there + * are three writes overall. + * + * As a small optimization, only write the version field in the first + * and third write. The vcpu->pv_time cache is still valid, because the + * version field is the first in the struct. */ - vcpu->hv_clock.version = guest_hv_clock.version + 2; + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + + vcpu->hv_clock.version = guest_hv_clock.version + 1; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); + + smp_wmb(); /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); @@ -1695,6 +1711,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); + + smp_wmb(); + + vcpu->hv_clock.version++; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); return 0; } From 73459e2a1ada09a68c02cc5b73f3116fc8194b3d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Apr 2015 13:20:18 +0200 Subject: [PATCH 043/104] x86: pvclock: Really remove the sched notifier for cross-cpu migrations This reverts commits 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 and 80f7fdb1c7f0f9266421f823964fd1962681f6ce. The task migration notifier was originally introduced in order to support the pvclock vsyscall with non-synchronized TSC, but KVM only supports it with synchronized TSC. Hence, on KVM the race condition is only needed due to a bad implementation on the host side, and even then it's so rare that it's mostly theoretical. As far as KVM is concerned it's possible to fix the host, avoiding the additional complexity in the vDSO and the (re)introduction of the task migration notifier. Xen, on the other hand, hasn't yet implemented vsyscall support at all, so we do not care about its plans for non-synchronized TSC. Reported-by: Peter Zijlstra Suggested-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 1 - arch/x86/kernel/pvclock.c | 44 ---------------------------------- arch/x86/vdso/vclock_gettime.c | 34 ++++++++++++-------------- include/linux/sched.h | 8 ------- kernel/sched/core.c | 15 ------------ 5 files changed, 15 insertions(+), 87 deletions(-) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 25b1cc07d496..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; - u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20e72dd..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } -static struct pvclock_vsyscall_time_info *pvclock_vdso_info; - -static struct pvclock_vsyscall_time_info * -pvclock_get_vsyscall_user_time_info(int cpu) -{ - if (!pvclock_vdso_info) { - BUG(); - return NULL; - } - - return &pvclock_vdso_info[cpu]; -} - -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) -{ - return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; -} - #ifdef CONFIG_X86_64 -static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, - void *v) -{ - struct task_migration_notifier *mn = v; - struct pvclock_vsyscall_time_info *pvti; - - pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); - - /* this is NULL when pvclock vsyscall is not initialized */ - if (unlikely(pvti == NULL)) - return NOTIFY_DONE; - - pvti->migrate_count++; - - return NOTIFY_DONE; -} - -static struct notifier_block pvclock_migrate = { - .notifier_call = pvclock_task_migrate, -}; - /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); - pvclock_vdso_info = i; - for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } - - register_task_migration_notifier(&pvclock_migrate); - return 0; } #endif diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 40d2473836c9..9793322751e0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; - u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * When looping to get a consistent (time-info, tsc) pair, we - * also need to deal with the possibility we can switch vcpus, - * so make sure we always re-fetch time-info for the current vcpu. + * Note: hypervisor must guarantee that: + * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. + * 2. that per-CPU pvclock time info is updated if the + * underlying CPU changes. + * 3. that version is increased whenever underlying CPU + * changes. + * */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode) * __getcpu() calls (Gleb). */ - /* Make sure migrate_count will change if we leave the VCPU. */ - do { - pvti = get_pvti(cpu); - migrate_count = pvti->migrate_count; - - cpu1 = cpu; - cpu = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1)); + pvti = get_pvti(cpu); version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* * Test we're still on the cpu as well as the version. - * - We must read TSC of pvti's VCPU. - * - KVM doesn't follow the versioning protocol, so data could - * change before version if we left the VCPU. + * We could have been migrated just after the first + * vgetcpu but before fetching the version, so we + * wouldn't notice a version change. */ - smp_rmb(); - } while (unlikely((pvti->pvti.version & 1) || - pvti->pvti.version != version || - pvti->migrate_count != migrate_count)); + cpu1 = __getcpu() & VGETCPU_CPU_MASK; + } while (unlikely(cpu != cpu1 || + (pvti->pvti.version & 1) || + pvti->pvti.version != version)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..26a2e6122734 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); -/* Notifier for when a task gets migrated to a new CPU */ -struct task_migration_notifier { - struct task_struct *task; - int from_cpu; - int to_cpu; -}; -extern void register_task_migration_notifier(struct notifier_block *n); - extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f9123a82cbb6..fe22f7510bce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } -static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); - -void register_task_migration_notifier(struct notifier_block *n) -{ - atomic_notifier_chain_register(&task_migration_notifier, n); -} - #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { @@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu); if (task_cpu(p) != new_cpu) { - struct task_migration_notifier tmn; - if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); - - tmn.task = p; - tmn.from_cpu = task_cpu(p); - tmn.to_cpu = new_cpu; - - atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); } __set_task_cpu(p, new_cpu); From 30e5f003ff4b2be86f71733b6c9b11355d66584c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2015 16:39:19 +0200 Subject: [PATCH 044/104] ALSA: hda - Fix missing va_end() call in snd_hda_codec_pcm_new() Reported by coverity CID 1296024. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 27333e0d8ebe..b49feff0a319 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -873,14 +873,15 @@ struct hda_pcm *snd_hda_codec_pcm_new(struct hda_codec *codec, struct hda_pcm *pcm; va_list args; - va_start(args, fmt); pcm = kzalloc(sizeof(*pcm), GFP_KERNEL); if (!pcm) return NULL; pcm->codec = codec; kref_init(&pcm->kref); + va_start(args, fmt); pcm->name = kvasprintf(GFP_KERNEL, fmt, args); + va_end(args); if (!pcm->name) { kfree(pcm); return NULL; From 53f9b3baa937e0cbdd75ea11b3c824462e4359b2 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 21 Apr 2015 12:19:49 +0800 Subject: [PATCH 045/104] ASoC: rt5645: Fix mask for setting RT5645_DMIC_2_DP_GPIO12 bit Current code uses wrong mask when setting RT5645_DMIC_2_DP_GPIO12 bit, fix it. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index e16724a11c47..3153aa0fe51b 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2742,7 +2742,7 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, case RT5645_DMIC_DATA_GPIO12: regmap_update_bits(rt5645->regmap, RT5645_DMIC_CTRL1, - RT5645_DMIC_1_DP_MASK, RT5645_DMIC_2_DP_GPIO12); + RT5645_DMIC_2_DP_MASK, RT5645_DMIC_2_DP_GPIO12); regmap_update_bits(rt5645->regmap, RT5645_GPIO_CTRL1, RT5645_GP12_PIN_MASK, RT5645_GP12_PIN_DMIC2_SDA); From 8e046d68ba719a5bb95912c154c6314472edccfa Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Mon, 27 Apr 2015 14:12:43 -0400 Subject: [PATCH 046/104] net: netcp: remove call to netif_carrier_(on/off) for MAC to Phy interface Currently when interface type is MAC to Phy, netif_carrier_(on/off) is called which is not needed as Phy lib already updates the carrier status to net stack. This is needed only for other interface types Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_ethss.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 2bef655279f3..9b7e0a34c98b 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -1765,7 +1765,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); - if (ndev && slave->open) + if (ndev && slave->open && + slave->link_interface != SGMII_LINK_MAC_PHY && + slave->link_interface != XGMII_LINK_MAC_PHY) netif_carrier_on(ndev); } else { writel(mac_control, GBE_REG_ADDR(slave, emac_regs, @@ -1773,7 +1775,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, cpsw_ale_control_set(gbe_dev->ale, slave->port_num, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); - if (ndev) + if (ndev && + slave->link_interface != SGMII_LINK_MAC_PHY && + slave->link_interface != XGMII_LINK_MAC_PHY) netif_carrier_off(ndev); } From 876a7ae65b86d8cec8efe7d15d050ac61116874e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 27 Apr 2015 14:40:37 -0700 Subject: [PATCH 047/104] bpf: fix 64-bit divide ALU64_DIV instruction should be dividing 64-bit by 64-bit, whereas do_div() does 64-bit by 32-bit divide. x64 and arm64 JITs correctly implement 64 by 64 unsigned divide. llvm BPF backend emits code assuming that ALU64_DIV does 64 by 64. Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") Reported-by: Michael Holzheu Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4139a0f8b558..54f0e7fcd0e2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -357,8 +357,8 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) ALU64_MOD_X: if (unlikely(SRC == 0)) return 0; - tmp = DST; - DST = do_div(tmp, SRC); + div64_u64_rem(DST, SRC, &tmp); + DST = tmp; CONT; ALU_MOD_X: if (unlikely(SRC == 0)) @@ -367,8 +367,8 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) DST = do_div(tmp, (u32) SRC); CONT; ALU64_MOD_K: - tmp = DST; - DST = do_div(tmp, IMM); + div64_u64_rem(DST, IMM, &tmp); + DST = tmp; CONT; ALU_MOD_K: tmp = (u32) DST; @@ -377,7 +377,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) ALU64_DIV_X: if (unlikely(SRC == 0)) return 0; - do_div(DST, SRC); + DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: if (unlikely(SRC == 0)) @@ -387,7 +387,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) DST = (u32) tmp; CONT; ALU64_DIV_K: - do_div(DST, IMM); + DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: tmp = (u32) DST; From a5011d44f0e1117a6db14b19b57c51f8be5673a0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 21 Apr 2015 11:20:30 +0200 Subject: [PATCH 048/104] uas: Allow uas_use_uas_driver to return usb-storage flags uas_use_uas_driver may set some US_FL_foo flags during detection, currently these are stored in a local variable and then throw away, but these may be of interest to the caller, so add an extra parameter to (optionally) return the detected flags, and use this in the uas driver. Cc: stable@vger.kernel.org # 3.16 Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas-detect.h | 6 +++++- drivers/usb/storage/uas.c | 6 +++--- drivers/usb/storage/usb.c | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index 9893d696fc97..63ae1619fdb8 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -51,7 +51,8 @@ static int uas_find_endpoints(struct usb_host_interface *alt, } static int uas_use_uas_driver(struct usb_interface *intf, - const struct usb_device_id *id) + const struct usb_device_id *id, + unsigned long *flags_ret) { struct usb_host_endpoint *eps[4] = { }; struct usb_device *udev = interface_to_usbdev(intf); @@ -132,5 +133,8 @@ static int uas_use_uas_driver(struct usb_interface *intf, return 0; } + if (flags_ret) + *flags_ret = flags; + return 1; } diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 6cdabdc119a7..c6109c111aab 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -887,8 +887,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) struct Scsi_Host *shost = NULL; struct uas_dev_info *devinfo; struct usb_device *udev = interface_to_usbdev(intf); + unsigned long dev_flags; - if (!uas_use_uas_driver(intf, id)) + if (!uas_use_uas_driver(intf, id, &dev_flags)) return -ENODEV; if (uas_switch_interface(udev, intf)) @@ -910,8 +911,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) devinfo->udev = udev; devinfo->resetting = 0; devinfo->shutdown = 0; - devinfo->flags = id->driver_info; - usb_stor_adjust_quirks(udev, &devinfo->flags); + devinfo->flags = dev_flags; init_usb_anchor(&devinfo->cmd_urbs); init_usb_anchor(&devinfo->sense_urbs); init_usb_anchor(&devinfo->data_urbs); diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 5600c33fcadb..db6f6b5ec745 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -1080,7 +1080,7 @@ static int storage_probe(struct usb_interface *intf, /* If uas is enabled and this device can do uas then ignore it. */ #if IS_ENABLED(CONFIG_USB_UAS) - if (uas_use_uas_driver(intf, id)) + if (uas_use_uas_driver(intf, id, NULL)) return -ENXIO; #endif From ee136af4a064c2f61e2025873584d2c7ec93f4ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 21 Apr 2015 11:20:31 +0200 Subject: [PATCH 049/104] uas: Add US_FL_MAX_SECTORS_240 flag The usb-storage driver sets max_sectors = 240 in its scsi-host template, for uas we do not want to do that for all devices, but testing has shown that some devices need it. This commit adds a US_FL_MAX_SECTORS_240 flag for such devices, and implements support for it in uas.c, while at it it also adds support for US_FL_MAX_SECTORS_64 to uas.c. Cc: stable@vger.kernel.org # 3.16 Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 ++ drivers/usb/storage/uas.c | 10 +++++++++- drivers/usb/storage/usb.c | 6 +++++- include/linux/usb_usual.h | 2 ++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f6befa9855c1..61ab1628a057 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3787,6 +3787,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. READ_CAPACITY_16 command); f = NO_REPORT_OPCODES (don't use report opcodes command, uas only); + g = MAX_SECTORS_240 (don't transfer more than + 240 sectors at a time, uas only); h = CAPACITY_HEURISTICS (decrease the reported device capacity by one sector if the number is odd); diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index c6109c111aab..6d3122afeed3 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -759,7 +759,10 @@ static int uas_eh_bus_reset_handler(struct scsi_cmnd *cmnd) static int uas_slave_alloc(struct scsi_device *sdev) { - sdev->hostdata = (void *)sdev->host->hostdata; + struct uas_dev_info *devinfo = + (struct uas_dev_info *)sdev->host->hostdata; + + sdev->hostdata = devinfo; /* USB has unusual DMA-alignment requirements: Although the * starting address of each scatter-gather element doesn't matter, @@ -778,6 +781,11 @@ static int uas_slave_alloc(struct scsi_device *sdev) */ blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1)); + if (devinfo->flags & US_FL_MAX_SECTORS_64) + blk_queue_max_hw_sectors(sdev->request_queue, 64); + else if (devinfo->flags & US_FL_MAX_SECTORS_240) + blk_queue_max_hw_sectors(sdev->request_queue, 240); + return 0; } diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index db6f6b5ec745..6c10c888f35f 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -479,7 +479,8 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT | US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE | - US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES); + US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES | + US_FL_MAX_SECTORS_240); p = quirks; while (*p) { @@ -520,6 +521,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) case 'f': f |= US_FL_NO_REPORT_OPCODES; break; + case 'g': + f |= US_FL_MAX_SECTORS_240; + break; case 'h': f |= US_FL_CAPACITY_HEURISTICS; break; diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index a7f2604c5f25..7f5f78bd15ad 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -77,6 +77,8 @@ /* Cannot handle ATA_12 or ATA_16 CDBs */ \ US_FLAG(NO_REPORT_OPCODES, 0x04000000) \ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ + US_FLAG(MAX_SECTORS_240, 0x08000000) \ + /* Sets max_sectors to 240 */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; From 8e779c6c4a398763c21371fe40f649206041dc1e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 21 Apr 2015 11:20:32 +0200 Subject: [PATCH 050/104] uas: Set max_sectors_240 quirk for ASM1053 devices Testing has shown that ASM1053 devices do not work properly with transfers larger than 240 sectors, so set max_sectors to 240 on these. Cc: stable@vger.kernel.org # 3.16 Reported-by: Steve Bangert Signed-off-by: Hans de Goede Tested-by: Steve Bangert Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas-detect.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index 63ae1619fdb8..f58caa9e6a27 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -74,7 +74,7 @@ static int uas_use_uas_driver(struct usb_interface *intf, * this writing the following versions exist: * ASM1051 - no uas support version * ASM1051 - with broken (*) uas support - * ASM1053 - with working uas support + * ASM1053 - with working uas support, but problems with large xfers * ASM1153 - with working uas support * * Devices with these chips re-use a number of device-ids over the @@ -104,6 +104,9 @@ static int uas_use_uas_driver(struct usb_interface *intf, } else if (usb_ss_max_streams(&eps[1]->ss_ep_comp) == 32) { /* Possibly an ASM1051, disable uas */ flags |= US_FL_IGNORE_UAS; + } else { + /* ASM1053, these have issues with large transfers */ + flags |= US_FL_MAX_SECTORS_240; } } From a5a356cee89f86ff86cc3ce24136ca1f802c1bf1 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Sun, 12 Apr 2015 17:51:02 +0800 Subject: [PATCH 051/104] usb: chipidea: otg: remove mutex unlock and lock while stop and start role Wrongly release mutex lock during otg_statemachine may result in re-enter otg_statemachine, which is not allowed, we should do next state transtition after previous one completed. Fixes: 826cfe751f3e ("usb: chipidea: add OTG fsm operation functions implementation") Cc: # v3.16+ Signed-off-by: Li Jun Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg_fsm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c index 083acf45ad5a..19d655a743b5 100644 --- a/drivers/usb/chipidea/otg_fsm.c +++ b/drivers/usb/chipidea/otg_fsm.c @@ -520,7 +520,6 @@ static int ci_otg_start_host(struct otg_fsm *fsm, int on) { struct ci_hdrc *ci = container_of(fsm, struct ci_hdrc, fsm); - mutex_unlock(&fsm->lock); if (on) { ci_role_stop(ci); ci_role_start(ci, CI_ROLE_HOST); @@ -529,7 +528,6 @@ static int ci_otg_start_host(struct otg_fsm *fsm, int on) hw_device_reset(ci); ci_role_start(ci, CI_ROLE_GADGET); } - mutex_lock(&fsm->lock); return 0; } @@ -537,12 +535,10 @@ static int ci_otg_start_gadget(struct otg_fsm *fsm, int on) { struct ci_hdrc *ci = container_of(fsm, struct ci_hdrc, fsm); - mutex_unlock(&fsm->lock); if (on) usb_gadget_vbus_connect(&ci->gadget); else usb_gadget_vbus_disconnect(&ci->gadget); - mutex_lock(&fsm->lock); return 0; } From bb304b71f8dbcb284f0f876dfb4eecbadf079773 Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Mon, 27 Apr 2015 15:27:36 +0300 Subject: [PATCH 052/104] Revert "usb: host: ehci-msm: Use devm_ioremap_resource instead of devm_ioremap" This reverts commit 70843f623b58 ("usb: host: ehci-msm: Use devm_ioremap_resource instead of devm_ioremap") and commit e507bf577e5a ("host: ehci-msm: remove duplicate check on resource"), because msm_otg and this driver are using same address space to access AHB mode and USB command registers. Signed-off-by: Ivan T. Ivanov Acked-by: Alan Stern Acked-by: Vivek Gautam Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-msm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c index 9db74ca7e5b9..275c92e53a59 100644 --- a/drivers/usb/host/ehci-msm.c +++ b/drivers/usb/host/ehci-msm.c @@ -88,13 +88,20 @@ static int ehci_msm_probe(struct platform_device *pdev) } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hcd->regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(hcd->regs)) { - ret = PTR_ERR(hcd->regs); + if (!res) { + dev_err(&pdev->dev, "Unable to get memory resource\n"); + ret = -ENODEV; goto put_hcd; } + hcd->rsrc_start = res->start; hcd->rsrc_len = resource_size(res); + hcd->regs = devm_ioremap(&pdev->dev, hcd->rsrc_start, hcd->rsrc_len); + if (!hcd->regs) { + dev_err(&pdev->dev, "ioremap failed\n"); + ret = -ENOMEM; + goto put_hcd; + } /* * OTG driver takes care of PHY initialization, clock management, From 0d3bba0287d4e284c3ec7d3397e81eec920d5e7e Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 14 Apr 2015 11:25:43 +0200 Subject: [PATCH 053/104] cdc-acm: prevent infinite loop when parsing CDC headers. Phil and I found out a problem with commit: 7e860a6e7aa6 ("cdc-acm: add sanity checks") It added some sanity checks to ignore potential garbage in CDC headers but also introduced a potential infinite loop. This can happen at the first loop iteration (elength = 0 in that case) if the description isn't a DT_CS_INTERFACE or later if 'buffer[0]' is zero. It should also be noted that the wrong length was being added to 'buffer' in case 'buffer[1]' was not a DT_CS_INTERFACE descriptor, since elength was assigned after that check in the loop. A specially crafted USB device could be used to trigger this infinite loop. Fixes: 7e860a6e7aa6 ("cdc-acm: add sanity checks") Signed-off-by: Phil Turnbull Signed-off-by: Quentin Casasnovas CC: Sergei Shtylyov CC: Oliver Neukum CC: Adam Lee CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 3e15add665e2..5c8f58114677 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1142,11 +1142,16 @@ static int acm_probe(struct usb_interface *intf, } while (buflen > 0) { + elength = buffer[0]; + if (!elength) { + dev_err(&intf->dev, "skipping garbage byte\n"); + elength = 1; + goto next_desc; + } if (buffer[1] != USB_DT_CS_INTERFACE) { dev_err(&intf->dev, "skipping garbage\n"); goto next_desc; } - elength = buffer[0]; switch (buffer[2]) { case USB_CDC_UNION_TYPE: /* we've found it */ From b00f5c2dc01450bed9fed1a41a637fa917e03c5c Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Fri, 10 Apr 2015 15:13:05 +0200 Subject: [PATCH 054/104] tty: Re-add external interface for tty_set_termios() This is needed by Bluetooth hci_uart module to be able to change speed of Bluetooth controller and local UART. Signed-off-by: Frederic Danis Reviewed-by: Peter Hurley Cc: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 3 ++- include/linux/tty.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 632fc8152061..8e53fe469664 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -536,7 +536,7 @@ EXPORT_SYMBOL(tty_termios_hw_change); * Locking: termios_rwsem */ -static int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) +int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) { struct ktermios old_termios; struct tty_ldisc *ld; @@ -569,6 +569,7 @@ static int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) up_write(&tty->termios_rwsem); return 0; } +EXPORT_SYMBOL_GPL(tty_set_termios); /** * set_termios - set termios values for a tty diff --git a/include/linux/tty.h b/include/linux/tty.h index 358a337af598..fe5623c9af71 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -491,6 +491,7 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); +extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); From 10afbe346bba125e2eedfd1a89b7bd5c92900859 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 11 Apr 2015 10:05:07 -0400 Subject: [PATCH 055/104] serial: core: Fix kernel-doc build warnings Fix uart_console_write() kernel-doc build warnings. Warning(drivers/tty/serial/serial_core.c:1778): No description found for parameter 'putchar' Warning(drivers/tty/serial/serial_core.c:1778): Excess function parameter 'write' description in 'uart_console_write' Fixes: 1cfe42b7fd29 ("serial: core: Fix kernel doc for uart_console_write()") Reported-by: Fengguang Wu Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index eb5b03be9dfd..0b7bb12dfc68 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1770,7 +1770,7 @@ static const struct file_operations uart_proc_fops = { * @port: the port to write the message * @s: array of characters * @count: number of characters in string to write - * @write: function to write character to port + * @putchar: function to write character to port */ void uart_console_write(struct uart_port *port, const char *s, unsigned int count, From 5c90c07b98c02198d9777a7c4f3047b0a94bf7ed Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 13 Apr 2015 16:34:21 +0200 Subject: [PATCH 056/104] serial: xilinx: Use platform_get_irq to get irq description structure For systems with CONFIG_SERIAL_OF_PLATFORM=y and device_type = "serial"; property in DT of_serial.c driver maps and unmaps IRQ (because driver probe fails). Then a driver is called but irq mapping is not created that's why driver is failing again in again on request_irq(). Based on this use platform_get_irq() instead of platform_get_resource() which is doing irq_desc allocation and driver itself can request IRQ. Fix both xilinx serial drivers in the tree. Signed-off-by: Michal Simek CC: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/uartlite.c | 11 ++++++----- drivers/tty/serial/xilinx_uartps.c | 12 ++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index 708eead850b0..b1c6bd3d483f 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -632,7 +632,8 @@ MODULE_DEVICE_TABLE(of, ulite_of_match); static int ulite_probe(struct platform_device *pdev) { - struct resource *res, *res2; + struct resource *res; + int irq; int id = pdev->id; #ifdef CONFIG_OF const __be32 *prop; @@ -646,11 +647,11 @@ static int ulite_probe(struct platform_device *pdev) if (!res) return -ENODEV; - res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res2) - return -ENODEV; + irq = platform_get_irq(pdev, 0); + if (irq <= 0) + return -ENXIO; - return ulite_assign(&pdev->dev, id, res->start, res2->start); + return ulite_assign(&pdev->dev, id, res->start, irq); } static int ulite_remove(struct platform_device *pdev) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index f218ec658f5d..3ddbac767db3 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1331,9 +1331,9 @@ static SIMPLE_DEV_PM_OPS(cdns_uart_dev_pm_ops, cdns_uart_suspend, */ static int cdns_uart_probe(struct platform_device *pdev) { - int rc, id; + int rc, id, irq; struct uart_port *port; - struct resource *res, *res2; + struct resource *res; struct cdns_uart *cdns_uart_data; cdns_uart_data = devm_kzalloc(&pdev->dev, sizeof(*cdns_uart_data), @@ -1380,9 +1380,9 @@ static int cdns_uart_probe(struct platform_device *pdev) goto err_out_clk_disable; } - res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res2) { - rc = -ENODEV; + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + rc = -ENXIO; goto err_out_clk_disable; } @@ -1411,7 +1411,7 @@ static int cdns_uart_probe(struct platform_device *pdev) * and triggers invocation of the config_port() entry point. */ port->mapbase = res->start; - port->irq = res2->start; + port->irq = irq; port->dev = &pdev->dev; port->uartclk = clk_get_rate(cdns_uart_data->uartclk); port->private_data = cdns_uart_data; From 6befa9d883385c580369a2cc9e53fbf329771f6d Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 14 Apr 2015 12:03:09 +0200 Subject: [PATCH 057/104] serial: of-serial: Remove device_type = "serial" registration Do not probe all serial drivers by of_serial.c which are using device_type = "serial"; property. Only drivers which have valid compatible strings listed in the driver should be probed. When PORT_UNKNOWN is setup probe will fail anyway. Arnd quotation about driver historical background: "when I wrote that driver initially, the idea was that it would get used as a stub to hook up all other serial drivers but after that, the common code learned to create platform devices from DT" This patch fix the problem with on the system with xilinx_uartps and 16550a where of_serial failed to register for xilinx_uartps and because of irq_dispose_mapping() removed irq_desc. Then when xilinx_uartps was asking for irq with request_irq() EINVAL is returned. Signed-off-by: Michal Simek CC: Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/of_serial.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index 5b73afb9f9f3..137381e649e5 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -346,7 +346,6 @@ static const struct of_device_id of_platform_serial_table[] = { { .compatible = "ibm,qpace-nwp-serial", .data = (void *)PORT_NWPSERIAL, }, #endif - { .type = "serial", .data = (void *)PORT_UNKNOWN, }, { /* end of list */ }, }; From a8d4e01637902311c5643b69a5c80e2805f04054 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 16 Apr 2015 16:58:12 +0200 Subject: [PATCH 058/104] tty/serial: at91: maxburst was missing for dma transfers Maxburst was not set when doing the dma slave configuration. This value is checked by the recently introduced xdmac. It causes an error when doing the slave configuration and so prevents from using dma. Signed-off-by: Ludovic Desroches Cc: # 3.12 and later Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index d58fe4763d9e..27dade29646b 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -880,6 +880,7 @@ static int atmel_prepare_tx_dma(struct uart_port *port) config.direction = DMA_MEM_TO_DEV; config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; config.dst_addr = port->mapbase + ATMEL_US_THR; + config.dst_maxburst = 1; ret = dmaengine_slave_config(atmel_port->chan_tx, &config); @@ -1059,6 +1060,7 @@ static int atmel_prepare_rx_dma(struct uart_port *port) config.direction = DMA_DEV_TO_MEM; config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; config.src_addr = port->mapbase + ATMEL_US_RHR; + config.src_maxburst = 1; ret = dmaengine_slave_config(atmel_port->chan_rx, &config); From 31c6ba97d1671b0a853197fe54a05e0b07ce2631 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Fri, 17 Apr 2015 08:43:09 +0200 Subject: [PATCH 059/104] serial: samsung: fix serial console break This patch fixes problems with serial console break. When function s3c64xx_serial_startup() was started while serial console has been working, it caused lose of characters written to TX FIFO. This effect was particularly observable with systemd, which closes serial port every time when it's not currently needed, hence function s3c64xx_serial_startup() is called quite often there. To fix this problem we avoid resetting TX FIFO if port is used as serial console. Example of broken console log: [ 1086.7 Expecting device dev-ttySAC1.device... [ 1086.[ OK ] Reached target Paths. [ 1086.756416] s[ OK ] Reached target Swap. [ 1086.776413] systemd[1]: Reached target Swap. [ 1086.776642] systemd[1]: Starting Root Slice. [ 5.53403[ OK ] Created slice Root Slice. [ 5.548433] systemd[1]: Create[ OK ] Created slice User and Session Slice. [ 5.568414] sys[ OK ] Listening on /dev/initctl Compatibility Named Pipe. [ 5.588388] s[ OK ] Listening on Delayed Shutdown Socket. [ 5.608376] sy[ OK ] Listening on Journal Socket (/dev/log). [ 5.628361] [ OK ] Listening on udev Kernel Socket. [ 5.648357] s[ OK ] Listening on udev Control Socket. [ 5.668353] s[ OK ] Listening on Journal Socket. [ 5.688366] systemd[1]: Listeni[ OK ] Created slice System Slice. [ 5.708393] Mounting Temporary Directory... [ 7139.067436] Starting prepare device daemon... [ 7139.091726] sy Starting Generate environment from /etc/profile.d... [ 5.792867] system Starting Create Static Device Nodes in /dev... [ 7848.718 Mounting Debug File System... [ 7848.7384 Mounting Configuration File System... [ 5.852 Starting Apply Kernel Variables... [ 5.8720 Starting Setup Virtual Console... [ 7848.798 Starting udev Coldplug all Devices... [ 7848.817 Starting Journal Service... [ OK ] Started Journal Service. [ 7848.854222] s[ OK ] Reached target Slices. Starting Remount Root and Kernel File Systems... [ OK ] Mounted Configuration File System. Reported-by: Chanwoo Choi Signed-off-by: Robert Baldyga Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index cf08876922f1..a0ae942d9562 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1068,8 +1068,9 @@ static int s3c64xx_serial_startup(struct uart_port *port) spin_lock_irqsave(&port->lock, flags); ufcon = rd_regl(port, S3C2410_UFCON); - ufcon |= S3C2410_UFCON_RESETRX | S3C2410_UFCON_RESETTX | - S5PV210_UFCON_RXTRIG8; + ufcon |= S3C2410_UFCON_RESETRX | S5PV210_UFCON_RXTRIG8; + if (!uart_console(port)) + ufcon |= S3C2410_UFCON_RESETTX; wr_regl(port, S3C2410_UFCON, ufcon); enable_rx_pio(ourport); From 9b0f5d63e74a987bf56cc1774baca80a291c9d8d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 28 Apr 2015 10:36:45 -0400 Subject: [PATCH 060/104] tile: properly use node_isset() on a nodemask_t The code accidentally used cpu_isset() previously in one place (though properly node_isset() elsewhere). Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 6873f006f7d0..d366675e4bf8 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -774,7 +774,7 @@ static void __init zone_sizes_init(void) * though, there'll be no lowmem, so we just alloc_bootmem * the memmap. There will be no percpu memory either. */ - if (i != 0 && cpumask_test_cpu(i, &isolnodes)) { + if (i != 0 && node_isset(i, isolnodes)) { node_memmap_pfn[i] = alloc_bootmem_pfn(0, memmap_size, 0); BUG_ON(node_percpu[i] != 0); From 9d7dd6cd2a1d0d307be6bf783e40e7fda17d6dba Mon Sep 17 00:00:00 2001 From: Rajeev Kumar Date: Tue, 28 Apr 2015 11:30:08 +0530 Subject: [PATCH 061/104] ASoC: Update email-id of Rajeev Kumar rajeev-dlh.kumar@st.com email-id doesn't exist anymore as I have left the company. Replace ST's id with Rajeev Kumar Signed-off-by: Rajeev Kumar Signed-off-by: Mark Brown --- include/sound/designware_i2s.h | 2 +- include/sound/spear_dma.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/sound/designware_i2s.h b/include/sound/designware_i2s.h index 26f406e0f673..3a8fca9409a7 100644 --- a/include/sound/designware_i2s.h +++ b/include/sound/designware_i2s.h @@ -1,5 +1,5 @@ /* - * Copyright (ST) 2012 Rajeev Kumar (rajeev-dlh.kumar@st.com) + * Copyright (ST) 2012 Rajeev Kumar (rajeevkumar.linux@gmail.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/sound/spear_dma.h b/include/sound/spear_dma.h index 65aca51fe255..e290de4e7e82 100644 --- a/include/sound/spear_dma.h +++ b/include/sound/spear_dma.h @@ -1,7 +1,7 @@ /* * linux/spear_dma.h * -* Copyright (ST) 2012 Rajeev Kumar (rajeev-dlh.kumar@st.com) +* Copyright (ST) 2012 Rajeev Kumar (rajeevkumar.linux@gmail.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From 1c94e65c668f44d2c69ae7e7fc268ab3268fba3e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Apr 2015 17:11:44 +0200 Subject: [PATCH 062/104] ALSA: emux: Fix mutex deadlock in OSS emulation The OSS emulation in synth-emux helper has a potential AB/BA deadlock at the simultaneous closing and opening: close -> snd_seq_release() -> sne_seq_free_client() -> snd_seq_delete_all_ports(): takes client->ports_mutex -> port_delete() -> snd_emux_unuse(): takes emux->register_mutex open -> snd_seq_oss_open() -> snd_emux_open_seq_oss(): takes emux->register_mutex -> snd_seq_event_port_attach() -> snd_seq_create_port(): takes client->ports_mutex This patch addresses the deadlock by reducing the rance taking emux->register_mutex in snd_emux_open_seq_oss(). The lock is needed for the refcount handling, so move it locally. The calls in emux_seq.c are already with the mutex, thus they are replaced with the version without mutex lock/unlock. Cc: Signed-off-by: Takashi Iwai --- sound/synth/emux/emux_oss.c | 11 +---------- sound/synth/emux/emux_seq.c | 27 +++++++++++++++++++++------ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/sound/synth/emux/emux_oss.c b/sound/synth/emux/emux_oss.c index ab37add269ae..82e350e9501c 100644 --- a/sound/synth/emux/emux_oss.c +++ b/sound/synth/emux/emux_oss.c @@ -118,12 +118,8 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) if (snd_BUG_ON(!arg || !emu)) return -ENXIO; - mutex_lock(&emu->register_mutex); - - if (!snd_emux_inc_count(emu)) { - mutex_unlock(&emu->register_mutex); + if (!snd_emux_inc_count(emu)) return -EFAULT; - } memset(&callback, 0, sizeof(callback)); callback.owner = THIS_MODULE; @@ -135,7 +131,6 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) if (p == NULL) { snd_printk(KERN_ERR "can't create port\n"); snd_emux_dec_count(emu); - mutex_unlock(&emu->register_mutex); return -ENOMEM; } @@ -148,8 +143,6 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) reset_port_mode(p, arg->seq_mode); snd_emux_reset_port(p); - - mutex_unlock(&emu->register_mutex); return 0; } @@ -195,13 +188,11 @@ snd_emux_close_seq_oss(struct snd_seq_oss_arg *arg) if (snd_BUG_ON(!emu)) return -ENXIO; - mutex_lock(&emu->register_mutex); snd_emux_sounds_off_all(p); snd_soundfont_close_check(emu->sflist, SF_CLIENT_NO(p->chset.port)); snd_seq_event_port_detach(p->chset.client, p->chset.port); snd_emux_dec_count(emu); - mutex_unlock(&emu->register_mutex); return 0; } diff --git a/sound/synth/emux/emux_seq.c b/sound/synth/emux/emux_seq.c index 188fda0effb0..a0209204ae48 100644 --- a/sound/synth/emux/emux_seq.c +++ b/sound/synth/emux/emux_seq.c @@ -267,8 +267,8 @@ snd_emux_event_input(struct snd_seq_event *ev, int direct, void *private_data, /* * increment usage count */ -int -snd_emux_inc_count(struct snd_emux *emu) +static int +__snd_emux_inc_count(struct snd_emux *emu) { emu->used++; if (!try_module_get(emu->ops.owner)) @@ -282,12 +282,21 @@ snd_emux_inc_count(struct snd_emux *emu) return 1; } +int snd_emux_inc_count(struct snd_emux *emu) +{ + int ret; + + mutex_lock(&emu->register_mutex); + ret = __snd_emux_inc_count(emu); + mutex_unlock(&emu->register_mutex); + return ret; +} /* * decrease usage count */ -void -snd_emux_dec_count(struct snd_emux *emu) +static void +__snd_emux_dec_count(struct snd_emux *emu) { module_put(emu->card->module); emu->used--; @@ -296,6 +305,12 @@ snd_emux_dec_count(struct snd_emux *emu) module_put(emu->ops.owner); } +void snd_emux_dec_count(struct snd_emux *emu) +{ + mutex_lock(&emu->register_mutex); + __snd_emux_dec_count(emu); + mutex_unlock(&emu->register_mutex); +} /* * Routine that is called upon a first use of a particular port @@ -315,7 +330,7 @@ snd_emux_use(void *private_data, struct snd_seq_port_subscribe *info) mutex_lock(&emu->register_mutex); snd_emux_init_port(p); - snd_emux_inc_count(emu); + __snd_emux_inc_count(emu); mutex_unlock(&emu->register_mutex); return 0; } @@ -338,7 +353,7 @@ snd_emux_unuse(void *private_data, struct snd_seq_port_subscribe *info) mutex_lock(&emu->register_mutex); snd_emux_sounds_off_all(p); - snd_emux_dec_count(emu); + __snd_emux_dec_count(emu); mutex_unlock(&emu->register_mutex); return 0; } From 96a5d18bc1338786fecac73599f1681f59a59a8e Mon Sep 17 00:00:00 2001 From: Soeren Grunewald Date: Tue, 28 Apr 2015 16:29:49 +0200 Subject: [PATCH 063/104] serial: 8250_pci: Add support for 16 port Exar boards The Exar XR17V358 chip usually provides only 8 ports. But two chips can be combined to act as a single 16 port chip. Therefor one chip is configured as master the second as slave by connecting the mode pin to VCC (master) or GND (slave). Then the master chip is reporting a different device-id depending on whether a slave is detected or not. The UARTs 8-15 are addressed from 0x2000-0x3fff. So the offset of 0x400 from UART to UART can be used to address all 16 ports as before. See: https://www.exar.com/common/content/document.ashx?id=1587 page 11 Signed-off-by: Soeren Grunewald Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 08da4d3e2162..46bcebba54b2 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1998,6 +1998,8 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCIE_DEVICE_ID_WCH_CH382_2S1P 0x3250 #define PCIE_DEVICE_ID_WCH_CH384_4S 0x3470 +#define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358 + /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588 @@ -2520,6 +2522,13 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17v35x_setup, }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V8358, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, /* * Xircom cards */ @@ -2999,6 +3008,7 @@ enum pci_board_num_t { pbn_exar_XR17V352, pbn_exar_XR17V354, pbn_exar_XR17V358, + pbn_exar_XR17V8358, pbn_exar_ibm_saturn, pbn_pasemi_1682M, pbn_ni8430_2, @@ -3685,6 +3695,14 @@ static struct pciserial_board pci_boards[] = { .reg_shift = 0, .first_offset = 0, }, + [pbn_exar_XR17V8358] = { + .flags = FL_BASE0, + .num_ports = 16, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, [pbn_exar_ibm_saturn] = { .flags = FL_BASE0, .num_ports = 1, @@ -5080,7 +5098,7 @@ static struct pci_device_id serial_pci_tbl[] = { 0, 0, pbn_exar_XR17C158 }, /* - * Exar Corp. XR17V35[248] Dual/Quad/Octal PCIe UARTs + * Exar Corp. XR17V[48]35[248] Dual/Quad/Octal/Hexa PCIe UARTs */ { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V352, PCI_ANY_ID, PCI_ANY_ID, @@ -5094,7 +5112,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17V358 }, - + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V8358, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V8358 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ From 9e9d55e69a95f8583283d9f01b04562d1278c95d Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Tue, 28 Apr 2015 16:54:04 +0200 Subject: [PATCH 064/104] ACPICA: remove duplicate u8 typedef During commit e252652fb266 ("ACPICA: acpidump: Remove integer types translation protection.") two 'unsigned char' types got converted to 'u8'. The result does not compile with gcc-4.5, it can not cope with duplicate typedefs. Signed-off-by: Olaf Hering Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index f5ca0e989bba..1c3002e1db20 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -123,7 +123,6 @@ #ifndef ACPI_USE_SYSTEM_INTTYPES -typedef unsigned char u8; typedef unsigned char u8; typedef unsigned short u16; typedef short s16; From 61f8ff693923e4b19748b0e8287b99778f2661c7 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Wed, 22 Apr 2015 16:40:21 +0100 Subject: [PATCH 065/104] ACPI / SBS: Enable battery manager when present Commit 9faf6136ff46 (ACPI / SBS: Disable smart battery manager on Apple) introduced a regression disabling the SBS battery manager. The battery manager should be marked as present when acpi_manager_get_info() returns 0. Fixes: 9faf6136ff46 (ACPI / SBS: Disable smart battery manager on Apple) Signed-off-by: Chris Bainbridge Cc: 3.18+ # 3.18+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index cd827625cf07..01504c819e8f 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -684,7 +684,7 @@ static int acpi_sbs_add(struct acpi_device *device) if (!sbs_manager_broken) { result = acpi_manager_get_info(sbs); if (!result) { - sbs->manager_present = 0; + sbs->manager_present = 1; for (id = 0; id < MAX_SBS_BAT; ++id) if ((sbs->batteries_supported & (1 << id))) acpi_battery_add(sbs, id); From 7241ea558c6715501e777396b5fc312c372e11d9 Mon Sep 17 00:00:00 2001 From: Peter Zubaj Date: Tue, 28 Apr 2015 21:57:29 +0200 Subject: [PATCH 066/104] ALSA: emu10k1: Emu10k2 32 bit DMA mode Looks like audigy emu10k2 (probably emu10k1 - sb live too) support two modes for DMA. Second mode is useful for 64 bit os with more then 2 GB of ram (fixes problems with big soundfont loading) 1) 32MB from 2 GB address space using 8192 pages (used now as default) 2) 16MB from 4 GB address space using 4096 pages Mode is set using HCFG_EXPANDED_MEM flag in HCFG register. Also format of emu10k2 page table is then different. Signed-off-by: Peter Zubaj Tested-by: Takashi Iwai Cc: Signed-off-by: Takashi Iwai --- include/sound/emu10k1.h | 14 +++++++++----- sound/pci/emu10k1/emu10k1_callback.c | 4 ++-- sound/pci/emu10k1/emu10k1_main.c | 17 ++++++++++++----- sound/pci/emu10k1/emupcm.c | 2 +- sound/pci/emu10k1/memory.c | 11 ++++++----- 5 files changed, 30 insertions(+), 18 deletions(-) diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 0de95ccb92cf..5bd134651f5e 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -41,7 +41,8 @@ #define EMUPAGESIZE 4096 #define MAXREQVOICES 8 -#define MAXPAGES 8192 +#define MAXPAGES0 4096 /* 32 bit mode */ +#define MAXPAGES1 8192 /* 31 bit mode */ #define RESERVED 0 #define NUM_MIDI 16 #define NUM_G 64 /* use all channels */ @@ -50,8 +51,7 @@ /* FIXME? - according to the OSS driver the EMU10K1 needs a 29 bit DMA mask */ #define EMU10K1_DMA_MASK 0x7fffffffUL /* 31bit */ -#define AUDIGY_DMA_MASK 0x7fffffffUL /* 31bit FIXME - 32 should work? */ - /* See ALSA bug #1276 - rlrevell */ +#define AUDIGY_DMA_MASK 0xffffffffUL /* 32bit mode */ #define TMEMSIZE 256*1024 #define TMEMSIZEREG 4 @@ -466,8 +466,11 @@ #define MAPB 0x0d /* Cache map B */ -#define MAP_PTE_MASK 0xffffe000 /* The 19 MSBs of the PTE indexed by the PTI */ -#define MAP_PTI_MASK 0x00001fff /* The 13 bit index to one of the 8192 PTE dwords */ +#define MAP_PTE_MASK0 0xfffff000 /* The 20 MSBs of the PTE indexed by the PTI */ +#define MAP_PTI_MASK0 0x00000fff /* The 12 bit index to one of the 4096 PTE dwords */ + +#define MAP_PTE_MASK1 0xffffe000 /* The 19 MSBs of the PTE indexed by the PTI */ +#define MAP_PTI_MASK1 0x00001fff /* The 13 bit index to one of the 8192 PTE dwords */ /* 0x0e, 0x0f: Not used */ @@ -1704,6 +1707,7 @@ struct snd_emu10k1 { unsigned short model; /* subsystem id */ unsigned int card_type; /* EMU10K1_CARD_* */ unsigned int ecard_ctrl; /* ecard control bits */ + unsigned int address_mode; /* address mode */ unsigned long dma_mask; /* PCI DMA mask */ unsigned int delay_pcm_irq; /* in samples */ int max_cache_pages; /* max memory size / PAGE_SIZE */ diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c index 874cd76c7b7f..d2c7ea3a7610 100644 --- a/sound/pci/emu10k1/emu10k1_callback.c +++ b/sound/pci/emu10k1/emu10k1_callback.c @@ -415,7 +415,7 @@ start_voice(struct snd_emux_voice *vp) snd_emu10k1_ptr_write(hw, Z2, ch, 0); /* invalidate maps */ - temp = (hw->silent_page.addr << 1) | MAP_PTI_MASK; + temp = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(hw, MAPA, ch, temp); snd_emu10k1_ptr_write(hw, MAPB, ch, temp); #if 0 @@ -436,7 +436,7 @@ start_voice(struct snd_emux_voice *vp) snd_emu10k1_ptr_write(hw, CDF, ch, sample); /* invalidate maps */ - temp = ((unsigned int)hw->silent_page.addr << 1) | MAP_PTI_MASK; + temp = ((unsigned int)hw->silent_page.addr << hw_address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(hw, MAPA, ch, temp); snd_emu10k1_ptr_write(hw, MAPB, ch, temp); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 4f8cf5e7e45f..a4548147c621 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -282,7 +282,7 @@ static int snd_emu10k1_init(struct snd_emu10k1 *emu, int enable_ir, int resume) snd_emu10k1_ptr_write(emu, TCB, 0, 0); /* taken from original driver */ snd_emu10k1_ptr_write(emu, TCBS, 0, 4); /* taken from original driver */ - silent_page = (emu->silent_page.addr << 1) | MAP_PTI_MASK; + silent_page = (emu->silent_page.addr << emu->address_mode) | (emu->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); for (ch = 0; ch < NUM_G; ch++) { snd_emu10k1_ptr_write(emu, MAPA, ch, silent_page); snd_emu10k1_ptr_write(emu, MAPB, ch, silent_page); @@ -348,6 +348,11 @@ static int snd_emu10k1_init(struct snd_emu10k1 *emu, int enable_ir, int resume) outl(reg | A_IOCFG_GPOUT0, emu->port + A_IOCFG); } + if (emu->address_mode == 0) { + /* use 16M in 4G */ + outl(inl(emu->port + HCFG) | HCFG_EXPANDED_MEM, emu->port + HCFG); + } + return 0; } @@ -1902,8 +1907,10 @@ int snd_emu10k1_create(struct snd_card *card, is_audigy = emu->audigy = c->emu10k2_chip; + /* set addressing mode */ + emu->address_mode = is_audigy ? 0 : 1; /* set the DMA transfer mask */ - emu->dma_mask = is_audigy ? AUDIGY_DMA_MASK : EMU10K1_DMA_MASK; + emu->dma_mask = emu->address_mode ? EMU10K1_DMA_MASK : AUDIGY_DMA_MASK; if (pci_set_dma_mask(pci, emu->dma_mask) < 0 || pci_set_consistent_dma_mask(pci, emu->dma_mask) < 0) { dev_err(card->dev, @@ -1928,7 +1935,7 @@ int snd_emu10k1_create(struct snd_card *card, emu->max_cache_pages = max_cache_bytes >> PAGE_SHIFT; if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(pci), - 32 * 1024, &emu->ptb_pages) < 0) { + (emu->address_mode ? 32 : 16) * 1024, &emu->ptb_pages) < 0) { err = -ENOMEM; goto error; } @@ -2027,8 +2034,8 @@ int snd_emu10k1_create(struct snd_card *card, /* Clear silent pages and set up pointers */ memset(emu->silent_page.area, 0, PAGE_SIZE); - silent_page = emu->silent_page.addr << 1; - for (idx = 0; idx < MAXPAGES; idx++) + silent_page = emu->silent_page.addr << emu->address_mode; + for (idx = 0; idx < (emu->address_mode ? MAXPAGES1 : MAXPAGES0); idx++) ((u32 *)emu->ptb_pages.area)[idx] = cpu_to_le32(silent_page | idx); /* set up voice indices */ diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 0dc07385af0e..14a305bd8a98 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -380,7 +380,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, snd_emu10k1_ptr_write(emu, Z1, voice, 0); snd_emu10k1_ptr_write(emu, Z2, voice, 0); /* invalidate maps */ - silent_page = ((unsigned int)emu->silent_page.addr << 1) | MAP_PTI_MASK; + silent_page = ((unsigned int)emu->silent_page.addr << emu->address_mode) | (emu->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(emu, MAPA, voice, silent_page); snd_emu10k1_ptr_write(emu, MAPB, voice, silent_page); /* modulation envelope */ diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index c68e6dd2fa67..4f1f69be1865 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -34,10 +34,11 @@ * aligned pages in others */ #define __set_ptb_entry(emu,page,addr) \ - (((u32 *)(emu)->ptb_pages.area)[page] = cpu_to_le32(((addr) << 1) | (page))) + (((u32 *)(emu)->ptb_pages.area)[page] = cpu_to_le32(((addr) << (emu->address_mode)) | (page))) #define UNIT_PAGES (PAGE_SIZE / EMUPAGESIZE) -#define MAX_ALIGN_PAGES (MAXPAGES / UNIT_PAGES) +#define MAX_ALIGN_PAGES0 (MAXPAGES0 / UNIT_PAGES) +#define MAX_ALIGN_PAGES1 (MAXPAGES1 / UNIT_PAGES) /* get aligned page from offset address */ #define get_aligned_page(offset) ((offset) >> PAGE_SHIFT) /* get offset address from aligned page */ @@ -124,7 +125,7 @@ static int search_empty_map_area(struct snd_emu10k1 *emu, int npages, struct lis } page = blk->mapped_page + blk->pages; } - size = MAX_ALIGN_PAGES - page; + size = (emu->address_mode ? MAX_ALIGN_PAGES1 : MAX_ALIGN_PAGES0) - page; if (size >= max_size) { *nextp = pos; return page; @@ -181,7 +182,7 @@ static int unmap_memblk(struct snd_emu10k1 *emu, struct snd_emu10k1_memblk *blk) q = get_emu10k1_memblk(p, mapped_link); end_page = q->mapped_page; } else - end_page = MAX_ALIGN_PAGES; + end_page = (emu->address_mode ? MAX_ALIGN_PAGES1 : MAX_ALIGN_PAGES0); /* remove links */ list_del(&blk->mapped_link); @@ -307,7 +308,7 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst if (snd_BUG_ON(!emu)) return NULL; if (snd_BUG_ON(runtime->dma_bytes <= 0 || - runtime->dma_bytes >= MAXPAGES * EMUPAGESIZE)) + runtime->dma_bytes >= (emu->address_mode ? MAXPAGES1 : MAXPAGES0) * EMUPAGESIZE)) return NULL; hdr = emu->memhdr; if (snd_BUG_ON(!hdr)) From 60a8d62b8497c23eb3d48149af7e55dac2dd83a2 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 28 Apr 2015 11:27:39 +0800 Subject: [PATCH 067/104] ASoC: rt5677: fixed wrong DMIC ref clock DMIC clock source is not from codec system clock directly. it is generated from the division of system clock. And it should be 256 * sample rate of AIF1. Signed-off-by: Bard Liao Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/rt5677.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index c6d4e8fa8bd3..84d162d91ff6 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -904,7 +904,7 @@ static int set_dmic_clk(struct snd_soc_dapm_widget *w, { struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); struct rt5677_priv *rt5677 = snd_soc_codec_get_drvdata(codec); - int idx = rl6231_calc_dmic_clk(rt5677->sysclk); + int idx = rl6231_calc_dmic_clk(rt5677->lrck[RT5677_AIF1] << 8); if (idx < 0) dev_err(codec->dev, "Failed to set DMIC clock\n"); From df8d9eeadd0f7a216f2476351d5aee43c6550bf0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 29 Apr 2015 15:19:21 +0200 Subject: [PATCH 068/104] cpuidle: Run tick_broadcast_exit() with disabled interrupts Commit 335f49196fd6 (sched/idle: Use explicit broadcast oneshot control function) replaced clockevents_notify() invocations in cpuidle_idle_call() with direct calls to tick_broadcast_enter() and tick_broadcast_exit(), but it overlooked the fact that interrupts were already enabled before calling the latter which led to functional breakage on systems using idle states with the CPUIDLE_FLAG_TIMER_STOP flag set. Fix that by moving the invocations of tick_broadcast_enter() and tick_broadcast_exit() down into cpuidle_enter_state() where interrupts are still disabled when tick_broadcast_exit() is called. Also ensure that interrupts will be disabled before running tick_broadcast_exit() even if they have been enabled by the idle state's ->enter callback. Trigger a WARN_ON_ONCE() in that case, as we generally don't want that to happen for states with CPUIDLE_FLAG_TIMER_STOP set. Fixes: 335f49196fd6 (sched/idle: Use explicit broadcast oneshot control function) Reported-and-tested-by: Linus Walleij Acked-by: Peter Zijlstra (Intel) Acked-by: Daniel Lezcano Reported-and-tested-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 16 ++++++++++++++++ kernel/sched/idle.c | 16 ++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7a73a279e179..61c417b9e53f 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -158,9 +158,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int entered_state; struct cpuidle_state *target_state = &drv->states[index]; + bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); ktime_t time_start, time_end; s64 diff; + /* + * Tell the time framework to switch to a broadcast timer because our + * local timer will be shut down. If a local timer is used from another + * CPU as a broadcast timer, this call may fail if it is not available. + */ + if (broadcast && tick_broadcast_enter()) + return -EBUSY; + trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ktime_get(); @@ -169,6 +178,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, time_end = ktime_get(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + if (broadcast) { + if (WARN_ON_ONCE(!irqs_disabled())) + local_irq_disable(); + + tick_broadcast_exit(); + } + if (!cpuidle_state_is_coupled(dev, drv, entered_state)) local_irq_enable(); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index deef1caa94c6..fefcb1fa5160 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -81,7 +81,6 @@ static void cpuidle_idle_call(void) struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; - unsigned int broadcast; bool reflect; /* @@ -150,17 +149,6 @@ static void cpuidle_idle_call(void) goto exit_idle; } - broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP; - - /* - * Tell the time framework to switch to a broadcast timer - * because our local timer will be shutdown. If a local timer - * is used from another cpu as a broadcast timer, this call may - * fail if it is not available - */ - if (broadcast && tick_broadcast_enter()) - goto use_default; - /* Take note of the planned idle state. */ idle_set_state(this_rq(), &drv->states[next_state]); @@ -174,8 +162,8 @@ static void cpuidle_idle_call(void) /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); - if (broadcast) - tick_broadcast_exit(); + if (entered_state == -EBUSY) + goto use_default; /* * Give the governor an opportunity to reflect on the outcome From 2cff98b99c469880ce830cbcde015b53b67e0a7b Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Wed, 29 Apr 2015 16:09:18 +0100 Subject: [PATCH 069/104] arm64: add missing PAGE_ALIGN() to __dma_free() __dma_alloc() does a PAGE_ALIGN() on the passed in size argument before doing anything else. __dma_free() does not. And because it doesn't, it is possible to leak memory should size not be an integer multiple of PAGE_SIZE. The solution is to add a PAGE_ALIGN() to __dma_free() like is done in __dma_alloc(). Additionally, this patch removes a redundant PAGE_ALIGN() from __dma_alloc_coherent(), since __dma_alloc_coherent() can only be called from __dma_alloc(), which already does a PAGE_ALIGN() before the call. Cc: stable@vger.kernel.org Acked-by: Catalin Marinas Signed-off-by: Dean Nelson Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e0f14ee26b68..b0bd4e5fd5cf 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -104,7 +104,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, struct page *page; void *addr; - size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, get_order(size)); if (!page) @@ -193,6 +192,8 @@ static void __dma_free(struct device *dev, size_t size, { void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); + size = PAGE_ALIGN(size); + if (!is_device_dma_coherent(dev)) { if (__free_from_pool(vaddr, size)) return; From cb6ccf09d6b94bec4def1ac5cf4678d12b216474 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Apr 2015 11:43:15 +0800 Subject: [PATCH 070/104] route: Use ipv4_mtu instead of raw rt_pmtu The commit 3cdaa5be9e81a914e633a6be7b7d2ef75b528562 ("ipv4: Don't increase PMTU with Datagram Too Big message") broke PMTU in cases where the rt_pmtu value has expired but is smaller than the new PMTU value. This obsolete rt_pmtu then prevents the new PMTU value from being installed. Fixes: 3cdaa5be9e81 ("ipv4: Don't increase PMTU with Datagram Too Big message") Reported-by: Gerd v. Egidy Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/route.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a78540f28276..bff62fc87b8e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -962,10 +962,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (dst_metric_locked(dst, RTAX_MTU)) return; - if (dst->dev->mtu < mtu) - return; - - if (rt->rt_pmtu && rt->rt_pmtu < mtu) + if (ipv4_mtu(dst) < mtu) return; if (mtu < ip_rt_min_pmtu) From 3f300ff41d89fe9674b8dbab950ba2572639ee8d Mon Sep 17 00:00:00 2001 From: Simon Xiao Date: Tue, 28 Apr 2015 01:05:17 -0700 Subject: [PATCH 071/104] hv_netvsc: introduce netif-msg into netvsc module 1. Introduce netif-msg to netvsc to control debug logging output and keep msg_enable in netvsc_device_context so that it is kept persistently. 2. Only call dump_rndis_message() when NETIF_MSG_RX_ERR or above is specified in netvsc module debug param. In non-debug mode, in current code, dump_rndis_message() will not dump anything but it still initialize some local variables and process the switch logic which is unnecessary, especially in high network throughput situation. Signed-off-by: Simon Xiao Reviewed-by: K. Y. Srinivasan Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 12 ++++++++++++ drivers/net/hyperv/netvsc.c | 3 +++ drivers/net/hyperv/netvsc_drv.c | 20 ++++++++++++++------ drivers/net/hyperv/rndis_filter.c | 3 ++- 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a10b31664709..e55c8f4f55ef 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -612,6 +612,15 @@ struct multi_send_data { u32 count; /* counter of batched packets */ }; +/* The context of the netvsc device */ +struct net_device_context { + /* point back to our device context */ + struct hv_device *device_ctx; + struct delayed_work dwork; + struct work_struct work; + u32 msg_enable; /* debug level */ +}; + /* Per netvsc device */ struct netvsc_device { struct hv_device *dev; @@ -667,6 +676,9 @@ struct netvsc_device { struct multi_send_data msd[NR_CPUS]; u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ + + /* The net device context */ + struct net_device_context *nd_ctx; }; /* NdisInitialize message */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 2e8ad0636b46..c651d4d8b747 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1197,6 +1197,9 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) */ ndev = net_device->ndev; + /* Add netvsc_device context to netvsc_device */ + net_device->nd_ctx = netdev_priv(ndev); + /* Initialize the NetVSC channel extension */ init_completion(&net_device->channel_init_wait); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index a3a9d3898a6e..66c4b0c8d108 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -40,18 +40,21 @@ #include "hyperv_net.h" -struct net_device_context { - /* point back to our device context */ - struct hv_device *device_ctx; - struct delayed_work dwork; - struct work_struct work; -}; #define RING_SIZE_MIN 64 static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | + NETIF_MSG_LINK | NETIF_MSG_IFUP | + NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR | + NETIF_MSG_TX_ERR; + +static int debug = -1; +module_param(debug, int, S_IRUGO); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + static void do_set_multicast(struct work_struct *w) { struct net_device_context *ndevctx = @@ -888,6 +891,11 @@ static int netvsc_probe(struct hv_device *dev, net_device_ctx = netdev_priv(net); net_device_ctx->device_ctx = dev; + net_device_ctx->msg_enable = netif_msg_init(debug, default_msg); + if (netif_msg_probe(net_device_ctx)) + netdev_dbg(net, "netvsc msg_enable: %d\n", + net_device_ctx->msg_enable); + hv_set_drvdata(dev, net); INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 0d92efefd796..9118cea91882 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -429,7 +429,8 @@ int rndis_filter_receive(struct hv_device *dev, rndis_msg = pkt->data; - dump_rndis_message(dev, rndis_msg); + if (netif_msg_rx_err(net_dev->nd_ctx)) + dump_rndis_message(dev, rndis_msg); switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: From d9b9e860cef1d904832bc6e92e444adfeabe49e9 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 28 Apr 2015 11:34:21 +0200 Subject: [PATCH 072/104] bnx2x: mark LRO as a fixed disabled feature if disable_tpa is set If disable_tpa is set, remove NETIF_F_LRO from hw_features, so ethtool sees it as "off [fixed]". Note that setting the NETIF_F_LRO bit in dev->features in the 'else' branch is not needed, because the bit was already set by bnx2x_init_dev(). Then the check for disable_tpa in in bnx2x_fix_features() becomes unnecessary. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 4 ---- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 3558a36b1c2d..dcdbd00d0259 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4834,10 +4834,6 @@ netdev_features_t bnx2x_fix_features(struct net_device *dev, features &= ~NETIF_F_GRO; } - /* Note: do not disable SW GRO in kernel when HW GRO is off */ - if (bp->disable_tpa) - features &= ~NETIF_F_LRO; - return features; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index b9f85fccb419..2d067c937bd1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12108,10 +12108,10 @@ static int bnx2x_init_bp(struct bnx2x *bp) /* Set TPA flags */ if (bp->disable_tpa) { bp->flags &= ~(TPA_ENABLE_FLAG | GRO_ENABLE_FLAG); + bp->dev->hw_features &= ~NETIF_F_LRO; bp->dev->features &= ~NETIF_F_LRO; } else { bp->flags |= (TPA_ENABLE_FLAG | GRO_ENABLE_FLAG); - bp->dev->features |= NETIF_F_LRO; } if (CHIP_IS_E1(bp)) From 7e6b4d440b0ae9062b84dfb417ea6d51a45dab76 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 28 Apr 2015 11:34:22 +0200 Subject: [PATCH 073/104] bnx2x: merge fp->disable_tpa with fp->mode It is simpler to have the TPA mode as one three-state variable. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 +- .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 26 +++++++++---------- .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 2 +- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 +-- .../net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 355d5fea5be9..2a41604cdd0e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -521,6 +521,7 @@ struct bnx2x_fp_txdata { }; enum bnx2x_tpa_mode_t { + TPA_MODE_DISABLED, TPA_MODE_LRO, TPA_MODE_GRO }; @@ -589,7 +590,6 @@ struct bnx2x_fastpath { /* TPA related */ struct bnx2x_agg_info *tpa_info; - u8 disable_tpa; #ifdef BNX2X_STOP_ON_ERROR u64 tpa_queue_used; #endif diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index dcdbd00d0259..84612b082b86 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -947,10 +947,10 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) u16 frag_size, pages; #ifdef BNX2X_STOP_ON_ERROR /* sanity check */ - if (fp->disable_tpa && + if (fp->mode == TPA_MODE_DISABLED && (CQE_TYPE_START(cqe_fp_type) || CQE_TYPE_STOP(cqe_fp_type))) - BNX2X_ERR("START/STOP packet while disable_tpa type %x\n", + BNX2X_ERR("START/STOP packet while TPA disabled, type %x\n", CQE_TYPE(cqe_fp_type)); #endif @@ -1396,7 +1396,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) DP(NETIF_MSG_IFUP, "mtu %d rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size); - if (!fp->disable_tpa) { + if (fp->mode != TPA_MODE_DISABLED) { /* Fill the per-aggregation pool */ for (i = 0; i < MAX_AGG_QS(bp); i++) { struct bnx2x_agg_info *tpa_info = @@ -1410,7 +1410,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) BNX2X_ERR("Failed to allocate TPA skb pool for queue[%d] - disabling TPA on this queue!\n", j); bnx2x_free_tpa_pool(bp, fp, i); - fp->disable_tpa = 1; + fp->mode = TPA_MODE_DISABLED; break; } dma_unmap_addr_set(first_buf, mapping, 0); @@ -1438,7 +1438,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) ring_prod); bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp)); - fp->disable_tpa = 1; + fp->mode = TPA_MODE_DISABLED; ring_prod = 0; break; } @@ -1560,7 +1560,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp) bnx2x_free_rx_bds(fp); - if (!fp->disable_tpa) + if (fp->mode != TPA_MODE_DISABLED) bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp)); } } @@ -2477,19 +2477,19 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) /* set the tpa flag for each queue. The tpa flag determines the queue * minimal size so it must be set prior to queue memory allocation */ - fp->disable_tpa = !(bp->flags & TPA_ENABLE_FLAG || - (bp->flags & GRO_ENABLE_FLAG && - bnx2x_mtu_allows_gro(bp->dev->mtu))); if (bp->flags & TPA_ENABLE_FLAG) fp->mode = TPA_MODE_LRO; - else if (bp->flags & GRO_ENABLE_FLAG) + else if (bp->flags & GRO_ENABLE_FLAG && + bnx2x_mtu_allows_gro(bp->dev->mtu)) fp->mode = TPA_MODE_GRO; + else + fp->mode = TPA_MODE_DISABLED; /* We don't want TPA if it's disabled in bp * or if this is an FCoE L2 ring. */ if (bp->disable_tpa || IS_FCOE_FP(fp)) - fp->disable_tpa = 1; + fp->mode = TPA_MODE_DISABLED; } int bnx2x_load_cnic(struct bnx2x *bp) @@ -2610,7 +2610,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) /* * Zero fastpath structures preserving invariants like napi, which are * allocated only once, fp index, max_cos, bp pointer. - * Also set fp->disable_tpa and txdata_ptr. + * Also set fp->mode and txdata_ptr. */ DP(NETIF_MSG_IFUP, "num queues: %d", bp->num_queues); for_each_queue(bp, i) @@ -4545,7 +4545,7 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) * In these cases we disable the queue * Min size is different for OOO, TPA and non-TPA queues */ - if (ring_size < (fp->disable_tpa ? + if (ring_size < (fp->mode == TPA_MODE_DISABLED ? MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) { /* release memory allocated for this queue */ bnx2x_free_fp_mem_at(bp, index); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index adcacda7af7b..d7a71758e876 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -969,7 +969,7 @@ static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp, { int i; - if (fp->disable_tpa) + if (fp->mode == TPA_MODE_DISABLED) return; for (i = 0; i < last; i++) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2d067c937bd1..64ea5adf72eb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -3128,7 +3128,7 @@ static unsigned long bnx2x_get_q_flags(struct bnx2x *bp, __set_bit(BNX2X_Q_FLG_FORCE_DEFAULT_PRI, &flags); } - if (!fp->disable_tpa) { + if (fp->mode != TPA_MODE_DISABLED) { __set_bit(BNX2X_Q_FLG_TPA, &flags); __set_bit(BNX2X_Q_FLG_TPA_IPV6, &flags); if (fp->mode == TPA_MODE_GRO) @@ -3176,7 +3176,7 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp, u16 sge_sz = 0; u16 tpa_agg_size = 0; - if (!fp->disable_tpa) { + if (fp->mode != TPA_MODE_DISABLED) { pause->sge_th_lo = SGE_TH_LO(bp); pause->sge_th_hi = SGE_TH_HI(bp); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 15b2d1647560..06b8c0d8fd3b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -594,7 +594,7 @@ int bnx2x_vfpf_setup_q(struct bnx2x *bp, struct bnx2x_fastpath *fp, bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_SETUP_Q, sizeof(*req)); /* select tpa mode to request */ - if (!fp->disable_tpa) { + if (fp->mode != TPA_MODE_DISABLED) { flags |= VFPF_QUEUE_FLG_TPA; flags |= VFPF_QUEUE_FLG_TPA_IPV6; if (fp->mode == TPA_MODE_GRO) From f8dcb5e3365a23b620f5744f23f1918b9c38d883 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 28 Apr 2015 11:34:23 +0200 Subject: [PATCH 074/104] bnx2x: remove {TPA,GRO}_ENABLE_FLAG These flags are redundant with dev->features. Remove them. Just make sure to set dev->features ourselves in bnx2x_set_features() before performing the reload of the card. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 - .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 39 +++++++------------ .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 5 +-- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 2a41604cdd0e..a3b0f7a0c61e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1545,9 +1545,7 @@ struct bnx2x { #define USING_MSIX_FLAG (1 << 5) #define USING_MSI_FLAG (1 << 6) #define DISABLE_MSI_FLAG (1 << 7) -#define TPA_ENABLE_FLAG (1 << 8) #define NO_MCP_FLAG (1 << 9) -#define GRO_ENABLE_FLAG (1 << 10) #define MF_FUNC_DIS (1 << 11) #define OWN_CNIC_IRQ (1 << 12) #define NO_ISCSI_OOO_FLAG (1 << 13) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 84612b082b86..a8bb8f664d3d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2477,9 +2477,9 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) /* set the tpa flag for each queue. The tpa flag determines the queue * minimal size so it must be set prior to queue memory allocation */ - if (bp->flags & TPA_ENABLE_FLAG) + if (bp->dev->features & NETIF_F_LRO) fp->mode = TPA_MODE_LRO; - else if (bp->flags & GRO_ENABLE_FLAG && + else if (bp->dev->features & NETIF_F_GRO && bnx2x_mtu_allows_gro(bp->dev->mtu)) fp->mode = TPA_MODE_GRO; else @@ -3249,7 +3249,7 @@ int bnx2x_low_latency_recv(struct napi_struct *napi) if ((bp->state == BNX2X_STATE_CLOSED) || (bp->state == BNX2X_STATE_ERROR) || - (bp->flags & (TPA_ENABLE_FLAG | GRO_ENABLE_FLAG))) + (bp->dev->features & (NETIF_F_LRO | NETIF_F_GRO))) return LL_FLUSH_FAILED; if (!bnx2x_fp_lock_poll(fp)) @@ -4840,19 +4840,9 @@ netdev_features_t bnx2x_fix_features(struct net_device *dev, int bnx2x_set_features(struct net_device *dev, netdev_features_t features) { struct bnx2x *bp = netdev_priv(dev); - u32 flags = bp->flags; - u32 changes; + netdev_features_t changes = features ^ dev->features; bool bnx2x_reload = false; - - if (features & NETIF_F_LRO) - flags |= TPA_ENABLE_FLAG; - else - flags &= ~TPA_ENABLE_FLAG; - - if (features & NETIF_F_GRO) - flags |= GRO_ENABLE_FLAG; - else - flags &= ~GRO_ENABLE_FLAG; + int rc; /* VFs or non SRIOV PFs should be able to change loopback feature */ if (!pci_num_vf(bp->pdev)) { @@ -4869,24 +4859,23 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features) } } - changes = flags ^ bp->flags; - /* if GRO is changed while LRO is enabled, don't force a reload */ - if ((changes & GRO_ENABLE_FLAG) && (flags & TPA_ENABLE_FLAG)) - changes &= ~GRO_ENABLE_FLAG; + if ((changes & NETIF_F_GRO) && (features & NETIF_F_LRO)) + changes &= ~NETIF_F_GRO; /* if GRO is changed while HW TPA is off, don't force a reload */ - if ((changes & GRO_ENABLE_FLAG) && bp->disable_tpa) - changes &= ~GRO_ENABLE_FLAG; + if ((changes & NETIF_F_GRO) && bp->disable_tpa) + changes &= ~NETIF_F_GRO; if (changes) bnx2x_reload = true; - bp->flags = flags; - if (bnx2x_reload) { - if (bp->recovery_state == BNX2X_RECOVERY_DONE) - return bnx2x_reload_if_running(dev); + if (bp->recovery_state == BNX2X_RECOVERY_DONE) { + dev->features = features; + rc = bnx2x_reload_if_running(dev); + return rc ? rc : 1; + } /* else: bnx2x_nic_load() will be called at end of recovery */ } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 64ea5adf72eb..60d2b8ebba16 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -3304,7 +3304,7 @@ static void bnx2x_pf_init(struct bnx2x *bp) /* This flag is relevant for E1x only. * E2 doesn't have a TPA configuration in a function level. */ - flags |= (bp->flags & TPA_ENABLE_FLAG) ? FUNC_FLG_TPA : 0; + flags |= (bp->dev->features & NETIF_F_LRO) ? FUNC_FLG_TPA : 0; func_init.func_flgs = flags; func_init.pf_id = BP_FUNC(bp); @@ -12107,11 +12107,8 @@ static int bnx2x_init_bp(struct bnx2x *bp) /* Set TPA flags */ if (bp->disable_tpa) { - bp->flags &= ~(TPA_ENABLE_FLAG | GRO_ENABLE_FLAG); bp->dev->hw_features &= ~NETIF_F_LRO; bp->dev->features &= ~NETIF_F_LRO; - } else { - bp->flags |= (TPA_ENABLE_FLAG | GRO_ENABLE_FLAG); } if (CHIP_IS_E1(bp)) From 18fe369a5a4f175ee652ba8b7d8d7e49f140e281 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Tue, 28 Apr 2015 13:11:27 +0200 Subject: [PATCH 075/104] trivial: net: atl1e: atl1e_hw.h: fix 0x0x prefix Fix the 0x0x prefix in an integer constant. Signed-off-by: Antonio Ospite Cc: Jay Cliburn Cc: Chris Snook Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atl1e/atl1e_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h b/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h index 74df16aef793..88a6271de5bc 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h @@ -129,7 +129,7 @@ s32 atl1e_restart_autoneg(struct atl1e_hw *hw); #define TWSI_CTRL_LD_SLV_ADDR_SHIFT 8 #define TWSI_CTRL_SW_LDSTART 0x800 #define TWSI_CTRL_HW_LDSTART 0x1000 -#define TWSI_CTRL_SMB_SLV_ADDR_MASK 0x0x7F +#define TWSI_CTRL_SMB_SLV_ADDR_MASK 0x7F #define TWSI_CTRL_SMB_SLV_ADDR_SHIFT 15 #define TWSI_CTRL_LD_EXIST 0x400000 #define TWSI_CTRL_READ_FREQ_SEL_MASK 0x3 From 165996bd351f0e711feff92479c0796098afaa38 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Tue, 28 Apr 2015 13:11:29 +0200 Subject: [PATCH 076/104] trivial: net: systemport: bcmsysport.h: fix 0x0x prefix Fix the 0x0x prefix in an integer constant. In this case, while at it, also fix a typo (s/unitcast/unicast/). Signed-off-by: Antonio Ospite Cc: Florian Fainelli Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 7e3d87a88c76..e2c043eabbf3 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -543,7 +543,7 @@ struct bcm_sysport_tx_counters { u32 jbr; /* RO # of xmited jabber count*/ u32 bytes; /* RO # of xmited byte count */ u32 pok; /* RO # of xmited good pkt */ - u32 uc; /* RO (0x0x4f0)# of xmited unitcast pkt */ + u32 uc; /* RO (0x4f0) # of xmited unicast pkt */ }; struct bcm_sysport_mib { From 2b70fe5aba0dd00d81173243c7ab04c66aeb67d8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 28 Apr 2015 13:33:21 +0200 Subject: [PATCH 077/104] net: sched: act_connmark: don't zap skb->nfct This action is meant to be passive, i.e. we should not alter skb->nfct: If nfct is present just leave it alone. Compile tested only. Cc: Jamal Hadi Salim Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/sched/act_connmark.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8e472518f9f6..295d14bd6c67 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -63,7 +63,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, skb->mark = c->mark; /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; - nf_ct_put(c); goto out; } @@ -82,7 +81,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, nf_ct_put(c); out: - skb->nfct = NULL; spin_unlock(&ca->tcf_lock); return ca->tcf_action; } From 821996795973fd52703c35811a03db9fec1ac141 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 28 Apr 2015 18:33:48 +0200 Subject: [PATCH 078/104] bridge/mdb: remove wrong use of NLM_F_MULTI NLM_F_MULTI must be used only when a NLMSG_DONE message is sent. In fact, it is sent only at the end of a dump. Libraries like libnl will wait forever for NLMSG_DONE. Fixes: 37a393bc4932 ("bridge: notify mdb changes via netlink") CC: Cong Wang CC: Stephen Hemminger CC: bridge@lists.linux-foundation.org Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 409608960899..e29ad70b3000 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -170,7 +170,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, struct br_port_msg *bpm; struct nlattr *nest, *nest2; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; From 46c264daaaa569e24f8aba877d0fd8167c42a9a4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 28 Apr 2015 18:33:49 +0200 Subject: [PATCH 079/104] bridge/nl: remove wrong use of NLM_F_MULTI NLM_F_MULTI must be used only when a NLMSG_DONE message is sent. In fact, it is sent only at the end of a dump. Libraries like libnl will wait forever for NLMSG_DONE. Fixes: e5a55a898720 ("net: create generic bridge ops") Fixes: 815cccbf10b2 ("ixgbe: add setlink, getlink support to ixgbe and ixgbevf") CC: John Fastabend CC: Sathya Perla CC: Subbu Seetharaman CC: Ajit Khaparde CC: Jeff Kirsher CC: intel-wired-lan@lists.osuosl.org CC: Jiri Pirko CC: Scott Feldman CC: Stephen Hemminger CC: bridge@lists.linux-foundation.org Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 5 +++-- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 ++++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- drivers/net/ethernet/rocker/rocker.c | 5 +++-- include/linux/netdevice.h | 6 ++++-- include/linux/rtnetlink.h | 2 +- net/bridge/br_netlink.c | 4 ++-- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 12 +++++++----- 9 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index fb0bc3c3620e..a6dcbf850c1f 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4846,7 +4846,8 @@ static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, } static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask) + struct net_device *dev, u32 filter_mask, + int nlflags) { struct be_adapter *adapter = netdev_priv(dev); int status = 0; @@ -4868,7 +4869,7 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return ndo_dflt_bridge_getlink(skb, pid, seq, dev, hsw_mode == PORT_FWD_TYPE_VEPA ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB, - 0, 0); + 0, 0, nlflags); } #ifdef CONFIG_BE2NET_VXLAN diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 24481cd7e59a..a54c14491e3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8053,10 +8053,10 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, #ifdef HAVE_BRIDGE_FILTER static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 __always_unused filter_mask) + u32 __always_unused filter_mask, int nlflags) #else static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, int nlflags) #endif /* HAVE_BRIDGE_FILTER */ { struct i40e_netdev_priv *np = netdev_priv(dev); @@ -8078,7 +8078,8 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, if (!veb) return 0; - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode); + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, + nlflags); } #endif /* HAVE_BRIDGE_ATTRIBS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d3f4b0ceb3f7..5be12a00e1f4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8044,7 +8044,7 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask) + u32 filter_mask, int nlflags) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -8052,7 +8052,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, - adapter->bridge_mode, 0, 0); + adapter->bridge_mode, 0, 0, nlflags); } static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index a570a60533be..ec251531bd9f 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4176,14 +4176,15 @@ static int rocker_port_bridge_setlink(struct net_device *dev, static int rocker_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask) + u32 filter_mask, int nlflags) { struct rocker_port *rocker_port = netdev_priv(dev); u16 mode = BRIDGE_MODE_UNDEF; u32 mask = BR_LEARNING | BR_LEARNING_SYNC; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - rocker_port->brport_flags, mask); + rocker_port->brport_flags, mask, + nlflags); } static int rocker_port_get_phys_port_name(struct net_device *dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dbad4d728b4b..1899c74a7127 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -977,7 +977,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - * struct net_device *dev, u32 filter_mask) + * struct net_device *dev, u32 filter_mask, + * int nlflags) * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags); * @@ -1173,7 +1174,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask); + u32 filter_mask, + int nlflags); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2da5d1081ad9..7b8e260c4a27 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -122,5 +122,5 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm, extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask); + u32 flags, u32 mask, int nlflags); #endif /* __LINUX_RTNETLINK_H */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0e4ddb81610d..4b5c236998ff 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -394,7 +394,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask) + struct net_device *dev, u32 filter_mask, int nlflags) { struct net_bridge_port *port = br_port_get_rtnl(dev); @@ -402,7 +402,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) return 0; - return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags, filter_mask, dev); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6ca0251cb478..3362c29400f1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -828,7 +828,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port); int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask); + u32 filter_mask, int nlflags); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 358d52a38533..666e0928ba40 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2854,7 +2854,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask) + u32 flags, u32 mask, int nlflags) { struct nlmsghdr *nlh; struct ifinfomsg *ifm; @@ -2863,7 +2863,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_device *br_dev = netdev_master_upper_dev_get(dev); - nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); if (nlh == NULL) return -EMSGSIZE; @@ -2969,7 +2969,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev, filter_mask) < 0) + skb, portid, seq, dev, filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -2977,7 +2978,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && ops->ndo_bridge_getlink(skb, portid, seq, dev, - filter_mask) < 0) + filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -3018,7 +3020,7 @@ static int rtnl_bridge_notify(struct net_device *dev) goto errout; } - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0); if (err < 0) goto errout; From f2f67390a4b961dae83733732e96e1a394a53c4e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 28 Apr 2015 18:33:50 +0200 Subject: [PATCH 080/104] tipc: remove wrong use of NLM_F_MULTI NLM_F_MULTI must be used only when a NLMSG_DONE message is sent. In fact, it is sent only at the end of a dump. Libraries like libnl will wait forever for NLMSG_DONE. Fixes: 35b9dd7607f0 ("tipc: add bearer get/dump to new netlink api") Fixes: 7be57fc69184 ("tipc: add link get/dump to new netlink api") Fixes: 46f15c6794fb ("tipc: add media get/dump to new netlink api") CC: Richard Alpe CC: Jon Maloy CC: Ying Xue CC: tipc-discussion@lists.sourceforge.net Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/tipc/bearer.c | 17 +++++++++-------- net/tipc/link.c | 8 ++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3613e72e858e..70e3dacbf84a 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -591,14 +591,14 @@ void tipc_bearer_stop(struct net *net) /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, - struct tipc_bearer *bearer) + struct tipc_bearer *bearer, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_BEARER_GET); + nlflags, TIPC_NL_BEARER_GET); if (!hdr) return -EMSGSIZE; @@ -657,7 +657,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!bearer) continue; - err = __tipc_nl_add_bearer(&msg, bearer); + err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); if (err) break; } @@ -705,7 +705,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_bearer(&msg, bearer); + err = __tipc_nl_add_bearer(&msg, bearer, 0); if (err) goto err_out; rtnl_unlock(); @@ -857,14 +857,14 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) } static int __tipc_nl_add_media(struct tipc_nl_msg *msg, - struct tipc_media *media) + struct tipc_media *media, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_MEDIA_GET); + nlflags, TIPC_NL_MEDIA_GET); if (!hdr) return -EMSGSIZE; @@ -916,7 +916,8 @@ int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); for (; media_info_array[i] != NULL; i++) { - err = __tipc_nl_add_media(&msg, media_info_array[i]); + err = __tipc_nl_add_media(&msg, media_info_array[i], + NLM_F_MULTI); if (err) break; } @@ -963,7 +964,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_media(&msg, media); + err = __tipc_nl_add_media(&msg, media, 0); if (err) goto err_out; rtnl_unlock(); diff --git a/net/tipc/link.c b/net/tipc/link.c index 57be6e6aff99..a11cc5e5e0ae 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2013,7 +2013,7 @@ static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) /* Caller should hold appropriate locks to protect the link */ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, - struct tipc_link *link) + struct tipc_link *link, int nlflags) { int err; void *hdr; @@ -2022,7 +2022,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_LINK_GET); + nlflags, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; @@ -2095,7 +2095,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, if (!node->links[i]) continue; - err = __tipc_nl_add_link(net, msg, node->links[i]); + err = __tipc_nl_add_link(net, msg, node->links[i], NLM_F_MULTI); if (err) return err; } @@ -2209,7 +2209,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_link(net, &msg, link); + err = __tipc_nl_add_link(net, &msg, link, 0); if (err) goto err_out; From 0d699f28ee5d0641470a603ab5904e463cb1532a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 28 Apr 2015 16:59:04 -0400 Subject: [PATCH 081/104] tipc: fix problem with parallel link synchronization mechanism Currently, we try to accumulate arrived packets in the links's 'deferred' queue during the parallel link syncronization phase. This entails two problems: - With an unlucky combination of arriving packets the algorithm may go into a lockstep with the out-of-sequence handling function, where the synch mechanism is adding a packet to the deferred queue, while the out-of-sequence handling is retrieving it again, thus ending up in a loop inside the node_lock scope. - Even if this is avoided, the link will very often send out unnecessary protocol messages, in the worst case leading to redundant retransmissions. We fix this by just dropping arriving packets on the upcoming link during the synchronization phase, thus relying on the retransmission protocol to resolve the situation once the two links have arrived to a synchronized state. Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index a11cc5e5e0ae..43a515dc97b0 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1145,11 +1145,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) } /* Synchronize with parallel link if applicable */ if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { - link_handle_out_of_seq_msg(l_ptr, skb); - if (link_synch(l_ptr)) - link_retrieve_defq(l_ptr, &head); - skb = NULL; - goto unlock; + if (!link_synch(l_ptr)) + goto unlock; } l_ptr->next_in_no++; if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) From 42eab005a5dd5d7ea2b0328aecc4d6cc0c23c9c2 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 28 Apr 2015 14:49:29 -0700 Subject: [PATCH 082/104] mlx4: Fix tx ring affinity_mask creation By default, the number of tx queues is limited by the number of online cpus in mlx4_en_get_profile(). However, this limit no longer holds after the ethtool .set_channels method has been called. In that situation, the driver may access invalid bits of certain cpumask variables when queue_index >= nr_cpu_ids. Signed-off-by: Benjamin Poirier Acked-by: Ido Shamay Fixes: d03a68f ("net/mlx4_en: Configure the XPS queue mapping on driver load") Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 1783705273d8..f7bf312fb443 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -143,8 +143,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type; ring->queue_index = queue_index; - if (queue_index < priv->num_tx_rings_p_up && cpu_online(queue_index)) - cpumask_set_cpu(queue_index, &ring->affinity_mask); + if (queue_index < priv->num_tx_rings_p_up) + cpumask_set_cpu_local_first(queue_index, + priv->mdev->dev->numa_node, + &ring->affinity_mask); *pring = ring; return 0; @@ -213,7 +215,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); - if (!user_prio && cpu_online(ring->queue_index)) + if (!cpumask_empty(&ring->affinity_mask)) netif_set_xps_queue(priv->dev, &ring->affinity_mask, ring->queue_index); From 48734994ac268eb391a66dca4cde7d08a08aba08 Mon Sep 17 00:00:00 2001 From: Vlastimil Setka Date: Wed, 29 Apr 2015 00:17:11 +0200 Subject: [PATCH 083/104] altera_tse: Correct rx packet length Altera TSE MAC rx DMA transfer starts with the 2 additional bytes for IP payload alignment. This patch fixes tse_rx() function loop which reads DMA rx status and extracts packet length from it. Status signalises a whole DMA transfer length, which is 2 bytes longer than the packet itself. Signed-off-by: Vlastimil Setka Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/altera_tse_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 0533c051a3e5..da48e66377b5 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -391,6 +391,12 @@ static int tse_rx(struct altera_tse_private *priv, int limit) "RCV pktstatus %08X pktlength %08X\n", pktstatus, pktlength); + /* DMA trasfer from TSE starts with 2 aditional bytes for + * IP payload alignment. Status returned by get_rx_status() + * contains DMA transfer length. Packet is 2 bytes shorter. + */ + pktlength -= 2; + count++; next_entry = (++priv->rx_cons) % priv->rx_ring_size; From b56fc3c536541c8081cd5f1f1d101a16c002a365 Mon Sep 17 00:00:00 2001 From: KY Srinivasan Date: Tue, 28 Apr 2015 17:59:48 -0700 Subject: [PATCH 084/104] hv_netvsc: Fix a bug in netvsc_start_xmit() Commit b08cc79155fc26d0d112b1470d1ece5034651a4b eliminated memory allocation in the packet send path: "hv_netvsc: Eliminate memory allocation in the packet send path The network protocol used to communicate with the host is the remote ndis (rndis) protocol. We need to decorate each outgoing packet with a rndis header and additional rndis state (rndis per-packet state). To manage this state, we currently allocate memory in the transmit path. Eliminate this allocation by requesting additional head room in the skb." This commit introduced a bug since it did not account for the case if the skb was cloned. Fix this bug. Signed-off-by: K. Y. Srinivasan Tested-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 - drivers/net/hyperv/netvsc.c | 5 ----- drivers/net/hyperv/netvsc_drv.c | 27 +++++++-------------------- 3 files changed, 7 insertions(+), 26 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index e55c8f4f55ef..41071d32bc8e 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -128,7 +128,6 @@ struct ndis_tcp_ip_checksum_info; struct hv_netvsc_packet { /* Bookkeeping stuff */ u32 status; - bool part_of_skb; bool is_data_pkt; bool xmit_more; /* from skb */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index c651d4d8b747..2d9ef533cc48 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -889,11 +889,6 @@ int netvsc_send(struct hv_device *device, } else { packet->page_buf_cnt = 0; packet->total_data_buflen += msd_len; - if (!packet->part_of_skb) { - skb = (struct sk_buff *)(unsigned long)packet-> - send_completion_tid; - packet->send_completion_tid = 0; - } } if (msdp->pkt) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 66c4b0c8d108..5993c7e2d723 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -238,9 +238,6 @@ void netvsc_xmit_completion(void *context) struct sk_buff *skb = (struct sk_buff *) (unsigned long)packet->send_completion_tid; - if (!packet->part_of_skb) - kfree(packet); - if (skb) dev_kfree_skb_any(skb); } @@ -392,7 +389,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) u32 net_trans_info; u32 hash; u32 skb_length; - u32 head_room; u32 pkt_sz; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; @@ -405,7 +401,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) check_size: skb_length = skb->len; - head_room = skb_headroom(skb); num_data_pgs = netvsc_get_slots(skb) + 2; if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) { net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", @@ -424,20 +419,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) pkt_sz = sizeof(struct hv_netvsc_packet) + RNDIS_AND_PPI_SIZE; - if (head_room < pkt_sz) { - packet = kmalloc(pkt_sz, GFP_ATOMIC); - if (!packet) { - /* out of memory, drop packet */ - netdev_err(net, "unable to alloc hv_netvsc_packet\n"); - ret = -ENOMEM; - goto drop; - } - packet->part_of_skb = false; - } else { - /* Use the headroom for building up the packet */ - packet = (struct hv_netvsc_packet *)skb->head; - packet->part_of_skb = true; + ret = skb_cow_head(skb, pkt_sz); + if (ret) { + netdev_err(net, "unable to alloc hv_netvsc_packet\n"); + ret = -ENOMEM; + goto drop; } + /* Use the headroom for building up the packet */ + packet = (struct hv_netvsc_packet *)skb->head; packet->status = 0; packet->xmit_more = skb->xmit_more; @@ -594,8 +583,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) net->stats.tx_bytes += skb_length; net->stats.tx_packets++; } else { - if (packet && !packet->part_of_skb) - kfree(packet); if (ret != -EAGAIN) { dev_kfree_skb_any(skb); net->stats.tx_dropped++; From d4c216c54197d741ed8b7ca54f13645dfb3eacde Mon Sep 17 00:00:00 2001 From: Punnaiah Choudary Kalluri Date: Wed, 29 Apr 2015 08:34:46 +0530 Subject: [PATCH 085/104] net: macb: Fix race condition in driver when Rx frame is dropped Under heavy Rx load, observed that the Hw is updating the USED bit and it is not updating the received frame status to the BD control field. This could be lack of resources for processing the BDs at high data rates. Driver drops the frame associated with this BD but not clearing the USED bit. So, this is causing hang condition as Hw expects USED bit to be cleared for this BD. Signed-off-by: Punnaiah Choudary Kalluri Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 665c29098e3c..4104d49f005d 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -707,6 +707,9 @@ static void gem_rx_refill(struct macb *bp) /* properly align Ethernet header */ skb_reserve(skb, NET_IP_ALIGN); + } else { + bp->rx_ring[entry].addr &= ~MACB_BIT(RX_USED); + bp->rx_ring[entry].ctrl = 0; } } From 50d4964f1d0411d82cca593f2664bfab7f82dbbf Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 29 Apr 2015 10:56:15 -0700 Subject: [PATCH 086/104] net: dsa: Fix scope of eeprom-length property eeprom-length is a switch property, not a dsa property, and thus needs to be attached to the switch node, not to the dsa node. Reported-by: Andrew Lunn Fixes: 6793abb4e849 ("net: dsa: Add support for switch EEPROM access") Signed-off-by: Guenter Roeck Acked-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 079a224471e7..e6f6cc3a1bcf 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -633,7 +633,7 @@ static int dsa_of_probe(struct device *dev) if (cd->sw_addr > PHY_MAX_ADDR) continue; - if (!of_property_read_u32(np, "eeprom-length", &eeprom_len)) + if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) cd->eeprom_len = eeprom_len; for_each_available_child_of_node(child, port) { From e913fb279c564f2af05658b3f01041757d2e9303 Mon Sep 17 00:00:00 2001 From: Pai Date: Wed, 29 Apr 2015 14:24:23 -0400 Subject: [PATCH 087/104] net: Fix Kernel Panic in bonding driver debugfs file: rlb_hash_table This patch fixes a Kernel Panic in bonding driver debugfs file: rlb_hash_table. $> modprobe bonding mode=6 $> cat /sys/kernel/debug/bonding/bond0/rlb_hash_table This will crash the kernel. The struct alb_bond_info is initialized only when the bonding interface is initialized (ip link set bond0 up) and not at the time it is allocated. If we try to read the table before that, it'll result in a kernel panic. The patch applies against both net and net-next Signed-off-by: Vishwanath Pai Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3a10551d64cf..d5fe5d5f490f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4544,6 +4544,8 @@ unsigned int bond_get_num_tx_queues(void) int bond_create(struct net *net, const char *name) { struct net_device *bond_dev; + struct bonding *bond; + struct alb_bond_info *bond_info; int res; rtnl_lock(); @@ -4557,6 +4559,14 @@ int bond_create(struct net *net, const char *name) return -ENOMEM; } + /* + * Initialize rx_hashtbl_used_head to RLB_NULL_INDEX. + * It is set to 0 by default which is wrong. + */ + bond = netdev_priv(bond_dev); + bond_info = &(BOND_ALB_INFO(bond)); + bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; + dev_net_set(bond_dev, net); bond_dev->rtnl_link_ops = &bond_link_ops; From 12a8541d5c82d17d42424d46ec36929cfef06a10 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 29 Apr 2015 08:09:49 +0300 Subject: [PATCH 088/104] bnx2x: Delay during kdump load In a kdump environment interfaces might be re-loaded without a proper unload sequence in the previous running kernel. bnx2x management FW and driver maintains a `pulse' that notifies the FW that the driver is still up and running. Driver load on the kdump kernel should be performed only after the pulse has been out-of-sync long enough for the management FW to identify that the driver has crashed, on which point it will perform some necessary cleanup of the HW. In today's distros kdump loading is quite fast, sometimes too fast for our FW to get out-of-sync. This patch delays the bnx2x's probe during kdump to allow a proper re-load on the kdump kernel. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 60d2b8ebba16..556dcc162a62 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13368,6 +13368,12 @@ static int bnx2x_init_one(struct pci_dev *pdev, bool is_vf; int cnic_cnt; + /* Management FW 'remembers' living interfaces. Allow it some time + * to forget previously living interfaces, allowing a proper re-load. + */ + if (is_kdump_kernel()) + msleep(5000); + /* An estimated maximum supported CoS number according to the chip * version. * We will try to roughly estimate the maximum number of CoSes this chip From 7f0b8a56c978b0a3315ac84c6cbb065413afb8e9 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 29 Apr 2015 17:19:05 +0530 Subject: [PATCH 089/104] cxgb4: Fix MC1 memory offset calculation Commit 6559a7e8296002b4 ("cxgb4: Cleanup macros so they follow the same style and look consistent") introduced a regression where reading MC1 memory in adapters where MC0 isn't present or MC0 size is not equal to MC1 size caused the adapter to crash due to incorrect computation of memoffset. Fix is to read the size of MC0 instead of MC1 for offset calculation Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 5959e3ae72da..e8578a742f2a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -492,7 +492,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, memoffset = (mtype * (edc_size * 1024 * 1024)); else { mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, - MA_EXT_MEMORY1_BAR_A)); + MA_EXT_MEMORY0_BAR_A)); memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; } From 5d2361db48899789fb466ff62db5d5fc7b070e86 Mon Sep 17 00:00:00 2001 From: Forrest Liu Date: Mon, 9 Feb 2015 17:31:45 +0800 Subject: [PATCH 090/104] Btrfs: btrfs_release_extent_buffer_page didn't free pages of dummy extent btrfs_release_extent_buffer_page() can't handle dummy extent that allocated by btrfs_clone_extent_buffer() properly. That is because reference count of pages that allocated by btrfs_clone_extent_buffer() was 2, 1 by alloc_page(), and another by attach_extent_buffer_page(). Running following command repeatly can check this memory leak problem btrfs inspect-internal inode-resolve 256 /mnt/btrfs Signed-off-by: Chien-Kuan Yeh Signed-off-by: Forrest Liu Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 51 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ea100eb188de..43af5a61ad25 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) do { index--; page = eb->pages[index]; - if (page && mapped) { + if (!page) + continue; + if (mapped) spin_lock(&page->mapping->private_lock); + /* + * We do this since we'll remove the pages after we've + * removed the eb from the radix tree, so we could race + * and have this page now attached to the new eb. So + * only clear page_private if it's still connected to + * this eb. + */ + if (PagePrivate(page) && + page->private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); /* - * We do this since we'll remove the pages after we've - * removed the eb from the radix tree, so we could race - * and have this page now attached to the new eb. So - * only clear page_private if it's still connected to - * this eb. + * We need to make sure we haven't be attached + * to a new eb. */ - if (PagePrivate(page) && - page->private == (unsigned long)eb) { - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(PageDirty(page)); - BUG_ON(PageWriteback(page)); - /* - * We need to make sure we haven't be attached - * to a new eb. - */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - page_cache_release(page); - } - spin_unlock(&page->mapping->private_lock); - - } - if (page) { - /* One for when we alloced the page */ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ page_cache_release(page); } + + if (mapped) + spin_unlock(&page->mapping->private_lock); + + /* One for when we alloced the page */ + page_cache_release(page); } while (index != 0); } From d795ef9aa8311ca3c5158bda1edbcd14479c101c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 17 Apr 2015 14:41:29 +0100 Subject: [PATCH 091/104] arm64: perf: don't warn about missing interrupt-affinity property for PPIs PPIs are affine by nature, so the interrupt-affinity property is not used and therefore we shouldn't print a warning in its absence. Reported-by: Maxime Ripard Reviewed-by: Maxime Ripard Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 195991dadc37..2a9cbcb61126 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1310,7 +1310,7 @@ static const struct of_device_id armpmu_of_device_ids[] = { static int armpmu_device_probe(struct platform_device *pdev) { - int i, *irqs; + int i, irq, *irqs; if (!cpu_pmu) return -ENODEV; @@ -1319,6 +1319,11 @@ static int armpmu_device_probe(struct platform_device *pdev) if (!irqs) return -ENOMEM; + /* Don't bother with PPIs; they're already affine */ + irq = platform_get_irq(pdev, 0); + if (irq >= 0 && irq_is_percpu(irq)) + return 0; + for (i = 0; i < pdev->num_resources; ++i) { struct device_node *dn; int cpu; From 8291fd04d86b97869bd34e796bcac3141b9d5432 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 13 Apr 2015 10:17:55 +0100 Subject: [PATCH 092/104] arm64: perf: Fix the pmu node name in warning message With commit d5efd9cc9cf2 ("arm64: pmu: add support for interrupt-affinity property"), we print a warning when we find a PMU SPI with a missing missing interrupt-affinity property in a pmu node. Unfortunately, we pass the wrong (NULL) device node to of_node_full_name, resulting in unhelpful messages such as: hw perfevents: Failed to parse /interrupt-affinity[0] This patch fixes the name to that of the pmu node. Fixes: d5efd9cc9cf2 (arm64: pmu: add support for interrupt-affinity property) Acked-by: Mark Rutland Signed-off-by: Suzuki K. Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2a9cbcb61126..23f25acf43a9 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1332,7 +1332,7 @@ static int armpmu_device_probe(struct platform_device *pdev) i); if (!dn) { pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(dn), i); + of_node_full_name(pdev->dev.of_node), i); break; } From 3e6180f0c82b3790a9ec6d13d67aae359bf1ce84 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Apr 2015 10:10:36 -0400 Subject: [PATCH 093/104] dm: only initialize the request_queue once Commit bfebd1cdb4 ("dm: add full blk-mq support to request-based DM") didn't properly account for the need to short-circuit re-initializing DM's blk-mq request_queue if it was already initialized. Otherwise, reloading a blk-mq request-based DM table (either manually or via multipathd) resulted in errors, see: https://www.redhat.com/archives/dm-devel/2015-April/msg00132.html Fix is to only initialize the request_queue on the initial table load (when the mapped_device type is assigned). This is better than having dm_init_request_based_blk_mq_queue() return early if the queue was already initialized because it elevates the constraint to a more meaningful location in DM core. As such the pre-existing early return in dm_init_request_based_queue() can now be removed. Fixes: bfebd1cdb4 ("dm: add full blk-mq support to request-based DM") Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 17 +++++++++-------- drivers/md/dm.c | 3 --- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c8a18e4ee9dc..720ceeb7fa9b 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1298,21 +1298,22 @@ static int table_load(struct dm_ioctl *param, size_t param_size) goto err_unlock_md_type; } - if (dm_get_md_type(md) == DM_TYPE_NONE) + if (dm_get_md_type(md) == DM_TYPE_NONE) { /* Initial table load: acquire type of table. */ dm_set_md_type(md, dm_table_get_type(t)); - else if (dm_get_md_type(md) != dm_table_get_type(t)) { + + /* setup md->queue to reflect md's type (may block) */ + r = dm_setup_md_queue(md); + if (r) { + DMWARN("unable to set up device queue for new table."); + goto err_unlock_md_type; + } + } else if (dm_get_md_type(md) != dm_table_get_type(t)) { DMWARN("can't change device type after initial table load."); r = -EINVAL; goto err_unlock_md_type; } - /* setup md->queue to reflect md's type (may block) */ - r = dm_setup_md_queue(md); - if (r) { - DMWARN("unable to set up device queue for new table."); - goto err_unlock_md_type; - } dm_unlock_md_type(md); /* stage inactive table */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f8c7ca3e8947..923496ba72a0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2662,9 +2662,6 @@ static int dm_init_request_based_queue(struct mapped_device *md) { struct request_queue *q = NULL; - if (md->queue->elevator) - return 0; - /* Fully initialize the queue */ q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); if (!q) From aa6df8dd28c01d9a3d2cfcfe9dd0a4a334d1cd81 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 29 Apr 2015 10:48:09 -0400 Subject: [PATCH 094/104] dm: fix free_rq_clone() NULL pointer when requeueing unmapped request Commit 022333427a ("dm: optimize dm_mq_queue_rq to _not_ use kthread if using pure blk-mq") mistakenly removed free_rq_clone()'s clone->q check before testing clone->q->mq_ops. It was an oversight to discontinue that check for 1 of the 2 use-cases for free_rq_clone(): 1) free_rq_clone() called when an unmapped original request is requeued 2) free_rq_clone() called in the request-based IO completion path The clone->q check made sense for case #1 but not for #2. However, we cannot just reinstate the check as it'd mask a serious bug in the IO completion case #2 -- no in-flight request should have an uninitialized request_queue (basic block layer refcounting _should_ ensure this). The NULL pointer seen for case #1 is detailed here: https://www.redhat.com/archives/dm-devel/2015-April/msg00160.html Fix this free_rq_clone() NULL pointer by simply checking if the mapped_device's type is DM_TYPE_MQ_REQUEST_BASED (clone's queue is blk-mq) rather than checking clone->q->mq_ops. This avoids the need to dereference clone->q, but a WARN_ON_ONCE is added to let us know if an uninitialized clone request is being completed. Reported-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 923496ba72a0..a930b72314ac 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1082,18 +1082,26 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) dm_put(md); } -static void free_rq_clone(struct request *clone) +static void free_rq_clone(struct request *clone, bool must_be_mapped) { struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; + WARN_ON_ONCE(must_be_mapped && !clone->q); + blk_rq_unprep_clone(clone); - if (clone->q->mq_ops) + if (md->type == DM_TYPE_MQ_REQUEST_BASED) + /* stacked on blk-mq queue(s) */ tio->ti->type->release_clone_rq(clone); else if (!md->queue->mq_ops) /* request_fn queue stacked on request_fn queue(s) */ free_clone_request(md, clone); + /* + * NOTE: for the blk-mq queue stacked on request_fn queue(s) case: + * no need to call free_clone_request() because we leverage blk-mq by + * allocating the clone at the end of the blk-mq pdu (see: clone_rq) + */ if (!md->queue->mq_ops) free_rq_tio(tio); @@ -1124,7 +1132,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - free_rq_clone(clone); + free_rq_clone(clone, true); if (!rq->q->mq_ops) blk_end_request_all(rq, error); else @@ -1143,7 +1151,7 @@ static void dm_unprep_request(struct request *rq) } if (clone) - free_rq_clone(clone); + free_rq_clone(clone, false); } /* From 9c4249c8e0221e5cfae758d35b768aee84abf6c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Apr 2015 14:58:43 +0100 Subject: [PATCH 095/104] modsign: change default key details Change default key details to be more obviously unspecified. Reported-by: Linus Torvalds Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: Linus Torvalds --- Documentation/module-signing.txt | 6 +++--- kernel/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt index 09c2382ad055..c72702ec1ded 100644 --- a/Documentation/module-signing.txt +++ b/Documentation/module-signing.txt @@ -119,9 +119,9 @@ Most notably, in the x509.genkey file, the req_distinguished_name section should be altered from the default: [ req_distinguished_name ] - O = Magrathea - CN = Glacier signing key - emailAddress = slartibartfast@magrathea.h2g2 + #O = Unspecified company + CN = Build time autogenerated kernel key + #emailAddress = unspecified.user@unspecified.company The generated RSA key size can also be set with: diff --git a/kernel/Makefile b/kernel/Makefile index 0f8f8b0bc1bf..60c302cfb4d3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -197,9 +197,9 @@ x509.genkey: @echo >>x509.genkey "x509_extensions = myexts" @echo >>x509.genkey @echo >>x509.genkey "[ req_distinguished_name ]" - @echo >>x509.genkey "O = Magrathea" - @echo >>x509.genkey "CN = Glacier signing key" - @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2" + @echo >>x509.genkey "#O = Unspecified company" + @echo >>x509.genkey "CN = Build time autogenerated kernel key" + @echo >>x509.genkey "#emailAddress = unspecified.user@unspecified.company" @echo >>x509.genkey @echo >>x509.genkey "[ myexts ]" @echo >>x509.genkey "basicConstraints=critical,CA:FALSE" From f94813f3c1d02090cc02dcfcbed339897830acb8 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 29 Apr 2015 15:59:35 -0700 Subject: [PATCH 096/104] mlx4_en: Use correct loop cursor in error path. Signed-off-by: Benjamin Poirier Fixes: 9e311e7 ("net/mlx4_en: Use affinity hint") Acked-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0f1afc085d58..bf173d7b873b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1721,7 +1721,7 @@ int mlx4_en_start_port(struct net_device *dev) cq_err: while (rx_index--) { mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); - mlx4_en_free_affinity_hint(priv, i); + mlx4_en_free_affinity_hint(priv, rx_index); } for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); From 17d5ceb6e43ea545d6d92db2f3ddb035233ba335 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 29 Apr 2015 16:52:51 -0400 Subject: [PATCH 097/104] net/mlx4_core: Fix unaligned accesses Addresses the following kernel logs seen during boot: Kernel unaligned access at TPC[100ee150] mlx4_QUERY_HCA+0x80/0x248 [mlx4_core] Kernel unaligned access at TPC[100f071c] mlx4_QUERY_ADAPTER+0x100/0x12c [mlx4_core] Kernel unaligned access at TPC[100f071c] mlx4_QUERY_ADAPTER+0x100/0x12c [mlx4_core] Kernel unaligned access at TPC[100f071c] mlx4_QUERY_ADAPTER+0x100/0x12c [mlx4_core] Kernel unaligned access at TPC[100f071c] mlx4_QUERY_ADAPTER+0x100/0x12c [mlx4_core] Signed-off-by: David Ahern Acked-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index a4079811b176..e30bf57ad7a1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -56,11 +56,13 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: on)"); #define MLX4_GET(dest, source, offset) \ do { \ void *__p = (char *) (source) + (offset); \ + u64 val; \ switch (sizeof (dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ - case 8: (dest) = be64_to_cpup(__p); break; \ + case 8: val = get_unaligned((u64 *)__p); \ + (dest) = be64_to_cpu(val); break; \ default: __buggy_use_of_MLX4_GET(); \ } \ } while (0) @@ -1605,9 +1607,17 @@ static void get_board_id(void *vsd, char *board_id) * swaps each 4-byte word before passing it back to * us. Therefore we need to swab it before printing. */ - for (i = 0; i < 4; ++i) - ((u32 *) board_id)[i] = - swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + u32 *bid_u32 = (u32 *)board_id; + + for (i = 0; i < 4; ++i) { + u32 *addr; + u32 val; + + addr = (u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4); + val = get_unaligned(addr); + val = swab32(val); + put_unaligned(val, &bid_u32[i]); + } } } From c232d8a8bb1416f7ec21bb1aabc7c4ec8a5a899e Mon Sep 17 00:00:00 2001 From: Tony Camuso Date: Thu, 30 Apr 2015 07:51:27 -0400 Subject: [PATCH 098/104] netxen_nic: use spin_[un]lock_bh around tx_clean_lock While testing this driver with DEBUG_LOCKDEP and DEBUG_SPINLOCK enabled did not produce any traces, it would be more prudent in the case of tx_clean_lock to use spin_[un]lock_bh, since this lock is manipulated in both the process and softirq contexts. This patch was tested for functionality and regressions with netperf and DEBUG_LOCKDEP and DEBUG_SPINLOCK enabled. Signed-off-by: Tony Camuso Acked-by: Neil Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 5c4068353f66..8da7c3faf817 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -135,7 +135,7 @@ void netxen_release_tx_buffers(struct netxen_adapter *adapter) int i, j; struct nx_host_tx_ring *tx_ring = adapter->tx_ring; - spin_lock(&adapter->tx_clean_lock); + spin_lock_bh(&adapter->tx_clean_lock); cmd_buf = tx_ring->cmd_buf_arr; for (i = 0; i < tx_ring->num_desc; i++) { buffrag = cmd_buf->frag_array; @@ -159,7 +159,7 @@ void netxen_release_tx_buffers(struct netxen_adapter *adapter) } cmd_buf++; } - spin_unlock(&adapter->tx_clean_lock); + spin_unlock_bh(&adapter->tx_clean_lock); } void netxen_free_sw_resources(struct netxen_adapter *adapter) From 07841f9d94c11afe00c0498cf242edf4075729f4 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 30 Apr 2015 17:32:46 +0300 Subject: [PATCH 099/104] net/mlx4_en: Schedule napi when RX buffers allocation fails When system is out of memory, refilling of RX buffers fails while the driver continue to pass the received packets to the kernel stack. At some point, when all RX buffers deplete, driver may fall into a sleep, and not recover when memory for new RX buffers is once again availible. This is because hardware does not have valid descriptors, so no interrupt will be generated for the driver to return to work in napi context. Fix it by schedule the napi poll function from stats_task delayed workqueue, as long as the allocations fail. Signed-off-by: Ido Shamay Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 26 +++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bf173d7b873b..32f5ec737472 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1467,6 +1467,7 @@ static void mlx4_en_service_task(struct work_struct *work) if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) mlx4_en_ptp_overflow_check(mdev); + mlx4_en_recover_from_oom(priv); queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 4fdd3c37e47b..2a77a6b19121 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -244,6 +244,12 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); } +static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring) +{ + BUG_ON((u32)(ring->prod - ring->cons) > ring->actual_size); + return ring->prod == ring->cons; +} + static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) { *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); @@ -315,8 +321,7 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, ring->cons, ring->prod); /* Unmap and free Rx buffers */ - BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size); - while (ring->cons != ring->prod) { + while (!mlx4_en_is_ring_empty(ring)) { index = ring->cons & ring->size_mask; en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_rx_desc(priv, ring, index); @@ -491,6 +496,23 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) return err; } +/* We recover from out of memory by scheduling our napi poll + * function (mlx4_en_process_cq), which tries to allocate + * all missing RX buffers (call to mlx4_en_refill_rx_buffers). + */ +void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) +{ + int ring; + + if (!priv->port_up) + return; + + for (ring = 0; ring < priv->rx_ring_num; ring++) { + if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) + napi_reschedule(&priv->rx_cq[ring]->napi); + } +} + void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9de30216b146..d021f079f181 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -774,6 +774,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev); +void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv); int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride, int node); From e813bb2b955d9f72c94be7d592746b49929a499b Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Thu, 30 Apr 2015 17:07:50 +0200 Subject: [PATCH 100/104] net: fec: Fix RGMII-ID mode RGMII-ID uses an internal delay within the transmitter or receiver. This feature is phy specific. The rest of the communication is normal RGMII. So the fec driver has to check for all RGMII modes, not only 'PHY_INTERFACE_MODE_RGMII'. Signed-off-by: Markus Pargmann Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f6a3a7abd468..66d47e448e4d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -988,7 +988,10 @@ fec_restart(struct net_device *ndev) rcntl |= 0x40000000 | 0x00000020; /* RGMII, RMII or MII */ - if (fep->phy_interface == PHY_INTERFACE_MODE_RGMII) + if (fep->phy_interface == PHY_INTERFACE_MODE_RGMII || + fep->phy_interface == PHY_INTERFACE_MODE_RGMII_ID || + fep->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID || + fep->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) rcntl |= (1 << 6); else if (fep->phy_interface == PHY_INTERFACE_MODE_RMII) rcntl |= (1 << 8); From 082a75dad84d79d1c15ea9e50f31cb4bb4fa7fd6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 25 Apr 2015 15:56:15 +0300 Subject: [PATCH 101/104] rbd: end I/O the entire obj_request on error When we end I/O struct request with error, we need to pass obj_request->length as @nr_bytes so that the entire obj_request worth of bytes is completed. Otherwise block layer ends up confused and we trip on rbd_assert(more ^ (which == img_request->obj_request_count)); in rbd_img_obj_callback() due to more being true no matter what. We already do it in most cases but we are missing some, in particular those where we don't even get a chance to submit any obj_requests, due to an early -ENOMEM for example. A number of obj_request->xferred assignments seem to be redundant but I haven't touched any of obj_request->xferred stuff to keep this small and isolated. Cc: Alex Elder Cc: stable@vger.kernel.org # 3.10+ Reported-by: Shawn Edwards Reviewed-by: Sage Weil Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 812523330a78..ec6c5c6e1ac9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2264,6 +2264,11 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) result, xferred); if (!img_request->result) img_request->result = result; + /* + * Need to end I/O on the entire obj_request worth of + * bytes in case of error. + */ + xferred = obj_request->length; } /* Image object requests don't own their page array */ From a134f083e79fb4c3d0a925691e732c56911b4326 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 1 May 2015 22:02:47 -0400 Subject: [PATCH 102/104] ipv4: Missing sk_nulls_node_init() in ping_unhash(). If we don't do that, then the poison value is left in the ->pprev backlink. This can cause crashes if we do a disconnect, followed by a connect(). Tested-by: Linus Torvalds Reported-by: Wen Xu Signed-off-by: David S. Miller --- net/ipv4/ping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a93f260cf24c..05ff44b758df 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -158,6 +158,7 @@ void ping_unhash(struct sock *sk) if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); + sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; From feda5f939eafa4af94dfb547847806e0f2df73b8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 2 May 2015 08:42:38 +0930 Subject: [PATCH 103/104] virtio: pass baton to Michael Tsirkin With my job change kernel work will be "own time"; I'm keeping lguest and modules (and the virtio standards work), but virtio kernel has to go. This makes it clear that Michael is in charge. He's good, but having me watch over his shoulder won't help. Good luck Michael! Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2e5bbc0d68b2..16227759dfa8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10523,7 +10523,6 @@ F: include/linux/virtio_console.h F: include/uapi/linux/virtio_console.h VIRTIO CORE, NET AND BLOCK DRIVERS -M: Rusty Russell M: "Michael S. Tsirkin" L: virtualization@lists.linux-foundation.org S: Maintained From e412d3a32badcf17541d7443b033769fdf39b545 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Sat, 2 May 2015 08:42:29 +0930 Subject: [PATCH 104/104] virtio: fix typo in vring_need_event() doc comment Here the "other side" refers to the guest or host. Signed-off-by: Stefan Hajnoczi Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/uapi/linux/virtio_ring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index a3318f31e8e7..915980ac68df 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -155,7 +155,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) } /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ -/* Assuming a given event_idx value from the other size, if +/* Assuming a given event_idx value from the other side, if * we have just incremented index from old to new_idx, * should we trigger an event? */ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)