From a95a9e5f0fdf9ef7c1b1cbf2788cb0df28a97bfb Mon Sep 17 00:00:00 2001 From: Jagadeesh Pagadala Date: Thu, 28 Mar 2019 02:58:45 +0530 Subject: [PATCH 01/14] arch:sparc:kernel/uprobes.c : Remove duplicate header Remove duplicate header which is included twice. Signed-off-by: Jagadeesh Pagadala Reviewed-by: Mukesh Ojha Signed-off-by: David S. Miller --- arch/sparc/kernel/uprobes.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c index d852ae56ddc1a..c44bf5b85de8c 100644 --- a/arch/sparc/kernel/uprobes.c +++ b/arch/sparc/kernel/uprobes.c @@ -29,7 +29,6 @@ #include #include -#include /* Compute the address of the breakpoint instruction and return it. * From 269fe56551c68cde57e477a6810ed57921dfe54f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 3 Apr 2019 17:32:24 +0900 Subject: [PATCH 02/14] sparc: vdso: add FORCE to the build rule of %.so $(call if_changed,...) must have FORCE as a prerequisite. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: David S. Miller --- arch/sparc/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 74e97f77e23b2..83c4b463cb3de 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -68,7 +68,7 @@ CFLAGS_REMOVE_vdso-note.o = -pg CFLAGS_REMOVE_vclock_gettime.o = -pg $(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg +$(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) From 031abf0b70cb6804eefb11340463a2277e52f853 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:39 +0200 Subject: [PATCH 03/14] sparc/iommu: use !PageHighMem to check if a page has a kernel mapping This deobsfucates the check a bit, and prepares for future changes. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index e8d5d73ca40d6..dcdadac03fdfe 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -273,7 +273,8 @@ static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, * XXX Is this a good assumption? * XXX What if someone else unmaps it here and races us? */ - if ((page = (unsigned long) page_address(sg_page(sg))) != 0) { + if (!PageHighMem(sg_page(sg))) { + page = (unsigned long)page_address(sg_page(sg)); for (i = 0; i < n; i++) { if (page != oldpage) { /* Already flushed? */ flush_page_for_dma(page); From a7fce1f7ca2f092fe44a17cb158deda97060aab4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:40 +0200 Subject: [PATCH 04/14] sparc/iommu: use sbus_iommu_unmap_page in sbus_iommu_unmap_sg Use the page-level helper instead of duplicating the logic, while also fixing the incorrect handling of larger than page sized offsets in the sg variant. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index dcdadac03fdfe..f47a6ce0acaa0 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -321,11 +321,11 @@ static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; - int i, n; + int i; for_each_sg(sgl, sg, nents, i) { - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - iommu_release_one(dev, sg->dma_address & PAGE_MASK, n); + sbus_iommu_unmap_page(dev, sg->dma_address, sg->length, dir, + attrs); sg->dma_address = 0x21212121; } } From f25b23bc156fef3211fe4adf9692eca5ce2fd082 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:41 +0200 Subject: [PATCH 05/14] sparc/iommu: merge iommu_release_one and sbus_iommu_unmap_page There is only one caller of iommu_release_one left, so merge it into that one to clean things up a bit. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index f47a6ce0acaa0..7cb9ddda75318 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -291,14 +291,17 @@ static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, return nents; } -static void iommu_release_one(struct device *dev, u32 busa, int npages) +static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t len, enum dma_data_direction dir, unsigned long attrs) { struct iommu_struct *iommu = dev->archdata.iommu; - int ioptex; - int i; + unsigned int busa = dma_addr & PAGE_MASK; + unsigned long off = dma_addr & ~PAGE_MASK; + unsigned int npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; + unsigned int ioptex = (busa - iommu->start) >> PAGE_SHIFT; + unsigned int i; BUG_ON(busa < iommu->start); - ioptex = (busa - iommu->start) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { iopte_val(iommu->page_table[ioptex + i]) = 0; iommu_invalidate_page(iommu->regs, busa); @@ -307,16 +310,6 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages) bit_map_clear(&iommu->usemap, ioptex, npages); } -static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t len, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long off = dma_addr & ~PAGE_MASK; - int npages; - - npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; - iommu_release_one(dev, dma_addr & PAGE_MASK, npages); -} - static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { From ff5cbec0c3ea8b96c4cb7bcd9f484d8665d394e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:42 +0200 Subject: [PATCH 06/14] sparc/iommu: create a common helper for map_sg Share the code for the global and per-page flush map_sg loops using a simple bool parameter to disable the per-page flush for the former variant. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 7cb9ddda75318..f90d943a3a272 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -241,25 +241,9 @@ static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev, return __sbus_iommu_map_page(dev, page, offset, len); } -static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i, n; - - flush_page_for_dma(0); - - for_each_sg(sgl, sg, nents, i) { - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; - sg->dma_length = sg->length; - } - - return nents; -} - -static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) +static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs, + bool per_page_flush) { unsigned long page, oldpage = 0; struct scatterlist *sg; @@ -273,7 +257,7 @@ static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, * XXX Is this a good assumption? * XXX What if someone else unmaps it here and races us? */ - if (!PageHighMem(sg_page(sg))) { + if (per_page_flush && !PageHighMem(sg_page(sg))) { page = (unsigned long)page_address(sg_page(sg)); for (i = 0; i < n; i++) { if (page != oldpage) { /* Already flushed? */ @@ -291,6 +275,19 @@ static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, return nents; } +static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + flush_page_for_dma(0); + return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, false); +} + +static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true); +} + static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t len, enum dma_data_direction dir, unsigned long attrs) { From b82059428c0577c2ec082974d7956291d5eae2cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:43 +0200 Subject: [PATCH 07/14] sparc/iommu: pass a physical address to iommu_get_one No need for the page structure, just the paddr / pfn. This is going to simplify fixes to the callers. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index f90d943a3a272..19d9266e4049d 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -175,16 +175,17 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) } } -static u32 iommu_get_one(struct device *dev, struct page *page, int npages) +static u32 iommu_get_one(struct device *dev, phys_addr_t paddr, int npages) { struct iommu_struct *iommu = dev->archdata.iommu; int ioptex; iopte_t *iopte, *iopte0; unsigned int busa, busa0; + unsigned long pfn = __phys_to_pfn(paddr); int i; /* page color = pfn of page */ - ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page)); + ioptex = bit_map_string_get(&iommu->usemap, npages, pfn); if (ioptex < 0) panic("iommu out"); busa0 = iommu->start + (ioptex << PAGE_SHIFT); @@ -193,11 +194,11 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages) busa = busa0; iopte = iopte0; for (i = 0; i < npages; i++) { - iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM); + iopte_val(*iopte) = MKIOPTE(pfn, IOPERM); iommu_invalidate_page(iommu->regs, busa); busa += PAGE_SIZE; iopte++; - page++; + pfn++; } iommu_flush_iotlb(iopte0, npages); @@ -215,7 +216,7 @@ static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, /* XXX So what is maxphys for us and how do drivers know it? */ if (!len || len > 256 * 1024) return DMA_MAPPING_ERROR; - return iommu_get_one(dev, virt_to_page(vaddr), npages) + off; + return iommu_get_one(dev, virt_to_phys(vaddr), npages) + off; } static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, @@ -268,7 +269,7 @@ static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl, } } - sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; + sg->dma_address = iommu_get_one(dev, sg_phys(sg), n) + sg->offset; sg->dma_length = sg->length; } From 8668b38c1c7720baf76da15a7a7eef43ae0c65a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:44 +0200 Subject: [PATCH 08/14] sparc/iommu: move per-page flushing into __sbus_iommu_map_page This prepares for reusing __sbus_iommu_map_page in the map_sg path. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 19d9266e4049d..7e191c8ae46a3 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -207,15 +207,25 @@ static u32 iommu_get_one(struct device *dev, phys_addr_t paddr, int npages) } static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t len) + unsigned long offset, size_t len, bool per_page_flush) { void *vaddr = page_address(page) + offset; unsigned long off = (unsigned long)vaddr & ~PAGE_MASK; unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - + /* XXX So what is maxphys for us and how do drivers know it? */ if (!len || len > 256 * 1024) return DMA_MAPPING_ERROR; + + if (per_page_flush) { + unsigned long p = (unsigned long)vaddr & PAGE_MASK; + + while (p < (unsigned long)vaddr + len) { + flush_page_for_dma(p); + p += PAGE_SIZE; + } + } + return iommu_get_one(dev, virt_to_phys(vaddr), npages) + off; } @@ -224,22 +234,14 @@ static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { flush_page_for_dma(0); - return __sbus_iommu_map_page(dev, page, offset, len); + return __sbus_iommu_map_page(dev, page, offset, len, false); } static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev, struct page *page, unsigned long offset, size_t len, enum dma_data_direction dir, unsigned long attrs) { - void *vaddr = page_address(page) + offset; - unsigned long p = ((unsigned long)vaddr) & PAGE_MASK; - - while (p < (unsigned long)vaddr + len) { - flush_page_for_dma(p); - p += PAGE_SIZE; - } - - return __sbus_iommu_map_page(dev, page, offset, len); + return __sbus_iommu_map_page(dev, page, offset, len, true); } static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl, From 7e996890b88078011bfb55ce072712d464207dad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:45 +0200 Subject: [PATCH 09/14] sparc/iommu: fix __sbus_iommu_map_page for highmem pages __sbus_iommu_map_page currently assumes all pages are mapped into the kernel direct mapping. Switch to using physical address instead of virtual ones for all the normal mapping operations, and only use the virtual addresses for cache flushing when not operating on a highmem page. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 7e191c8ae46a3..37b5ce7657f61 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -209,24 +209,23 @@ static u32 iommu_get_one(struct device *dev, phys_addr_t paddr, int npages) static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t len, bool per_page_flush) { - void *vaddr = page_address(page) + offset; - unsigned long off = (unsigned long)vaddr & ~PAGE_MASK; + phys_addr_t paddr = page_to_phys(page) + offset; + unsigned long off = paddr & ~PAGE_MASK; unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; /* XXX So what is maxphys for us and how do drivers know it? */ if (!len || len > 256 * 1024) return DMA_MAPPING_ERROR; - if (per_page_flush) { - unsigned long p = (unsigned long)vaddr & PAGE_MASK; + if (per_page_flush && !PageHighMem(page)) { + unsigned long vaddr, p; - while (p < (unsigned long)vaddr + len) { + vaddr = (unsigned long)page_address(page) + offset; + for (p = vaddr & PAGE_MASK; p < vaddr + len; p += PAGE_SIZE) flush_page_for_dma(p); - p += PAGE_SIZE; - } } - return iommu_get_one(dev, virt_to_phys(vaddr), npages) + off; + return iommu_get_one(dev, paddr, npages) + off; } static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, From edb1f07203ba8856b24bcddf8326386ba6a03291 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:46 +0200 Subject: [PATCH 10/14] sparc/iommu: use __sbus_iommu_map_page to implement the map_sg path This means we handle > PAGE_SIZE offsets fine, and grow the size check so far only performed in the map_page path. We lose the optimization to not double flush a page if it apears in multiple consecutive SG list entries. But at least for block I/O those don't happen anymore since we properly merge in higher layers anyway. Signed-off-by: Christoph Hellwig Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 37b5ce7657f61..8fbc08d148361 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -217,6 +217,11 @@ static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, if (!len || len > 256 * 1024) return DMA_MAPPING_ERROR; + /* + * We expect unmapped highmem pages to be not in the cache. + * XXX Is this a good assumption? + * XXX What if someone else unmaps it here and races us? + */ if (per_page_flush && !PageHighMem(page)) { unsigned long vaddr, p; @@ -247,30 +252,14 @@ static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs, bool per_page_flush) { - unsigned long page, oldpage = 0; struct scatterlist *sg; - int i, j, n; + int j; for_each_sg(sgl, sg, nents, j) { - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - - /* - * We expect unmapped highmem pages to be not in the cache. - * XXX Is this a good assumption? - * XXX What if someone else unmaps it here and races us? - */ - if (per_page_flush && !PageHighMem(sg_page(sg))) { - page = (unsigned long)page_address(sg_page(sg)); - for (i = 0; i < n; i++) { - if (page != oldpage) { /* Already flushed? */ - flush_page_for_dma(page); - oldpage = page; - } - page += PAGE_SIZE; - } - } - - sg->dma_address = iommu_get_one(dev, sg_phys(sg), n) + sg->offset; + sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg), + sg->offset, sg->length, per_page_flush); + if (sg->dma_address == DMA_MAPPING_ERROR) + return 0; sg->dma_length = sg->length; } From 376b1371a9f29112ae000cc0cade174a9a670053 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Apr 2019 20:23:47 +0200 Subject: [PATCH 11/14] sparc/iommu: merge iommu_get_one and __sbus_iommu_map_page There is only one caller of iommu_get_one left, so merge it into that one to clean things up a bit. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 56 +++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 8fbc08d148361..71ac353032b68 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -175,43 +175,17 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) } } -static u32 iommu_get_one(struct device *dev, phys_addr_t paddr, int npages) -{ - struct iommu_struct *iommu = dev->archdata.iommu; - int ioptex; - iopte_t *iopte, *iopte0; - unsigned int busa, busa0; - unsigned long pfn = __phys_to_pfn(paddr); - int i; - - /* page color = pfn of page */ - ioptex = bit_map_string_get(&iommu->usemap, npages, pfn); - if (ioptex < 0) - panic("iommu out"); - busa0 = iommu->start + (ioptex << PAGE_SHIFT); - iopte0 = &iommu->page_table[ioptex]; - - busa = busa0; - iopte = iopte0; - for (i = 0; i < npages; i++) { - iopte_val(*iopte) = MKIOPTE(pfn, IOPERM); - iommu_invalidate_page(iommu->regs, busa); - busa += PAGE_SIZE; - iopte++; - pfn++; - } - - iommu_flush_iotlb(iopte0, npages); - - return busa0; -} - static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t len, bool per_page_flush) { + struct iommu_struct *iommu = dev->archdata.iommu; phys_addr_t paddr = page_to_phys(page) + offset; unsigned long off = paddr & ~PAGE_MASK; unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long pfn = __phys_to_pfn(paddr); + unsigned int busa, busa0; + iopte_t *iopte, *iopte0; + int ioptex, i; /* XXX So what is maxphys for us and how do drivers know it? */ if (!len || len > 256 * 1024) @@ -230,7 +204,25 @@ static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, flush_page_for_dma(p); } - return iommu_get_one(dev, paddr, npages) + off; + /* page color = pfn of page */ + ioptex = bit_map_string_get(&iommu->usemap, npages, pfn); + if (ioptex < 0) + panic("iommu out"); + busa0 = iommu->start + (ioptex << PAGE_SHIFT); + iopte0 = &iommu->page_table[ioptex]; + + busa = busa0; + iopte = iopte0; + for (i = 0; i < npages; i++) { + iopte_val(*iopte) = MKIOPTE(pfn, IOPERM); + iommu_invalidate_page(iommu->regs, busa); + busa += PAGE_SIZE; + iopte++; + pfn++; + } + + iommu_flush_iotlb(iopte0, npages); + return busa0 + off; } static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, From 5d5db1c94f8c412d1c9755bd13194701301b9046 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 22 Apr 2019 10:28:02 -0300 Subject: [PATCH 12/14] docs: sparc: convert to ReST Rename the sparc documentation files to ReST, add an index for them and adjust in order to produce a nice html output via the Sphinx build system. There is an except from a document under oradax dir. It doesn't seem to make much sense to convert this one to ReST, so let's add it as an included document. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: David S. Miller --- Documentation/sparc/{adi.txt => adi.rst} | 188 +++++++++--------- .../sparc/{console.txt => console.rst} | 4 +- Documentation/sparc/index.rst | 13 ++ .../oradax/{oracle-dax.txt => oracle-dax.rst} | 58 ++++-- drivers/sbus/char/oradax.c | 2 +- 5 files changed, 151 insertions(+), 114 deletions(-) rename Documentation/sparc/{adi.txt => adi.rst} (70%) rename Documentation/sparc/{console.txt => console.rst} (53%) create mode 100644 Documentation/sparc/index.rst rename Documentation/sparc/oradax/{oracle-dax.txt => oracle-dax.rst} (95%) diff --git a/Documentation/sparc/adi.txt b/Documentation/sparc/adi.rst similarity index 70% rename from Documentation/sparc/adi.txt rename to Documentation/sparc/adi.rst index e1aed155fb89e..857ad30f95693 100644 --- a/Documentation/sparc/adi.txt +++ b/Documentation/sparc/adi.rst @@ -1,3 +1,4 @@ +================================ Application Data Integrity (ADI) ================================ @@ -44,12 +45,15 @@ provided by the hypervisor to the kernel. Kernel returns the value of ADI block size to userspace using auxiliary vector along with other ADI info. Following auxiliary vectors are provided by the kernel: + ============ =========================================== AT_ADI_BLKSZ ADI block size. This is the granularity and alignment, in bytes, of ADI versioning. AT_ADI_NBITS Number of ADI version bits in the VA + ============ =========================================== -IMPORTANT NOTES: +IMPORTANT NOTES +=============== - Version tag values of 0x0 and 0xf are reserved. These values match any tag in virtual address and never generate a mismatch exception. @@ -86,11 +90,12 @@ IMPORTANT NOTES: ADI related traps ------------------ +================= With ADI enabled, following new traps may occur: Disrupting memory corruption +---------------------------- When a store accesses a memory localtion that has TTE.mcd=1, the task is running with ADI enabled (PSTATE.mcde=1), and the ADI @@ -100,7 +105,7 @@ Disrupting memory corruption first. Hypervisor creates a sun4v error report and sends a resumable error (TT=0x7e) trap to the kernel. The kernel sends a SIGSEGV to the task that resulted in this trap with the following - info: + info:: siginfo.si_signo = SIGSEGV; siginfo.errno = 0; @@ -110,6 +115,7 @@ Disrupting memory corruption Precise memory corruption +------------------------- When a store accesses a memory location that has TTE.mcd=1, the task is running with ADI enabled (PSTATE.mcde=1), and the ADI @@ -118,7 +124,7 @@ Precise memory corruption MCD precise exception is enabled (MCDPERR=1), a precise exception is sent to the kernel with TT=0x1a. The kernel sends a SIGSEGV to the task that resulted in this trap with the following - info: + info:: siginfo.si_signo = SIGSEGV; siginfo.errno = 0; @@ -126,17 +132,19 @@ Precise memory corruption siginfo.si_addr = addr; /* address that caused trap */ siginfo.si_trapno = 0; - NOTE: ADI tag mismatch on a load always results in precise trap. + NOTE: + ADI tag mismatch on a load always results in precise trap. MCD disabled +------------ When a task has not enabled ADI and attempts to set ADI version on a memory address, processor sends an MCD disabled trap. This trap is handled by hypervisor first and the hypervisor vectors this trap through to the kernel as Data Access Exception trap with fault type set to 0xa (invalid ASI). When this occurs, the kernel - sends the task SIGSEGV signal with following info: + sends the task SIGSEGV signal with following info:: siginfo.si_signo = SIGSEGV; siginfo.errno = 0; @@ -149,35 +157,35 @@ Sample program to use ADI ------------------------- Following sample program is meant to illustrate how to use the ADI -functionality. - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef AT_ADI_BLKSZ -#define AT_ADI_BLKSZ 48 -#endif -#ifndef AT_ADI_NBITS -#define AT_ADI_NBITS 49 -#endif - -#ifndef PROT_ADI -#define PROT_ADI 0x10 -#endif - -#define BUFFER_SIZE 32*1024*1024UL - -main(int argc, char* argv[], char* envp[]) -{ - unsigned long i, mcde, adi_blksz, adi_nbits; - char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr; - int shmid, version; +functionality:: + + #include + #include + #include + #include + #include + #include + #include + #include + + #ifndef AT_ADI_BLKSZ + #define AT_ADI_BLKSZ 48 + #endif + #ifndef AT_ADI_NBITS + #define AT_ADI_NBITS 49 + #endif + + #ifndef PROT_ADI + #define PROT_ADI 0x10 + #endif + + #define BUFFER_SIZE 32*1024*1024UL + + main(int argc, char* argv[], char* envp[]) + { + unsigned long i, mcde, adi_blksz, adi_nbits; + char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr; + int shmid, version; Elf64_auxv_t *auxv; adi_blksz = 0; @@ -202,77 +210,77 @@ main(int argc, char* argv[], char* envp[]) printf("\tBlock size = %ld\n", adi_blksz); printf("\tNumber of bits = %ld\n", adi_nbits); - if ((shmid = shmget(2, BUFFER_SIZE, - IPC_CREAT | SHM_R | SHM_W)) < 0) { - perror("shmget failed"); - exit(1); - } + if ((shmid = shmget(2, BUFFER_SIZE, + IPC_CREAT | SHM_R | SHM_W)) < 0) { + perror("shmget failed"); + exit(1); + } - shmaddr = shmat(shmid, NULL, 0); - if (shmaddr == (char *)-1) { - perror("shm attach failed"); - shmctl(shmid, IPC_RMID, NULL); - exit(1); - } + shmaddr = shmat(shmid, NULL, 0); + if (shmaddr == (char *)-1) { + perror("shm attach failed"); + shmctl(shmid, IPC_RMID, NULL); + exit(1); + } if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE|PROT_ADI)) { perror("mprotect failed"); goto err_out; } - /* Set the ADI version tag on the shm segment - */ - version = 10; - tmp_addr = shmaddr; - end = shmaddr + BUFFER_SIZE; - while (tmp_addr < end) { - asm volatile( - "stxa %1, [%0]0x90\n\t" - : - : "r" (tmp_addr), "r" (version)); - tmp_addr += adi_blksz; - } + /* Set the ADI version tag on the shm segment + */ + version = 10; + tmp_addr = shmaddr; + end = shmaddr + BUFFER_SIZE; + while (tmp_addr < end) { + asm volatile( + "stxa %1, [%0]0x90\n\t" + : + : "r" (tmp_addr), "r" (version)); + tmp_addr += adi_blksz; + } asm volatile("membar #Sync\n\t"); - /* Create a versioned address from the normal address by placing + /* Create a versioned address from the normal address by placing * version tag in the upper adi_nbits bits - */ - tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits); - tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits); - veraddr = (void *) (((unsigned long)version << (64-adi_nbits)) - | (unsigned long)tmp_addr); - - printf("Starting the writes:\n"); - for (i = 0; i < BUFFER_SIZE; i++) { - veraddr[i] = (char)(i); - if (!(i % (1024 * 1024))) - printf("."); - } - printf("\n"); - - printf("Verifying data..."); + */ + tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits); + tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits); + veraddr = (void *) (((unsigned long)version << (64-adi_nbits)) + | (unsigned long)tmp_addr); + + printf("Starting the writes:\n"); + for (i = 0; i < BUFFER_SIZE; i++) { + veraddr[i] = (char)(i); + if (!(i % (1024 * 1024))) + printf("."); + } + printf("\n"); + + printf("Verifying data..."); fflush(stdout); - for (i = 0; i < BUFFER_SIZE; i++) - if (veraddr[i] != (char)i) - printf("\nIndex %lu mismatched\n", i); - printf("Done.\n"); + for (i = 0; i < BUFFER_SIZE; i++) + if (veraddr[i] != (char)i) + printf("\nIndex %lu mismatched\n", i); + printf("Done.\n"); - /* Disable ADI and clean up - */ + /* Disable ADI and clean up + */ if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE)) { perror("mprotect failed"); goto err_out; } - if (shmdt((const void *)shmaddr) != 0) - perror("Detach failure"); - shmctl(shmid, IPC_RMID, NULL); + if (shmdt((const void *)shmaddr) != 0) + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); - exit(0); + exit(0); -err_out: - if (shmdt((const void *)shmaddr) != 0) - perror("Detach failure"); - shmctl(shmid, IPC_RMID, NULL); - exit(1); -} + err_out: + if (shmdt((const void *)shmaddr) != 0) + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(1); + } diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.rst similarity index 53% rename from Documentation/sparc/console.txt rename to Documentation/sparc/console.rst index 5aa735a44e022..73132db83ece7 100644 --- a/Documentation/sparc/console.txt +++ b/Documentation/sparc/console.rst @@ -1,5 +1,5 @@ -Steps for sending 'break' on sunhv console: -=========================================== +Steps for sending 'break' on sunhv console +========================================== On Baremetal: 1. press Esc + 'B' diff --git a/Documentation/sparc/index.rst b/Documentation/sparc/index.rst new file mode 100644 index 0000000000000..91f7d6643dd5d --- /dev/null +++ b/Documentation/sparc/index.rst @@ -0,0 +1,13 @@ +:orphan: + +================== +Sparc Architecture +================== + +.. toctree:: + :maxdepth: 1 + + console + adi + + oradax/oracle-dax diff --git a/Documentation/sparc/oradax/oracle-dax.txt b/Documentation/sparc/oradax/oracle-dax.rst similarity index 95% rename from Documentation/sparc/oradax/oracle-dax.txt rename to Documentation/sparc/oradax/oracle-dax.rst index 9d53ac93286fc..d1e14d572918c 100644 --- a/Documentation/sparc/oradax/oracle-dax.txt +++ b/Documentation/sparc/oradax/oracle-dax.rst @@ -1,5 +1,6 @@ +======================================= Oracle Data Analytics Accelerator (DAX) ---------------------------------------- +======================================= DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8 (DAX2) processor chips, and has direct access to the CPU's L3 caches @@ -17,6 +18,7 @@ code sufficient to write user or kernel applications that use DAX functionality. The user library is open source and available at: + https://oss.oracle.com/git/gitweb.cgi?p=libdax.git The Hypervisor interface to the coprocessor is described in detail in @@ -26,7 +28,7 @@ Specification" version 3.0.20+15, dated 2017-09-25. High Level Overview -------------------- +=================== A coprocessor request is described by a Command Control Block (CCB). The CCB contains an opcode and various parameters. The opcode @@ -52,7 +54,7 @@ thread. Addressing Memory ------------------ +================= The kernel does not have access to physical memory in the Sun4v architecture, as there is an additional level of memory virtualization @@ -77,7 +79,7 @@ the request. The Driver API --------------- +============== An application makes requests to the driver via the write() system call, and gets results (if any) via read(). The completion areas are @@ -108,6 +110,7 @@ equal to the number of bytes given in the call. Otherwise -1 is returned and errno is set. CCB_DEQUEUE +----------- Tells the driver to clean up resources associated with past requests. Since no interrupt is generated upon the completion of a @@ -116,12 +119,14 @@ further status information is returned, so the user should not subsequently call read(). CCB_KILL +-------- Kills a CCB during execution. The CCB is guaranteed to not continue executing once this call returns successfully. On success, read() must be called to retrieve the result of the action. CCB_INFO +-------- Retrieves information about a currently executing CCB. Note that some Hypervisors might return 'notfound' when the CCB is in 'inprogress' @@ -130,6 +135,7 @@ CCB_KILL must be invoked on that CCB. Upon success, read() must be called to retrieve the details of the action. Submission of an array of CCBs for execution +--------------------------------------------- A write() whose length is a multiple of the CCB size is treated as a submit operation. The file offset is treated as the index of the @@ -146,6 +152,7 @@ status will reflect the error caused by the first CCB that was not accepted, and status_data will provide additional data in some cases. MMAP +---- The mmap() function provides access to the completion area allocated in the driver. Note that the completion area is not writeable by the @@ -153,7 +160,7 @@ user process, and the mmap call must not specify PROT_WRITE. Completion of a Request ------------------------ +======================= The first byte in each completion area is the command status which is updated by the coprocessor hardware. Software may take advantage of @@ -172,7 +179,7 @@ and resumption of execution may be just a few nanoseconds. Application Life Cycle of a DAX Submission ------------------------------------------- +========================================== - open dax device - call mmap() to get the completion area address @@ -187,7 +194,7 @@ Application Life Cycle of a DAX Submission Memory Constraints ------------------- +================== The DAX hardware operates only on physical addresses. Therefore, it is not aware of virtual memory mappings and the discontiguities that may @@ -226,7 +233,7 @@ CCB Structure ------------- A CCB is an array of 8 64-bit words. Several of these words provide command opcodes, parameters, flags, etc., and the rest are addresses -for the completion area, output buffer, and various inputs: +for the completion area, output buffer, and various inputs:: struct ccb { u64 control; @@ -252,7 +259,7 @@ The first word (control) is examined by the driver for the following: Example Code ------------- +============ The DAX is accessible to both user and kernel code. The kernel code can make hypercalls directly while the user code must use wrappers @@ -265,7 +272,7 @@ arch/sparc/include/uapi/asm/oradax.h must be included. First, the proper device must be opened. For M7 it will be /dev/oradax1 and for M8 it will be /dev/oradax2. The simplest -procedure is to attempt to open both, as only one will succeed: +procedure is to attempt to open both, as only one will succeed:: fd = open("/dev/oradax1", O_RDWR); if (fd < 0) @@ -273,7 +280,7 @@ procedure is to attempt to open both, as only one will succeed: if (fd < 0) /* No DAX found */ -Next, the completion area must be mapped: +Next, the completion area must be mapped:: completion_area = mmap(NULL, DAX_MMAP_LEN, PROT_READ, MAP_SHARED, fd, 0); @@ -295,7 +302,7 @@ is the input bitmap inverted. For details of all the parameters and bits used in this CCB, please refer to section 36.2.1.3 of the DAX Hypervisor API document, which -describes the Scan command in detail. +describes the Scan command in detail:: ccb->control = /* Table 36.1, CCB Header Format */ (2L << 48) /* command = Scan Value */ @@ -326,7 +333,7 @@ describes the Scan command in detail. The CCB submission is a write() or pwrite() system call to the driver. If the call fails, then a read() must be used to retrieve the -status: +status:: if (pwrite(fd, ccb, 64, 0) != 64) { struct ccb_exec_result status; @@ -337,7 +344,7 @@ status: After a successful submission of the CCB, the completion area may be polled to determine when the DAX is finished. Detailed information on the contents of the completion area can be found in section 36.2.2 of -the DAX HV API document. +the DAX HV API document:: while (1) { /* Monitored Load */ @@ -355,7 +362,7 @@ the DAX HV API document. A completion area status of 1 indicates successful completion of the CCB and validity of the output bitmap, which may be used immediately. All other non-zero values indicate error conditions which are -described in section 36.2.2. +described in section 36.2.2:: if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */ /* completion_area[0] contains the completion status */ @@ -364,7 +371,7 @@ described in section 36.2.2. After the completion area has been processed, the driver must be notified that it can release any resources associated with the -request. This is done via the dequeue operation: +request. This is done via the dequeue operation:: struct dax_command cmd; cmd.command = CCB_DEQUEUE; @@ -375,13 +382,14 @@ request. This is done via the dequeue operation: Finally, normal program cleanup should be done, i.e., unmapping completion area, closing the dax device, freeing memory etc. -[Kernel example] +Kernel example +-------------- The only difference in using the DAX in kernel code is the treatment of the completion area. Unlike user applications which mmap the completion area allocated by the driver, kernel code must allocate its own memory to use for the completion area, and this address and its -type must be given in the CCB: +type must be given in the CCB:: ccb->control |= /* Table 36.1, CCB Header Format */ (3L << 32); /* completion area address type = primary virtual */ @@ -389,9 +397,11 @@ type must be given in the CCB: ccb->completion = (unsigned long) completion_area; /* Completion area address */ The dax submit hypercall is made directly. The flags used in the -ccb_submit call are documented in the DAX HV API in section 36.3.1. +ccb_submit call are documented in the DAX HV API in section 36.3.1/ -#include +:: + + #include hv_rv = sun4v_ccb_submit((unsigned long)ccb, 64, HV_CCB_QUERY_CMD | @@ -405,7 +415,7 @@ ccb_submit call are documented in the DAX HV API in section 36.3.1. } After the submission, the completion area polling code is identical to -that in user land: +that in user land:: while (1) { /* Monitored Load */ @@ -427,3 +437,9 @@ that in user land: The output bitmap is ready for consumption immediately after the completion status indicates success. + +Excer[t from UltraSPARC Virtual Machine Specification +===================================================== + + .. include:: dax-hv-api.txt + :literal: diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c index 6516bc3cb58b0..acd9ba40eabe0 100644 --- a/drivers/sbus/char/oradax.c +++ b/drivers/sbus/char/oradax.c @@ -30,7 +30,7 @@ * the recommended way for applications to use the coprocessor, and * the driver interface is not intended for general use. * - * See Documentation/sparc/oradax/oracle-dax.txt for more details. + * See Documentation/sparc/oradax/oracle-dax.rst for more details. */ #include From bc0025b6107c011e8f9411a275d8442a56bd573a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Jan 2019 10:13:56 -0600 Subject: [PATCH 13/14] sparc: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kzalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- arch/sparc/kernel/cpumap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index d1d52822603d0..1cb62bfeaa1ff 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -194,8 +194,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void) n = enumerate_cpuinfo_nodes(tmp_level); - new_tree = kzalloc(sizeof(struct cpuinfo_tree) + - (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC); + new_tree = kzalloc(struct_size(new_tree, nodes, n), GFP_ATOMIC); if (!new_tree) return NULL; From f4d9a23d3dad0252f375901bf4ff6523a2c97241 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 12 Feb 2019 11:32:36 +0200 Subject: [PATCH 14/14] sparc64: simplify reduce_memory() function The reduce_memory() function clampls the available memory to a limit defined by the "mem=" command line parameter. It takes into account the amount of already reserved memory and excludes it from the limit calculations. Rather than traverse memblocks and remove them by hand, use memblock_reserved_size() to account the reserved memory and memblock_enforce_memory_limit() to clamp the available memory. Signed-off-by: Mike Rapoport Acked-by: David S. Miller Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 42 ++--------------------------------------- 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f2d70ff7a2842..bc2aaa47bc8ac 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2269,19 +2269,6 @@ static unsigned long last_valid_pfn; static void sun4u_pgprot_init(void); static void sun4v_pgprot_init(void); -static phys_addr_t __init available_memory(void) -{ - phys_addr_t available = 0ULL; - phys_addr_t pa_start, pa_end; - u64 i; - - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start, - &pa_end, NULL) - available = available + (pa_end - pa_start); - - return available; -} - #define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) #define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) #define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) @@ -2295,33 +2282,8 @@ static phys_addr_t __init available_memory(void) */ static void __init reduce_memory(phys_addr_t limit_ram) { - phys_addr_t avail_ram = available_memory(); - phys_addr_t pa_start, pa_end; - u64 i; - - if (limit_ram >= avail_ram) - return; - - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start, - &pa_end, NULL) { - phys_addr_t region_size = pa_end - pa_start; - phys_addr_t clip_start = pa_start; - - avail_ram = avail_ram - region_size; - /* Are we consuming too much? */ - if (avail_ram < limit_ram) { - phys_addr_t give_back = limit_ram - avail_ram; - - region_size = region_size - give_back; - clip_start = clip_start + give_back; - } - - memblock_remove(clip_start, region_size); - - if (avail_ram <= limit_ram) - break; - i = 0UL; - } + limit_ram += memblock_reserved_size(); + memblock_enforce_memory_limit(limit_ram); } void __init paging_init(void)