Skip to content

Commit

Permalink
[SPARC64]: Fix streaming buffer flushing on PCI and SBUS.
Browse files Browse the repository at this point in the history
Firstly, if the direction is TODEVICE, then dirty data in the
streaming cache is impossible so we can elide the flush-flag
synchronization in that case.

Next, the context allocator is broken.  It is highly likely
that contexts get used multiple times for different dma
mappings, which confuses the strbuf flushing code and makes
it run inefficiently.

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 31, 2005
1 parent 2e3e80c commit 7c963ad
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 32 deletions.
90 changes: 70 additions & 20 deletions arch/sparc64/kernel/pci_iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,34 @@ static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long
return NULL;
}

static int iommu_alloc_ctx(struct pci_iommu *iommu)
{
int lowest = iommu->ctx_lowest_free;
int sz = IOMMU_NUM_CTXS - lowest;
int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);

if (unlikely(n == sz)) {
n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
if (unlikely(n == lowest)) {
printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
n = 0;
}
}
if (n)
__set_bit(n, iommu->ctx_bitmap);

return n;
}

static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx)
{
if (likely(ctx)) {
__clear_bit(ctx, iommu->ctx_bitmap);
if (ctx < iommu->ctx_lowest_free)
iommu->ctx_lowest_free = ctx;
}
}

/* Allocate and map kernel buffer of size SIZE using consistent mode
* DMA for PCI device PDEV. Return non-NULL cpu-side address if
* successful and set *DMA_ADDRP to the PCI side dma address.
Expand Down Expand Up @@ -236,7 +264,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
npages = size >> IO_PAGE_SHIFT;
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu->iommu_cur_ctx++;
ctx = iommu_alloc_ctx(iommu);
first_page = __pa(first_page);
while (npages--) {
iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) |
Expand Down Expand Up @@ -317,6 +345,8 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
}
}

iommu_free_ctx(iommu, ctx);

spin_unlock_irqrestore(&iommu->lock, flags);

order = get_order(size);
Expand Down Expand Up @@ -360,7 +390,7 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
base_paddr = __pa(oaddr & IO_PAGE_MASK);
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu->iommu_cur_ctx++;
ctx = iommu_alloc_ctx(iommu);
if (strbuf->strbuf_enabled)
iopte_protection = IOPTE_STREAMING(ctx);
else
Expand All @@ -380,39 +410,55 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
return PCI_DMA_ERROR_CODE;
}

static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages)
static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages, int direction)
{
int limit;

PCI_STC_FLUSHFLAG_INIT(strbuf);
if (strbuf->strbuf_ctxflush &&
iommu->iommu_ctxflush) {
unsigned long matchreg, flushreg;
u64 val;

flushreg = strbuf->strbuf_ctxflush;
matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);

limit = 100000;
if (pci_iommu_read(matchreg) == 0)
goto do_flush_sync;

pci_iommu_write(flushreg, ctx);
for(;;) {
if (((long)pci_iommu_read(matchreg)) >= 0L)
break;
limit--;
if (!limit)
break;
udelay(1);
if ((val = pci_iommu_read(matchreg)) == 0)
goto do_flush_sync;

val &= 0xffff;
while (val) {
if (val & 0x1)
pci_iommu_write(flushreg, ctx);
val >>= 1;
}
if (!limit)
val = pci_iommu_read(matchreg);
if (unlikely(val)) {
printk(KERN_WARNING "pci_strbuf_flush: ctx flush "
"timeout vaddr[%08x] ctx[%lx]\n",
vaddr, ctx);
"timeout matchreg[%lx] ctx[%lx]\n",
val, ctx);
goto do_page_flush;
}
} else {
unsigned long i;

do_page_flush:
for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
pci_iommu_write(strbuf->strbuf_pflush, vaddr);
}

do_flush_sync:
/* If the device could not have possibly put dirty data into
* the streaming cache, no flush-flag synchronization needs
* to be performed.
*/
if (direction == PCI_DMA_TODEVICE)
return;

PCI_STC_FLUSHFLAG_INIT(strbuf);
pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
(void) pci_iommu_read(iommu->write_complete_reg);

Expand Down Expand Up @@ -466,14 +512,16 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int

/* Step 1: Kick data out of streaming buffers if necessary. */
if (strbuf->strbuf_enabled)
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);

/* Step 2: Clear out first TSB entry. */
iopte_make_dummy(iommu, base);

free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
npages, ctx);

iommu_free_ctx(iommu, ctx);

spin_unlock_irqrestore(&iommu->lock, flags);
}

Expand Down Expand Up @@ -613,7 +661,7 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
/* Step 4: Choose a context if necessary. */
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu->iommu_cur_ctx++;
ctx = iommu_alloc_ctx(iommu);

/* Step 5: Create the mappings. */
if (strbuf->strbuf_enabled)
Expand Down Expand Up @@ -678,14 +726,16 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,

/* Step 1: Kick data out of streaming buffers if necessary. */
if (strbuf->strbuf_enabled)
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);

/* Step 2: Clear out first TSB entry. */
iopte_make_dummy(iommu, base);

free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
npages, ctx);

iommu_free_ctx(iommu, ctx);

spin_unlock_irqrestore(&iommu->lock, flags);
}

Expand Down Expand Up @@ -724,7 +774,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size
}

/* Step 2: Kick data out of streaming buffers. */
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);

spin_unlock_irqrestore(&iommu->lock, flags);
}
Expand Down Expand Up @@ -768,7 +818,7 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i
i--;
npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
- bus_addr) >> IO_PAGE_SHIFT;
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);

spin_unlock_irqrestore(&iommu->lock, flags);
}
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc64/kernel/pci_psycho.c
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,7 @@ static void __init psycho_iommu_init(struct pci_controller_info *p)

/* Setup initial software IOMMU state. */
spin_lock_init(&iommu->lock);
iommu->iommu_cur_ctx = 0;
iommu->ctx_lowest_free = 1;

/* Register addresses. */
iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL;
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc64/kernel/pci_sabre.c
Original file line number Diff line number Diff line change
Expand Up @@ -1265,7 +1265,7 @@ static void __init sabre_iommu_init(struct pci_controller_info *p,

/* Setup initial software IOMMU state. */
spin_lock_init(&iommu->lock);
iommu->iommu_cur_ctx = 0;
iommu->ctx_lowest_free = 1;

/* Register addresses. */
iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL;
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc64/kernel/pci_schizo.c
Original file line number Diff line number Diff line change
Expand Up @@ -1753,7 +1753,7 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)

/* Setup initial software IOMMU state. */
spin_lock_init(&iommu->lock);
iommu->iommu_cur_ctx = 0;
iommu->ctx_lowest_free = 1;

/* Register addresses, SCHIZO has iommu ctx flushing. */
iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
Expand Down
20 changes: 14 additions & 6 deletions arch/sparc64/kernel/sbus.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,25 @@ static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages

#define STRBUF_TAG_VALID 0x02UL

static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages, int direction)
{
unsigned long n;
int limit;

iommu->strbuf_flushflag = 0UL;
n = npages;
while (n--)
upa_writeq(base + (n << IO_PAGE_SHIFT),
iommu->strbuf_regs + STRBUF_PFLUSH);

/* If the device could not have possibly put dirty data into
* the streaming cache, no flush-flag synchronization needs
* to be performed.
*/
if (direction == SBUS_DMA_TODEVICE)
return;

iommu->strbuf_flushflag = 0UL;

/* Whoopee cushion! */
upa_writeq(__pa(&iommu->strbuf_flushflag),
iommu->strbuf_regs + STRBUF_FSYNC);
Expand Down Expand Up @@ -421,7 +429,7 @@ void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size,

spin_lock_irqsave(&iommu->lock, flags);
free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT);
sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT);
sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction);
spin_unlock_irqrestore(&iommu->lock, flags);
}

Expand Down Expand Up @@ -584,7 +592,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int
iommu = sdev->bus->iommu;
spin_lock_irqsave(&iommu->lock, flags);
free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT);
sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT);
sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction);
spin_unlock_irqrestore(&iommu->lock, flags);
}

Expand All @@ -596,7 +604,7 @@ void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t
size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK));

spin_lock_irqsave(&iommu->lock, flags);
sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT);
sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction);
spin_unlock_irqrestore(&iommu->lock, flags);
}

Expand All @@ -620,7 +628,7 @@ void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int
size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base;

spin_lock_irqsave(&iommu->lock, flags);
sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT);
sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction);
spin_unlock_irqrestore(&iommu->lock, flags);
}

Expand Down
2 changes: 2 additions & 0 deletions include/asm-sparc64/iommu.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@
#define IOPTE_CACHE 0x0000000000000010UL /* Cached (in UPA E-cache) */
#define IOPTE_WRITE 0x0000000000000002UL /* Writeable */

#define IOMMU_NUM_CTXS 4096

#endif /* !(_SPARC_IOMMU_H) */
8 changes: 5 additions & 3 deletions include/asm-sparc64/pbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <asm/io.h>
#include <asm/page.h>
#include <asm/oplib.h>
#include <asm/iommu.h>

/* The abstraction used here is that there are PCI controllers,
* each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules
Expand All @@ -40,9 +41,6 @@ struct pci_iommu {
*/
spinlock_t lock;

/* Context allocator. */
unsigned int iommu_cur_ctx;

/* IOMMU page table, a linear array of ioptes. */
iopte_t *page_table; /* The page table itself. */
int page_table_sz_bits; /* log2 of ow many pages does it map? */
Expand Down Expand Up @@ -87,6 +85,10 @@ struct pci_iommu {
u16 flush;
} alloc_info[PBM_NCLUSTERS];

/* CTX allocation. */
unsigned long ctx_lowest_free;
unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)];

/* Here a PCI controller driver describes the areas of
* PCI memory space where DMA to/from physical memory
* are addressed. Drivers interrogate the PCI layer
Expand Down

0 comments on commit 7c963ad

Please sign in to comment.