-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
arch/tile: common DMA code for the GXIO IORPC subsystem
The dma_queue support is used by both the mPipe (networking) and Trio (PCI) hardware shims on tilegx. This common code is selected when either of those drivers is built. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
- Loading branch information
Chris Metcalf
committed
Jul 11, 2012
1 parent
44e5696
commit 6369798
Showing
4 changed files
with
344 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,4 @@ | |
# | ||
|
||
obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o | ||
obj-$(CONFIG_TILE_GXIO_DMA) += dma_queue.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
/* | ||
* Copyright 2012 Tilera Corporation. All Rights Reserved. | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU General Public License | ||
* as published by the Free Software Foundation, version 2. | ||
* | ||
* This program is distributed in the hope that it will be useful, but | ||
* WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
* NON INFRINGEMENT. See the GNU General Public License for | ||
* more details. | ||
*/ | ||
|
||
#include <linux/io.h> | ||
#include <linux/atomic.h> | ||
#include <linux/module.h> | ||
#include <gxio/dma_queue.h> | ||
|
||
/* Wait for a memory read to complete. */ | ||
#define wait_for_value(val) \ | ||
__asm__ __volatile__("move %0, %0" :: "r"(val)) | ||
|
||
/* The index is in the low 16. */ | ||
#define DMA_QUEUE_INDEX_MASK ((1 << 16) - 1) | ||
|
||
/* | ||
* The hardware descriptor-ring type. | ||
* This matches the types used by mpipe (MPIPE_EDMA_POST_REGION_VAL_t) | ||
* and trio (TRIO_PUSH_DMA_REGION_VAL_t or TRIO_PULL_DMA_REGION_VAL_t). | ||
* See those types for more documentation on the individual fields. | ||
*/ | ||
typedef union { | ||
struct { | ||
#ifndef __BIG_ENDIAN__ | ||
uint64_t ring_idx:16; | ||
uint64_t count:16; | ||
uint64_t gen:1; | ||
uint64_t __reserved:31; | ||
#else | ||
uint64_t __reserved:31; | ||
uint64_t gen:1; | ||
uint64_t count:16; | ||
uint64_t ring_idx:16; | ||
#endif | ||
}; | ||
uint64_t word; | ||
} __gxio_ring_t; | ||
|
||
void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, | ||
void *post_region_addr, unsigned int num_entries) | ||
{ | ||
/* | ||
* Limit 65536 entry rings to 65535 credits because we only have a | ||
* 16 bit completion counter. | ||
*/ | ||
int64_t credits = (num_entries < 65536) ? num_entries : 65535; | ||
|
||
memset(dma_queue, 0, sizeof(*dma_queue)); | ||
|
||
dma_queue->post_region_addr = post_region_addr; | ||
dma_queue->hw_complete_count = 0; | ||
dma_queue->credits_and_next_index = credits << DMA_QUEUE_CREDIT_SHIFT; | ||
} | ||
|
||
EXPORT_SYMBOL_GPL(__gxio_dma_queue_init); | ||
|
||
void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue) | ||
{ | ||
__gxio_ring_t val; | ||
uint64_t count; | ||
uint64_t delta; | ||
uint64_t new_count; | ||
|
||
/* | ||
* Read the 64-bit completion count without touching the cache, so | ||
* we later avoid having to evict any sharers of this cache line | ||
* when we update it below. | ||
*/ | ||
uint64_t orig_hw_complete_count = | ||
cmpxchg(&dma_queue->hw_complete_count, | ||
-1, -1); | ||
|
||
/* Make sure the load completes before we access the hardware. */ | ||
wait_for_value(orig_hw_complete_count); | ||
|
||
/* Read the 16-bit count of how many packets it has completed. */ | ||
val.word = __gxio_mmio_read(dma_queue->post_region_addr); | ||
count = val.count; | ||
|
||
/* | ||
* Calculate the number of completions since we last updated the | ||
* 64-bit counter. It's safe to ignore the high bits because the | ||
* maximum credit value is 65535. | ||
*/ | ||
delta = (count - orig_hw_complete_count) & 0xffff; | ||
if (delta == 0) | ||
return; | ||
|
||
/* | ||
* Try to write back the count, advanced by delta. If we race with | ||
* another thread, this might fail, in which case we return | ||
* immediately on the assumption that some credits are (or at least | ||
* were) available. | ||
*/ | ||
new_count = orig_hw_complete_count + delta; | ||
if (cmpxchg(&dma_queue->hw_complete_count, | ||
orig_hw_complete_count, | ||
new_count) != orig_hw_complete_count) | ||
return; | ||
|
||
/* | ||
* We succeeded in advancing the completion count; add back the | ||
* corresponding number of egress credits. | ||
*/ | ||
__insn_fetchadd(&dma_queue->credits_and_next_index, | ||
(delta << DMA_QUEUE_CREDIT_SHIFT)); | ||
} | ||
|
||
EXPORT_SYMBOL_GPL(__gxio_dma_queue_update_credits); | ||
|
||
/* | ||
* A separate 'blocked' method for put() so that backtraces and | ||
* profiles will clearly indicate that we're wasting time spinning on | ||
* egress availability rather than actually posting commands. | ||
*/ | ||
int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, | ||
int64_t modifier) | ||
{ | ||
int backoff = 16; | ||
int64_t old; | ||
|
||
do { | ||
int i; | ||
/* Back off to avoid spamming memory networks. */ | ||
for (i = backoff; i > 0; i--) | ||
__insn_mfspr(SPR_PASS); | ||
|
||
/* Check credits again. */ | ||
__gxio_dma_queue_update_credits(dma_queue); | ||
old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, | ||
modifier); | ||
|
||
/* Calculate bounded exponential backoff for next iteration. */ | ||
if (backoff < 256) | ||
backoff *= 2; | ||
} while (old + modifier < 0); | ||
|
||
return old; | ||
} | ||
|
||
EXPORT_SYMBOL_GPL(__gxio_dma_queue_wait_for_credits); | ||
|
||
int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, | ||
unsigned int num, int wait) | ||
{ | ||
return __gxio_dma_queue_reserve(dma_queue, num, wait != 0, true); | ||
} | ||
|
||
EXPORT_SYMBOL_GPL(__gxio_dma_queue_reserve_aux); | ||
|
||
int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, | ||
int64_t completion_slot, int update) | ||
{ | ||
if (update) { | ||
if (ACCESS_ONCE(dma_queue->hw_complete_count) > | ||
completion_slot) | ||
return 1; | ||
|
||
__gxio_dma_queue_update_credits(dma_queue); | ||
} | ||
|
||
return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot; | ||
} | ||
|
||
EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
/* | ||
* Copyright 2012 Tilera Corporation. All Rights Reserved. | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU General Public License | ||
* as published by the Free Software Foundation, version 2. | ||
* | ||
* This program is distributed in the hope that it will be useful, but | ||
* WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
* NON INFRINGEMENT. See the GNU General Public License for | ||
* more details. | ||
*/ | ||
|
||
#ifndef _GXIO_DMA_QUEUE_H_ | ||
#define _GXIO_DMA_QUEUE_H_ | ||
|
||
/* | ||
* DMA queue management APIs shared between TRIO and mPIPE. | ||
*/ | ||
|
||
#include "common.h" | ||
|
||
/* The credit counter lives in the high 32 bits. */ | ||
#define DMA_QUEUE_CREDIT_SHIFT 32 | ||
|
||
/* | ||
* State object that tracks a DMA queue's head and tail indices, as | ||
* well as the number of commands posted and completed. The | ||
* structure is accessed via a thread-safe, lock-free algorithm. | ||
*/ | ||
typedef struct { | ||
/* | ||
* Address of a MPIPE_EDMA_POST_REGION_VAL_t, | ||
* TRIO_PUSH_DMA_REGION_VAL_t, or TRIO_PULL_DMA_REGION_VAL_t | ||
* register. These register have identical encodings and provide | ||
* information about how many commands have been processed. | ||
*/ | ||
void *post_region_addr; | ||
|
||
/* | ||
* A lazily-updated count of how many edescs the hardware has | ||
* completed. | ||
*/ | ||
uint64_t hw_complete_count __attribute__ ((aligned(64))); | ||
|
||
/* | ||
* High 32 bits are a count of available egress command credits, | ||
* low 24 bits are the next egress "slot". | ||
*/ | ||
int64_t credits_and_next_index; | ||
|
||
} __gxio_dma_queue_t; | ||
|
||
/* Initialize a dma queue. */ | ||
extern void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, | ||
void *post_region_addr, | ||
unsigned int num_entries); | ||
|
||
/* | ||
* Update the "credits_and_next_index" and "hw_complete_count" fields | ||
* based on pending hardware completions. Note that some other thread | ||
* may have already done this and, importantly, may still be in the | ||
* process of updating "credits_and_next_index". | ||
*/ | ||
extern void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue); | ||
|
||
/* Wait for credits to become available. */ | ||
extern int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, | ||
int64_t modifier); | ||
|
||
/* Reserve slots in the queue, optionally waiting for slots to become | ||
* available, and optionally returning a "completion_slot" suitable for | ||
* direct comparison to "hw_complete_count". | ||
*/ | ||
static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, | ||
unsigned int num, bool wait, | ||
bool completion) | ||
{ | ||
uint64_t slot; | ||
|
||
/* | ||
* Try to reserve 'num' egress command slots. We do this by | ||
* constructing a constant that subtracts N credits and adds N to | ||
* the index, and using fetchaddgez to only apply it if the credits | ||
* count doesn't go negative. | ||
*/ | ||
int64_t modifier = (((int64_t)(-num)) << DMA_QUEUE_CREDIT_SHIFT) | num; | ||
int64_t old = | ||
__insn_fetchaddgez(&dma_queue->credits_and_next_index, | ||
modifier); | ||
|
||
if (unlikely(old + modifier < 0)) { | ||
/* | ||
* We're out of credits. Try once to get more by checking for | ||
* completed egress commands. If that fails, wait or fail. | ||
*/ | ||
__gxio_dma_queue_update_credits(dma_queue); | ||
old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, | ||
modifier); | ||
if (old + modifier < 0) { | ||
if (wait) | ||
old = __gxio_dma_queue_wait_for_credits | ||
(dma_queue, modifier); | ||
else | ||
return GXIO_ERR_DMA_CREDITS; | ||
} | ||
} | ||
|
||
/* The bottom 24 bits of old encode the "slot". */ | ||
slot = (old & 0xffffff); | ||
|
||
if (completion) { | ||
/* | ||
* A "completion_slot" is a "slot" which can be compared to | ||
* "hw_complete_count" at any time in the future. To convert | ||
* "slot" into a "completion_slot", we access "hw_complete_count" | ||
* once (knowing that we have reserved a slot, and thus, it will | ||
* be "basically" accurate), and combine its high 40 bits with | ||
* the 24 bit "slot", and handle "wrapping" by adding "1 << 24" | ||
* if the result is LESS than "hw_complete_count". | ||
*/ | ||
uint64_t complete; | ||
complete = ACCESS_ONCE(dma_queue->hw_complete_count); | ||
slot |= (complete & 0xffffffffff000000); | ||
if (slot < complete) | ||
slot += 0x1000000; | ||
} | ||
|
||
/* | ||
* If any of our slots mod 256 were equivalent to 0, go ahead and | ||
* collect some egress credits, and update "hw_complete_count", and | ||
* make sure the index doesn't overflow into the credits. | ||
*/ | ||
if (unlikely(((old + num) & 0xff) < num)) { | ||
__gxio_dma_queue_update_credits(dma_queue); | ||
|
||
/* Make sure the index doesn't overflow into the credits. */ | ||
#ifdef __BIG_ENDIAN__ | ||
*(((uint8_t *)&dma_queue->credits_and_next_index) + 4) = 0; | ||
#else | ||
*(((uint8_t *)&dma_queue->credits_and_next_index) + 3) = 0; | ||
#endif | ||
} | ||
|
||
return slot; | ||
} | ||
|
||
/* Non-inlinable "__gxio_dma_queue_reserve(..., true)". */ | ||
extern int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, | ||
unsigned int num, int wait); | ||
|
||
/* Check whether a particular "completion slot" has completed. | ||
* | ||
* Note that this function requires a "completion slot", and thus | ||
* cannot be used with the result of any "reserve_fast" function. | ||
*/ | ||
extern int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, | ||
int64_t completion_slot, int update); | ||
|
||
#endif /* !_GXIO_DMA_QUEUE_H_ */ |