Skip to content

Commit

Permalink
USB: EHCI: split ehci_qh into hw and sw parts
Browse files Browse the repository at this point in the history
The ehci_qh structure merged hw and sw together which is not good:
1. More and more items are being added into ehci_qh, the ehci_qh software
   part are unnecessary to be allocated in DMA qh_pool.
2. If HCD has local SRAM, the sw part will consume it too, and it won't
   bring any benefit.
3. For non-cache-coherence system, the entire ehci_qh is uncachable, actually
   we only need the hw part to be uncacheable. Spliting them will let the sw
   part to be cacheable.

Signed-off-by: Alek Du <alek.du@intel.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
CC: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
  • Loading branch information
Alek Du authored and Greg Kroah-Hartman committed Sep 23, 2009
1 parent 403dbd3 commit 3807e26
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 81 deletions.
43 changes: 24 additions & 19 deletions drivers/usb/host/ehci-dbg.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,11 @@ dbg_qtd (const char *label, struct ehci_hcd *ehci, struct ehci_qtd *qtd)
static void __maybe_unused
dbg_qh (const char *label, struct ehci_hcd *ehci, struct ehci_qh *qh)
{
struct ehci_qh_hw *hw = qh->hw;

ehci_dbg (ehci, "%s qh %p n%08x info %x %x qtd %x\n", label,
qh, qh->hw_next, qh->hw_info1, qh->hw_info2,
qh->hw_current);
dbg_qtd ("overlay", ehci, (struct ehci_qtd *) &qh->hw_qtd_next);
qh, hw->hw_next, hw->hw_info1, hw->hw_info2, hw->hw_current);
dbg_qtd("overlay", ehci, (struct ehci_qtd *) &hw->hw_qtd_next);
}

static void __maybe_unused
Expand Down Expand Up @@ -400,31 +401,32 @@ static void qh_lines (
char *next = *nextp;
char mark;
__le32 list_end = EHCI_LIST_END(ehci);
struct ehci_qh_hw *hw = qh->hw;

if (qh->hw_qtd_next == list_end) /* NEC does this */
if (hw->hw_qtd_next == list_end) /* NEC does this */
mark = '@';
else
mark = token_mark(ehci, qh->hw_token);
mark = token_mark(ehci, hw->hw_token);
if (mark == '/') { /* qh_alt_next controls qh advance? */
if ((qh->hw_alt_next & QTD_MASK(ehci))
== ehci->async->hw_alt_next)
if ((hw->hw_alt_next & QTD_MASK(ehci))
== ehci->async->hw->hw_alt_next)
mark = '#'; /* blocked */
else if (qh->hw_alt_next == list_end)
else if (hw->hw_alt_next == list_end)
mark = '.'; /* use hw_qtd_next */
/* else alt_next points to some other qtd */
}
scratch = hc32_to_cpup(ehci, &qh->hw_info1);
hw_curr = (mark == '*') ? hc32_to_cpup(ehci, &qh->hw_current) : 0;
scratch = hc32_to_cpup(ehci, &hw->hw_info1);
hw_curr = (mark == '*') ? hc32_to_cpup(ehci, &hw->hw_current) : 0;
temp = scnprintf (next, size,
"qh/%p dev%d %cs ep%d %08x %08x (%08x%c %s nak%d)",
qh, scratch & 0x007f,
speed_char (scratch),
(scratch >> 8) & 0x000f,
scratch, hc32_to_cpup(ehci, &qh->hw_info2),
hc32_to_cpup(ehci, &qh->hw_token), mark,
(cpu_to_hc32(ehci, QTD_TOGGLE) & qh->hw_token)
scratch, hc32_to_cpup(ehci, &hw->hw_info2),
hc32_to_cpup(ehci, &hw->hw_token), mark,
(cpu_to_hc32(ehci, QTD_TOGGLE) & hw->hw_token)
? "data1" : "data0",
(hc32_to_cpup(ehci, &qh->hw_alt_next) >> 1) & 0x0f);
(hc32_to_cpup(ehci, &hw->hw_alt_next) >> 1) & 0x0f);
size -= temp;
next += temp;

Expand All @@ -435,10 +437,10 @@ static void qh_lines (
mark = ' ';
if (hw_curr == td->qtd_dma)
mark = '*';
else if (qh->hw_qtd_next == cpu_to_hc32(ehci, td->qtd_dma))
else if (hw->hw_qtd_next == cpu_to_hc32(ehci, td->qtd_dma))
mark = '+';
else if (QTD_LENGTH (scratch)) {
if (td->hw_alt_next == ehci->async->hw_alt_next)
if (td->hw_alt_next == ehci->async->hw->hw_alt_next)
mark = '#';
else if (td->hw_alt_next != list_end)
mark = '/';
Expand Down Expand Up @@ -550,12 +552,15 @@ static ssize_t fill_periodic_buffer(struct debug_buffer *buf)
next += temp;

do {
struct ehci_qh_hw *hw;

switch (hc32_to_cpu(ehci, tag)) {
case Q_TYPE_QH:
hw = p.qh->hw;
temp = scnprintf (next, size, " qh%d-%04x/%p",
p.qh->period,
hc32_to_cpup(ehci,
&p.qh->hw_info2)
&hw->hw_info2)
/* uframe masks */
& (QH_CMASK | QH_SMASK),
p.qh);
Expand All @@ -576,7 +581,7 @@ static ssize_t fill_periodic_buffer(struct debug_buffer *buf)
/* show more info the first time around */
if (temp == seen_count) {
u32 scratch = hc32_to_cpup(ehci,
&p.qh->hw_info1);
&hw->hw_info1);
struct ehci_qtd *qtd;
char *type = "";

Expand Down Expand Up @@ -609,7 +614,7 @@ static ssize_t fill_periodic_buffer(struct debug_buffer *buf)
} else
temp = 0;
if (p.qh) {
tag = Q_NEXT_TYPE(ehci, p.qh->hw_next);
tag = Q_NEXT_TYPE(ehci, hw->hw_next);
p = p.qh->qh_next;
}
break;
Expand Down
14 changes: 8 additions & 6 deletions drivers/usb/host/ehci-hcd.c
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@ static int ehci_init(struct usb_hcd *hcd)
u32 temp;
int retval;
u32 hcc_params;
struct ehci_qh_hw *hw;

spin_lock_init(&ehci->lock);

Expand Down Expand Up @@ -550,12 +551,13 @@ static int ehci_init(struct usb_hcd *hcd)
* from automatically advancing to the next td after short reads.
*/
ehci->async->qh_next.qh = NULL;
ehci->async->hw_next = QH_NEXT(ehci, ehci->async->qh_dma);
ehci->async->hw_info1 = cpu_to_hc32(ehci, QH_HEAD);
ehci->async->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT);
ehci->async->hw_qtd_next = EHCI_LIST_END(ehci);
hw = ehci->async->hw;
hw->hw_next = QH_NEXT(ehci, ehci->async->qh_dma);
hw->hw_info1 = cpu_to_hc32(ehci, QH_HEAD);
hw->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT);
hw->hw_qtd_next = EHCI_LIST_END(ehci);
ehci->async->qh_state = QH_STATE_LINKED;
ehci->async->hw_alt_next = QTD_NEXT(ehci, ehci->async->dummy->qtd_dma);
hw->hw_alt_next = QTD_NEXT(ehci, ehci->async->dummy->qtd_dma);

/* clear interrupt enables, set irq latency */
if (log2_irq_thresh < 0 || log2_irq_thresh > 6)
Expand Down Expand Up @@ -985,7 +987,7 @@ ehci_endpoint_disable (struct usb_hcd *hcd, struct usb_host_endpoint *ep)
/* endpoints can be iso streams. for now, we don't
* accelerate iso completions ... so spin a while.
*/
if (qh->hw_info1 == 0) {
if (qh->hw->hw_info1 == 0) {
ehci_vdbg (ehci, "iso delay\n");
goto idle_timeout;
}
Expand Down
26 changes: 17 additions & 9 deletions drivers/usb/host/ehci-mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,23 @@ static void qh_destroy(struct ehci_qh *qh)
}
if (qh->dummy)
ehci_qtd_free (ehci, qh->dummy);
dma_pool_free (ehci->qh_pool, qh, qh->qh_dma);
dma_pool_free(ehci->qh_pool, qh->hw, qh->qh_dma);
kfree(qh);
}

static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
{
struct ehci_qh *qh;
dma_addr_t dma;

qh = (struct ehci_qh *)
dma_pool_alloc (ehci->qh_pool, flags, &dma);
qh = kzalloc(sizeof *qh, GFP_ATOMIC);
if (!qh)
return qh;

memset (qh, 0, sizeof *qh);
goto done;
qh->hw = (struct ehci_qh_hw *)
dma_pool_alloc(ehci->qh_pool, flags, &dma);
if (!qh->hw)
goto fail;
memset(qh->hw, 0, sizeof *qh->hw);
qh->refcount = 1;
qh->ehci = ehci;
qh->qh_dma = dma;
Expand All @@ -99,10 +102,15 @@ static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
qh->dummy = ehci_qtd_alloc (ehci, flags);
if (qh->dummy == NULL) {
ehci_dbg (ehci, "no dummy td\n");
dma_pool_free (ehci->qh_pool, qh, qh->qh_dma);
qh = NULL;
goto fail1;
}
done:
return qh;
fail1:
dma_pool_free(ehci->qh_pool, qh->hw, qh->qh_dma);
fail:
kfree(qh);
return NULL;
}

/* to share a qh (cpu threads, or hc) */
Expand Down Expand Up @@ -180,7 +188,7 @@ static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags)
/* QHs for control/bulk/intr transfers */
ehci->qh_pool = dma_pool_create ("ehci_qh",
ehci_to_hcd(ehci)->self.controller,
sizeof (struct ehci_qh),
sizeof(struct ehci_qh_hw),
32 /* byte alignment (for hw parts) */,
4096 /* can't cross 4K */);
if (!ehci->qh_pool) {
Expand Down
50 changes: 28 additions & 22 deletions drivers/usb/host/ehci-q.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,31 +87,33 @@ qtd_fill(struct ehci_hcd *ehci, struct ehci_qtd *qtd, dma_addr_t buf,
static inline void
qh_update (struct ehci_hcd *ehci, struct ehci_qh *qh, struct ehci_qtd *qtd)
{
struct ehci_qh_hw *hw = qh->hw;

/* writes to an active overlay are unsafe */
BUG_ON(qh->qh_state != QH_STATE_IDLE);

qh->hw_qtd_next = QTD_NEXT(ehci, qtd->qtd_dma);
qh->hw_alt_next = EHCI_LIST_END(ehci);
hw->hw_qtd_next = QTD_NEXT(ehci, qtd->qtd_dma);
hw->hw_alt_next = EHCI_LIST_END(ehci);

/* Except for control endpoints, we make hardware maintain data
* toggle (like OHCI) ... here (re)initialize the toggle in the QH,
* and set the pseudo-toggle in udev. Only usb_clear_halt() will
* ever clear it.
*/
if (!(qh->hw_info1 & cpu_to_hc32(ehci, 1 << 14))) {
if (!(hw->hw_info1 & cpu_to_hc32(ehci, 1 << 14))) {
unsigned is_out, epnum;

is_out = !(qtd->hw_token & cpu_to_hc32(ehci, 1 << 8));
epnum = (hc32_to_cpup(ehci, &qh->hw_info1) >> 8) & 0x0f;
epnum = (hc32_to_cpup(ehci, &hw->hw_info1) >> 8) & 0x0f;
if (unlikely (!usb_gettoggle (qh->dev, epnum, is_out))) {
qh->hw_token &= ~cpu_to_hc32(ehci, QTD_TOGGLE);
hw->hw_token &= ~cpu_to_hc32(ehci, QTD_TOGGLE);
usb_settoggle (qh->dev, epnum, is_out, 1);
}
}

/* HC must see latest qtd and qh data before we clear ACTIVE+HALT */
wmb ();
qh->hw_token &= cpu_to_hc32(ehci, QTD_TOGGLE | QTD_STS_PING);
hw->hw_token &= cpu_to_hc32(ehci, QTD_TOGGLE | QTD_STS_PING);
}

/* if it weren't for a common silicon quirk (writing the dummy into the qh
Expand All @@ -129,7 +131,7 @@ qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh)
qtd = list_entry (qh->qtd_list.next,
struct ehci_qtd, qtd_list);
/* first qtd may already be partially processed */
if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw_current)
if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current)
qtd = NULL;
}

Expand Down Expand Up @@ -260,7 +262,7 @@ __acquires(ehci->lock)
struct ehci_qh *qh = (struct ehci_qh *) urb->hcpriv;

/* S-mask in a QH means it's an interrupt urb */
if ((qh->hw_info2 & cpu_to_hc32(ehci, QH_SMASK)) != 0) {
if ((qh->hw->hw_info2 & cpu_to_hc32(ehci, QH_SMASK)) != 0) {

/* ... update hc-wide periodic stats (for usbfs) */
ehci_to_hcd(ehci)->self.bandwidth_int_reqs--;
Expand Down Expand Up @@ -315,6 +317,7 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
unsigned count = 0;
u8 state;
__le32 halt = HALT_BIT(ehci);
struct ehci_qh_hw *hw = qh->hw;

if (unlikely (list_empty (&qh->qtd_list)))
return count;
Expand Down Expand Up @@ -392,7 +395,8 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
qtd->hw_token = cpu_to_hc32(ehci,
token);
wmb();
qh->hw_token = cpu_to_hc32(ehci, token);
hw->hw_token = cpu_to_hc32(ehci,
token);
goto retry_xacterr;
}
stopped = 1;
Expand Down Expand Up @@ -435,8 +439,8 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
/* qh unlinked; token in overlay may be most current */
if (state == QH_STATE_IDLE
&& cpu_to_hc32(ehci, qtd->qtd_dma)
== qh->hw_current) {
token = hc32_to_cpu(ehci, qh->hw_token);
== hw->hw_current) {
token = hc32_to_cpu(ehci, hw->hw_token);

/* An unlink may leave an incomplete
* async transaction in the TT buffer.
Expand All @@ -449,9 +453,9 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
* patch the qh later and so that completions can't
* activate it while we "know" it's stopped.
*/
if ((halt & qh->hw_token) == 0) {
if ((halt & hw->hw_token) == 0) {
halt:
qh->hw_token |= halt;
hw->hw_token |= halt;
wmb ();
}
}
Expand Down Expand Up @@ -510,7 +514,7 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
* it after fault cleanup, or recovering from silicon wrongly
* overlaying the dummy qtd (which reduces DMA chatter).
*/
if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END(ehci)) {
if (stopped != 0 || hw->hw_qtd_next == EHCI_LIST_END(ehci)) {
switch (state) {
case QH_STATE_IDLE:
qh_refresh(ehci, qh);
Expand All @@ -528,7 +532,7 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
* except maybe high bandwidth ...
*/
if ((cpu_to_hc32(ehci, QH_SMASK)
& qh->hw_info2) != 0) {
& hw->hw_info2) != 0) {
intr_deschedule (ehci, qh);
(void) qh_schedule (ehci, qh);
} else
Expand Down Expand Up @@ -649,7 +653,7 @@ qh_urb_transaction (
* (this will usually be overridden later.)
*/
if (is_input)
qtd->hw_alt_next = ehci->async->hw_alt_next;
qtd->hw_alt_next = ehci->async->hw->hw_alt_next;

/* qh makes control packets use qtd toggle; maybe switch it */
if ((maxpacket & (this_qtd_len + (maxpacket - 1))) == 0)
Expand Down Expand Up @@ -744,6 +748,7 @@ qh_make (
int is_input, type;
int maxp = 0;
struct usb_tt *tt = urb->dev->tt;
struct ehci_qh_hw *hw;

if (!qh)
return qh;
Expand Down Expand Up @@ -890,8 +895,9 @@ qh_make (

/* init as live, toggle clear, advance to dummy */
qh->qh_state = QH_STATE_IDLE;
qh->hw_info1 = cpu_to_hc32(ehci, info1);
qh->hw_info2 = cpu_to_hc32(ehci, info2);
hw = qh->hw;
hw->hw_info1 = cpu_to_hc32(ehci, info1);
hw->hw_info2 = cpu_to_hc32(ehci, info2);
usb_settoggle (urb->dev, usb_pipeendpoint (urb->pipe), !is_input, 1);
qh_refresh (ehci, qh);
return qh;
Expand Down Expand Up @@ -933,11 +939,11 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)

/* splice right after start */
qh->qh_next = head->qh_next;
qh->hw_next = head->hw_next;
qh->hw->hw_next = head->hw->hw_next;
wmb ();

head->qh_next.qh = qh;
head->hw_next = dma;
head->hw->hw_next = dma;

qh_get(qh);
qh->xacterrs = 0;
Expand Down Expand Up @@ -984,7 +990,7 @@ static struct ehci_qh *qh_append_tds (

/* usb_reset_device() briefly reverts to address 0 */
if (usb_pipedevice (urb->pipe) == 0)
qh->hw_info1 &= ~qh_addr_mask;
qh->hw->hw_info1 &= ~qh_addr_mask;
}

/* just one way to queue requests: swap with the dummy qtd.
Expand Down Expand Up @@ -1169,7 +1175,7 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
while (prev->qh_next.qh != qh)
prev = prev->qh_next.qh;

prev->hw_next = qh->hw_next;
prev->hw->hw_next = qh->hw->hw_next;
prev->qh_next = qh->qh_next;
wmb ();

Expand Down
Loading

0 comments on commit 3807e26

Please sign in to comment.