Skip to content

Commit

Permalink
IPoIB: Use netif_tx_lock() and get rid of private tx_lock, LLTX
Browse files Browse the repository at this point in the history
Currently, IPoIB is an LLTX driver that uses its own IRQ-disabling
tx_lock.  Not only do we want to get rid of LLTX, this actually causes
problems because of the skb_orphan() done with this tx_lock held: some
skb destructors expect to be run with interrupts enabled.

The simplest fix for this is to get rid of the driver-private tx_lock
and stop using LLTX.  We kill off priv->tx_lock and use
netif_tx_lock[_bh]() instead; the patch to do this is a tiny bit
tricky because we need to update places that take priv->lock inside
the tx_lock to disable IRQs, rather than relying on tx_lock having
already disabled IRQs.

Also, there are a couple of places where we need to disable BHs to
make sure we have a consistent context to call netif_tx_lock() (since
we no longer can use _irqsave() variants), and we also have to change
ipoib_send_comp_handler() to call drain_tx_cq() through a timer rather
than directly, because ipoib_send_comp_handler() runs in interrupt
context and drain_tx_cq() must run in BH context so it can call
netif_tx_lock().

Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Roland Dreier committed Sep 30, 2008
1 parent c9da4ba commit 943c246
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 107 deletions.
8 changes: 3 additions & 5 deletions drivers/infiniband/ulp/ipoib/ipoib.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,9 @@ struct ipoib_lro {
};

/*
* Device private locking: tx_lock protects members used in TX fast
* path (and we use LLTX so upper layers don't do extra locking).
* lock protects everything else. lock nests inside of tx_lock (ie
* tx_lock must be acquired first if needed).
* Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside
* of tx_lock (ie tx_lock must be acquired first if needed).
*/
struct ipoib_dev_priv {
spinlock_t lock;
Expand Down Expand Up @@ -320,7 +319,6 @@ struct ipoib_dev_priv {

struct ipoib_rx_buf *rx_ring;

spinlock_t tx_lock;
struct ipoib_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
Expand Down
88 changes: 52 additions & 36 deletions drivers/infiniband/ulp/ipoib/ipoib_cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)

dev_kfree_skb_any(tx_req->skb);

spin_lock_irqsave(&priv->tx_lock, flags);
netif_tx_lock(dev);

++tx->tx_tail;
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
Expand All @@ -801,7 +802,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);

spin_lock(&priv->lock);
spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;

if (neigh) {
Expand All @@ -821,10 +822,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)

clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);

spin_unlock(&priv->lock);
spin_unlock_irqrestore(&priv->lock, flags);
}

spin_unlock_irqrestore(&priv->tx_lock, flags);
netif_tx_unlock(dev);
}

int ipoib_cm_dev_open(struct net_device *dev)
Expand Down Expand Up @@ -1149,7 +1150,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
struct ipoib_cm_tx_buf *tx_req;
unsigned long flags;
unsigned long begin;

ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
Expand Down Expand Up @@ -1180,12 +1180,12 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
spin_lock_irqsave(&priv->tx_lock, flags);
netif_tx_lock_bh(p->dev);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev);
spin_unlock_irqrestore(&priv->tx_lock, flags);
netif_tx_unlock_bh(p->dev);
}

if (p->qp)
Expand All @@ -1202,6 +1202,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
unsigned long flags;
int ret;

switch (event->event) {
Expand All @@ -1220,8 +1221,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
case IB_CM_REJ_RECEIVED:
case IB_CM_TIMEWAIT_EXIT:
ipoib_dbg(priv, "CM error %d.\n", event->event);
spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;

if (neigh) {
Expand All @@ -1239,8 +1240,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
queue_work(ipoib_workqueue, &priv->cm.reap_task);
}

spin_unlock(&priv->lock);
spin_unlock_irq(&priv->tx_lock);
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
break;
default:
break;
Expand Down Expand Up @@ -1294,19 +1295,24 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ib_sa_path_rec pathrec;
u32 qpn;

spin_lock_irqsave(&priv->tx_lock, flags);
spin_lock(&priv->lock);
netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);

while (!list_empty(&priv->cm.start_list)) {
p = list_entry(priv->cm.start_list.next, typeof(*p), list);
list_del_init(&p->list);
neigh = p->neigh;
qpn = IPOIB_QPN(neigh->neighbour->ha);
memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
spin_unlock(&priv->lock);
spin_unlock_irqrestore(&priv->tx_lock, flags);

spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);

ret = ipoib_cm_tx_init(p, qpn, &pathrec);
spin_lock_irqsave(&priv->tx_lock, flags);
spin_lock(&priv->lock);

netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);

if (ret) {
neigh = p->neigh;
if (neigh) {
Expand All @@ -1320,56 +1326,66 @@ static void ipoib_cm_tx_start(struct work_struct *work)
kfree(p);
}
}
spin_unlock(&priv->lock);
spin_unlock_irqrestore(&priv->tx_lock, flags);

spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
}

static void ipoib_cm_tx_reap(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.reap_task);
struct net_device *dev = priv->dev;
struct ipoib_cm_tx *p;
unsigned long flags;

netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);

spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
while (!list_empty(&priv->cm.reap_list)) {
p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
list_del(&p->list);
spin_unlock(&priv->lock);
spin_unlock_irq(&priv->tx_lock);
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
ipoib_cm_tx_destroy(p);
spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);
}
spin_unlock(&priv->lock);
spin_unlock_irq(&priv->tx_lock);

spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
}

static void ipoib_cm_skb_reap(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.skb_task);
struct net_device *dev = priv->dev;
struct sk_buff *skb;

unsigned long flags;
unsigned mtu = priv->mcast_mtu;

spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);

while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
spin_unlock(&priv->lock);
spin_unlock_irq(&priv->tx_lock);
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);

if (skb->protocol == htons(ETH_P_IP))
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6))
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
#endif
dev_kfree_skb_any(skb);
spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);

netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);
}
spin_unlock(&priv->lock);
spin_unlock_irq(&priv->tx_lock);

spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
}

void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
Expand Down
30 changes: 22 additions & 8 deletions drivers/infiniband/ulp/ipoib/ipoib_ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -468,21 +468,22 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
static void drain_tx_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;

spin_lock_irqsave(&priv->tx_lock, flags);
netif_tx_lock(dev);
while (poll_tx(priv))
; /* nothing */

if (netif_queue_stopped(dev))
mod_timer(&priv->poll_timer, jiffies + 1);

spin_unlock_irqrestore(&priv->tx_lock, flags);
netif_tx_unlock(dev);
}

void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
drain_tx_cq((struct net_device *)dev_ptr);
struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);

mod_timer(&priv->poll_timer, jiffies);
}

static inline int post_send(struct ipoib_dev_priv *priv,
Expand Down Expand Up @@ -614,17 +615,20 @@ static void __ipoib_reap_ah(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
unsigned long flags;

netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);

spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
ib_destroy_ah(ah->ah);
kfree(ah);
}
spin_unlock(&priv->lock);
spin_unlock_irq(&priv->tx_lock);

spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
}

void ipoib_reap_ah(struct work_struct *work)
Expand Down Expand Up @@ -761,6 +765,14 @@ void ipoib_drain_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int i, n;

/*
* We call completion handling routines that expect to be
* called from the BH-disabled NAPI poll context, so disable
* BHs here too.
*/
local_bh_disable();

do {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
Expand All @@ -784,6 +796,8 @@ void ipoib_drain_cq(struct net_device *dev)

while (poll_tx(priv))
; /* nothing */

local_bh_enable();
}

int ipoib_ib_dev_stop(struct net_device *dev, int flush)
Expand Down
Loading

0 comments on commit 943c246

Please sign in to comment.