Skip to content

Commit

Permalink
PPPoE: Fix flush/close races.
Browse files Browse the repository at this point in the history
Be more careful about the state of pointers during tear-down.
The "pppoe_dev" field can only be looked at safely while holding socket locks.
This subsequently allows for the flush_lock to be killed.

We depend on the PPPOX_CONNECTED state to tell us that that those fields are
valid, so whoever clears that state (pppox_unbind_sock()) is responsible for
the dev_put() call.

We also have to ensure that we delete_item() on all sockets before they are
cleaned up.

The need for these changes has been exposed by scenarios wherein namespace
bindings of ethernet devices change while there are ongoing PPPoE sessions,
which resulted in oopses due to unusual socket connection termination paths,
exposing these issues.

Signed-off-by: Michal Ostrowski <mostrows@gmail.com>
Reviewed-by: Cyril Gorcunov <gorcunov@gmail.com>
Reported-by: Denys Fedoryschenko <denys@visp.net.lb>
Tested-by: Denys Fedoryschenko <denys@visp.net.lb>
  • Loading branch information
Michal Ostrowski authored and David S. Miller committed Oct 26, 2009
1 parent 5ccdcec commit fb64bb5
Showing 1 changed file with 68 additions and 61 deletions.
129 changes: 68 additions & 61 deletions drivers/net/pppoe.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,6 @@ struct pppoe_net {
rwlock_t hash_lock;
};

/* to eliminate a race btw pppoe_flush_dev and pppoe_release */
static DEFINE_SPINLOCK(flush_lock);

/*
* PPPoE could be in the following stages:
* 1) Discovery stage (to obtain remote MAC and Session ID)
Expand Down Expand Up @@ -303,45 +300,48 @@ static void pppoe_flush_dev(struct net_device *dev)
write_lock_bh(&pn->hash_lock);
for (i = 0; i < PPPOE_HASH_SIZE; i++) {
struct pppox_sock *po = pn->hash_table[i];
struct sock *sk;

while (po != NULL) {
struct sock *sk;
if (po->pppoe_dev != dev) {
while (po) {
while (po && po->pppoe_dev != dev) {
po = po->next;
continue;
}

if (!po)
break;

sk = sk_pppox(po);
spin_lock(&flush_lock);
po->pppoe_dev = NULL;
spin_unlock(&flush_lock);
dev_put(dev);

/* We always grab the socket lock, followed by the
* hash_lock, in that order. Since we should
* hold the sock lock while doing any unbinding,
* we need to release the lock we're holding.
* Hold a reference to the sock so it doesn't disappear
* as we're jumping between locks.
* hash_lock, in that order. Since we should hold the
* sock lock while doing any unbinding, we need to
* release the lock we're holding. Hold a reference to
* the sock so it doesn't disappear as we're jumping
* between locks.
*/

sock_hold(sk);

write_unlock_bh(&pn->hash_lock);
lock_sock(sk);

if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
if (po->pppoe_dev == dev
&& sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
pppox_unbind_sock(sk);
sk->sk_state = PPPOX_ZOMBIE;
sk->sk_state_change(sk);
po->pppoe_dev = NULL;
dev_put(dev);
}

release_sock(sk);
sock_put(sk);

/* Restart scan at the beginning of this hash chain.
* While the lock was dropped the chain contents may
* have changed.
/* Restart the process from the start of the current
* hash chain. We dropped locks so the world may have
* change from underneath us.
*/

BUG_ON(pppoe_pernet(dev_net(dev)) == NULL);
write_lock_bh(&pn->hash_lock);
po = pn->hash_table[i];
}
Expand Down Expand Up @@ -388,11 +388,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
struct pppox_sock *po = pppox_sk(sk);
struct pppox_sock *relay_po;

/* Backlog receive. Semantics of backlog rcv preclude any code from
* executing in lock_sock()/release_sock() bounds; meaning sk->sk_state
* can't change.
*/

if (sk->sk_state & PPPOX_BOUND) {
ppp_input(&po->chan, skb);
} else if (sk->sk_state & PPPOX_RELAY) {
relay_po = get_item_by_addr(dev_net(po->pppoe_dev),
&po->pppoe_relay);
relay_po = get_item_by_addr(sock_net(sk),
&po->pppoe_relay);
if (relay_po == NULL)
goto abort_kfree;

Expand Down Expand Up @@ -447,6 +452,10 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop;

pn = pppoe_pernet(dev_net(dev));

/* Note that get_item does a sock_hold(), so sk_pppox(po)
* is known to be safe.
*/
po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
if (!po)
goto drop;
Expand Down Expand Up @@ -561,6 +570,7 @@ static int pppoe_release(struct socket *sock)
struct sock *sk = sock->sk;
struct pppox_sock *po;
struct pppoe_net *pn;
struct net *net = NULL;

if (!sk)
return 0;
Expand All @@ -571,44 +581,28 @@ static int pppoe_release(struct socket *sock)
return -EBADF;
}

po = pppox_sk(sk);

if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
dev_put(po->pppoe_dev);
po->pppoe_dev = NULL;
}

pppox_unbind_sock(sk);

/* Signal the death of the socket. */
sk->sk_state = PPPOX_DEAD;

/*
* pppoe_flush_dev could lead to a race with
* this routine so we use flush_lock to eliminate
* such a case (we only need per-net specific data)
*/
spin_lock(&flush_lock);
po = pppox_sk(sk);
if (!po->pppoe_dev) {
spin_unlock(&flush_lock);
goto out;
}
pn = pppoe_pernet(dev_net(po->pppoe_dev));
spin_unlock(&flush_lock);
net = sock_net(sk);
pn = pppoe_pernet(net);

/*
* protect "po" from concurrent updates
* on pppoe_flush_dev
*/
write_lock_bh(&pn->hash_lock);
delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
po->pppoe_ifindex);

po = pppox_sk(sk);
if (stage_session(po->pppoe_pa.sid))
__delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
po->pppoe_ifindex);

if (po->pppoe_dev) {
dev_put(po->pppoe_dev);
po->pppoe_dev = NULL;
}

write_unlock_bh(&pn->hash_lock);

out:
sock_orphan(sk);
sock->sk = NULL;

Expand All @@ -625,8 +619,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
struct sock *sk = sock->sk;
struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr;
struct pppox_sock *po = pppox_sk(sk);
struct net_device *dev;
struct net_device *dev = NULL;
struct pppoe_net *pn;
struct net *net = NULL;
int error;

lock_sock(sk);
Expand All @@ -652,12 +647,14 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Delete the old binding */
if (stage_session(po->pppoe_pa.sid)) {
pppox_unbind_sock(sk);
pn = pppoe_pernet(sock_net(sk));
delete_item(pn, po->pppoe_pa.sid,
po->pppoe_pa.remote, po->pppoe_ifindex);
if (po->pppoe_dev) {
pn = pppoe_pernet(dev_net(po->pppoe_dev));
delete_item(pn, po->pppoe_pa.sid,
po->pppoe_pa.remote, po->pppoe_ifindex);
dev_put(po->pppoe_dev);
po->pppoe_dev = NULL;
}

memset(sk_pppox(po) + 1, 0,
sizeof(struct pppox_sock) - sizeof(struct sock));
sk->sk_state = PPPOX_NONE;
Expand All @@ -666,23 +663,23 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Re-bind in session stage only */
if (stage_session(sp->sa_addr.pppoe.sid)) {
error = -ENODEV;
dev = dev_get_by_name(sock_net(sk), sp->sa_addr.pppoe.dev);
net = sock_net(sk);
dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev);
if (!dev)
goto end;
goto err_put;

po->pppoe_dev = dev;
po->pppoe_ifindex = dev->ifindex;
pn = pppoe_pernet(dev_net(dev));
write_lock_bh(&pn->hash_lock);
pn = pppoe_pernet(net);
if (!(dev->flags & IFF_UP)) {
write_unlock_bh(&pn->hash_lock);
goto err_put;
}

memcpy(&po->pppoe_pa,
&sp->sa_addr.pppoe,
sizeof(struct pppoe_addr));

write_lock_bh(&pn->hash_lock);
error = __set_item(pn, po);
write_unlock_bh(&pn->hash_lock);
if (error < 0)
Expand All @@ -696,8 +693,11 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
po->chan.ops = &pppoe_chan_ops;

error = ppp_register_net_channel(dev_net(dev), &po->chan);
if (error)
if (error) {
delete_item(pn, po->pppoe_pa.sid,
po->pppoe_pa.remote, po->pppoe_ifindex);
goto err_put;
}

sk->sk_state = PPPOX_CONNECTED;
}
Expand Down Expand Up @@ -915,6 +915,14 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
struct pppoe_hdr *ph;
int data_len = skb->len;

/* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP
* xmit operations conclude prior to an unregistration call. Thus
* sk->sk_state cannot change, so we don't need to do lock_sock().
* But, we also can't do a lock_sock since that introduces a potential
* deadlock as we'd reverse the lock ordering used when calling
* ppp_unregister_channel().
*/

if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
goto abort;

Expand Down Expand Up @@ -944,7 +952,6 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
po->pppoe_pa.remote, NULL, data_len);

dev_queue_xmit(skb);

return 1;

abort:
Expand Down

0 comments on commit fb64bb5

Please sign in to comment.