Skip to content

Commit

Permalink
netvm: set PF_MEMALLOC as appropriate during SKB processing
Browse files Browse the repository at this point in the history
In order to make sure pfmemalloc packets receive all memory needed to
proceed, ensure processing of pfmemalloc SKBs happens under PF_MEMALLOC.
This is limited to a subset of protocols that are expected to be used for
writing to swap.  Taps are not allowed to use PF_MEMALLOC as these are
expected to communicate with userspace processes which could be paged out.

[a.p.zijlstra@chello.nl: Ideas taken from various patches]
[jslaby@suse.cz: Lock imbalance fix]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Mel Gorman authored and Linus Torvalds committed Aug 1, 2012
1 parent 0614002 commit b4b9e35
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 6 deletions.
5 changes: 5 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -754,8 +754,13 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s
return 0;
}

extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);

static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
if (sk_memalloc_socks() && skb_pfmemalloc(skb))
return __sk_backlog_rcv(sk, skb);

return sk->sk_backlog_rcv(sk, skb);
}

Expand Down
53 changes: 47 additions & 6 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3155,6 +3155,23 @@ void netdev_rx_handler_unregister(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);

/*
* Limit the use of PFMEMALLOC reserves to those protocols that implement
* the special handling of PFMEMALLOC skbs.
*/
static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
{
switch (skb->protocol) {
case __constant_htons(ETH_P_ARP):
case __constant_htons(ETH_P_IP):
case __constant_htons(ETH_P_IPV6):
case __constant_htons(ETH_P_8021Q):
return true;
default:
return false;
}
}

static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
Expand All @@ -3164,14 +3181,27 @@ static int __netif_receive_skb(struct sk_buff *skb)
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
unsigned long pflags = current->flags;

net_timestamp_check(!netdev_tstamp_prequeue, skb);

trace_netif_receive_skb(skb);

/*
* PFMEMALLOC skbs are special, they should
* - be delivered to SOCK_MEMALLOC sockets only
* - stay away from userspace
* - have bounded memory usage
*
* Use PF_MEMALLOC as this saves us from propagating the allocation
* context down to all allocation sites.
*/
if (sk_memalloc_socks() && skb_pfmemalloc(skb))
current->flags |= PF_MEMALLOC;

/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
return NET_RX_DROP;
goto out;

orig_dev = skb->dev;

Expand All @@ -3191,7 +3221,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
skb = vlan_untag(skb);
if (unlikely(!skb))
goto out;
goto unlock;
}

#ifdef CONFIG_NET_CLS_ACT
Expand All @@ -3201,6 +3231,9 @@ static int __netif_receive_skb(struct sk_buff *skb)
}
#endif

if (sk_memalloc_socks() && skb_pfmemalloc(skb))
goto skip_taps;

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
Expand All @@ -3209,13 +3242,18 @@ static int __netif_receive_skb(struct sk_buff *skb)
}
}

skip_taps:
#ifdef CONFIG_NET_CLS_ACT
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto out;
goto unlock;
ncls:
#endif

if (sk_memalloc_socks() && skb_pfmemalloc(skb)
&& !skb_pfmemalloc_protocol(skb))
goto drop;

rx_handler = rcu_dereference(skb->dev->rx_handler);
if (vlan_tx_tag_present(skb)) {
if (pt_prev) {
Expand All @@ -3225,7 +3263,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
if (vlan_do_receive(&skb, !rx_handler))
goto another_round;
else if (unlikely(!skb))
goto out;
goto unlock;
}

if (rx_handler) {
Expand All @@ -3235,7 +3273,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
}
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
goto out;
goto unlock;
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
Expand Down Expand Up @@ -3268,6 +3306,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
else
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
drop:
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
Expand All @@ -3276,8 +3315,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
ret = NET_RX_DROP;
}

out:
unlock:
rcu_read_unlock();
out:
tsk_restore_flags(current, pflags, PF_MEMALLOC);
return ret;
}

Expand Down
16 changes: 16 additions & 0 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,22 @@ void sk_clear_memalloc(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_clear_memalloc);

int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
int ret;
unsigned long pflags = current->flags;

/* these should have been dropped before queueing */
BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));

current->flags |= PF_MEMALLOC;
ret = sk->sk_backlog_rcv(sk, skb);
tsk_restore_flags(current, pflags, PF_MEMALLOC);

return ret;
}
EXPORT_SYMBOL(__sk_backlog_rcv);

#if defined(CONFIG_CGROUPS)
#if !defined(CONFIG_NET_CLS_CGROUP)
int net_cls_subsys_id = -1;
Expand Down

0 comments on commit b4b9e35

Please sign in to comment.