Skip to content

Commit

Permalink
Merge branch 'tcp-udp-misc'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
net: various udp/tcp changes

First round of patches for linux-4.7

Add a generic facility for sockets to be freed after an RCU grace
period, if they need to.

Then UDP stack is changed to no longer use SLAB_DESTROY_BY_RCU,
in order to speedup rx processing for traffic encapsulated in UDP.
It gives a 17 % speedup for normal UDP reception in stress conditions.

Then TCP listeners are changed to use SOCK_RCU_FREE as well
to avoid touching sk_refcnt in synflood case :
I got up to 30 % performance increase for a mono listener.

Then three patches add SK_MEMINFO_DROPS to sock_diag
and add per socket rx drops accounting to TCP.

Last patch adds rate limiting on ACK sent on behalf of SYN_RECV
to better resist to SYNFLOOD targeting one or few flows.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 5, 2016
2 parents 43e2dfb + 4ce7e93 commit 15f41e2
Show file tree
Hide file tree
Showing 23 changed files with 401 additions and 578 deletions.
8 changes: 4 additions & 4 deletions include/linux/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
return udp_sk(sk)->no_check6_rx;
}

#define udp_portaddr_for_each_entry(__sk, node, list) \
hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
#define udp_portaddr_for_each_entry(__sk, list) \
hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)

#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)

#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)

Expand Down
12 changes: 8 additions & 4 deletions include/net/inet6_hashtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net,
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum,
const int dif)
const int dif,
bool *refcounted)
{
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
sport, daddr, hnum, dif);
*refcounted = true;
if (sk)
return sk;

*refcounted = false;
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, hnum, dif);
}
Expand All @@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
int iif)
int iif,
bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);

*refcounted = true;
if (sk)
return sk;

return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
doff, &ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
iif);
iif, refcounted);
}

struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
Expand Down
47 changes: 25 additions & 22 deletions include/net/inet_hashtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,10 @@ struct inet_bind_hashbucket {

/*
* Sockets can be hashed in established or listening table
* We must use different 'nulls' end-of-chain value for listening
* hash table, or we might find a socket that was closed and
* reallocated/inserted into established hash table
*/
#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
struct hlist_nulls_head head;
struct hlist_head head;
};

/* This is for listening sockets, thus all sockets which possess wildcards. */
Expand Down Expand Up @@ -280,11 +276,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */

/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
*
* Local BH must be disabled here.
*/
struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
Expand All @@ -307,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
const int dif)
const int dif,
bool *refcounted)
{
u16 hnum = ntohs(dport);
struct sock *sk = __inet_lookup_established(net, hashinfo,
saddr, sport, daddr, hnum, dif);
struct sock *sk;

return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
sport, daddr, hnum, dif);
sk = __inet_lookup_established(net, hashinfo, saddr, sport,
daddr, hnum, dif);
*refcounted = true;
if (sk)
return sk;
*refcounted = false;
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
sport, daddr, hnum, dif);
}

static inline struct sock *inet_lookup(struct net *net,
Expand All @@ -325,30 +324,34 @@ static inline struct sock *inet_lookup(struct net *net,
const int dif)
{
struct sock *sk;
bool refcounted;

local_bh_disable();
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
dport, dif);
local_bh_enable();
dport, dif, &refcounted);

if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
}

static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
int doff,
const __be16 sport,
const __be16 dport)
const __be16 dport,
bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
const struct iphdr *iph = ip_hdr(skb);

*refcounted = true;
if (sk)
return sk;
else
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb));

return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
refcounted);
}

u32 sk_ehashfn(const struct sock *sk);
Expand Down
31 changes: 15 additions & 16 deletions include/net/request_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,23 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
struct request_sock *req;

req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);

if (req) {
req->rsk_ops = ops;
if (attach_listener) {
sock_hold(sk_listener);
req->rsk_listener = sk_listener;
} else {
req->rsk_listener = NULL;
if (!req)
return NULL;
req->rsk_listener = NULL;
if (attach_listener) {
if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
kmem_cache_free(ops->slab, req);
return NULL;
}
req_to_sk(req)->sk_prot = sk_listener->sk_prot;
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
/* Following is temporary. It is coupled with debugging
* helpers in reqsk_put() & reqsk_free()
*/
atomic_set(&req->rsk_refcnt, 0);
req->rsk_listener = sk_listener;
}
req->rsk_ops = ops;
req_to_sk(req)->sk_prot = sk_listener->sk_prot;
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
atomic_set(&req->rsk_refcnt, 0);

return req;
}

Expand Down
21 changes: 15 additions & 6 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ struct sock_common {
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
struct hlist_nulls_node skc_portaddr_node;
struct hlist_node skc_portaddr_node;
};
struct proto *skc_prot;
possible_net_t skc_net;
Expand Down Expand Up @@ -438,6 +438,7 @@ struct sock {
struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
struct rcu_head sk_rcu;
};

#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
Expand Down Expand Up @@ -669,18 +670,18 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry(__sk, list, sk_bind_node)

/**
* sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @offset: offset of hlist_node within the struct.
*
*/
#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \
for (pos = (head)->first; \
(!is_a_nulls(pos)) && \
#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
for (pos = rcu_dereference((head)->first); \
pos != NULL && \
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
pos = pos->next)
pos = rcu_dereference(pos->next))

static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
Expand Down Expand Up @@ -720,6 +721,7 @@ enum sock_flags {
*/
SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
};

#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
Expand Down Expand Up @@ -2010,6 +2012,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
}

static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
{
int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);

atomic_add(segs, &sk->sk_drops);
}

void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
Expand Down
13 changes: 13 additions & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1836,4 +1836,17 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
tp->data_segs_in += segs_in;
}

/*
* TCP listen path runs lockless.
* We forced "struct sock" to be const qualified to make sure
* we don't modify one of its field by mistake.
* Here, we increment sk_drops which is an atomic_t, so we can safely
* make sock writable again.
*/
static inline void tcp_listendrop(const struct sock *sk)
{
atomic_inc(&((struct sock *)sk)->sk_drops);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
}

#endif /* _TCP_H */
2 changes: 1 addition & 1 deletion include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ struct udp_skb_cb {
* @lock: spinlock protecting changes to head/count
*/
struct udp_hslot {
struct hlist_nulls_head head;
struct hlist_head head;
int count;
spinlock_t lock;
} __attribute__((aligned(2 * sizeof(long))));
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/sock_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ enum {
SK_MEMINFO_WMEM_QUEUED,
SK_MEMINFO_OPTMEM,
SK_MEMINFO_BACKLOG,
SK_MEMINFO_DROPS,

SK_MEMINFO_VARS,
};
Expand Down
15 changes: 14 additions & 1 deletion net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1419,8 +1419,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
}
EXPORT_SYMBOL(sk_alloc);

void sk_destruct(struct sock *sk)
/* Sockets having SOCK_RCU_FREE will call this function after one RCU
* grace period. This is the case for UDP sockets and TCP listeners.
*/
static void __sk_destruct(struct rcu_head *head)
{
struct sock *sk = container_of(head, struct sock, sk_rcu);
struct sk_filter *filter;

if (sk->sk_destruct)
Expand Down Expand Up @@ -1449,6 +1453,14 @@ void sk_destruct(struct sock *sk)
sk_prot_free(sk->sk_prot_creator, sk);
}

void sk_destruct(struct sock *sk)
{
if (sock_flag(sk, SOCK_RCU_FREE))
call_rcu(&sk->sk_rcu, __sk_destruct);
else
__sk_destruct(&sk->sk_rcu);
}

static void __sk_free(struct sock *sk)
{
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
Expand Down Expand Up @@ -1513,6 +1525,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_cache = NULL;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;

Expand Down
1 change: 1 addition & 0 deletions net/core/sock_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);

return nla_put(skb, attrtype, sizeof(mem), &mem);
}
Expand Down
7 changes: 5 additions & 2 deletions net/dccp/ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
const struct iphdr *iph;
bool refcounted;
struct sock *sk;
int min_cov;

Expand Down Expand Up @@ -801,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)

lookup:
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport);
dh->dccph_sport, dh->dccph_dport, &refcounted);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
Expand Down Expand Up @@ -830,6 +831,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
goto lookup;
}
sock_hold(sk);
refcounted = true;
nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
Expand Down Expand Up @@ -886,7 +888,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
return 0;

discard_and_relse:
sock_put(sk);
if (refcounted)
sock_put(sk);
goto discard_it;
}

Expand Down
Loading

0 comments on commit 15f41e2

Please sign in to comment.