Skip to content

Commit

Permalink
Merge branch 'listener_refactor'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
inet: tcp listener refactoring, part 10

We are getting close to the point where request sockets will be hashed
into generic hash table. Some followups are needed for netfilter and
will be handled in next patch series.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Mar 16, 2015
2 parents f00bbd2 + 13854e5 commit b35f504
Show file tree
Hide file tree
Showing 13 changed files with 99 additions and 102 deletions.
5 changes: 5 additions & 0 deletions include/net/inet_connection_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk,
struct sock *child)
{
reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
/* before letting lookups find us, make sure all req fields
* are committed to memory.
*/
smp_wmb();
atomic_set(&req->rsk_refcnt, 1);
}

void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
Expand Down
5 changes: 5 additions & 0 deletions include/net/inet_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
ireq->opt = NULL;
atomic64_set(&ireq->ir_cookie, 0);
ireq->ireq_state = TCP_NEW_SYN_RECV;

/* Following is temporary. It is coupled with debugging
* helpers in reqsk_put() & reqsk_free()
*/
atomic_set(&ireq->ireq_refcnt, 0);
}

return req;
Expand Down
13 changes: 7 additions & 6 deletions include/net/request_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,20 @@ static inline struct request_sock *inet_reqsk(struct sock *sk)
return (struct request_sock *)sk;
}

static inline void __reqsk_free(struct request_sock *req)
{
kmem_cache_free(req->rsk_ops->slab, req);
}

static inline void reqsk_free(struct request_sock *req)
{
/* temporary debugging */
WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);

req->rsk_ops->destructor(req);
__reqsk_free(req);
kmem_cache_free(req->rsk_ops->slab, req);
}

static inline void reqsk_put(struct request_sock *req)
{
/* temporary debugging, until req sock are put into ehash table */
WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1);

if (atomic_dec_and_test(&req->rsk_refcnt))
reqsk_free(req);
}
Expand Down
9 changes: 9 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
#include <linux/atomic.h>
#include <net/dst.h>
#include <net/checksum.h>
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>

struct cgroup;
Expand Down Expand Up @@ -2218,6 +2219,14 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb)
return NULL;
}

/* This helper checks if a socket is a full socket,
* ie _not_ a timewait or request socket.
*/
static inline bool sk_fullsock(const struct sock *sk)
{
return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
}

void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
Expand Down
4 changes: 2 additions & 2 deletions net/core/request_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
lopt->qlen--;
reqsk_free(req);
reqsk_put(req);
}
}
}
Expand Down Expand Up @@ -180,7 +180,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
*/
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
reqsk_free(req);
reqsk_put(req);
return;
}
/* Wait for 60secs before removing a req that has triggered RST.
Expand Down
15 changes: 0 additions & 15 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1661,21 +1661,6 @@ void sock_efree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_efree);

#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb)
{
struct sock *sk = skb->sk;

if (sk->sk_state == TCP_TIME_WAIT)
inet_twsk_put(inet_twsk(sk));
else if (sk->sk_state == TCP_NEW_SYN_RECV)
reqsk_put(inet_reqsk(sk));
else
sock_put(sk);
}
EXPORT_SYMBOL(sock_edemux);
#endif

kuid_t sock_i_uid(struct sock *sk)
{
kuid_t uid;
Expand Down
8 changes: 4 additions & 4 deletions net/ipv4/inet_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
out:
release_sock(sk);
if (req)
__reqsk_free(req);
reqsk_put(req);
return newsk;
out_err:
newsk = NULL;
Expand Down Expand Up @@ -635,7 +635,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
/* Drop this request */
inet_csk_reqsk_queue_unlink(parent, req, reqp);
reqsk_queue_removed(queue, req);
reqsk_free(req);
reqsk_put(req);
continue;
}
reqp = &req->dl_next;
Expand Down Expand Up @@ -837,7 +837,7 @@ void inet_csk_listen_stop(struct sock *sk)
sock_put(child);

sk_acceptq_removed(sk);
__reqsk_free(req);
reqsk_put(req);
}
if (queue->fastopenq != NULL) {
/* Free all the reqs queued in rskq_rst_head. */
Expand All @@ -847,7 +847,7 @@ void inet_csk_listen_stop(struct sock *sk)
spin_unlock_bh(&queue->fastopenq->lock);
while ((req = acc_req) != NULL) {
acc_req = req->dl_next;
__reqsk_free(req);
reqsk_put(req);
}
}
WARN_ON(sk->sk_ack_backlog);
Expand Down
120 changes: 53 additions & 67 deletions net/ipv4/inet_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
return -EMSGSIZE;

r = nlmsg_data(nlh);
BUG_ON((1 << sk->sk_state) & (TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV));
BUG_ON(!sk_fullsock(sk));

inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
r->idiag_retrans = 0;


if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
goto errout;

Expand Down Expand Up @@ -229,7 +228,6 @@ static int inet_csk_diag_fill(struct sock *sk,

static int inet_twsk_diag_fill(struct sock *sk,
struct sk_buff *skb,
const struct inet_diag_req_v2 *req,
u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
Expand Down Expand Up @@ -265,16 +263,53 @@ static int inet_twsk_diag_fill(struct sock *sk,
return 0;
}

static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
long tmo;

nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
nlmsg_flags);
if (!nlh)
return -EMSGSIZE;

r = nlmsg_data(nlh);
inet_diag_msg_common_fill(r, sk);
r->idiag_state = TCP_SYN_RECV;
r->idiag_timer = 1;
r->idiag_retrans = inet_reqsk(sk)->num_retrans;

BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
offsetof(struct sock, sk_cookie));

tmo = inet_reqsk(sk)->expires - jiffies;
r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
r->idiag_inode = 0;

nlmsg_end(skb, nlh);
return 0;
}

static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct inet_diag_req_v2 *r,
struct user_namespace *user_ns,
u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill(sk, skb, r, portid, seq,
return inet_twsk_diag_fill(sk, skb, portid, seq,
nlmsg_flags, unlh);

if (sk->sk_state == TCP_NEW_SYN_RECV)
return inet_req_diag_fill(sk, skb, portid, seq,
nlmsg_flags, unlh);

return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
nlmsg_flags, unlh);
}
Expand Down Expand Up @@ -502,7 +537,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry_fill_addrs(&entry, sk);
entry.sport = inet->inet_num;
entry.dport = ntohs(inet->inet_dport);
entry.userlocks = (sk->sk_state != TCP_TIME_WAIT) ? sk->sk_userlocks : 0;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;

return inet_diag_bc_run(bc, &entry);
}
Expand Down Expand Up @@ -661,61 +696,6 @@ static void twsk_build_assert(void)
#endif
}

static int inet_twsk_diag_dump(struct sock *sk,
struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
const struct nlattr *bc)
{
twsk_build_assert();

if (!inet_diag_bc_sk(bc, sk))
return 0;

return inet_twsk_diag_fill(sk, skb, r,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}

static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
struct request_sock *req,
struct user_namespace *user_ns,
u32 portid, u32 seq,
const struct nlmsghdr *unlh)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
long tmo;

nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
NLM_F_MULTI);
if (!nlh)
return -EMSGSIZE;

r = nlmsg_data(nlh);
inet_diag_msg_common_fill(r, (struct sock *)ireq);
r->idiag_state = TCP_SYN_RECV;
r->idiag_timer = 1;
r->idiag_retrans = req->num_retrans;

BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
offsetof(struct sock, sk_cookie));

tmo = req->expires - jiffies;
if (tmo < 0)
tmo = 0;

r->idiag_expires = jiffies_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = 0;

nlmsg_end(skb, nlh);
return 0;
}

static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
Expand Down Expand Up @@ -769,10 +749,10 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
continue;
}

err = inet_diag_fill_req(skb, sk, req,
sk_user_ns(NETLINK_CB(cb->skb).sk),
err = inet_req_diag_fill((struct sock *)req, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, cb->nlh);
cb->nlh->nlmsg_seq,
NLM_F_MULTI, cb->nlh);
if (err < 0) {
cb->args[3] = j + 1;
cb->args[4] = reqnum;
Expand Down Expand Up @@ -903,10 +883,16 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
if (r->id.idiag_dport != sk->sk_dport &&
r->id.idiag_dport)
goto next_normal;
if (sk->sk_state == TCP_TIME_WAIT)
res = inet_twsk_diag_dump(sk, skb, cb, r, bc);
else
res = inet_csk_diag_dump(sk, skb, cb, r, bc);
twsk_build_assert();

if (!inet_diag_bc_sk(bc, sk))
goto next_normal;

res = sk_diag_fill(sk, skb, r,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->nlh);
if (res < 0) {
spin_unlock_bh(lock);
goto done;
Expand Down
6 changes: 6 additions & 0 deletions net/ipv4/inet_hashtables.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,12 @@ void sock_gen_put(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sock_gen_put);

void sock_edemux(struct sk_buff *skb)
{
sock_gen_put(skb->sk);
}
EXPORT_SYMBOL(sock_edemux);

struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
Expand Down
10 changes: 5 additions & 5 deletions net/ipv4/syncookies.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
}
EXPORT_SYMBOL_GPL(__cookie_v4_check);

static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
Expand Down Expand Up @@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->opt = tcp_v4_save_options(skb);

if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req);
reqsk_put(req);
goto out;
}

Expand All @@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
reqsk_free(req);
reqsk_put(req);
goto out;
}

Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_fastopen.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
fastopenq->rskq_rst_head = req1->dl_next;
fastopenq->qlen--;
spin_unlock(&fastopenq->lock);
reqsk_free(req1);
reqsk_put(req1);
}
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk->sk_state != TCP_TIME_WAIT) {
if (sk_fullsock(sk)) {
struct dst_entry *dst = sk->sk_rx_dst;

if (dst)
Expand Down
Loading

0 comments on commit b35f504

Please sign in to comment.