diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b9a6b0a94cc6b..191feec60205d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -275,6 +275,11 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk, struct sock *child) { reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); + /* before letting lookups find us, make sure all req fields + * are committed to memory. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 1); } void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b3053fdd871e8..3d8c09abb097e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -255,6 +255,11 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops ireq->opt = NULL; atomic64_set(&ireq->ir_cookie, 0); ireq->ireq_state = TCP_NEW_SYN_RECV; + + /* Following is temporary. It is coupled with debugging + * helpers in reqsk_put() & reqsk_free() + */ + atomic_set(&ireq->ireq_refcnt, 0); } return req; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 3275cf31f7318..56dc2faba47ec 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -82,19 +82,20 @@ static inline struct request_sock *inet_reqsk(struct sock *sk) return (struct request_sock *)sk; } -static inline void __reqsk_free(struct request_sock *req) -{ - kmem_cache_free(req->rsk_ops->slab, req); -} - static inline void reqsk_free(struct request_sock *req) { + /* temporary debugging */ + WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); + req->rsk_ops->destructor(req); - __reqsk_free(req); + kmem_cache_free(req->rsk_ops->slab, req); } static inline void reqsk_put(struct request_sock *req) { + /* temporary debugging, until req sock are put into ehash table */ + WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1); + if (atomic_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } diff --git a/include/net/sock.h b/include/net/sock.h index f10832ca2e90a..e0360f5a53e94 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -67,6 +67,7 @@ #include #include #include +#include #include struct cgroup; @@ -2218,6 +2219,14 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb) return NULL; } +/* This helper checks if a socket is a full socket, + * ie _not_ a timewait or request socket. + */ +static inline bool sk_fullsock(const struct sock *sk) +{ + return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); +} + void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 04db318e6218d..e910317ef6d90 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -103,7 +103,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; lopt->qlen--; - reqsk_free(req); + reqsk_put(req); } } } @@ -180,7 +180,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, */ spin_unlock_bh(&fastopenq->lock); sock_put(lsk); - reqsk_free(req); + reqsk_put(req); return; } /* Wait for 60secs before removing a req that has triggered RST. diff --git a/net/core/sock.c b/net/core/sock.c index 4bc42efb3e408..a950b54248da3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1661,21 +1661,6 @@ void sock_efree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_efree); -#ifdef CONFIG_INET -void sock_edemux(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else if (sk->sk_state == TCP_NEW_SYN_RECV) - reqsk_put(inet_reqsk(sk)); - else - sock_put(sk); -} -EXPORT_SYMBOL(sock_edemux); -#endif - kuid_t sock_i_uid(struct sock *sk) { kuid_t uid; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 34581f928afa7..3390ba6f96b20 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -340,7 +340,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) out: release_sock(sk); if (req) - __reqsk_free(req); + reqsk_put(req); return newsk; out_err: newsk = NULL; @@ -635,7 +635,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, /* Drop this request */ inet_csk_reqsk_queue_unlink(parent, req, reqp); reqsk_queue_removed(queue, req); - reqsk_free(req); + reqsk_put(req); continue; } reqp = &req->dl_next; @@ -837,7 +837,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_put(child); sk_acceptq_removed(sk); - __reqsk_free(req); + reqsk_put(req); } if (queue->fastopenq != NULL) { /* Free all the reqs queued in rskq_rst_head. */ @@ -847,7 +847,7 @@ void inet_csk_listen_stop(struct sock *sk) spin_unlock_bh(&queue->fastopenq->lock); while ((req = acc_req) != NULL) { acc_req = req->dl_next; - __reqsk_free(req); + reqsk_put(req); } } WARN_ON(sk->sk_ack_backlog); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ac7b5c909fe7b..e7ba59038c8de 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -113,14 +113,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, return -EMSGSIZE; r = nlmsg_data(nlh); - BUG_ON((1 << sk->sk_state) & (TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV)); + BUG_ON(!sk_fullsock(sk)); inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; - if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) goto errout; @@ -229,7 +228,6 @@ static int inet_csk_diag_fill(struct sock *sk, static int inet_twsk_diag_fill(struct sock *sk, struct sk_buff *skb, - const struct inet_diag_req_v2 *req, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -265,6 +263,39 @@ static int inet_twsk_diag_fill(struct sock *sk, return 0; } +static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, + u32 portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), + nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + inet_diag_msg_common_fill(r, sk); + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = inet_reqsk(sk)->num_retrans; + + BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != + offsetof(struct sock, sk_cookie)); + + tmo = inet_reqsk(sk)->expires - jiffies; + r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; + + nlmsg_end(skb, nlh); + return 0; +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct inet_diag_req_v2 *r, struct user_namespace *user_ns, @@ -272,9 +303,13 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(sk, skb, r, portid, seq, + return inet_twsk_diag_fill(sk, skb, portid, seq, nlmsg_flags, unlh); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return inet_req_diag_fill(sk, skb, portid, seq, + nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } @@ -502,7 +537,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry_fill_addrs(&entry, sk); entry.sport = inet->inet_num; entry.dport = ntohs(inet->inet_dport); - entry.userlocks = (sk->sk_state != TCP_TIME_WAIT) ? sk->sk_userlocks : 0; + entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; return inet_diag_bc_run(bc, &entry); } @@ -661,61 +696,6 @@ static void twsk_build_assert(void) #endif } -static int inet_twsk_diag_dump(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - const struct nlattr *bc) -{ - twsk_build_assert(); - - if (!inet_diag_bc_sk(bc, sk)) - return 0; - - return inet_twsk_diag_fill(sk, skb, r, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); -} - -static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, - const struct nlmsghdr *unlh) -{ - const struct inet_request_sock *ireq = inet_rsk(req); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; - - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - inet_diag_msg_common_fill(r, (struct sock *)ireq); - r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; - r->idiag_retrans = req->num_retrans; - - BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != - offsetof(struct sock, sk_cookie)); - - tmo = req->expires - jiffies; - if (tmo < 0) - tmo = 0; - - r->idiag_expires = jiffies_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); - r->idiag_inode = 0; - - nlmsg_end(skb, nlh); - return 0; -} - static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, @@ -769,10 +749,10 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, continue; } - err = inet_diag_fill_req(skb, sk, req, - sk_user_ns(NETLINK_CB(cb->skb).sk), + err = inet_req_diag_fill((struct sock *)req, skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, cb->nlh); + cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb->nlh); if (err < 0) { cb->args[3] = j + 1; cb->args[4] = reqnum; @@ -903,10 +883,16 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, if (r->id.idiag_dport != sk->sk_dport && r->id.idiag_dport) goto next_normal; - if (sk->sk_state == TCP_TIME_WAIT) - res = inet_twsk_diag_dump(sk, skb, cb, r, bc); - else - res = inet_csk_diag_dump(sk, skb, cb, r, bc); + twsk_build_assert(); + + if (!inet_diag_bc_sk(bc, sk)) + goto next_normal; + + res = sk_diag_fill(sk, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh); if (res < 0) { spin_unlock_bh(lock); goto done; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 64401a2fdd33c..c28bca4cc15bf 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -269,6 +269,12 @@ void sock_gen_put(struct sock *sk) } EXPORT_SYMBOL_GPL(sock_gen_put); +void sock_edemux(struct sk_buff *skb) +{ + sock_gen_put(skb->sk); +} +EXPORT_SYMBOL(sock_edemux); + struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f17db898ed266..5ae0c49f5e2ee 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_check); -static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); + reqsk_put(req); goto out; } @@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { - reqsk_free(req); + reqsk_put(req); goto out; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fe77417fc137e..84381319e1bc1 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -253,7 +253,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk) fastopenq->rskq_rst_head = req1->dl_next; fastopenq->qlen--; spin_unlock(&fastopenq->lock); - reqsk_free(req1); + reqsk_put(req1); } return true; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1f514a0c5e60f..80067d5858b4a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = sk->sk_rx_dst; if (dst) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d89f028dc8c4c..e4761b22307b2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1583,7 +1583,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = sk->sk_rx_dst; if (dst)