Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 327761
b: refs/heads/master
c: 8336886
h: refs/heads/master
i:
  327759: 3e4a83b
v: v3
  • Loading branch information
Jerry Chu authored and David S. Miller committed Sep 1, 2012
1 parent 25a72dd commit 7596c3c
Show file tree
Hide file tree
Showing 14 changed files with 331 additions and 51 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 1046716368979dee857a2b8a91c4a8833f21b9cb
refs/heads/master: 8336886f786fdacbc19b719c1f7ea91eb70706d4
13 changes: 0 additions & 13 deletions trunk/include/net/request_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,19 +226,6 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
return req;
}

static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
struct sock *parent)
{
struct request_sock *req = reqsk_queue_remove(queue);
struct sock *child = req->sk;

WARN_ON(child == NULL);

sk_acceptq_removed(parent);
__reqsk_free(req);
return child;
}

static inline int reqsk_queue_removed(struct request_sock_queue *queue,
struct request_sock *req)
{
Expand Down
6 changes: 4 additions & 2 deletions trunk/include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *
const struct tcphdr *th);
extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev);
struct request_sock **prev,
bool fastopen);
extern int tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
extern bool tcp_use_frto(struct sock *sk);
Expand Down Expand Up @@ -478,7 +479,8 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
extern int tcp_connect(struct sock *sk);
extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct request_values *rvp);
struct request_values *rvp,
struct tcp_fastopen_cookie *foc);
extern int tcp_disconnect(struct sock *sk, int flags);

void tcp_connect_init(struct sock *sk);
Expand Down
95 changes: 95 additions & 0 deletions trunk/net/core/request_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/tcp.h>
#include <linux/vmalloc.h>

#include <net/request_sock.h>
Expand Down Expand Up @@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
kfree(lopt);
}

/*
* This function is called to set a Fast Open socket's "fastopen_rsk" field
* to NULL when a TFO socket no longer needs to access the request_sock.
* This happens only after 3WHS has been either completed or aborted (e.g.,
* RST is received).
*
* Before TFO, a child socket is created only after 3WHS is completed,
* hence it never needs to access the request_sock. things get a lot more
* complex with TFO. A child socket, accepted or not, has to access its
* request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
* until 3WHS is either completed or aborted. Afterwards the req will stay
* until either the child socket is accepted, or in the rare case when the
* listener is closed before the child is accepted.
*
* In short, a request socket is only freed after BOTH 3WHS has completed
* (or aborted) and the child socket has been accepted (or listener closed).
* When a child socket is accepted, its corresponding req->sk is set to
* NULL since it's no longer needed. More importantly, "req->sk == NULL"
* will be used by the code below to determine if a child socket has been
* accepted or not, and the check is protected by the fastopenq->lock
* described below.
*
* Note that fastopen_rsk is only accessed from the child socket's context
* with its socket lock held. But a request_sock (req) can be accessed by
* both its child socket through fastopen_rsk, and a listener socket through
* icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
* lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
* only in the rare case when both the listener and the child locks are held,
* e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
* The lock also protects other fields such as fastopenq->qlen, which is
* decremented by this function when fastopen_rsk is no longer needed.
*
* Note that another solution was to simply use the existing socket lock
* from the listener. But first socket lock is difficult to use. It is not
* a simple spin lock - one must consider sock_owned_by_user() and arrange
* to use sk_add_backlog() stuff. But what really makes it infeasible is the
* locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
* acquire a child's lock while holding listener's socket lock. A corner
* case might also exist in tcp_v4_hnd_req() that will trigger this locking
* order.
*
* When a TFO req is created, it needs to sock_hold its listener to prevent
* the latter data structure from going away.
*
* This function also sets "treq->listener" to NULL and unreference listener
* socket. treq->listener is used by the listener so it is protected by the
* fastopenq->lock in this function.
*/
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
bool reset)
{
struct sock *lsk = tcp_rsk(req)->listener;
struct fastopen_queue *fastopenq =
inet_csk(lsk)->icsk_accept_queue.fastopenq;

BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));

tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
fastopenq->qlen--;
tcp_rsk(req)->listener = NULL;
if (req->sk) /* the child socket hasn't been accepted yet */
goto out;

if (!reset || lsk->sk_state != TCP_LISTEN) {
/* If the listener has been closed don't bother with the
* special RST handling below.
*/
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
reqsk_free(req);
return;
}
/* Wait for 60secs before removing a req that has triggered RST.
* This is a simple defense against TFO spoofing attack - by
* counting the req against fastopen.max_qlen, and disabling
* TFO when the qlen exceeds max_qlen.
*
* For more details see CoNext'11 "TCP Fast Open" paper.
*/
req->expires = jiffies + 60*HZ;
if (fastopenq->rskq_rst_head == NULL)
fastopenq->rskq_rst_head = req;
else
fastopenq->rskq_rst_tail->dl_next = req;

req->dl_next = NULL;
fastopenq->rskq_rst_tail = req;
fastopenq->qlen++;
out:
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
return;
}
28 changes: 27 additions & 1 deletion trunk/net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ void inet_sock_destruct(struct sock *sk)
pr_err("Attempt to release alive inet socket %p\n", sk);
return;
}
if (sk->sk_type == SOCK_STREAM) {
struct fastopen_queue *fastopenq =
inet_csk(sk)->icsk_accept_queue.fastopenq;
kfree(fastopenq);
}

WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
Expand Down Expand Up @@ -212,6 +217,26 @@ int inet_listen(struct socket *sock, int backlog)
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
/* Check special setups for testing purpose to enable TFO w/o
* requiring TCP_FASTOPEN sockopt.
* Note that only TCP sockets (SOCK_STREAM) will reach here.
* Also fastopenq may already been allocated because this
* socket was in TCP_LISTEN state previously but was
* shutdown() (rather than close()).
*/
if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
err = fastopen_init_queue(sk, backlog);
else if ((sysctl_tcp_fastopen &
TFO_SERVER_WO_SOCKOPT2) != 0)
err = fastopen_init_queue(sk,
((uint)sysctl_tcp_fastopen) >> 16);
else
err = 0;
if (err)
goto out;
}
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
Expand Down Expand Up @@ -701,7 +726,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)

sock_rps_record_flow(sk2);
WARN_ON(!((1 << sk2->sk_state) &
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
TCPF_CLOSE_WAIT | TCPF_CLOSE)));

sock_graft(sk2, newsock);

Expand Down
57 changes: 51 additions & 6 deletions trunk/net/ipv4/inet_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct sock *newsk;
struct request_sock *req;
int error;

lock_sock(sk);
Expand All @@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
goto out_err;

/* Find already established connection */
if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
if (reqsk_queue_empty(queue)) {
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);

/* If this is a non blocking socket don't sleep */
Expand All @@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
if (error)
goto out_err;
}

newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
WARN_ON(newsk->sk_state == TCP_SYN_RECV);
req = reqsk_queue_remove(queue);
newsk = req->sk;

sk_acceptq_removed(sk);
if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) {
spin_lock_bh(&queue->fastopenq->lock);
if (tcp_rsk(req)->listener) {
/* We are still waiting for the final ACK from 3WHS
* so can't free req now. Instead, we set req->sk to
* NULL to signify that the child socket is taken
* so reqsk_fastopen_remove() will free the req
* when 3WHS finishes (or is aborted).
*/
req->sk = NULL;
req = NULL;
}
spin_unlock_bh(&queue->fastopenq->lock);
}
out:
release_sock(sk);
if (req)
__reqsk_free(req);
return newsk;
out_err:
newsk = NULL;
req = NULL;
*err = error;
goto out;
}
Expand Down Expand Up @@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
void inet_csk_listen_stop(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct request_sock *acc_req;
struct request_sock *req;

inet_csk_delete_keepalive_timer(sk);

/* make all the listen_opt local to us */
acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
acc_req = reqsk_queue_yank_acceptq(queue);

/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
Expand All @@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk)
* To be honest, we are not able to make either
* of the variants now. --ANK
*/
reqsk_queue_destroy(&icsk->icsk_accept_queue);
reqsk_queue_destroy(queue);

while ((req = acc_req) != NULL) {
struct sock *child = req->sk;
Expand All @@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk)

percpu_counter_inc(sk->sk_prot->orphan_count);

if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) {
BUG_ON(tcp_sk(child)->fastopen_rsk != req);
BUG_ON(sk != tcp_rsk(req)->listener);

/* Paranoid, to prevent race condition if
* an inbound pkt destined for child is
* blocked by sock lock in tcp_v4_rcv().
* Also to satisfy an assertion in
* tcp_v4_destroy_sock().
*/
tcp_sk(child)->fastopen_rsk = NULL;
sock_put(sk);
}
inet_csk_destroy_sock(child);

bh_unlock_sock(child);
Expand All @@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk)
sk_acceptq_removed(sk);
__reqsk_free(req);
}
if (queue->fastopenq != NULL) {
/* Free all the reqs queued in rskq_rst_head. */
spin_lock_bh(&queue->fastopenq->lock);
acc_req = queue->fastopenq->rskq_rst_head;
queue->fastopenq->rskq_rst_head = NULL;
spin_unlock_bh(&queue->fastopenq->lock);
while ((req = acc_req) != NULL) {
acc_req = req->dl_next;
__reqsk_free(req);
}
}
WARN_ON(sk->sk_ack_backlog);
}
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
Expand Down
1 change: 1 addition & 0 deletions trunk/net/ipv4/syncookies.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
treq->listener = NULL;

/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
Expand Down
Loading

0 comments on commit 7596c3c

Please sign in to comment.