Skip to content

Commit

Permalink
tcp: TCP Fast Open Server - header & support functions
Browse files Browse the repository at this point in the history
This patch adds all the necessary data structure and support
functions to implement TFO server side. It also documents a number
of flags for the sysctl_tcp_fastopen knob, and adds a few Linux
extension MIBs.

In addition, it includes the following:

1. a new TCP_FASTOPEN socket option an application must call to
supply a max backlog allowed in order to enable TFO on its listener.

2. A number of key data structures:
"fastopen_rsk" in tcp_sock - for a big socket to access its
request_sock for retransmission and ack processing purpose. It is
non-NULL iff 3WHS not completed.

"fastopenq" in request_sock_queue - points to a per Fast Open
listener data structure "fastopen_queue" to keep track of qlen (# of
outstanding Fast Open requests) and max_qlen, among other things.

"listener" in tcp_request_sock - to point to the original listener
for book-keeping purpose, i.e., to maintain qlen against max_qlen
as part of defense against IP spoofing attack.

3. various data structure and functions, many in tcp_fastopen.c, to
support server side Fast Open cookie operations, including
/proc/sys/net/ipv4/tcp_fastopen_key to allow manual rekeying.

Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Jerry Chu authored and David S. Miller committed Sep 1, 2012
1 parent 2a35cfa commit 1046716
Show file tree
Hide file tree
Showing 9 changed files with 276 additions and 20 deletions.
29 changes: 22 additions & 7 deletions Documentation/networking/ip-sysctl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -467,16 +467,31 @@ tcp_syncookies - BOOLEAN
tcp_fastopen - INTEGER
Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data
in the opening SYN packet. To use this feature, the client application
must not use connect(). Instead, it should use sendmsg() or sendto()
with MSG_FASTOPEN flag which performs a TCP handshake automatically.

The values (bitmap) are:
1: Enables sending data in the opening SYN on the client
5: Enables sending data in the opening SYN on the client regardless
of cookie availability.
must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than
connect() to perform a TCP handshake automatically.

The values (bitmap) are
1: Enables sending data in the opening SYN on the client.
2: Enables TCP Fast Open on the server side, i.e., allowing data in
a SYN packet to be accepted and passed to the application before
3-way hand shake finishes.
4: Send data in the opening SYN regardless of cookie availability and
without a cookie option.
0x100: Accept SYN data w/o validating the cookie.
0x200: Accept data-in-SYN w/o any cookie option present.
0x400/0x800: Enable Fast Open on all listeners regardless of the
TCP_FASTOPEN socket option. The two different flags designate two
different ways of setting max_qlen without the TCP_FASTOPEN socket
option.

Default: 0

Note that the client & server side Fast Open flags (1 and 2
respectively) must be also enabled before the rest of flags can take
effect.

See include/net/tcp.h and the code for more details.

tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value
Expand Down
4 changes: 4 additions & 0 deletions include/linux/snmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,10 @@ enum
LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */
LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */
LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */
LINUX_MIB_TCPFASTOPENPASSIVE, /* TCPFastOpenPassive*/
LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */
LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */
LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */
__LINUX_MIB_MAX
};

Expand Down
45 changes: 42 additions & 3 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ enum {
#define TCP_REPAIR_QUEUE 20
#define TCP_QUEUE_SEQ 21
#define TCP_REPAIR_OPTIONS 22
#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */

struct tcp_repair_opt {
__u32 opt_code;
Expand Down Expand Up @@ -246,6 +247,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
/* TCP Fast Open */
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */
#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */

/* TCP Fast Open Cookie as stored in memory */
struct tcp_fastopen_cookie {
Expand Down Expand Up @@ -312,9 +314,14 @@ struct tcp_request_sock {
/* Only used by TCP MD5 Signature so far. */
const struct tcp_request_sock_ops *af_specific;
#endif
struct sock *listener; /* needed for TFO */
u32 rcv_isn;
u32 snt_isn;
u32 snt_synack; /* synack sent time */
u32 rcv_nxt; /* the ack # by SYNACK. For
* FastOpen it's the seq#
* after data-in-SYN.
*/
};

static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
Expand Down Expand Up @@ -505,14 +512,18 @@ struct tcp_sock {
struct tcp_md5sig_info __rcu *md5sig_info;
#endif

/* TCP fastopen related information */
struct tcp_fastopen_request *fastopen_req;

/* When the cookie options are generated and exchanged, then this
* object holds a reference to them (cookie_values->kref). Also
* contains related tcp_cookie_transactions fields.
*/
struct tcp_cookie_values *cookie_values;

/* TCP fastopen related information */
struct tcp_fastopen_request *fastopen_req;
/* fastopen_rsk points to request_sock that resulted in this big
* socket. Used to retransmit SYNACKs etc.
*/
struct request_sock *fastopen_rsk;
};

enum tsq_flags {
Expand Down Expand Up @@ -552,6 +563,34 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
return (struct tcp_timewait_sock *)sk;
}

static inline bool tcp_passive_fastopen(const struct sock *sk)
{
return (sk->sk_state == TCP_SYN_RECV &&
tcp_sk(sk)->fastopen_rsk != NULL);
}

static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
{
return foc->len != -1;
}

static inline int fastopen_init_queue(struct sock *sk, int backlog)
{
struct request_sock_queue *queue =
&inet_csk(sk)->icsk_accept_queue;

if (queue->fastopenq == NULL) {
queue->fastopenq = kzalloc(
sizeof(struct fastopen_queue),
sk->sk_allocation);
if (queue->fastopenq == NULL)
return -ENOMEM;
spin_lock_init(&queue->fastopenq->lock);
}
queue->fastopenq->max_qlen = backlog;
return 0;
}

#endif /* __KERNEL__ */

#endif /* _LINUX_TCP_H */
36 changes: 36 additions & 0 deletions include/net/request_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,34 @@ struct listen_sock {
struct request_sock *syn_table[0];
};

/*
* For a TCP Fast Open listener -
* lock - protects the access to all the reqsk, which is co-owned by
* the listener and the child socket.
* qlen - pending TFO requests (still in TCP_SYN_RECV).
* max_qlen - max TFO reqs allowed before TFO is disabled.
*
* XXX (TFO) - ideally these fields can be made as part of "listen_sock"
* structure above. But there is some implementation difficulty due to
* listen_sock being part of request_sock_queue hence will be freed when
* a listener is stopped. But TFO related fields may continue to be
* accessed even after a listener is closed, until its sk_refcnt drops
* to 0 implying no more outstanding TFO reqs. One solution is to keep
* listen_opt around until sk_refcnt drops to 0. But there is some other
* complexity that needs to be resolved. E.g., a listener can be disabled
* temporarily through shutdown()->tcp_disconnect(), and re-enabled later.
*/
struct fastopen_queue {
struct request_sock *rskq_rst_head; /* Keep track of past TFO */
struct request_sock *rskq_rst_tail; /* requests that caused RST.
* This is part of the defense
* against spoofing attack.
*/
spinlock_t lock;
int qlen; /* # of pending (TCP_SYN_RECV) reqs */
int max_qlen; /* != 0 iff TFO is currently enabled */
};

/** struct request_sock_queue - queue of request_socks
*
* @rskq_accept_head - FIFO head of established children
Expand All @@ -129,13 +157,21 @@ struct request_sock_queue {
u8 rskq_defer_accept;
/* 3 bytes hole, try to pack */
struct listen_sock *listen_opt;
struct fastopen_queue *fastopenq; /* This is non-NULL iff TFO has been
* enabled on this listener. Check
* max_qlen != 0 in fastopen_queue
* to determine if TFO is enabled
* right at this moment.
*/
};

extern int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries);

extern void __reqsk_queue_destroy(struct request_sock_queue *queue);
extern void reqsk_queue_destroy(struct request_sock_queue *queue);
extern void reqsk_fastopen_remove(struct sock *sk,
struct request_sock *req, bool reset);

static inline struct request_sock *
reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
Expand Down
46 changes: 39 additions & 7 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,24 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);

/* Bit Flags for sysctl_tcp_fastopen */
#define TFO_CLIENT_ENABLE 1
#define TFO_SERVER_ENABLE 2
#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */

/* Process SYN data but skip cookie validation */
#define TFO_SERVER_COOKIE_NOT_CHKED 0x100
/* Accept SYN data w/o any cookie option */
#define TFO_SERVER_COOKIE_NOT_REQD 0x200

/* Force enable TFO on all listeners, i.e., not requiring the
* TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
*/
#define TFO_SERVER_WO_SOCKOPT1 0x400
#define TFO_SERVER_WO_SOCKOPT2 0x800
/* Always create TFO child sockets on a TFO listener even when
* cookie/data not present. (For testing purpose!)
*/
#define TFO_SERVER_ALWAYS 0x1000

extern struct inet_timewait_death_row tcp_death_row;

/* sysctl variables for tcp */
Expand Down Expand Up @@ -421,12 +437,6 @@ extern void tcp_metrics_init(void);
extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
extern bool tcp_remember_stamp(struct sock *sk);
extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie,
int *syn_loss, unsigned long *last_syn_loss);
extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
struct tcp_fastopen_cookie *cookie,
bool syn_lost);
extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
extern void tcp_disable_fack(struct tcp_sock *tp);
extern void tcp_close(struct sock *sk, long timeout);
Expand Down Expand Up @@ -537,6 +547,7 @@ extern void tcp_send_delayed_ack(struct sock *sk);
extern void tcp_cwnd_application_limited(struct sock *sk);
extern void tcp_resume_early_retransmit(struct sock *sk);
extern void tcp_rearm_rto(struct sock *sk);
extern void tcp_reset(struct sock *sk);

/* tcp_timer.c */
extern void tcp_init_xmit_timers(struct sock *);
Expand Down Expand Up @@ -586,6 +597,7 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
extern void tcp_mtup_init(struct sock *sk);
extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
extern void tcp_init_buffer_space(struct sock *sk);

static inline void tcp_bound_rto(const struct sock *sk)
{
Expand Down Expand Up @@ -1104,6 +1116,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
ireq->tstamp_ok = rx_opt->tstamp_ok;
Expand Down Expand Up @@ -1308,15 +1321,34 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff
extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
const struct tcp_md5sig_key *key);

/* From tcp_fastopen.c */
extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie,
int *syn_loss, unsigned long *last_syn_loss);
extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
struct tcp_fastopen_cookie *cookie,
bool syn_lost);
struct tcp_fastopen_request {
/* Fast Open cookie. Size 0 means a cookie request */
struct tcp_fastopen_cookie cookie;
struct msghdr *data; /* data in MSG_FASTOPEN */
u16 copied; /* queued in tcp_connect() */
};

void tcp_free_fastopen_req(struct tcp_sock *tp);

extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
int tcp_fastopen_reset_cipher(void *key, unsigned int len);
void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc);

#define TCP_FASTOPEN_KEY_LENGTH 16

/* Fastopen key context */
struct tcp_fastopen_context {
struct crypto_cipher __rcu *tfm;
__u8 key[TCP_FASTOPEN_KEY_LENGTH];
struct rcu_head rcu;
};

/* write queue abstraction */
static inline void tcp_write_queue_purge(struct sock *sk)
{
Expand Down
4 changes: 4 additions & 0 deletions net/ipv4/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE),
SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
SNMP_MIB_SENTINEL
};

Expand Down
45 changes: 45 additions & 0 deletions net/ipv4/sysctl_net_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
return 0;
}

int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */

tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data)
return -ENOMEM;

rcu_read_lock();
ctxt = rcu_dereference(tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
rcu_read_unlock();

snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
user_key[0], user_key[1], user_key[2], user_key[3]);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);

if (write && ret == 0) {
if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
user_key + 2, user_key + 3) != 4) {
ret = -EINVAL;
goto bad_key;
}
tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
}

bad_key:
pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
user_key[0], user_key[1], user_key[2], user_key[3],
(char *)tbl.data, ret);
kfree(tbl.data);
return ret;
}

static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
Expand Down Expand Up @@ -385,6 +424,12 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_fastopen_key",
.mode = 0600,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
.proc_handler = proc_tcp_fastopen_key,
},
{
.procname = "tcp_tw_recycle",
.data = &tcp_death_row.sysctl_tw_recycle,
Expand Down
Loading

0 comments on commit 1046716

Please sign in to comment.