From 85f7e7508a1d288b513493196ef406c6c06134e1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Nov 2016 14:18:37 +0100 Subject: [PATCH 1/2] tcp: add cwnd_undo functions to various tcp cc algorithms congestion control algorithms that do not halve cwnd in their .ssthresh should provide a .cwnd_undo rather than rely on current fallback which assumes reno halving (and thus doubles the cwnd). All of these do 'something else' in their .ssthresh implementation, thus store the cwnd on loss and provide .undo_cwnd to restore it again. A followup patch will remove the fallback and all algorithms will need to provide a .cwnd_undo function. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_highspeed.c | 11 ++++++++++- net/ipv4/tcp_illinois.c | 10 ++++++++++ net/ipv4/tcp_scalable.c | 15 +++++++++++++++ net/ipv4/tcp_veno.c | 10 ++++++++++ net/ipv4/tcp_yeah.c | 10 ++++++++++ 5 files changed, 55 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index db7842495a641..6d9879e93648a 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -94,6 +94,7 @@ static const struct hstcp_aimd_val { struct hstcp { u32 ai; + u32 loss_cwnd; }; static void hstcp_init(struct sock *sk) @@ -150,16 +151,24 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 hstcp_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - const struct hstcp *ca = inet_csk_ca(sk); + struct hstcp *ca = inet_csk_ca(sk); + ca->loss_cwnd = tp->snd_cwnd; /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); } +static u32 hstcp_cwnd_undo(struct sock *sk) +{ + const struct hstcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, + .undo_cwnd = hstcp_cwnd_undo, .cong_avoid = hstcp_cong_avoid, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index c8e6d86be1142..60352ff4f5a85 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -48,6 +48,7 @@ struct illinois { u32 end_seq; /* right edge of current RTT */ u32 alpha; /* Additive increase */ u32 beta; /* Muliplicative decrease */ + u32 loss_cwnd; /* cwnd on loss */ u16 acked; /* # packets acked by current ACK */ u8 rtt_above; /* average rtt has gone above threshold */ u8 rtt_low; /* # of rtts measurements below threshold */ @@ -296,10 +297,18 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); + ca->loss_cwnd = tp->snd_cwnd; /* Multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); } +static u32 tcp_illinois_cwnd_undo(struct sock *sk) +{ + const struct illinois *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + /* Extract info for Tcp socket info provided via netlink. */ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) @@ -327,6 +336,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, static struct tcp_congestion_ops tcp_illinois __read_mostly = { .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, + .undo_cwnd = tcp_illinois_cwnd_undo, .cong_avoid = tcp_illinois_cong_avoid, .set_state = tcp_illinois_state, .get_info = tcp_illinois_info, diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index bf5ea9e9bbc1e..f2123075ce6e1 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,6 +15,10 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 +struct scalable { + u32 loss_cwnd; +}; + static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -32,12 +36,23 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 tcp_scalable_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + struct scalable *ca = inet_csk_ca(sk); + + ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } +static u32 tcp_scalable_cwnd_undo(struct sock *sk) +{ + const struct scalable *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, + .undo_cwnd = tcp_scalable_cwnd_undo, .cong_avoid = tcp_scalable_cong_avoid, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 40171e163cffa..76005d4b8dfc2 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -30,6 +30,7 @@ struct veno { u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */ u32 inc; /* decide whether to increase cwnd */ u32 diff; /* calculate the diff rate */ + u32 loss_cwnd; /* cwnd when loss occured */ }; /* There are several situations when we must "re-start" Veno: @@ -193,6 +194,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); + veno->loss_cwnd = tp->snd_cwnd; if (veno->diff < beta) /* in "non-congestive state", cut cwnd by 1/5 */ return max(tp->snd_cwnd * 4 / 5, 2U); @@ -201,9 +203,17 @@ static u32 tcp_veno_ssthresh(struct sock *sk) return max(tp->snd_cwnd >> 1U, 2U); } +static u32 tcp_veno_cwnd_undo(struct sock *sk) +{ + const struct veno *veno = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd); +} + static struct tcp_congestion_ops tcp_veno __read_mostly = { .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, + .undo_cwnd = tcp_veno_cwnd_undo, .cong_avoid = tcp_veno_cong_avoid, .pkts_acked = tcp_veno_pkts_acked, .set_state = tcp_veno_state, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 9c5fc973267fe..e6ff99c4bd3b6 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -37,6 +37,7 @@ struct yeah { u32 fast_count; u32 pkts_acked; + u32 loss_cwnd; }; static void tcp_yeah_init(struct sock *sk) @@ -219,13 +220,22 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); + yeah->loss_cwnd = tp->snd_cwnd; return max_t(int, tp->snd_cwnd - reduction, 2); } +static u32 tcp_yeah_cwnd_undo(struct sock *sk) +{ + const struct yeah *yeah = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd); +} + static struct tcp_congestion_ops tcp_yeah __read_mostly = { .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, + .undo_cwnd = tcp_yeah_cwnd_undo, .cong_avoid = tcp_yeah_cong_avoid, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, From e97991832a4ea4a5f47d65f068a4c966a2eb5730 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Nov 2016 14:18:38 +0100 Subject: [PATCH 2/2] tcp: make undo_cwnd mandatory for congestion modules The undo_cwnd fallback in the stack doubles cwnd based on ssthresh, which un-does reno halving behaviour. It seems more appropriate to let congctl algorithms pair .ssthresh and .undo_cwnd properly. Add a 'tcp_reno_undo_cwnd' function and wire it up for all congestion algorithms that used to rely on the fallback. Cc: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_cong.c | 14 ++++++++++++-- net/ipv4/tcp_dctcp.c | 1 + net/ipv4/tcp_hybla.c | 1 + net/ipv4/tcp_input.c | 5 +---- net/ipv4/tcp_lp.c | 1 + net/ipv4/tcp_vegas.c | 1 + net/ipv4/tcp_westwood.c | 1 + 8 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 123979fe12bf7..7de80739adaba 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -958,6 +958,7 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); u32 tcp_reno_ssthresh(struct sock *sk); +u32 tcp_reno_undo_cwnd(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1294af4e0127b..38905ec5f5081 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -68,8 +68,9 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret = 0; - /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { + /* all algorithms must implement these */ + if (!ca->ssthresh || !ca->undo_cwnd || + !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } @@ -441,10 +442,19 @@ u32 tcp_reno_ssthresh(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); +u32 tcp_reno_undo_cwnd(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + return max(tp->snd_cwnd, tp->snd_ssthresh << 1); +} +EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); + struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, }; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 51139175bf614..bde22ebb92a8f 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -342,6 +342,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = { static struct tcp_congestion_ops dctcp_reno __read_mostly = { .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, .get_info = dctcp_get_info, .owner = THIS_MODULE, .name = "dctcp-reno", diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 083831e359df9..0f7175c3338e0 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -166,6 +166,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) static struct tcp_congestion_ops tcp_hybla __read_mostly = { .init = hybla_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = hybla_cong_avoid, .set_state = hybla_state, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a70046fea0e89..22e6a2097ff60 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2394,10 +2394,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_ca_ops->undo_cwnd) - tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); - else - tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); + tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index c67ece1390c25..046fd39108733 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -316,6 +316,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) static struct tcp_congestion_ops tcp_lp __read_mostly = { .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_lp_cong_avoid, .pkts_acked = tcp_lp_pkts_acked, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 4c4bac1b5eab2..218cfcc776500 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_vegas_cong_avoid, .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 4b03a2e2a0504..fed66dc0e0f5f 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -278,6 +278,7 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = tcp_westwood_event, .in_ack_event = tcp_westwood_ack, .get_info = tcp_westwood_info,