-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Neal Cardwell says: ==================== tcp: BBR congestion control algorithm This patch series implements a new TCP congestion control algorithm: BBR (Bottleneck Bandwidth and RTT). A paper with a detailed description of BBR will be published in ACM Queue, September-October 2016, as "BBR: Congestion-Based Congestion Control". BBR is widely deployed in production at Google. The patch series starts with a set of supporting infrastructure changes, including a few that extend the congestion control framework. The last patch adds BBR as a TCP congestion control module. Please see individual patches for the details. - v3 -> v4: - Updated tcp_bbr.c in "tcp_bbr: add BBR congestion control" to use const to qualify all the constant parameters. Thanks to Stephen Hemminger. - In "tcp_bbr: add BBR congestion control", remove the bbr_rate_kbps() function, which had a 64-bit divide that would be problematic on some architectures, and just use bbr_rate_bytes_per_sec() directly. Thanks to Kenneth Klette Jonassen for suggesting this. - In "tcp: switch back to proper tcp_skb_cb size check in tcp_init()", switched from sizeof(skb->cb) to FIELD_SIZEOF. Thanks to Lance Richardson for suggesting this. - Updated "tcp_bbr: add BBR congestion control" commit message with performance data, more details about deployment at Google, and another reminder to use fq with BBR. - Updated tcp_bbr.c in "tcp_bbr: add BBR congestion control" to use MODULE_LICENSE("Dual BSD/GPL"). - v2 -> v3: fix another issue caught by build bots: - adjust rate_sample struct initialization syntax to allow gcc-4.4 to compile the "tcp: track data delivery rate for a TCP connection" patch; also adjusted some similar syntax in "tcp_bbr: add BBR congestion control" - v1 -> v2: fix issues caught by build bots: - fix "tcp: export data delivery rate" to use rate64 instead of rate, so there is a 64-bit numerator for the do_div call - fix conflicting definitions for minmax caused by "tcp: use windowed min filter library for TCP min_rtt estimation" with a new commit: tcp: cdg: rename struct minmax in tcp_cdg.c to avoid a naming conflict - fix warning about the use of __packed in "tcp: track data delivery rate for a TCP connection", which involves the addition of a new commit: tcp: switch back to proper tcp_skb_cb size check in tcp_init() ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
- Loading branch information
Showing
20 changed files
with
1,470 additions
and
107 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/** | ||
* lib/minmax.c: windowed min/max tracker by Kathleen Nichols. | ||
* | ||
*/ | ||
#ifndef MINMAX_H | ||
#define MINMAX_H | ||
|
||
#include <linux/types.h> | ||
|
||
/* A single data point for our parameterized min-max tracker */ | ||
struct minmax_sample { | ||
u32 t; /* time measurement was taken */ | ||
u32 v; /* value measured */ | ||
}; | ||
|
||
/* State for the parameterized min-max tracker */ | ||
struct minmax { | ||
struct minmax_sample s[3]; | ||
}; | ||
|
||
static inline u32 minmax_get(const struct minmax *m) | ||
{ | ||
return m->s[0].v; | ||
} | ||
|
||
static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) | ||
{ | ||
struct minmax_sample val = { .t = t, .v = meas }; | ||
|
||
m->s[2] = m->s[1] = m->s[0] = val; | ||
return m->s[0].v; | ||
} | ||
|
||
u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); | ||
u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/** | ||
* lib/minmax.c: windowed min/max tracker | ||
* | ||
* Kathleen Nichols' algorithm for tracking the minimum (or maximum) | ||
* value of a data stream over some fixed time interval. (E.g., | ||
* the minimum RTT over the past five minutes.) It uses constant | ||
* space and constant time per update yet almost always delivers | ||
* the same minimum as an implementation that has to keep all the | ||
* data in the window. | ||
* | ||
* The algorithm keeps track of the best, 2nd best & 3rd best min | ||
* values, maintaining an invariant that the measurement time of | ||
* the n'th best >= n-1'th best. It also makes sure that the three | ||
* values are widely separated in the time window since that bounds | ||
* the worse case error when that data is monotonically increasing | ||
* over the window. | ||
* | ||
* Upon getting a new min, we can forget everything earlier because | ||
* it has no value - the new min is <= everything else in the window | ||
* by definition and it's the most recent. So we restart fresh on | ||
* every new min and overwrites 2nd & 3rd choices. The same property | ||
* holds for 2nd & 3rd best. | ||
*/ | ||
#include <linux/module.h> | ||
#include <linux/win_minmax.h> | ||
|
||
/* As time advances, update the 1st, 2nd, and 3rd choices. */ | ||
static u32 minmax_subwin_update(struct minmax *m, u32 win, | ||
const struct minmax_sample *val) | ||
{ | ||
u32 dt = val->t - m->s[0].t; | ||
|
||
if (unlikely(dt > win)) { | ||
/* | ||
* Passed entire window without a new val so make 2nd | ||
* choice the new val & 3rd choice the new 2nd choice. | ||
* we may have to iterate this since our 2nd choice | ||
* may also be outside the window (we checked on entry | ||
* that the third choice was in the window). | ||
*/ | ||
m->s[0] = m->s[1]; | ||
m->s[1] = m->s[2]; | ||
m->s[2] = *val; | ||
if (unlikely(val->t - m->s[0].t > win)) { | ||
m->s[0] = m->s[1]; | ||
m->s[1] = m->s[2]; | ||
m->s[2] = *val; | ||
} | ||
} else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) { | ||
/* | ||
* We've passed a quarter of the window without a new val | ||
* so take a 2nd choice from the 2nd quarter of the window. | ||
*/ | ||
m->s[2] = m->s[1] = *val; | ||
} else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) { | ||
/* | ||
* We've passed half the window without finding a new val | ||
* so take a 3rd choice from the last half of the window | ||
*/ | ||
m->s[2] = *val; | ||
} | ||
return m->s[0].v; | ||
} | ||
|
||
/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */ | ||
u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas) | ||
{ | ||
struct minmax_sample val = { .t = t, .v = meas }; | ||
|
||
if (unlikely(val.v >= m->s[0].v) || /* found new max? */ | ||
unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ | ||
return minmax_reset(m, t, meas); /* forget earlier samples */ | ||
|
||
if (unlikely(val.v >= m->s[1].v)) | ||
m->s[2] = m->s[1] = val; | ||
else if (unlikely(val.v >= m->s[2].v)) | ||
m->s[2] = val; | ||
|
||
return minmax_subwin_update(m, win, &val); | ||
} | ||
EXPORT_SYMBOL(minmax_running_max); | ||
|
||
/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */ | ||
u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) | ||
{ | ||
struct minmax_sample val = { .t = t, .v = meas }; | ||
|
||
if (unlikely(val.v <= m->s[0].v) || /* found new min? */ | ||
unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ | ||
return minmax_reset(m, t, meas); /* forget earlier samples */ | ||
|
||
if (unlikely(val.v <= m->s[1].v)) | ||
m->s[2] = m->s[1] = val; | ||
else if (unlikely(val.v <= m->s[2].v)) | ||
m->s[2] = val; | ||
|
||
return minmax_subwin_update(m, win, &val); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.