Skip to content

Commit

Permalink
Merge branch 'mptcp-introduce-support-for-real-multipath-xmit'
Browse files Browse the repository at this point in the history
Paolo Abeni says:

====================
mptcp: introduce support for real multipath xmit

This series enable MPTCP socket to transmit data on multiple subflows
concurrently in a load balancing scenario.

First the receive code path is refactored to better deal with out-of-order
data (patches 1-7). An RB-tree is introduced to queue MPTCP-level out-of-order
data, closely resembling the TCP level OoO handling.

When data is sent on multiple subflows, the peer can easily see OoO - "future"
data at the MPTCP level, especially if speeds, delay, or jitter are not
symmetric.

The other major change regards the netlink PM, which is extended to allow
creating non backup subflows in patches 9-11.

There are a few smaller additions, like the introduction of OoO related mibs,
send buffer autotuning and better ack handling.

Finally a bunch of new self-tests is introduced. The new feature is tested
ensuring that the B/W used by an MPTCP socket using multiple subflows matches
the link aggregated B/W - we use low B/W virtual links, to ensure the tests
are not CPU bounded.

v1 -> v2:
  - fix 32 bit build breakage
  - fix a bunch of checkpatch issues
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 14, 2020
2 parents 26cdb8f + 1a418cb commit b91c06c
Show file tree
Hide file tree
Showing 10 changed files with 798 additions and 182 deletions.
2 changes: 2 additions & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1414,6 +1414,8 @@ static inline int tcp_full_space(const struct sock *sk)
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}

void tcp_cleanup_rbuf(struct sock *sk, int copied);

/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
* SO_RCVLOWAT constraint, since we are receiving skbs with too small
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1527,7 +1527,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
static void tcp_cleanup_rbuf(struct sock *sk, int copied)
void tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
Expand Down
5 changes: 5 additions & 0 deletions net/mptcp/mib.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW),
SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
SNMP_MIB_SENTINEL
};

Expand Down
5 changes: 5 additions & 0 deletions net/mptcp/mib.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */
MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */
MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */
__MPTCP_MIB_MAX
};

Expand Down
39 changes: 20 additions & 19 deletions net/mptcp/pm_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ static int pm_nl_pernet_id;

struct mptcp_pm_addr_entry {
struct list_head list;
unsigned int flags;
int ifindex;
struct mptcp_addr_info addr;
struct rcu_head rcu;
};
Expand Down Expand Up @@ -119,7 +117,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
rcu_read_lock();
spin_lock_bh(&msk->join_list_lock);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue;

/* avoid any address already in use by subflows and
Expand Down Expand Up @@ -150,7 +148,7 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
* can lead to additional addresses not being announced.
*/
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
if (i++ == pos) {
ret = entry;
Expand Down Expand Up @@ -210,8 +208,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
msk->pm.subflows++;
check_work_pending(msk);
spin_unlock_bh(&msk->pm.lock);
__mptcp_subflow_connect(sk, local->ifindex,
&local->addr, &remote);
__mptcp_subflow_connect(sk, &local->addr, &remote);
spin_lock_bh(&msk->pm.lock);
return;
}
Expand Down Expand Up @@ -257,13 +254,13 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
local.family = remote.family;

spin_unlock_bh(&msk->pm.lock);
__mptcp_subflow_connect((struct sock *)msk, 0, &local, &remote);
__mptcp_subflow_connect((struct sock *)msk, &local, &remote);
spin_lock_bh(&msk->pm.lock);
}

static bool address_use_port(struct mptcp_pm_addr_entry *entry)
{
return (entry->flags &
return (entry->addr.flags &
(MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) ==
MPTCP_PM_ADDR_FLAG_SIGNAL;
}
Expand Down Expand Up @@ -293,9 +290,9 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
goto out;
}

if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
pernet->add_addr_signal_max++;
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
pernet->local_addr_max++;

entry->addr.id = pernet->next_id++;
Expand Down Expand Up @@ -345,8 +342,9 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
if (!entry)
return -ENOMEM;

entry->flags = 0;
entry->addr = skc_local;
entry->addr.ifindex = 0;
entry->addr.flags = 0;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
kfree(entry);
Expand Down Expand Up @@ -460,14 +458,17 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
entry->addr.addr.s_addr = nla_get_in_addr(tb[addr_addr]);

skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX])
entry->ifindex = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);

entry->addr.ifindex = val;
}

if (tb[MPTCP_PM_ADDR_ATTR_ID])
entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);

if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);

return 0;
}
Expand Down Expand Up @@ -535,9 +536,9 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
ret = -EINVAL;
goto out;
}
if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
pernet->add_addr_signal_max--;
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
pernet->local_addr_max--;

pernet->addrs--;
Expand Down Expand Up @@ -593,10 +594,10 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id))
goto nla_put_failure;
if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags))
if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags))
goto nla_put_failure;
if (entry->ifindex &&
nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex))
if (entry->addr.ifindex &&
nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->addr.ifindex))
goto nla_put_failure;

if (addr->family == AF_INET &&
Expand Down
Loading

0 comments on commit b91c06c

Please sign in to comment.