From d7889cfa0b892dfe2a44f72c866ddf41ca19f048 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 2 Feb 2022 17:03:37 -0800 Subject: [PATCH 1/7] mptcp: move the declarations of ssk and subflow Move the declarations of ssk and subflow in MP_FAIL and MP_PRIO to the beginning of the function mptcp_write_options(). Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 645dd984fef03..5d0b3c3e46552 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1264,10 +1264,10 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { - if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { - const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_fail = 0; @@ -1489,9 +1489,6 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } if (OPTION_MPTCP_PRIO & opts->suboptions) { - const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_prio = 0; From 902c8f8648828c9293533cc54393f163fb594d06 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 2 Feb 2022 17:03:38 -0800 Subject: [PATCH 2/7] mptcp: reduce branching when writing MP_FAIL option MP_FAIL should be use in very rare cases, either when the TCP RST flag is set -- with or without an MP_RST -- or with a DSS, see mptcp_established_options(). Here, we do the same in mptcp_write_options(). Co-developed-by: Geliang Tang Signed-off-by: Geliang Tang Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 5d0b3c3e46552..ab054c389a5f0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1267,17 +1267,6 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, const struct sock *ssk = (const struct sock *)tp; struct mptcp_subflow_context *subflow; - if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { - subflow = mptcp_subflow_ctx(ssk); - subflow->send_mp_fail = 0; - - *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, - TCPOLEN_MPTCP_FAIL, - 0, 0); - put_unaligned_be64(opts->fail_seq, ptr); - ptr += 2; - } - /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive, * see mptcp_established_options*() */ @@ -1336,6 +1325,10 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } ptr += 1; } + + /* We might need to add MP_FAIL options in rare cases */ + if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) + goto mp_fail; } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; @@ -1476,6 +1469,21 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; + if (OPTION_MPTCP_RST & opts->suboptions) + goto mp_rst; + return; + } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { +mp_fail: + /* MP_FAIL is mutually exclusive with others except RST */ + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_fail = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, + TCPOLEN_MPTCP_FAIL, + 0, 0); + put_unaligned_be64(opts->fail_seq, ptr); + ptr += 2; + if (OPTION_MPTCP_RST & opts->suboptions) goto mp_rst; return; From 8cca39e251718baec13a5999bf3d18c05af865be Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 2 Feb 2022 17:03:39 -0800 Subject: [PATCH 3/7] mptcp: clarify when options can be used RFC8684 doesn't seem to clearly specify which MPTCP options can be used together. Some options are mutually exclusive -- e.g. MP_CAPABLE and MP_JOIN --, some can be used together -- e.g. DSS + MP_PRIO --, some can but we prefer not to -- e.g. DSS + ADD_ADDR -- and some have to be used together at some points -- e.g. MP_FAIL and DSS. We need to clarify this as a base before allowing other modifications. For example, does it make sense to send a RM_ADDR with an MPC or MPJ? This remains open for possible future discussions. Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index ab054c389a5f0..7345f28f3de1c 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1267,8 +1267,27 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, const struct sock *ssk = (const struct sock *)tp; struct mptcp_subflow_context *subflow; - /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive, - * see mptcp_established_options*() + /* Which options can be used together? + * + * X: mutually exclusive + * O: often used together + * C: can be used together in some cases + * P: could be used together but we prefer not to (optimisations) + * + * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC | + * ------|------|------|------|------|------|------|------|------| + * MPC |------|------|------|------|------|------|------|------| + * MPJ | X |------|------|------|------|------|------|------| + * DSS | X | X |------|------|------|------|------|------| + * ADD | X | X | P |------|------|------|------|------| + * RM | C | C | C | P |------|------|------|------| + * PRIO | X | C | C | C | C |------|------|------| + * FAIL | X | X | C | X | X | X |------|------| + * FC | X | X | X | X | X | X | X |------| + * RST | X | X | X | X | X | X | O | O | + * ------|------|------|------|------|------|------|------|------| + * + * The same applies in mptcp_established_options() function. */ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { struct mptcp_ext *mpext = &opts->ext_copy; From 9ddd1cac6fe1f8464f54ab0d5af9bc8260caca12 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 2 Feb 2022 17:03:40 -0800 Subject: [PATCH 4/7] mptcp: print out reset infos of MP_RST This patch printed out the reset infos, reset_transient and reset_reason, of MP_RST in mptcp_parse_option() to show that MP_RST is received. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 7345f28f3de1c..3e82ac24d548a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -336,6 +336,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; + pr_debug("MP_RST: transient=%u reason=%u", + mp_opt->reset_transient, mp_opt->reset_reason); break; case MPTCPOPT_MP_FAIL: From 73c762c1f07dacba4fd1cefd15e24b419d42320d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 2 Feb 2022 17:03:41 -0800 Subject: [PATCH 5/7] mptcp: set fullmesh flag in pm_netlink This patch added the fullmesh flag setting support in pm_netlink. If the fullmesh flag of the address is changed, remove all the related subflows, update the fullmesh flag and create subflows again. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 782b1d4522697..d47795748ad7e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1728,9 +1728,20 @@ mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info) return -EMSGSIZE; } -static int mptcp_nl_addr_backup(struct net *net, - struct mptcp_addr_info *addr, - u8 bkup) +static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + + list.ids[list.nr++] = addr->id; + + mptcp_pm_nl_rm_subflow_received(msk, &list); + mptcp_pm_create_subflow_or_signal_addr(msk); +} + +static int mptcp_nl_set_flags(struct net *net, + struct mptcp_addr_info *addr, + u8 bkup, u8 changed) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; @@ -1744,7 +1755,10 @@ static int mptcp_nl_addr_backup(struct net *net, lock_sock(sk); spin_lock_bh(&msk->pm.lock); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) + ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) + mptcp_pm_nl_fullmesh(msk, addr); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1761,6 +1775,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; struct net *net = sock_net(skb->sk); u8 bkup = 0, lookup_by_id = 0; int ret; @@ -1783,15 +1799,18 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); return -EINVAL; } + if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + changed = (addr.flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr.flags & mask); addr = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_addr_backup(net, &addr.addr, bkup); + mptcp_nl_set_flags(net, &addr.addr, bkup, changed); return 0; } From c25d29be00c1a1c53549b71d06a1fb666b598d3b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 2 Feb 2022 17:03:42 -0800 Subject: [PATCH 6/7] selftests: mptcp: set fullmesh flag in pm_nl_ctl This patch added the fullmesh flag setting and clearing support in pm_nl_ctl: # pm_nl_ctl set ip flags fullmesh # pm_nl_ctl set ip flags nofullmesh Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 354784512748a..152b84e44d69d 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -28,7 +28,7 @@ static void syntax(char *argv[]) fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id ] [dev ] \n"); fprintf(stderr, "\tdel []\n"); fprintf(stderr, "\tget \n"); - fprintf(stderr, "\tset [flags backup|nobackup]\n"); + fprintf(stderr, "\tset [flags backup|nobackup|fullmesh|nofullmesh]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [ ]\n"); @@ -704,12 +704,14 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing flags value"); - /* do not support flag list yet */ for (str = argv[arg]; (tok = strtok(str, ",")); str = NULL) { if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else if (strcmp(tok, "nobackup")) + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (strcmp(tok, "nobackup") && + strcmp(tok, "nofullmesh")) error(1, errno, "unknown flag %s", argv[arg]); } From 6a0653b96f5d103d5579475304d76e7017165090 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 2 Feb 2022 17:03:43 -0800 Subject: [PATCH 7/7] selftests: mptcp: add fullmesh setting tests This patch added the fullmesh setting and clearing selftests in mptcp_join.sh. Now we can set both backup and fullmesh flags, so avoid using the words 'backup' and 'bkup'. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- .../testing/selftests/net/mptcp/mptcp_join.sh | 49 ++++++++++++++++--- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8bdbec0cf69c..bd106c7ec2327 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -289,7 +289,7 @@ do_transfer() addr_nr_ns1="$7" addr_nr_ns2="$8" speed="$9" - bkup="${10}" + sflags="${10}" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -461,14 +461,13 @@ do_transfer() fi fi - if [ ! -z $bkup ]; then + if [ ! -z $sflags ]; then sleep 1 for netns in "$ns1" "$ns2"; do dump=(`ip netns exec $netns ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then addr=${dump[${#dump[@]} - 1]} - backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup" - $backup + ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags fi done fi @@ -545,7 +544,7 @@ run_tests() addr_nr_ns1="${5:-0}" addr_nr_ns2="${6:-0}" speed="${7:-fast}" - bkup="${8:-""}" + sflags="${8:-""}" lret=0 oldin="" @@ -574,7 +573,7 @@ run_tests() fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} + ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags} lret=$? } @@ -1888,6 +1887,44 @@ fullmesh_tests() run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow chk_join_nr "fullmesh test 1x2, limited" 4 4 4 chk_add_nr 1 1 + + # set fullmesh flag + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh + chk_join_nr "set fullmesh flag test" 2 2 2 + chk_rm_nr 0 1 + + # set nofullmesh flag + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh + chk_join_nr "set nofullmesh flag test" 2 2 2 + chk_rm_nr 0 1 + + # set backup,fullmesh flags + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh + chk_join_nr "set backup,fullmesh flags test" 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 + + # set nobackup,nofullmesh flags + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh + chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 } all_tests()