diff options
author | David S. Miller <davem@davemloft.net> | 2022-02-03 11:44:08 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-02-03 11:44:08 +0000 |
commit | 83a18b8e69542e1a1107218afa4b0f6d95d4dda0 (patch) | |
tree | de42dc075e0e63728c7791c0025ca2eda922aff4 | |
parent | 156a532b48125896c8518a64ada2afdc05017eee (diff) | |
parent | 6a0653b96f5d103d5579475304d76e7017165090 (diff) |
Merge branch 'mptcp-next'
Mat Martineau says:
====================
mptcp: Miscellaneous changes for 5.18
Patch 1 has some minor cleanup in mptcp_write_options().
Patch 2 moves a rarely-needed branch to optimize mptcp_write_options().
Patch 3 adds a comment explaining which combinations of MPTCP option
headers are expected.
Patch 4 adds a pr_debug() for the MPTCP_RST option.
Patches 5-7 allow setting MPTCP_PM_ADDR_FLAG_FULLMESH with the "set
flags" netlink command. This allows changing the behavior of existing
path manager endpoints. The flag was previously only set at endpoint
creation time. Associated selftests also updated.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/mptcp/options.c | 64 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 37 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_join.sh | 49 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 8 |
4 files changed, 121 insertions, 37 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 645dd984fef0..3e82ac24d548 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -336,6 +336,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; + pr_debug("MP_RST: transient=%u reason=%u", + mp_opt->reset_transient, mp_opt->reset_reason); break; case MPTCPOPT_MP_FAIL: @@ -1264,22 +1266,30 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { - if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { - const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - - subflow = mptcp_subflow_ctx(ssk); - subflow->send_mp_fail = 0; - - *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, - TCPOLEN_MPTCP_FAIL, - 0, 0); - put_unaligned_be64(opts->fail_seq, ptr); - ptr += 2; - } - - /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive, - * see mptcp_established_options*() + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + /* Which options can be used together? + * + * X: mutually exclusive + * O: often used together + * C: can be used together in some cases + * P: could be used together but we prefer not to (optimisations) + * + * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC | + * ------|------|------|------|------|------|------|------|------| + * MPC |------|------|------|------|------|------|------|------| + * MPJ | X |------|------|------|------|------|------|------| + * DSS | X | X |------|------|------|------|------|------| + * ADD | X | X | P |------|------|------|------|------| + * RM | C | C | C | P |------|------|------|------| + * PRIO | X | C | C | C | C |------|------|------| + * FAIL | X | X | C | X | X | X |------|------| + * FC | X | X | X | X | X | X | X |------| + * RST | X | X | X | X | X | X | O | O | + * ------|------|------|------|------|------|------|------|------| + * + * The same applies in mptcp_established_options() function. */ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { struct mptcp_ext *mpext = &opts->ext_copy; @@ -1336,6 +1346,10 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } ptr += 1; } + + /* We might need to add MP_FAIL options in rare cases */ + if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) + goto mp_fail; } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; @@ -1479,6 +1493,21 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, if (OPTION_MPTCP_RST & opts->suboptions) goto mp_rst; return; + } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { +mp_fail: + /* MP_FAIL is mutually exclusive with others except RST */ + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_fail = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, + TCPOLEN_MPTCP_FAIL, + 0, 0); + put_unaligned_be64(opts->fail_seq, ptr); + ptr += 2; + + if (OPTION_MPTCP_RST & opts->suboptions) + goto mp_rst; + return; } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { mp_rst: *ptr++ = mptcp_option(MPTCPOPT_RST, @@ -1489,9 +1518,6 @@ mp_rst: } if (OPTION_MPTCP_PRIO & opts->suboptions) { - const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_prio = 0; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 782b1d452269..d47795748ad7 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1728,9 +1728,20 @@ fail: return -EMSGSIZE; } -static int mptcp_nl_addr_backup(struct net *net, - struct mptcp_addr_info *addr, - u8 bkup) +static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + + list.ids[list.nr++] = addr->id; + + mptcp_pm_nl_rm_subflow_received(msk, &list); + mptcp_pm_create_subflow_or_signal_addr(msk); +} + +static int mptcp_nl_set_flags(struct net *net, + struct mptcp_addr_info *addr, + u8 bkup, u8 changed) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; @@ -1744,7 +1755,10 @@ static int mptcp_nl_addr_backup(struct net *net, lock_sock(sk); spin_lock_bh(&msk->pm.lock); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) + ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) + mptcp_pm_nl_fullmesh(msk, addr); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1761,6 +1775,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; struct net *net = sock_net(skb->sk); u8 bkup = 0, lookup_by_id = 0; int ret; @@ -1783,15 +1799,18 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); return -EINVAL; } + if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + changed = (addr.flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr.flags & mask); addr = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_addr_backup(net, &addr.addr, bkup); + mptcp_nl_set_flags(net, &addr.addr, bkup, changed); return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8bdbec0cf69..bd106c7ec232 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -289,7 +289,7 @@ do_transfer() addr_nr_ns1="$7" addr_nr_ns2="$8" speed="$9" - bkup="${10}" + sflags="${10}" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -461,14 +461,13 @@ do_transfer() fi fi - if [ ! -z $bkup ]; then + if [ ! -z $sflags ]; then sleep 1 for netns in "$ns1" "$ns2"; do dump=(`ip netns exec $netns ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then addr=${dump[${#dump[@]} - 1]} - backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup" - $backup + ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags fi done fi @@ -545,7 +544,7 @@ run_tests() addr_nr_ns1="${5:-0}" addr_nr_ns2="${6:-0}" speed="${7:-fast}" - bkup="${8:-""}" + sflags="${8:-""}" lret=0 oldin="" @@ -574,7 +573,7 @@ run_tests() fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} + ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags} lret=$? } @@ -1888,6 +1887,44 @@ fullmesh_tests() run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow chk_join_nr "fullmesh test 1x2, limited" 4 4 4 chk_add_nr 1 1 + + # set fullmesh flag + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh + chk_join_nr "set fullmesh flag test" 2 2 2 + chk_rm_nr 0 1 + + # set nofullmesh flag + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh + chk_join_nr "set nofullmesh flag test" 2 2 2 + chk_rm_nr 0 1 + + # set backup,fullmesh flags + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh + chk_join_nr "set backup,fullmesh flags test" 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 + + # set nobackup,nofullmesh flags + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh + chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 } all_tests() diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 354784512748..152b84e44d69 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -28,7 +28,7 @@ static void syntax(char *argv[]) fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); - fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n"); + fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); @@ -704,12 +704,14 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing flags value"); - /* do not support flag list yet */ for (str = argv[arg]; (tok = strtok(str, ",")); str = NULL) { if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else if (strcmp(tok, "nobackup")) + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (strcmp(tok, "nobackup") && + strcmp(tok, "nofullmesh")) error(1, errno, "unknown flag %s", argv[arg]); } |