diff options
-rw-r--r-- | include/net/sock.h | 2 | ||||
-rw-r--r-- | net/core/sock.c | 7 | ||||
-rw-r--r-- | net/mptcp/mib.c | 4 | ||||
-rw-r--r-- | net/mptcp/mib.h | 4 | ||||
-rw-r--r-- | net/mptcp/options.c | 81 | ||||
-rw-r--r-- | net/mptcp/pm.c | 91 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 276 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 30 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 39 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 3 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/mptcp_connect.c | 18 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_join.sh | 189 |
12 files changed, 674 insertions, 70 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index eaa5cac5e836..a5c6ae78df77 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2195,6 +2195,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer); + int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, diff --git a/net/core/sock.c b/net/core/sock.c index ba9e7d91e2ef..d9a537e6876a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2947,6 +2947,13 @@ void sk_stop_timer(struct sock *sk, struct timer_list* timer) } EXPORT_SYMBOL(sk_stop_timer); +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) +{ + if (del_timer_sync(timer)) + __sock_put(sk); +} +EXPORT_SYMBOL(sk_stop_timer_sync); + void sock_init_data(struct socket *sock, struct sock *sk) { sk_init_common(sk); diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 056986c7a228..84d119436b22 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -27,6 +27,10 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), + SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), + SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), + SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), + SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 937a177729f1..47bcecce1106 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -20,6 +20,10 @@ enum linux_mptcp_mib_field { MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */ MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */ MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */ + MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */ + MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_RMADDR, /* Received RM_ADDR */ + MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 7fa822b55c34..14a290fae767 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -11,6 +11,7 @@ #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" +#include "mib.h" static bool mptcp_cap_flag_sha256(u8 flags) { @@ -242,7 +243,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->add_addr = 1; mp_opt->port = 0; mp_opt->addr_id = *ptr++; - pr_debug("ADD_ADDR: id=%d", mp_opt->addr_id); + pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo); if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4); ptr += 4; @@ -571,18 +572,19 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, } #endif -static bool mptcp_established_options_addr(struct sock *sk, - unsigned int *size, - unsigned int remaining, - struct mptcp_out_options *opts) +static bool mptcp_established_options_add_addr(struct sock *sk, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_addr_info saddr; + bool echo; int len; - if (!mptcp_pm_should_signal(msk) || - !(mptcp_pm_addr_signal(msk, remaining, &saddr))) + if (!mptcp_pm_should_add_signal(msk) || + !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo))) return false; len = mptcp_add_addr_len(saddr.family); @@ -594,22 +596,51 @@ static bool mptcp_established_options_addr(struct sock *sk, if (saddr.family == AF_INET) { opts->suboptions |= OPTION_MPTCP_ADD_ADDR; opts->addr = saddr.addr; - opts->ahmac = add_addr_generate_hmac(msk->local_key, - msk->remote_key, - opts->addr_id, - &opts->addr); + if (!echo) { + opts->ahmac = add_addr_generate_hmac(msk->local_key, + msk->remote_key, + opts->addr_id, + &opts->addr); + } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (saddr.family == AF_INET6) { opts->suboptions |= OPTION_MPTCP_ADD_ADDR6; opts->addr6 = saddr.addr6; - opts->ahmac = add_addr6_generate_hmac(msk->local_key, - msk->remote_key, - opts->addr_id, - &opts->addr6); + if (!echo) { + opts->ahmac = add_addr6_generate_hmac(msk->local_key, + msk->remote_key, + opts->addr_id, + &opts->addr6); + } } #endif - pr_debug("addr_id=%d, ahmac=%llu", opts->addr_id, opts->ahmac); + pr_debug("addr_id=%d, ahmac=%llu, echo=%d", opts->addr_id, opts->ahmac, echo); + + return true; +} + +static bool mptcp_established_options_rm_addr(struct sock *sk, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + u8 rm_id; + + if (!mptcp_pm_should_rm_signal(msk) || + !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id))) + return false; + + if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE) + return false; + + *size = TCPOLEN_MPTCP_RM_ADDR_BASE; + opts->suboptions |= OPTION_MPTCP_RM_ADDR; + opts->rm_id = rm_id; + + pr_debug("rm_id=%d", opts->rm_id); return true; } @@ -640,7 +671,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, *size += opt_size; remaining -= opt_size; - if (mptcp_established_options_addr(sk, &opt_size, remaining, opts)) { + if (mptcp_established_options_add_addr(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + ret = true; + } else if (mptcp_established_options_rm_addr(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; @@ -854,11 +889,21 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, addr.addr6 = mp_opt.addr6; } #endif - if (!mp_opt.echo) + if (!mp_opt.echo) { mptcp_pm_add_addr_received(msk, &addr); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); + } else { + mptcp_pm_del_add_timer(msk, &addr); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); + } mp_opt.add_addr = 0; } + if (mp_opt.rm_addr) { + mptcp_pm_rm_addr_received(msk, mp_opt.rm_id); + mp_opt.rm_addr = 0; + } + if (!mp_opt.dss) return; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index a8ad20559aaa..7e81f53d1e5d 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -13,23 +13,34 @@ /* path manager command handlers */ int mptcp_pm_announce_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) + const struct mptcp_addr_info *addr, + bool echo) { pr_debug("msk=%p, local_id=%d", msk, addr->id); msk->pm.local = *addr; - WRITE_ONCE(msk->pm.addr_signal, true); + WRITE_ONCE(msk->pm.add_addr_echo, echo); + WRITE_ONCE(msk->pm.add_addr_signal, true); return 0; } int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id) { - return -ENOTSUPP; + pr_debug("msk=%p, local_id=%d", msk, local_id); + + msk->pm.rm_id = local_id; + WRITE_ONCE(msk->pm.rm_addr_signal, true); + return 0; } -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id) +int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id) { - return -ENOTSUPP; + pr_debug("msk=%p, local_id=%d", msk, local_id); + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_nl_rm_subflow_received(msk, local_id); + spin_unlock_bh(&msk->pm.lock); + return 0; } /* path manager event handlers */ @@ -46,7 +57,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side) bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; - int ret; + int ret = 0; pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, pm->subflows_max, READ_ONCE(pm->accept_subflow)); @@ -56,9 +67,11 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) return false; spin_lock_bh(&pm->lock); - ret = pm->subflows < pm->subflows_max; - if (ret && ++pm->subflows == pm->subflows_max) - WRITE_ONCE(pm->accept_subflow, false); + if (READ_ONCE(pm->accept_subflow)) { + ret = pm->subflows < pm->subflows_max; + if (ret && ++pm->subflows == pm->subflows_max) + WRITE_ONCE(pm->accept_subflow, false); + } spin_unlock_bh(&pm->lock); return ret; @@ -135,38 +148,70 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, READ_ONCE(pm->accept_addr)); - /* avoid acquiring the lock if there is no room for fouther addresses */ - if (!READ_ONCE(pm->accept_addr)) - return; - spin_lock_bh(&pm->lock); - /* be sure there is something to signal re-checking under PM lock */ - if (READ_ONCE(pm->accept_addr) && - mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) + if (!READ_ONCE(pm->accept_addr)) + mptcp_pm_announce_addr(msk, addr, true); + else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) pm->remote = *addr; spin_unlock_bh(&pm->lock); } +void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) +{ + struct mptcp_pm_data *pm = &msk->pm; + + pr_debug("msk=%p remote_id=%d", msk, rm_id); + + spin_lock_bh(&pm->lock); + mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); + pm->rm_id = rm_id; + spin_unlock_bh(&pm->lock); +} + /* path manager helpers */ -bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr) +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, + struct mptcp_addr_info *saddr, bool *echo) { int ret = false; spin_lock_bh(&msk->pm.lock); /* double check after the lock is acquired */ - if (!mptcp_pm_should_signal(msk)) + if (!mptcp_pm_should_add_signal(msk)) goto out_unlock; if (remaining < mptcp_add_addr_len(msk->pm.local.family)) goto out_unlock; *saddr = msk->pm.local; - WRITE_ONCE(msk->pm.addr_signal, false); + *echo = READ_ONCE(msk->pm.add_addr_echo); + WRITE_ONCE(msk->pm.add_addr_signal, false); + ret = true; + +out_unlock: + spin_unlock_bh(&msk->pm.lock); + return ret; +} + +bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, + u8 *rm_id) +{ + int ret = false; + + spin_lock_bh(&msk->pm.lock); + + /* double check after the lock is acquired */ + if (!mptcp_pm_should_rm_signal(msk)) + goto out_unlock; + + if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE) + goto out_unlock; + + *rm_id = msk->pm.rm_id; + WRITE_ONCE(msk->pm.rm_addr_signal, false); ret = true; out_unlock: @@ -185,13 +230,17 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) msk->pm.add_addr_accepted = 0; msk->pm.local_addr_used = 0; msk->pm.subflows = 0; + msk->pm.rm_id = 0; WRITE_ONCE(msk->pm.work_pending, false); - WRITE_ONCE(msk->pm.addr_signal, false); + WRITE_ONCE(msk->pm.add_addr_signal, false); + WRITE_ONCE(msk->pm.rm_addr_signal, false); WRITE_ONCE(msk->pm.accept_addr, false); WRITE_ONCE(msk->pm.accept_subflow, false); + WRITE_ONCE(msk->pm.add_addr_echo, false); msk->pm.status = 0; spin_lock_init(&msk->pm.lock); + INIT_LIST_HEAD(&msk->pm.anno_list); mptcp_pm_nl_data_init(msk); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index b4a9624d7bf2..5a0e4d11bcc3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -15,6 +15,7 @@ #include <uapi/linux/mptcp.h> #include "protocol.h" +#include "mib.h" /* forward declaration */ static struct genl_family mptcp_genl_family; @@ -27,6 +28,14 @@ struct mptcp_pm_addr_entry { struct rcu_head rcu; }; +struct mptcp_pm_add_entry { + struct list_head list; + struct mptcp_addr_info addr; + struct timer_list add_timer; + struct mptcp_sock *sock; + u8 retrans_times; +}; + struct pm_nl_pernet { /* protects pernet updates */ spinlock_t lock; @@ -40,6 +49,7 @@ struct pm_nl_pernet { }; #define MPTCP_PM_ADDR_MAX 8 +#define ADD_ADDR_RETRANS_MAX 3 static bool addresses_equal(const struct mptcp_addr_info *a, struct mptcp_addr_info *b, bool use_port) @@ -177,6 +187,121 @@ static void check_work_pending(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.work_pending, false); } +static struct mptcp_pm_add_entry * +lookup_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *entry; + + list_for_each_entry(entry, &msk->pm.anno_list, list) { + if (addresses_equal(&entry->addr, addr, false)) + return entry; + } + + return NULL; +} + +static void mptcp_pm_add_timer(struct timer_list *timer) +{ + struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); + struct mptcp_sock *msk = entry->sock; + struct sock *sk = (struct sock *)msk; + + pr_debug("msk=%p", msk); + + if (!msk) + return; + + if (inet_sk_state_load(sk) == TCP_CLOSE) + return; + + if (!entry->addr.id) + return; + + if (mptcp_pm_should_add_signal(msk)) { + sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + goto out; + } + + spin_lock_bh(&msk->pm.lock); + + if (!mptcp_pm_should_add_signal(msk)) { + pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); + mptcp_pm_announce_addr(msk, &entry->addr, false); + entry->retrans_times++; + } + + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) + sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX); + + spin_unlock_bh(&msk->pm.lock); + +out: + __sock_put(sk); +} + +struct mptcp_pm_add_entry * +mptcp_pm_del_add_timer(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *entry; + struct sock *sk = (struct sock *)msk; + + spin_lock_bh(&msk->pm.lock); + entry = lookup_anno_list_by_saddr(msk, addr); + if (entry) + entry->retrans_times = ADD_ADDR_RETRANS_MAX; + spin_unlock_bh(&msk->pm.lock); + + if (entry) + sk_stop_timer_sync(sk, &entry->add_timer); + + return entry; +} + +static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry) +{ + struct mptcp_pm_add_entry *add_entry = NULL; + struct sock *sk = (struct sock *)msk; + + if (lookup_anno_list_by_saddr(msk, &entry->addr)) + return false; + + add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); + if (!add_entry) + return false; + + list_add(&add_entry->list, &msk->pm.anno_list); + + add_entry->addr = entry->addr; + add_entry->sock = msk; + add_entry->retrans_times = 0; + + timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); + sk_reset_timer(sk, &add_entry->add_timer, jiffies + TCP_RTO_MAX); + + return true; +} + +void mptcp_pm_free_anno_list(struct mptcp_sock *msk) +{ + struct mptcp_pm_add_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; + LIST_HEAD(free_list); + + pr_debug("msk=%p", msk); + + spin_lock_bh(&msk->pm.lock); + list_splice_init(&msk->pm.anno_list, &free_list); + spin_unlock_bh(&msk->pm.lock); + + list_for_each_entry_safe(entry, tmp, &free_list, list) { + sk_stop_timer_sync(sk, &entry->add_timer); + kfree(entry); + } +} + static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct mptcp_addr_info remote = { 0 }; @@ -197,8 +322,10 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) msk->pm.add_addr_signaled); if (local) { - msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr); + if (mptcp_pm_alloc_anno_list(msk, local)) { + msk->pm.add_addr_signaled++; + mptcp_pm_announce_addr(msk, &local->addr, false); + } } else { /* pick failed, avoid fourther attempts later */ msk->pm.local_addr_used = msk->pm.add_addr_signal_max; @@ -266,6 +393,79 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); __mptcp_subflow_connect((struct sock *)msk, &local, &remote); spin_lock_bh(&msk->pm.lock); + + mptcp_pm_announce_addr(msk, &remote, true); +} + +void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct sock *sk = (struct sock *)msk; + + pr_debug("address rm_id %d", msk->pm.rm_id); + + if (!msk->pm.rm_id) + return; + + if (list_empty(&msk->conn_list)) + return; + + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + long timeout = 0; + + if (msk->pm.rm_id != subflow->remote_id) + continue; + + spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, how); + __mptcp_close_ssk(sk, ssk, subflow, timeout); + spin_lock_bh(&msk->pm.lock); + + msk->pm.add_addr_accepted--; + msk->pm.subflows--; + WRITE_ONCE(msk->pm.accept_addr, true); + + __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR); + + break; + } +} + +void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct sock *sk = (struct sock *)msk; + + pr_debug("subflow rm_id %d", rm_id); + + if (!rm_id) + return; + + if (list_empty(&msk->conn_list)) + return; + + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + long timeout = 0; + + if (rm_id != subflow->local_id) + continue; + + spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, how); + __mptcp_close_ssk(sk, ssk, subflow, timeout); + spin_lock_bh(&msk->pm.lock); + + msk->pm.local_addr_used--; + msk->pm.subflows--; + + __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); + + break; + } } static bool address_use_port(struct mptcp_pm_addr_entry *entry) @@ -531,6 +731,68 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *entry; + + entry = mptcp_pm_del_add_timer(msk, addr); + if (entry) { + list_del(&entry->list); + kfree(entry); + return true; + } + + return false; +} + +static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + bool force) +{ + bool ret; + + ret = remove_anno_list_by_saddr(msk, addr); + if (ret || force) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, addr->id); + spin_unlock_bh(&msk->pm.lock); + } + return ret; +} + +static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_sock *msk; + long s_slot = 0, s_num = 0; + + pr_debug("remove_id=%d", addr->id); + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + bool remove_subflow; + + if (list_empty(&msk->conn_list)) { + mptcp_pm_remove_anno_addr(msk, addr, false); + goto next; + } + + lock_sock(sk); + remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); + mptcp_pm_remove_anno_addr(msk, addr, remove_subflow); + if (remove_subflow) + mptcp_pm_remove_subflow(msk, addr->id); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -546,8 +808,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { GENL_SET_ERR_MSG(info, "address not found"); - ret = -EINVAL; - goto out; + spin_unlock_bh(&pernet->lock); + return -EINVAL; } if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) pernet->add_addr_signal_max--; @@ -556,9 +818,11 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) pernet->addrs--; list_del_rcu(&entry->list); - kfree_rcu(entry, rcu); -out: spin_unlock_bh(&pernet->lock); + + mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr); + kfree_rcu(entry, rcu); + return ret; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 386cd4e60250..34c037731f35 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1652,9 +1652,9 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) * so we need to use tcp_close() after detaching them from the mptcp * parent socket. */ -static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow, - long timeout) +void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow, + long timeout) { struct socket *sock = READ_ONCE(ssk->sk_socket); @@ -1685,6 +1685,10 @@ static void pm_work(struct mptcp_sock *msk) pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); mptcp_pm_nl_add_addr_received(msk); } + if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); + mptcp_pm_nl_rm_addr_received(msk); + } if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); mptcp_pm_nl_fully_established(msk); @@ -1806,16 +1810,16 @@ static int mptcp_init_sock(struct sock *sk) struct net *net = sock_net(sk); int ret; + ret = __mptcp_init_sock(sk); + if (ret) + return ret; + if (!mptcp_is_enabled(net)) return -ENOPROTOOPT; if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net)) return -ENOMEM; - ret = __mptcp_init_sock(sk); - if (ret) - return ret; - ret = __mptcp_socket_create(mptcp_sk(sk)); if (ret) return ret; @@ -1846,7 +1850,7 @@ static void mptcp_cancel_work(struct sock *sk) sock_put(sk); } -static void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) +void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) { lock_sock(ssk); @@ -2124,15 +2128,21 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, return newsk; } +void mptcp_destroy_common(struct mptcp_sock *msk) +{ + skb_rbtree_purge(&msk->out_of_order_queue); + mptcp_token_destroy(msk); + mptcp_pm_free_anno_list(msk); +} + static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - skb_rbtree_purge(&msk->out_of_order_queue); - mptcp_token_destroy(msk); if (msk->cached_ext) __skb_ext_put(msk->cached_ext); + mptcp_destroy_common(msk); sk_sockets_allocated_dec(sk); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 493bd2c13bc6..7cfe52aeb2b8 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -152,6 +152,7 @@ struct mptcp_addr_info { enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_RECEIVED, + MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, MPTCP_PM_SUBFLOW_ESTABLISHED, }; @@ -159,14 +160,17 @@ enum mptcp_pm_status { struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; + struct list_head anno_list; spinlock_t lock; /*protects the whole PM data */ - bool addr_signal; + bool add_addr_signal; + bool rm_addr_signal; bool server_side; bool work_pending; bool accept_addr; bool accept_subflow; + bool add_addr_echo; u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; @@ -176,6 +180,7 @@ struct mptcp_pm_data { u8 local_addr_max; u8 subflows_max; u8 status; + u8 rm_id; }; struct mptcp_data_frag { @@ -360,6 +365,10 @@ void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); +void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); +void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow, + long timeout); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, @@ -399,6 +408,7 @@ bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq); +void mptcp_destroy_common(struct mptcp_sock *msk); void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) @@ -432,15 +442,26 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk, void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); +void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id); +void mptcp_pm_free_anno_list(struct mptcp_sock *msk); +struct mptcp_pm_add_entry * +mptcp_pm_del_add_timer(struct mptcp_sock *msk, + struct mptcp_addr_info *addr); int mptcp_pm_announce_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr); + const struct mptcp_addr_info *addr, + bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id); +int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id); -static inline bool mptcp_pm_should_signal(struct mptcp_sock *msk) +static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.addr_signal); + return READ_ONCE(msk->pm.add_addr_signal); +} + +static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.rm_addr_signal); } static inline unsigned int mptcp_add_addr_len(int family) @@ -450,8 +471,10 @@ static inline unsigned int mptcp_add_addr_len(int family) return TCPOLEN_MPTCP_ADD_ADDR6; } -bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr); +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, + struct mptcp_addr_info *saddr, bool *echo); +bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, + u8 *rm_id); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); void __init mptcp_pm_nl_init(void); @@ -459,6 +482,8 @@ void mptcp_pm_nl_data_init(struct mptcp_sock *msk); void mptcp_pm_nl_fully_established(struct mptcp_sock *msk); void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk); void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk); +void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk); +void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 141d555b7bd2..ac2b19993f1a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -435,8 +435,7 @@ static void mptcp_sock_destruct(struct sock *sk) sock_orphan(sk); } - skb_rbtree_purge(&mptcp_sk(sk)->out_of_order_queue); - mptcp_token_destroy(mptcp_sk(sk)); + mptcp_destroy_common(mptcp_sk(sk)); inet_sock_destruct(sk); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index a54966531a64..77bb62feb872 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -54,6 +54,7 @@ static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; +static bool cfg_remove; static int cfg_wait; static void die_usage(void) @@ -271,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; + if (cfg_remove && do_w > 50) + do_w = 50; + bw = write(fd, buf, do_w); if (bw < 0) perror("write"); @@ -281,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) first = false; } + if (cfg_remove) + usleep(200000); + return bw; } @@ -428,7 +435,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_join) + if (cfg_join || cfg_remove) usleep(cfg_wait); close(peerfd); @@ -686,7 +693,7 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (!cfg_join && (r & 1)) + if (!(cfg_join || cfg_remove) && (r & 1)) close(fd); } @@ -822,13 +829,18 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) { + while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) { switch (c) { case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; break; + case 'r': + cfg_remove = true; + cfg_mode = CFG_MODE_POLL; + cfg_wait = 400000; + break; case 'l': listen_mode = true; break; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c2943e4dfcfe..08f53d86dedc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,6 +8,7 @@ cin="" cout="" ksft_skip=4 timeout=30 +mptcp_connect="" capture=0 TEST_COUNT=0 @@ -132,6 +133,8 @@ do_transfer() cl_proto="$3" srv_proto="$4" connect_addr="$5" + rm_nr_ns1="$6" + rm_nr_ns2="$7" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -156,14 +159,44 @@ do_transfer() sleep 1 fi - ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then + mptcp_connect="./mptcp_connect -j" + else + mptcp_connect="./mptcp_connect -r" + fi + + ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & spid=$! sleep 1 - ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & cpid=$! + if [ $rm_nr_ns1 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + + if [ $rm_nr_ns2 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + wait $cpid retc=$? wait $spid @@ -219,7 +252,24 @@ run_tests() connect_addr="$3" lret=0 - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +run_remove_tests() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + rm_nr_ns1="$4" + rm_nr_ns2="$5" + lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -276,6 +326,80 @@ chk_join_nr() fi } +chk_add_nr() +{ + local add_nr=$1 + local echo_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "add" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$add_nr" ]; then + echo "[fail] got $count ADD_ADDR[s] expected $add_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - echo " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$echo_nr" ]; then + echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + +chk_rm_nr() +{ + local rm_addr_nr=$1 + local rm_subflow_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "rm " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_addr_nr" ]; then + echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - sf " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_subflow_nr" ]; then + echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -332,6 +456,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "unused signal address" 0 0 0 +chk_add_nr 1 1 # accept and use add_addr reset @@ -340,6 +465,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address" 1 1 1 +chk_add_nr 1 1 # accept and use add_addr with an additional subflow # note: signal address in server ns and local addresses in client ns must @@ -352,6 +478,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset @@ -362,6 +489,59 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 +chk_add_nr 1 1 + +# single subflow, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 1 +chk_join_nr "remove single subflow" 1 1 1 +chk_rm_nr 1 1 + +# multiple subflows, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 2 +chk_join_nr "remove multiple subflows" 2 2 2 +chk_rm_nr 2 2 + +# single address, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_remove_tests $ns1 $ns2 10.0.1.1 1 0 +chk_join_nr "remove single address" 1 1 1 +chk_add_nr 1 1 +chk_rm_nr 0 0 + +# subflow and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 1 +chk_join_nr "remove subflow and signal" 2 2 2 +chk_add_nr 1 1 +chk_rm_nr 1 1 + +# subflows and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 2 +chk_join_nr "remove subflows and signal" 3 3 3 +chk_add_nr 1 1 +chk_rm_nr 2 2 # single subflow, syncookies reset_with_cookies @@ -396,6 +576,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address with syn cookies" 1 1 1 +chk_add_nr 1 1 # test cookie with subflow and signal reset_with_cookies @@ -405,6 +586,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal w cookies" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset_with_cookies @@ -415,5 +597,6 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflows and signal w. cookies" 3 3 3 +chk_add_nr 1 1 exit $ret |