diff options
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r-- | net/mptcp/subflow.c | 143 |
1 files changed, 55 insertions, 88 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ec54413fb31f..a0041360ee9d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -26,6 +26,7 @@ #include "mib.h" #include <trace/events/mptcp.h> +#include <trace/events/sock.h> static void mptcp_subflow_ops_undo_override(struct sock *ssk); @@ -396,10 +397,15 @@ void mptcp_subflow_reset(struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; + /* mptcp_mp_fail_no_response() can reach here on an already closed + * socket + */ + if (ssk->sk_state == TCP_CLOSE) + return; + /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_set_state(ssk, TCP_CLOSE); tcp_send_active_reset(ssk, GFP_ATOMIC); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && @@ -621,7 +627,7 @@ static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init; -static struct proto tcpv6_prot_override; +static struct proto tcpv6_prot_override __ro_after_init; static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { @@ -692,9 +698,10 @@ static bool subflow_hmac_valid(const struct request_sock *req, static void mptcp_force_close(struct sock *sk) { - /* the msk is not yet exposed to user-space */ + /* the msk is not yet exposed to user-space, and refcount is 2 */ inet_sk_state_store(sk, TCP_CLOSE); sk_common_release(sk); + sock_put(sk); } static void subflow_ulp_fallback(struct sock *sk, @@ -710,7 +717,7 @@ static void subflow_ulp_fallback(struct sock *sk, mptcp_subflow_ops_undo_override(sk); } -static void subflow_drop_ctx(struct sock *ssk) +void mptcp_subflow_drop_ctx(struct sock *ssk) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); @@ -749,6 +756,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; + struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -815,7 +823,7 @@ create_child: if (new_msk) mptcp_copy_inaddrs(new_msk, child); - subflow_drop_ctx(child); + mptcp_subflow_drop_ctx(child); goto out; } @@ -823,6 +831,8 @@ create_child: ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { + owner = mptcp_sk(new_msk); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ @@ -831,14 +841,14 @@ create_child: /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); /* new mpc subflow takes ownership of the newly * created mptcp socket */ mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); ctx->conn = new_msk; new_msk = NULL; @@ -846,15 +856,21 @@ create_child: * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified = 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); @@ -898,7 +914,7 @@ out: return child; dispose_child: - subflow_drop_ctx(child); + mptcp_subflow_drop_ctx(child); tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); @@ -909,7 +925,7 @@ dispose_child: } static struct inet_connection_sock_af_ops subflow_specific __ro_after_init; -static struct proto tcp_prot_override; +static struct proto tcp_prot_override __ro_after_init; enum mapping_status { MAPPING_OK, @@ -1399,6 +1415,7 @@ void __mptcp_error_report(struct sock *sk) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int err = sock_error(ssk); + int ssk_state; if (!err) continue; @@ -1409,7 +1426,14 @@ void __mptcp_error_report(struct sock *sk) if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) continue; - inet_sk_state_store(sk, inet_sk_state_load(ssk)); + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); sk->sk_err = -err; /* This barrier is coupled with smp_rmb() in mptcp_poll() */ @@ -1423,6 +1447,13 @@ static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + /* bail early if this is a no-op, so that we avoid introducing a + * problematic lockdep dependency between TCP accept queue lock + * and msk socket spinlock + */ + if (!sk->sk_socket) + return; + mptcp_data_lock(sk); if (!sock_owned_by_user(sk)) __mptcp_error_report(sk); @@ -1438,6 +1469,8 @@ static void subflow_data_ready(struct sock *sk) struct sock *parent = subflow->conn; struct mptcp_sock *msk; + trace_sk_data_ready(sk); + msk = mptcp_sk(parent); if (state & TCPF_LISTEN) { /* MPJ subflow are removed from accept queue before reaching here, @@ -1679,7 +1712,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, if (err) return err; - lock_sock(sf->sk); + lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING); /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -1792,79 +1825,6 @@ static void subflow_state_change(struct sock *sk) } } -void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) -{ - struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; - struct mptcp_sock *msk, *next, *head = NULL; - struct request_sock *req; - - /* build a list of all unaccepted mptcp sockets */ - spin_lock_bh(&queue->rskq_lock); - for (req = queue->rskq_accept_head; req; req = req->dl_next) { - struct mptcp_subflow_context *subflow; - struct sock *ssk = req->sk; - struct mptcp_sock *msk; - - if (!sk_is_mptcp(ssk)) - continue; - - subflow = mptcp_subflow_ctx(ssk); - if (!subflow || !subflow->conn) - continue; - - /* skip if already in list */ - msk = mptcp_sk(subflow->conn); - if (msk->dl_next || msk == head) - continue; - - msk->dl_next = head; - head = msk; - } - spin_unlock_bh(&queue->rskq_lock); - if (!head) - return; - - /* can't acquire the msk socket lock under the subflow one, - * or will cause ABBA deadlock - */ - release_sock(listener_ssk); - - for (msk = head; msk; msk = next) { - struct sock *sk = (struct sock *)msk; - bool do_cancel_work; - - sock_hold(sk); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - next = msk->dl_next; - msk->first = NULL; - msk->dl_next = NULL; - - do_cancel_work = __mptcp_close(sk, 0); - release_sock(sk); - if (do_cancel_work) { - /* lockdep will report a false positive ABBA deadlock - * between cancel_work_sync and the listener socket. - * The involved locks belong to different sockets WRT - * the existing AB chain. - * Using a per socket key is problematic as key - * deregistration requires process context and must be - * performed at socket disposal time, in atomic - * context. - * Just tell lockdep to consider the listener socket - * released here. - */ - mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); - mptcp_cancel_work(sk); - mutex_acquire(&listener_sk->sk_lock.dep_map, - SINGLE_DEPTH_NESTING, 0, _RET_IP_); - } - sock_put(sk); - } - - /* we are still under the listener msk socket lock */ - lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); -} - static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -1921,6 +1881,13 @@ static void subflow_ulp_release(struct sock *ssk) * when the subflow is still unaccepted */ release = ctx->disposable || list_empty(&ctx->node); + + /* inet_child_forget() does not call sk_state_change(), + * explicitly trigger the socket close machinery + */ + if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, + &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); sock_put(sk); } |