diff options
author | Eric Dumazet <edumazet@google.com> | 2025-03-02 12:42:34 +0000 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2025-03-04 17:46:26 -0800 |
commit | ae9d5b19b322d4b557efda684da2f4df21670ef8 (patch) | |
tree | e79540fcc50c21b6d6b9547ca2da86694719fcc1 | |
parent | 7ff1c88fc89688c27f773ba956f65f0c11367269 (diff) |
tcp: use RCU in __inet{6}_check_established()
When __inet_hash_connect() has to try many 4-tuples before
finding an available one, we see a high spinlock cost from
__inet_check_established() and/or __inet6_check_established().
This patch adds an RCU lookup to avoid the spinlock
acquisition when the 4-tuple is found in the hash table.
Note that there are still spin_lock_bh() calls in
__inet_hash_connect() to protect inet_bind_hashbucket,
this will be fixed later in this series.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Tested-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250302124237.3913746-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | net/ipv4/inet_hashtables.c | 19 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 19 |
2 files changed, 32 insertions, 6 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9bfcfd016e18..46d39aa2199e 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -551,11 +551,24 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - spinlock_t *lock = inet_ehash_lockp(hinfo, hash); - struct sock *sk2; - const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; + const struct hlist_nulls_node *node; + struct sock *sk2; + spinlock_t *lock; + + rcu_read_lock(); + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !inet_match(net, sk2, acookie, ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + rcu_read_unlock(); + return -EADDRNOTAVAIL; + } + rcu_read_unlock(); + lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 9ec05e354baa..3604a5cae5d2 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -276,11 +276,24 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - spinlock_t *lock = inet_ehash_lockp(hinfo, hash); - struct sock *sk2; - const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; + const struct hlist_nulls_node *node; + struct sock *sk2; + spinlock_t *lock; + + rcu_read_lock(); + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !inet6_match(net, sk2, saddr, daddr, ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + rcu_read_unlock(); + return -EADDRNOTAVAIL; + } + rcu_read_unlock(); + lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { |