summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2025-01-13 13:55:57 +0000
committerJakub Kicinski <kuba@kernel.org>2025-01-14 13:28:13 -0800
commit124c4c32e9f3b4d89f5be3342897adc8db5c27b8 (patch)
tree381c296d5f1b745626a9956a3510d614a9f99c61
parentea98b61bddf41e9a9308ff8f931e6077907b1587 (diff)
tcp: add TCP_RFC7323_PAWS_ACK drop reason
XPS can cause reorders because of the relaxed OOO conditions for pure ACK packets. For hosts not using RFS, what can happpen is that ACK packets are sent on behalf of the cpu processing NIC interrupts, selecting TX queue A for ACK packet P1. Then a subsequent sendmsg() can run on another cpu. TX queue selection uses the socket hash and can choose another queue B for packets P2 (with payload). If queue A is more congested than queue B, the ACK packet P1 could be sent on the wire after P2. A linux receiver when processing P1 (after P2) currently increments LINUX_MIB_PAWSESTABREJECTED (TcpExtPAWSEstab) and use TCP_RFC7323_PAWS drop reason. It might also send a DUPACK if not rate limited. In order to better understand this pattern, this patch adds a new drop_reason : TCP_RFC7323_PAWS_ACK. For old ACKS like these, we no longer increment LINUX_MIB_PAWSESTABREJECTED and no longer sends a DUPACK, keeping credit for other more interesting DUPACK. perf record -e skb:kfree_skb -a perf script ... swapper 0 [148] 27475.438637: skb:kfree_skb: ... location=tcp_validate_incoming+0x4f0 reason: TCP_RFC7323_PAWS_ACK swapper 0 [208] 27475.438706: skb:kfree_skb: ... location=tcp_validate_incoming+0x4f0 reason: TCP_RFC7323_PAWS_ACK swapper 0 [208] 27475.438908: skb:kfree_skb: ... location=tcp_validate_incoming+0x4f0 reason: TCP_RFC7323_PAWS_ACK swapper 0 [148] 27475.439010: skb:kfree_skb: ... location=tcp_validate_incoming+0x4f0 reason: TCP_RFC7323_PAWS_ACK swapper 0 [148] 27475.439214: skb:kfree_skb: ... location=tcp_validate_incoming+0x4f0 reason: TCP_RFC7323_PAWS_ACK swapper 0 [208] 27475.439286: skb:kfree_skb: ... location=tcp_validate_incoming+0x4f0 reason: TCP_RFC7323_PAWS_ACK ... Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> Link: https://patch.msgid.link/20250113135558.3180360-3-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/dropreason-core.h5
-rw-r--r--net/ipv4/tcp_input.c10
2 files changed, 14 insertions, 1 deletions
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 3a6602f37978..28555109f9bd 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -36,6 +36,7 @@
FN(TCP_OVERWINDOW) \
FN(TCP_OFOMERGE) \
FN(TCP_RFC7323_PAWS) \
+ FN(TCP_RFC7323_PAWS_ACK) \
FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
FN(TCP_INVALID_ACK_SEQUENCE) \
@@ -259,6 +260,10 @@ enum skb_drop_reason {
* LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK: PAWS check, old ACK packet.
+ */
+ SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK,
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 24966dd3e49f..dc0e88bcc535 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4465,7 +4465,9 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk,
/* 2. Is its sequence not the expected one ? */
if (seq != tp->rcv_nxt)
- return reason;
+ return before(seq, tp->rcv_nxt) ?
+ SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK :
+ reason;
/* 3. Is this not a duplicate ACK ? */
if (ack != tp->snd_una)
@@ -5967,6 +5969,12 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (unlikely(th->syn))
goto syn_challenge;
+ /* Old ACK are common, do not change PAWSESTABREJECTED
+ * and do not send a dupack.
+ */
+ if (reason == SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK)
+ goto discard;
+
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
if (!tcp_oow_rate_limited(sock_net(sk), skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,