From 3f6c65d6255a872846c44182c82c78d3dc6239f5 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 19 Jun 2018 21:42:50 -0700 Subject: tcp: ignore rcv_rtt sample with old ts ecr value When receiving multiple packets with the same ts ecr value, only try to compute rcv_rtt sample with the earliest received packet. This is because the rcv_rtt calculated by later received packets could possibly include long idle time or other types of delay. For example: (1) server sends last packet of reply with TS val V1 (2) client ACKs last packet of reply with TS ecr V1 (3) long idle time passes (4) client sends next request data packet with TS ecr V1 (again!) At this time, the rcv_rtt computed on server with TS ecr V1 will be inflated with the idle time and should get ignored. Signed-off-by: Wei Wang Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 355d3dffd021..76ca88f63b70 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -582,9 +582,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, { struct tcp_sock *tp = tcp_sk(sk); - if (tp->rx_opt.rcv_tsecr && - (TCP_SKB_CB(skb)->end_seq - - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { + if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr) + return; + tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr; + + if (TCP_SKB_CB(skb)->end_seq - + TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; u32 delta_us; @@ -5475,6 +5478,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) tcp_ack(sk, skb, 0); __kfree_skb(skb); tcp_data_snd_check(sk); + /* When receiving pure ack in fast path, update + * last ts ecr directly instead of calling + * tcp_rcv_rtt_measure_ts() + */ + tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr; return; } else { /* Header too small */ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); -- cgit v1.2.3 From fb223502ec0889444965f602f57b1f45f9e9845e Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 24 Jun 2018 10:02:54 -0400 Subject: tcp: add SNMP counter for zero-window drops It will be helpful if we could display the drops due to zero window or no enough window space. So a new SNMP MIB entry is added to track this behavior. This entry is named LINUX_MIB_TCPZEROWINDOWDROP and published in /proc/net/netstat in TcpExt line as TCPZeroWindowDrop. Signed-off-by: Yafang Shao Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 8 ++++++-- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 750d89120335..97517f36a5f9 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -279,6 +279,7 @@ enum LINUX_MIB_TCPDELIVERED, /* TCPDelivered */ LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */ LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ + LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 77350c1256ce..225ef3433fe5 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -287,6 +287,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED), SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), + SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 76ca88f63b70..9c5b3415413f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4668,8 +4668,10 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) * Out of sequence packets to the out_of_order_queue. */ if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { - if (tcp_receive_window(tp) == 0) + if (tcp_receive_window(tp) == 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; + } /* Ok. In sequence. In window. */ queue_and_out: @@ -4735,8 +4737,10 @@ drop: /* If window is closed, drop tail of packet. But after * remembering D-SACK for its head made in previous line. */ - if (!tcp_receive_window(tp)) + if (!tcp_receive_window(tp)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; + } goto queue_and_out; } -- cgit v1.2.3 From ea5d0c32498e1a08ff5f3dbeafa4d74895851b0d Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 28 Jun 2018 00:22:56 -0400 Subject: tcp: add new SNMP counter for drops when try to queue in rcv queue When sk_rmem_alloc is larger than the receive buffer and we can't schedule more memory for it, the skb will be dropped. In above situation, if this skb is put into the ofo queue, LINUX_MIB_TCPOFODROP is incremented to track it. While if this skb is put into the receive queue, there's no record. So a new SNMP counter is introduced to track this behavior. LINUX_MIB_TCPRCVQDROP: Number of packets meant to be queued in rcv queue but dropped because socket rcvbuf limit hit. Signed-off-by: Yafang Shao Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 8 ++++++-- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 97517f36a5f9..e5ebc83827ab 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -280,6 +280,7 @@ enum LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */ LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */ + LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 225ef3433fe5..b46e4cf9a55a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -288,6 +288,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), + SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c5b3415413f..eecd359595fc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4611,8 +4611,10 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) skb->data_len = data_len; skb->len = size; - if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); goto err_free; + } err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); if (err) @@ -4677,8 +4679,10 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) queue_and_out: if (skb_queue_len(&sk->sk_receive_queue) == 0) sk_forced_mem_schedule(sk, skb->truesize); - else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); goto drop; + } eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); -- cgit v1.2.3 From c6345ce7d361dce1b5d02a2181ccb598c27fd7ae Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 29 Jun 2018 21:26:57 -0700 Subject: net: Record receive queue number for a connection This patch adds a new field to sock_common 'skc_rx_queue_mapping' which holds the receive queue number for the connection. The Rx queue is marked in tcp_finish_connect() to allow a client app to do SO_INCOMING_NAPI_ID after a connect() call to get the right queue association for a socket. Rx queue is also marked in tcp_conn_request() to allow syn-ack to go on the right tx-queue associated with the queue on which syn is received. Signed-off-by: Amritha Nambiar Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/busy_poll.h | 1 + include/net/sock.h | 28 ++++++++++++++++++++++++++++ net/core/sock.c | 2 ++ net/ipv4/tcp_input.c | 3 +++ 4 files changed, 34 insertions(+) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c5187438af38..9e36fda652b7 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -151,6 +151,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = skb->napi_id; #endif + sk_rx_queue_set(sk, skb); } /* variant used for unconnected sockets */ diff --git a/include/net/sock.h b/include/net/sock.h index 37b09c84504b..2b097cc89727 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -139,6 +139,7 @@ typedef __u64 __bitwise __addrpair; * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_rx_queue_mapping: rx queue number for this connection * @skc_flags: place holder for sk_flags * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings @@ -215,6 +216,9 @@ struct sock_common { struct hlist_nulls_node skc_nulls_node; }; unsigned short skc_tx_queue_mapping; +#ifdef CONFIG_XPS + unsigned short skc_rx_queue_mapping; +#endif union { int skc_incoming_cpu; u32 skc_rcv_wnd; @@ -326,6 +330,9 @@ struct sock { #define sk_nulls_node __sk_common.skc_nulls_node #define sk_refcnt __sk_common.skc_refcnt #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping +#ifdef CONFIG_XPS +#define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping +#endif #define sk_dontcopy_begin __sk_common.skc_dontcopy_begin #define sk_dontcopy_end __sk_common.skc_dontcopy_end @@ -1702,6 +1709,27 @@ static inline int sk_tx_queue_get(const struct sock *sk) return -1; } +static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + if (skb_rx_queue_recorded(skb)) { + u16 rx_queue = skb_get_rx_queue(skb); + + if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING)) + return; + + sk->sk_rx_queue_mapping = rx_queue; + } +#endif +} + +static inline void sk_rx_queue_clear(struct sock *sk) +{ +#ifdef CONFIG_XPS + sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING; +#endif +} + static inline void sk_set_socket(struct sock *sk, struct socket *sock) { sk_tx_queue_clear(sk); diff --git a/net/core/sock.c b/net/core/sock.c index bcc41829a16d..dac6d785186b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2818,6 +2818,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_pacing_rate = ~0U; sk->sk_pacing_shift = 10; sk->sk_incoming_cpu = -1; + + sk_rx_queue_clear(sk); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eecd359595fc..a4731995e899 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -78,6 +78,7 @@ #include #include #include +#include int sysctl_tcp_max_orphans __read_mostly = NR_FILE; @@ -5592,6 +5593,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); + sk_mark_napi_id(sk, skb); } tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); @@ -6420,6 +6422,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_openreq_init_rwin(req, sk, dst); + sk_rx_queue_set(req_to_sk(req), skb); if (!want_cookie) { tcp_reqsk_record_syn(sk, req, skb); fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); -- cgit v1.2.3 From cca9bab1b72cd2296097c75f59ef11ef80461279 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jul 2018 12:16:12 +0200 Subject: tcp: use monotonic timestamps for PAWS Using get_seconds() for timestamps is deprecated since it can lead to overflows on 32-bit systems. While the interface generally doesn't overflow until year 2106, the specific implementation of the TCP PAWS algorithm breaks in 2038 when the intermediate signed 32-bit timestamps overflow. A related problem is that the local timestamps in CLOCK_REALTIME form lead to unexpected behavior when settimeofday is called to set the system clock backwards or forwards by more than 24 days. While the first problem could be solved by using an overflow-safe method of comparing the timestamps, a nicer solution is to use a monotonic clocksource with ktime_get_seconds() that simply doesn't overflow (at least not until 136 years after boot) and that doesn't change during settimeofday(). To make 32-bit and 64-bit architectures behave the same way here, and also save a few bytes in the tcp_options_received structure, I'm changing the type to a 32-bit integer, which is now safe on all architectures. Finally, the ts_recent_stamp field also (confusingly) gets used to store a jiffies value in tcp_synq_overflow()/tcp_synq_no_recent_overflow(). This is currently safe, but changing the type to 32-bit requires some small changes there to keep it working. Signed-off-by: Arnd Bergmann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +- include/linux/tcp.h | 4 ++-- include/net/tcp.h | 17 ++++++++++------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv4/tcp_minisocks.c | 8 ++++---- 6 files changed, 20 insertions(+), 16 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 2bb6f0380758..0997e166ea57 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1673,7 +1673,7 @@ static void chtls_timewait(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); tp->rcv_nxt++; - tp->rx_opt.ts_recent_stamp = get_seconds(); + tp->rx_opt.ts_recent_stamp = ktime_get_seconds(); tp->srtt_us = 0; tcp_time_wait(sk, TCP_TIME_WAIT, 0); } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3dbea6610304..58a8d7d71354 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -89,7 +89,7 @@ struct tcp_sack_block { struct tcp_options_received { /* PAWS/RTTM data */ - long ts_recent_stamp;/* Time we stored ts_recent (for aging) */ + int ts_recent_stamp;/* Time we stored ts_recent (for aging) */ u32 ts_recent; /* Time stamp to echo next */ u32 rcv_tsval; /* Time stamp value */ u32 rcv_tsecr; /* Time stamp echo reply */ @@ -426,7 +426,7 @@ struct tcp_timewait_sock { /* The time we sent the last out-of-window ACK: */ u32 tw_last_oow_ack_time; - long tw_ts_recent_stamp; + int tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index f6cb20e6e524..582304955087 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -472,19 +472,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); */ static inline void tcp_synq_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - unsigned long now = jiffies; + unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned int now = jiffies; - if (time_after(now, last_overflow + HZ)) + if (time_after32(now, last_overflow + HZ)) tcp_sk(sk)->rx_opt.ts_recent_stamp = now; } /* syncookies: no recent synqueue overflow on this listening socket? */ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned int now = jiffies; - return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); + return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID); } static inline u32 tcp_cookie_time(void) @@ -1375,7 +1376,8 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, { if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) return true; - if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) + if (unlikely(!time_before32(ktime_get_seconds(), + rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))) return true; /* * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, @@ -1405,7 +1407,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, However, we can relax time bounds for RST segments to MSL. */ - if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) + if (rst && !time_before32(ktime_get_seconds(), + rx_opt->ts_recent_stamp + TCP_PAWS_MSL)) return false; return true; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 814ea43dd12f..d3b6390ecf23 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3462,7 +3462,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) static void tcp_store_ts_recent(struct tcp_sock *tp) { tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); + tp->rx_opt.ts_recent_stamp = ktime_get_seconds(); } static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bea17f1e8302..dc415c66a33a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -155,7 +155,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { + (!twp || (reuse && time_after32(ktime_get_seconds(), + tcptw->tw_ts_recent_stamp)))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) tp->write_seq = 1; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dac5893a52b4..75ef332a7caf 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tw->tw_substate = TCP_TIME_WAIT; tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tmp_opt.saw_tstamp) { - tcptw->tw_ts_recent_stamp = get_seconds(); + tcptw->tw_ts_recent_stamp = ktime_get_seconds(); tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } @@ -189,7 +189,7 @@ kill: if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; - tcptw->tw_ts_recent_stamp = get_seconds(); + tcptw->tw_ts_recent_stamp = ktime_get_seconds(); } inet_twsk_put(tw); @@ -537,7 +537,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, if (newtp->rx_opt.tstamp_ok) { newtp->rx_opt.ts_recent = req->ts_recent; - newtp->rx_opt.ts_recent_stamp = get_seconds(); + newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { newtp->rx_opt.ts_recent_stamp = 0; @@ -603,7 +603,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<num_timeout); + tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<num_timeout); paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } -- cgit v1.2.3 From ff0432e5a8025df895813408325b2afdfa21f946 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 14 Jul 2018 16:36:29 +0800 Subject: tcp: remove redundant rcv_nxt update tcp_rcv_nxt_update() is already executed in tcp_data_queue(). This line is redundant. See bellow, tcp_queue_rcv tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); <<<< redundant Signed-off-by: Yafang Shao Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d3b6390ecf23..fac5d03d4528 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4695,7 +4695,6 @@ queue_and_out: } eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); - tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) -- cgit v1.2.3 From 41ed9c04aac2f8c6ee922e29ce5e69f185c5125b Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Fri, 13 Jul 2018 14:33:38 +0300 Subject: tcp: Don't coalesce decrypted and encrypted SKBs Prevent coalescing of decrypted and encrypted SKBs in GRO and TCP layer. Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 ++++++++++++ net/ipv4/tcp_offload.c | 3 +++ 2 files changed, 15 insertions(+) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fac5d03d4528..91dbb9afb950 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4343,6 +4343,11 @@ static bool tcp_try_coalesce(struct sock *sk, if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; +#ifdef CONFIG_TLS_DEVICE + if (from->decrypted != to->decrypted) + return false; +#endif + if (!skb_try_coalesce(to, from, fragstolen, &delta)) return false; @@ -4871,6 +4876,9 @@ restart: break; memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); +#ifdef CONFIG_TLS_DEVICE + nskb->decrypted = skb->decrypted; +#endif TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; if (list) __skb_queue_before(list, skb, nskb); @@ -4898,6 +4906,10 @@ restart: skb == tail || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) goto end; +#ifdef CONFIG_TLS_DEVICE + if (skb->decrypted != nskb->decrypted) + goto end; +#endif } } } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index f5aee641f825..870b0a335061 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -262,6 +262,9 @@ found: flush |= (len - 1) >= mss; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); +#ifdef CONFIG_TLS_DEVICE + flush |= p->decrypted ^ skb->decrypted; +#endif if (flush || skb_gro_receive(p, skb)) { mss = 1; -- cgit v1.2.3 From 7e10b6554ff2ce7f86d5d3eec3af5db8db482caa Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 31 Jul 2018 17:46:23 -0700 Subject: tcp: add dsack blocks received stats Introduce a new TCP stat to record the number of DSACK blocks received (RFC4989 tcpEStatsStackDSACKDups) and expose it in both tcp_info (TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS). Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 4 ++++ net/ipv4/tcp_input.c | 1 + 4 files changed, 10 insertions(+) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fb67f9a51b95..da6281c549a5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -188,6 +188,9 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c31f5100b744..0e1c0aec0153 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -238,6 +238,7 @@ struct tcp_info { __u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ __u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */ + __u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */ }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ @@ -262,6 +263,7 @@ enum { TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ TCP_NLA_BYTES_SENT, /* Data bytes sent including retransmission */ TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */ + TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5ed1be88e922..d6232b598cae 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2596,6 +2596,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->compressed_ack = 0; tp->bytes_sent = 0; tp->bytes_retrans = 0; + tp->dsack_dups = 0; /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); @@ -3205,6 +3206,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_delivered_ce = tp->delivered_ce; info->tcpi_bytes_sent = tp->bytes_sent; info->tcpi_bytes_retrans = tp->bytes_retrans; + info->tcpi_dsack_dups = tp->dsack_dups; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3231,6 +3233,7 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */ + nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */ 0; } @@ -3282,6 +3285,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, TCP_NLA_PAD); + nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); return stats; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d51fa358b2b1..fbc85ff7d71d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -874,6 +874,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp) { tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; tp->rack.dsack_seen = 1; + tp->dsack_dups++; } /* It's reordering when higher sequence was delivered (i.e. sacked) before -- cgit v1.2.3 From 7ec65372ca534217b53fd208500cf7aac223a383 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 31 Jul 2018 17:46:24 -0700 Subject: tcp: add stat of data packet reordering events Introduce a new TCP stats to record the number of reordering events seen and expose it in both tcp_info (TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS). Application can use this stats to track the frequency of the reordering events in addition to the existing reordering stats which tracks the magnitude of the latest reordering event. Note: this new stats tracks reordering events triggered by ACKs, which could often be fewer than the actual number of packets being delivered out-of-order. Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++-- include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 4 ++++ net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_recovery.c | 2 +- 5 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index da6281c549a5..263e37271afd 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -220,8 +220,7 @@ struct tcp_sock { #define TCP_RACK_RECOVERY_THRESH 16 u8 reo_wnd_persist:5, /* No. of recovery since last adj */ dsack_seen:1, /* Whether DSACK seen after last adj */ - advanced:1, /* mstamp advanced since last lost marking */ - reord:1; /* reordering detected */ + advanced:1; /* mstamp advanced since last lost marking */ } rack; u16 advmss; /* Advertised MSS */ u8 compressed_ack; @@ -267,6 +266,7 @@ struct tcp_sock { u8 ecn_flags; /* ECN status bits. */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 reordering; /* Packet reordering metric. */ + u32 reord_seen; /* number of data packet reordering events */ u32 snd_up; /* Urgent pointer */ /* diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 0e1c0aec0153..e02d31986ff9 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -239,6 +239,7 @@ struct tcp_info { __u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ __u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */ __u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */ + __u32 tcpi_reord_seen; /* reordering events seen */ }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ @@ -264,6 +265,7 @@ enum { TCP_NLA_BYTES_SENT, /* Data bytes sent including retransmission */ TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */ TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ + TCP_NLA_REORD_SEEN, /* reordering events seen */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d6232b598cae..31fa1c080f28 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2597,6 +2597,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->bytes_sent = 0; tp->bytes_retrans = 0; tp->dsack_dups = 0; + tp->reord_seen = 0; /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); @@ -3207,6 +3208,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_bytes_sent = tp->bytes_sent; info->tcpi_bytes_retrans = tp->bytes_retrans; info->tcpi_dsack_dups = tp->dsack_dups; + info->tcpi_reord_seen = tp->reord_seen; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3234,6 +3236,7 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */ nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */ + nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */ 0; } @@ -3286,6 +3289,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, TCP_NLA_PAD); nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); + nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); return stats; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fbc85ff7d71d..3d6156f07a8d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -906,8 +906,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, sock_net(sk)->ipv4.sysctl_tcp_max_reordering); } - tp->rack.reord = 1; /* This exciting event is worth to be remembered. 8) */ + tp->reord_seen++; NET_INC_STATS(sock_net(sk), ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } @@ -1871,6 +1871,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) tp->reordering = min_t(u32, tp->packets_out + addend, sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 71593e4400ab..c81aadff769b 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -25,7 +25,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (!tp->rack.reord) { + if (!tp->reord_seen) { /* If reordering has not been observed, be aggressive during * the recovery or starting the recovery by DUPACK threshold. */ -- cgit v1.2.3 From 466466dc6c28ca9dc401f10e235b9cde9a7c9162 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 9 Aug 2018 09:38:09 -0700 Subject: tcp: mandate a one-time immediate ACK Add a new flag to indicate a one-time immediate ACK. This flag is occasionaly set under specific TCP protocol states in addition to the more common quickack mechanism for interactive application. In several cases in the TCP code we want to force an immediate ACK but do not want to call tcp_enter_quickack_mode() because we do not want to forget the icsk_ack.pingpong or icsk_ack.ato state. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 3 ++- net/ipv4/tcp_input.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 0a6c9e0f2b5a..fa43b82607d9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -167,7 +167,8 @@ enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, ICSK_ACK_PUSHED = 4, - ICSK_ACK_PUSHED2 = 8 + ICSK_ACK_PUSHED2 = 8, + ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ }; void inet_csk_init_xmit_timers(struct sock *sk, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 715d541b52dd..b8849588c440 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5179,7 +5179,9 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || __tcp_select_window(sk) >= tp->rcv_wnd)) || /* We ACK each frame or... */ - tcp_in_quickack_mode(sk)) { + tcp_in_quickack_mode(sk) || + /* Protocol state mandates a one-time immediate ACK */ + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) { send_now: tcp_send_ack(sk); return; -- cgit v1.2.3 From 15bdd5686c2c61373680b9015e95abf31778e4fd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 9 Aug 2018 09:38:11 -0700 Subject: tcp: always ACK immediately on hole repairs RFC 5681 sec 4.2: To provide feedback to senders recovering from losses, the receiver SHOULD send an immediate ACK when it receives a data segment that fills in all or part of a gap in the sequence space. When a gap is partially filled, __tcp_ack_snd_check already checks the out-of-order queue and correctly send an immediate ACK. However when a gap is fully filled, the previous implementation only resets pingpong mode which does not guarantee an immediate ACK because the quick ACK counter may be zero. This patch addresses this issue by marking the one-time immediate ACK flag instead. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b8849588c440..9a09ff3afef2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4735,11 +4735,11 @@ queue_and_out: if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); - /* RFC2581. 4.2. SHOULD send immediate ACK, when + /* RFC5681. 4.2. SHOULD send immediate ACK, when * gap in queue is filled. */ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) - inet_csk(sk)->icsk_ack.pingpong = 0; + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } if (tp->rx_opt.num_sacks) -- cgit v1.2.3 From fd2123a3d7527d4c7092633d55e877c0cc1d84a3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 9 Aug 2018 09:38:12 -0700 Subject: tcp: avoid resetting ACK timer upon receiving packet with ECN CWR flag Previously commit 9aee40006190 ("tcp: ack immediately when a cwr packet arrives") calls tcp_enter_quickack_mode to force sending two immediate ACKs upon receiving a packet w/ CWR flag. The side effect is it'll also reset the delayed ACK timer and interactive session tracking. This patch removes that side effect by using the new ACK_NOW flag to force an immmediate ACK. Packetdrill to demonstrate: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < [ect0] SEW 0:0(0) win 32792 +0 > SE. 0:0(0) ack 1 +.1 < [ect0] . 1:1(0) ack 1 win 257 +0 accept(3, ..., ...) = 4 +0 < [ect0] . 1:1001(1000) ack 1 win 257 +0 > [ect01] . 1:1(0) ack 1001 +0 write(4, ..., 1) = 1 +0 > [ect01] P. 1:2(1) ack 1001 +0 < [ect0] . 1001:2001(1000) ack 2 win 257 +0 write(4, ..., 1) = 1 +0 > [ect01] P. 2:3(1) ack 2001 +0 < [ect0] . 2001:3001(1000) ack 3 win 257 +0 < [ect0] . 3001:4001(1000) ack 3 win 257 // Ack delayed ... +.01 < [ce] P. 4001:4501(500) ack 3 win 257 +0 > [ect01] . 3:3(0) ack 4001 +0 > [ect01] E. 3:3(0) ack 4501 +.001 read(4, ..., 4500) = 4500 +0 write(4, ..., 1) = 1 +0 > [ect01] PE. 3:4(1) ack 4501 win 100 +.01 < [ect0] W. 4501:5501(1000) ack 4 win 257 // No delayed ACK on CWR flag +0 > [ect01] . 4:4(0) ack 5501 +.31 < [ect0] . 5501:6501(1000) ack 4 win 257 +0 > [ect01] . 4:4(0) ack 6501 Fixes: 9aee40006190 ("tcp: ack immediately when a cwr packet arrives") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9a09ff3afef2..4c2dd9f863f7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -245,16 +245,16 @@ static void tcp_ecn_queue_cwr(struct tcp_sock *tp) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } -static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) +static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) { if (tcp_hdr(skb)->cwr) { - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR; /* If the sender is telling us it has entered CWR, then its * cwnd may be very low (even just 1 packet), so we should ACK * immediately. */ - tcp_enter_quickack_mode((struct sock *)tp, 2); + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } } @@ -4703,7 +4703,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); - tcp_ecn_accept_cwr(tp, skb); + tcp_ecn_accept_cwr(sk, skb); tp->rx_opt.dsack = 0; -- cgit v1.2.3