summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_timer.c')
-rw-r--r--net/ipv4/tcp_timer.c68
1 files changed, 61 insertions, 7 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cdb2ca7684d4..a17629b8912e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
int sysctl_tcp_orphan_retries __read_mostly;
+int sysctl_tcp_thin_linear_timeouts __read_mostly;
static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
@@ -132,6 +133,35 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
}
}
+/* This function calculates a "timeout" which is equivalent to the timeout of a
+ * TCP connection after "boundary" unsucessful, exponentially backed-off
+ * retransmissions with an initial RTO of TCP_RTO_MIN.
+ */
+static bool retransmits_timed_out(struct sock *sk,
+ unsigned int boundary)
+{
+ unsigned int timeout, linear_backoff_thresh;
+ unsigned int start_ts;
+
+ if (!inet_csk(sk)->icsk_retransmits)
+ return false;
+
+ if (unlikely(!tcp_sk(sk)->retrans_stamp))
+ start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when;
+ else
+ start_ts = tcp_sk(sk)->retrans_stamp;
+
+ linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
+
+ if (boundary <= linear_backoff_thresh)
+ timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
+ else
+ timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
+ (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+
+ return (tcp_time_stamp - start_ts) >= timeout;
+}
+
/* A write timeout has occurred. Process the after effects. */
static int tcp_write_timeout(struct sock *sk)
{
@@ -141,14 +171,14 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(&sk->sk_dst_cache, sk);
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
} else {
if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(&sk->sk_dst_cache, sk);
}
retry_until = sysctl_tcp_retries2;
@@ -303,15 +333,15 @@ void tcp_retransmit_timer(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
if (sk->sk_family == AF_INET) {
LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &inet->daddr, ntohs(inet->dport),
- inet->num, tp->snd_una, tp->snd_nxt);
+ &inet->inet_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt);
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &np->daddr, ntohs(inet->dport),
- inet->num, tp->snd_una, tp->snd_nxt);
+ &np->daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt);
}
#endif
#endif
@@ -386,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk)
icsk->icsk_retransmits++;
out_reset_timer:
- icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
+ * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
+ * might be increased if the stream oscillates between thin and thick,
+ * thus the old value might already be too high compared to the value
+ * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
+ * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
+ * exponential backoff behaviour to avoid continue hammering
+ * linear-timeout retransmissions into a black hole
+ */
+ if (sk->sk_state == TCP_ESTABLISHED &&
+ (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+ tcp_stream_is_thin(tp) &&
+ icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
+ icsk->icsk_backoff = 0;
+ icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ } else {
+ /* Use normal (exponential) backoff */
+ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ }
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
__sk_dst_reset(sk);
@@ -445,6 +493,12 @@ static void tcp_synack_timer(struct sock *sk)
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
}
+void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+{
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
+}
+EXPORT_SYMBOL(tcp_syn_ack_timeout);
+
void tcp_set_keepalive(struct sock *sk, int val)
{
if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))