From e0f802fbcaa3bffe4728e37a8fa1279b5d554173 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Tue, 17 Jun 2014 11:25:37 +0300 Subject: tcp: move ir_mark initialization to tcp_openreq_init ir_mark initialization is done for both TCP v4 and v6, move it in the common tcp_openreq_init function. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 229239ad96b1..08ae3da0db4a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1025,7 +1025,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); + tcp_openreq_init(req, &tmp_opt, skb, sk); ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; @@ -1034,7 +1034,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb, sock_net(sk)); ireq->ir_iif = sk->sk_bound_dev_if; - ireq->ir_mark = inet_request_mark(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && -- cgit v1.2.3 From aa27fc501850030fb5d1ee705feb836ee6a21f2a Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:51 +0300 Subject: tcp: tcp_v[46]_conn_request: fix snt_synack initialization Commit 016818d07 (tcp: TCP Fast Open Server - take SYNACK RTT after completing 3WHS) changes the code to only take a snt_synack timestamp when a SYNACK transmit or retransmit succeeds. This behaviour is later broken by commit 843f4a55e (tcp: use tcp_v4_send_synack on first SYN-ACK), as snt_synack is now updated even if tcp_v4_send_synack fails. Also, commit 3a19ce0ee (tcp: IPv6 support for fastopen server) misses the required IPv6 updates for 016818d07. This patch makes sure that snt_synack is updated only when the SYNACK trasnmit/retransmit succeeds, for both IPv4 and IPv6. Cc: Cardwell Cc: Daniel Lee Cc: Yuchung Cheng Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 -- net/ipv6/tcp_ipv6.c | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 180336d47df6..145f6402c560 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1370,7 +1370,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_openreq_init_rwin(req, sk, dst); fastopen = !want_cookie && tcp_try_fastopen(sk, skb, req, &foc, dst); @@ -1380,7 +1379,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (err || want_cookie) goto drop_and_free; - tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 08ae3da0db4a..a962455471ba 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -497,6 +497,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); + if (!tcp_rsk(req)->snt_synack && !err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; } done: @@ -1100,7 +1102,6 @@ have_isn: goto drop_and_free; tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_openreq_init_rwin(req, sk, dst); fastopen = !want_cookie && tcp_try_fastopen(sk, skb, req, &foc, dst); -- cgit v1.2.3 From 476eab8251641ea2ae4666ca8a1436ebc2b8e9c3 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:52 +0300 Subject: net: remove inet6_reqsk_alloc Since pktops is only used for IPv6 only and opts is used for IPv4 only, we can move these fields into a union and this allows us to drop the inet6_reqsk_alloc function as after this change it becomes equivalent with inet_reqsk_alloc. This patch also fixes a kmemcheck issue in the IPv6 stack: the flags field was not annotated after a request_sock was allocated. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/linux/ipv6.h | 10 ---------- include/net/inet_sock.h | 6 ++++-- net/dccp/ipv6.c | 2 +- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 5 files changed, 7 insertions(+), 15 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2faef339d8f2..c811300b0b0c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -256,16 +256,6 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return inet_sk(__sk)->pinet6; } -static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops) -{ - struct request_sock *req = reqsk_alloc(ops); - - if (req) - inet_rsk(req)->pktopts = NULL; - - return req; -} - static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return (struct raw6_sock *)sk; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b1edf17bec01..a829b77523cf 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -88,8 +88,10 @@ struct inet_request_sock { acked : 1, no_srccheck: 1; kmemcheck_bitfield_end(flags); - struct ip_options_rcu *opt; - struct sk_buff *pktopts; + union { + struct ip_options_rcu *opt; + struct sk_buff *pktopts; + }; u32 ir_mark; }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4db3c2a1679c..04cb17d4b0ce 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet6_reqsk_alloc(&dccp6_request_sock_ops); + req = inet_reqsk_alloc(&dccp6_request_sock_ops); if (req == NULL) goto drop; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index a822b880689b..83cea1d39466 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops); if (!req) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a962455471ba..5e2d7e655c0f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1010,7 +1010,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) goto drop; -- cgit v1.2.3 From 16bea70aa7302b6f3bf3502d5a0efb4ea2ce4712 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:53 +0300 Subject: tcp: add init_req method to tcp_request_sock_ops Move the specific IPv4/IPv6 intializations to a new method in tcp_request_sock_ops in preparation for unifying tcp_v4_conn_request and tcp_v6_conn_request. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 --- include/net/tcp.h | 2 ++ net/ipv4/tcp_ipv4.c | 29 ++++++++++++++++------------ net/ipv6/tcp_ipv6.c | 55 +++++++++++++++++++++++++++++++---------------------- 4 files changed, 51 insertions(+), 38 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a0513210798f..fa5258f322e7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -111,10 +111,7 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; -#ifdef CONFIG_TCP_MD5SIG - /* Only used by TCP MD5 Signature so far. */ const struct tcp_request_sock_ops *af_specific; -#endif struct sock *listener; /* needed for TFO */ u32 rcv_isn; u32 snt_isn; diff --git a/include/net/tcp.h b/include/net/tcp.h index 39e47c4e4f19..7ad8ce296c3b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1613,6 +1613,8 @@ struct tcp_request_sock_ops { const struct request_sock *req, const struct sk_buff *skb); #endif + void (*init_req)(struct request_sock *req, struct sock *sk, + struct sk_buff *skb); }; int tcpv4_offload_init(void); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 145f6402c560..f86a86b30d20 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1237,6 +1237,17 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) #endif +static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(skb); +} + struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), @@ -1247,26 +1258,26 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_reqsk_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, -}; #endif + .init_req = tcp_v4_init_req, +}; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tmp_opt; struct request_sock *req; - struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = NULL; __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; bool want_cookie = false, fastopen; struct flowi4 fl4; struct tcp_fastopen_cookie foc = { .len = -1 }; + const struct tcp_request_sock_ops *af_ops; int err; /* Never answer to SYNs send to broadcast or multicast */ @@ -1298,9 +1309,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (!req) goto drop; -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; -#endif + af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; @@ -1313,11 +1322,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); - ireq = inet_rsk(req); - ireq->ir_loc_addr = daddr; - ireq->ir_rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(skb); + af_ops->init_req(req, sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5e2d7e655c0f..87a360c3eba9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -720,6 +720,31 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } #endif +static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + ireq->ir_iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); + + if (!TCP_SKB_CB(skb)->when && + (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || + np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || + np->rxopt.bits.rxohlim || np->repflow)) { + atomic_inc(&skb->users); + ireq->pktopts = skb; + } +} + struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), @@ -730,12 +755,13 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_reqsk_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, -}; #endif + .init_req = tcp_v6_init_req, +}; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, @@ -983,13 +1009,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_options_received tmp_opt; struct request_sock *req; struct inet_request_sock *ireq; - struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; struct tcp_fastopen_cookie foc = { .len = -1 }; bool want_cookie = false, fastopen; struct flowi6 fl6; + const struct tcp_request_sock_ops *af_ops; int err; if (skb->protocol == htons(ETH_P_IP)) @@ -1014,9 +1040,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; -#endif + af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -1030,27 +1054,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb, sk); ireq = inet_rsk(req); - ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; - ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + af_ops->init_req(req, sk, skb); + if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, skb, sock_net(sk)); - ireq->ir_iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); - if (!isn) { - if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || - np->repflow) { - atomic_inc(&skb->users); - ireq->pktopts = skb; - } - if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; -- cgit v1.2.3 From fb7b37a7f3d6f7b7ba05ee526fee96810d5b92a8 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:54 +0300 Subject: tcp: add init_cookie_seq method to tcp_request_sock_ops Move the specific IPv4/IPv6 cookie sequence initialization to a new method in tcp_request_sock_ops in preparation for unifying tcp_v4_conn_request and tcp_v6_conn_request. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 34 ++++++++++++++++++++-------------- net/ipv4/tcp_ipv4.c | 5 ++++- net/ipv6/tcp_ipv6.c | 5 ++++- 3 files changed, 28 insertions(+), 16 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7ad8ce296c3b..086d00ec6d8b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -495,13 +495,6 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mss); -#else -static inline __u32 cookie_v4_init_sequence(struct sock *sk, - const struct sk_buff *skb, - __u16 *mss) -{ - return 0; -} #endif __u32 cookie_init_timestamp(struct request_sock *req); @@ -517,13 +510,6 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mss); -#else -static inline __u32 cookie_v6_init_sequence(struct sock *sk, - struct sk_buff *skb, - __u16 *mss) -{ - return 0; -} #endif /* tcp_output.c */ @@ -1615,8 +1601,28 @@ struct tcp_request_sock_ops { #endif void (*init_req)(struct request_sock *req, struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_SYN_COOKIES + __u32 (*cookie_init_seq)(struct sock *sk, const struct sk_buff *skb, + __u16 *mss); +#endif }; +#ifdef CONFIG_SYN_COOKIES +static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, + struct sock *sk, struct sk_buff *skb, + __u16 *mss) +{ + return ops->cookie_init_seq(sk, skb, mss); +} +#else +static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, + struct sock *sk, struct sk_buff *skb, + __u16 *mss) +{ + return 0; +} +#endif + int tcpv4_offload_init(void); void tcp_v4_init(void); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f86a86b30d20..8c69e44c287b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1264,6 +1264,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .calc_md5_hash = tcp_v4_md5_hash_skb, #endif .init_req = tcp_v4_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v4_init_sequence, +#endif }; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -1331,7 +1334,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb, sock_net(sk)); if (want_cookie) { - isn = cookie_v4_init_sequence(sk, skb, &req->mss); + isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { /* VJ's idea. We save last timestamp seen diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 87a360c3eba9..17710cffddaa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -761,6 +761,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .calc_md5_hash = tcp_v6_md5_hash_skb, #endif .init_req = tcp_v6_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v6_init_sequence, +#endif }; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, @@ -1061,7 +1064,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!isn) { if (want_cookie) { - isn = cookie_v6_init_sequence(sk, skb, &req->mss); + isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; goto have_isn; } -- cgit v1.2.3 From d94e0417ad8d96d7d96b69335338ad942eaeecf1 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:55 +0300 Subject: tcp: add route_req method to tcp_request_sock_ops Create wrappers with same signature for the IPv4/IPv6 request routing calls and use these wrappers (via route_req method from tcp_request_sock_ops) in tcp_v4_conn_request and tcp_v6_conn_request with the purpose of unifying the two functions in a later patch. We can later drop the wrapper functions and modify inet_csk_route_req and inet6_cks_route_req to use the same signature. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ net/ipv4/tcp_ipv4.c | 36 +++++++++++++++++++++++++++++------- net/ipv6/tcp_ipv6.c | 26 ++++++++++++++++++++------ 3 files changed, 52 insertions(+), 13 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 086d00ec6d8b..59fcc5934c79 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1605,6 +1605,9 @@ struct tcp_request_sock_ops { __u32 (*cookie_init_seq)(struct sock *sk, const struct sk_buff *skb, __u16 *mss); #endif + struct dst_entry *(*route_req)(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict); }; #ifdef CONFIG_SYN_COOKIES diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8c69e44c287b..54fbbd8b4fcd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1248,6 +1248,22 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, ireq->opt = tcp_v4_save_options(skb); } +static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); + + if (strict) { + if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) + *strict = true; + else + *strict = false; + } + + return dst; +} + struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), @@ -1267,6 +1283,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v4_init_sequence, #endif + .route_req = tcp_v4_route_req, }; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -1346,11 +1363,13 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * timewait bucket, so that all the necessary checks * are made in the function processing timewait state. */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && - fl4.daddr == saddr) { - if (!tcp_peer_is_proven(req, dst, true)) { + if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { + bool strict; + + dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, + &strict); + if (dst && strict && + !tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -1374,8 +1393,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) - goto drop_and_free; + if (!dst) { + dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, NULL); + if (!dst) + goto drop_and_free; + } tcp_rsk(req)->snt_isn = isn; tcp_openreq_init_rwin(req, sk, dst); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 17710cffddaa..d780d8808566 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -745,6 +745,16 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, } } +static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + if (strict) + *strict = true; + return inet6_csk_route_req(sk, &fl->u.ip6, req); +} + + struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), @@ -764,6 +774,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif + .route_req = tcp_v6_route_req, }; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, @@ -1078,10 +1089,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) * timewait bucket, so that all the necessary checks * are made in the function processing timewait state. */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { - if (!tcp_peer_is_proven(req, dst, true)) { + if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { + dst = af_ops->route_req(sk, (struct flowi *)&fl6, req, + NULL); + if (dst && !tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -1110,8 +1121,11 @@ have_isn: if (security_inet_conn_request(sk, skb, req)) goto drop_and_release; - if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL) - goto drop_and_free; + if (!dst) { + dst = af_ops->route_req(sk, (struct flowi *)&fl6, req, NULL); + if (!dst) + goto drop_and_free; + } tcp_rsk(req)->snt_isn = isn; tcp_openreq_init_rwin(req, sk, dst); -- cgit v1.2.3 From 9403715977075c89b1dbcdd7713ab542807a04ac Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:56 +0300 Subject: tcp: move around a few calls in tcp_v6_conn_request Make the tcp_v6_conn_request calls flow similar with that of tcp_v4_conn_request. Note that want_cookie can be true only if isn is zero and that is why we can move the if (want_cookie) block out of the if (!isn) block. Moving security_inet_conn_request() has a couple of side effects: missing inet_rsk(req)->ecn_ok update and the req->cookie_ts update. However, neither SELinux nor Smack security hooks seems to check them. This change should also avoid future different behaviour for IPv4 and IPv6 in the security hooks. Signed-off-by: Octavian Purdila Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d780d8808566..91b8a2e699f3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1070,16 +1070,16 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); af_ops->init_req(req, sk, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_release; + if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, skb, sock_net(sk)); - if (!isn) { - if (want_cookie) { - isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - goto have_isn; - } - + if (want_cookie) { + isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; + } else if (!isn) { /* VJ's idea. We save last timestamp seen * from the destination in peer table, when entering * state TIME-WAIT, and check against it before @@ -1116,10 +1116,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v6_init_sequence(skb); } -have_isn: - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; if (!dst) { dst = af_ops->route_req(sk, (struct flowi *)&fl6, req, NULL); -- cgit v1.2.3 From 936b8bdb53f90840e658904530f9db8d02ac804b Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:57 +0300 Subject: tcp: add init_seq method to tcp_request_sock_ops More work in preparation of unifying tcp_v4_conn_request and tcp_v6_conn_request: indirect the init sequence calls via the tcp_request_sock_ops. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 5 +++-- net/ipv6/tcp_ipv6.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 59fcc5934c79..8cacf0d6ed4d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1608,6 +1608,7 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(struct sock *sk, struct flowi *fl, const struct request_sock *req, bool *strict); + __u32 (*init_seq)(const struct sk_buff *skb); }; #ifdef CONFIG_SYN_COOKIES diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 54fbbd8b4fcd..43478265006a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -99,7 +99,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) +static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, @@ -1284,6 +1284,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, + .init_seq = tcp_v4_init_sequence, }; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -1391,7 +1392,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_release; } - isn = tcp_v4_init_sequence(skb); + isn = af_ops->init_seq(skb); } if (!dst) { dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, NULL); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 91b8a2e699f3..2fd886fe8340 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -775,6 +775,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, + .init_seq = tcp_v6_init_sequence, }; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, @@ -1114,7 +1115,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_release; } - isn = tcp_v6_init_sequence(skb); + isn = af_ops->init_seq(skb); } if (!dst) { -- cgit v1.2.3 From d6274bd8d6ea84b7b54cc1c3fde6bcb6143b104f Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:58 +0300 Subject: tcp: add send_synack method to tcp_request_sock_ops Create a new tcp_request_sock_ops method to unify the IPv4/IPv6 signature for tcp_v[46]_send_synack. This allows us to later unify tcp_v4_rtx_synack with tcp_v6_rtx_synack and tcp_v4_conn_request with tcp_v4_conn_request. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ net/ipv4/tcp_ipv4.c | 9 ++++++--- net/ipv6/tcp_ipv6.c | 14 ++++++++------ 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8cacf0d6ed4d..8c05c25018d5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1609,6 +1609,9 @@ struct tcp_request_sock_ops { const struct request_sock *req, bool *strict); __u32 (*init_seq)(const struct sk_buff *skb); + int (*send_synack)(struct sock *sk, struct dst_entry *dst, + struct flowi *fl, struct request_sock *req, + u16 queue_mapping, struct tcp_fastopen_cookie *foc); }; #ifdef CONFIG_SYN_COOKIES diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 43478265006a..b5945ac50876 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -814,6 +814,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * socket. */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct flowi *fl, struct request_sock *req, u16 queue_mapping, struct tcp_fastopen_cookie *foc) @@ -846,7 +847,8 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + int res = af_ops->send_synack(sk, NULL, NULL, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1285,6 +1287,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { #endif .route_req = tcp_v4_route_req, .init_seq = tcp_v4_init_sequence, + .send_synack = tcp_v4_send_synack, }; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -1404,8 +1407,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init_rwin(req, sk, dst); fastopen = !want_cookie && tcp_try_fastopen(sk, skb, req, &foc, dst); - err = tcp_v4_send_synack(sk, dst, req, - skb_get_queue_mapping(skb), &foc); + err = af_ops->send_synack(sk, dst, NULL, req, + skb_get_queue_mapping(skb), &foc); if (!fastopen) { if (err || want_cookie) goto drop_and_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2fd886fe8340..210b6105afed 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -470,13 +470,14 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, - struct flowi6 *fl6, + struct flowi *fl, struct request_sock *req, u16 queue_mapping, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; @@ -507,10 +508,11 @@ done: static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) { - struct flowi6 fl6; + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + struct flowi fl; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL); + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); @@ -754,7 +756,6 @@ static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, return inet6_csk_route_req(sk, &fl->u.ip6, req); } - struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), @@ -776,6 +777,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif .route_req = tcp_v6_route_req, .init_seq = tcp_v6_init_sequence, + .send_synack = tcp_v6_send_synack, }; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, @@ -1128,8 +1130,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init_rwin(req, sk, dst); fastopen = !want_cookie && tcp_try_fastopen(sk, skb, req, &foc, dst); - err = tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb), &foc); + err = af_ops->send_synack(sk, dst, (struct flowi *)&fl6, req, + skb_get_queue_mapping(skb), &foc); if (!fastopen) { if (err || want_cookie) goto drop_and_free; -- cgit v1.2.3 From 5db92c994982ed826cf38f38d58bd09bc326aef6 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:59 +0300 Subject: tcp: unify tcp_v4_rtx_synack and tcp_v6_rtx_synack Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_ipv4.c | 14 +------------- net/ipv4/tcp_output.c | 15 +++++++++++++++ net/ipv6/tcp_ipv6.c | 15 +-------------- 4 files changed, 19 insertions(+), 27 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8c05c25018d5..8e9c28dccb80 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1573,6 +1573,8 @@ int tcp4_proc_init(void); void tcp4_proc_exit(void); #endif +int tcp_rtx_synack(struct sock *sk, struct request_sock *req); + /* TCP af-specific functions */ struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b5945ac50876..597dd9d75210 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -845,18 +845,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) -{ - const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; - int res = af_ops->send_synack(sk, NULL, NULL, req, 0, NULL); - - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} - /* * IPv4 request_sock destructor. */ @@ -1269,7 +1257,7 @@ static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_v4_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d92bce0ea24e..f8f2a944a1ce 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3299,3 +3299,18 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } + +int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +{ + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + struct flowi fl; + int res; + + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); + if (!res) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } + return res; +} +EXPORT_SYMBOL(tcp_rtx_synack); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 210b6105afed..41389bbb08c0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -506,19 +506,6 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) -{ - const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; - struct flowi fl; - int res; - - res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} static void tcp_v6_reqsk_destructor(struct request_sock *req) { @@ -759,7 +746,7 @@ static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_v6_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, -- cgit v1.2.3 From 2aec4a297b21f3690486bbf8f7d5d29281ba6a48 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:10:00 +0300 Subject: tcp: add mss_clamp to tcp_request_sock_ops Add mss_clamp member to tcp_request_sock_ops so that we can later unify tcp_v4_conn_request and tcp_v6_conn_request. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8e9c28dccb80..30fe98bc8957 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1592,6 +1592,7 @@ struct tcp_sock_af_ops { }; struct tcp_request_sock_ops { + u16 mss_clamp; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct request_sock *req); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 597dd9d75210..499d440539ad 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1265,6 +1265,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { }; static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { + .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_reqsk_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, @@ -1324,7 +1325,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = TCP_MSS_DEFAULT; + tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 41389bbb08c0..ad658332cf7d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -754,6 +754,8 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { }; static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { + .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_reqsk_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, @@ -1047,7 +1049,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); + tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); -- cgit v1.2.3 From 695da14eb0af21129187ed3810e329b21262e45f Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:10:01 +0300 Subject: tcp: add queue_add_hash to tcp_request_sock_ops Add queue_add_hash member to tcp_request_sock_ops so that we can later unify tcp_v4_conn_request and tcp_v6_conn_request. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 30fe98bc8957..cec6e2cf0610 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1615,6 +1615,8 @@ struct tcp_request_sock_ops { int (*send_synack)(struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, u16 queue_mapping, struct tcp_fastopen_cookie *foc); + void (*queue_hash_add)(struct sock *sk, struct request_sock *req, + const unsigned long timeout); }; #ifdef CONFIG_SYN_COOKIES diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 499d440539ad..845c39de97ab 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1277,6 +1277,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .route_req = tcp_v4_route_req, .init_seq = tcp_v4_init_sequence, .send_synack = tcp_v4_send_synack, + .queue_hash_add = inet_csk_reqsk_queue_hash_add, }; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -1403,7 +1404,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; tcp_rsk(req)->listener = NULL; - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } return 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad658332cf7d..8232bc7423c6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -767,6 +767,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .route_req = tcp_v6_route_req, .init_seq = tcp_v6_init_sequence, .send_synack = tcp_v6_send_synack, + .queue_hash_add = inet6_csk_reqsk_queue_hash_add, }; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, @@ -1126,7 +1127,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } return 0; -- cgit v1.2.3 From 1fb6f159fd21c640a28eb65fbd62ce8c9f6a777e Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:10:02 +0300 Subject: tcp: add tcp_conn_request Create tcp_conn_request and remove most of the code from tcp_v4_conn_request and tcp_v6_conn_request. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++ net/ipv4/tcp_input.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 128 +------------------------------------------- net/ipv6/tcp_ipv6.c | 120 +---------------------------------------- 4 files changed, 155 insertions(+), 244 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index cec6e2cf0610..0d5389aecf18 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1574,6 +1574,9 @@ void tcp4_proc_exit(void); #endif int tcp_rtx_synack(struct sock *sk, struct request_sock *req); +int tcp_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb); /* TCP af-specific functions */ struct tcp_sock_af_ops { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b5c23756965a..97e48d60c4e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5877,3 +5877,151 @@ discard: return 0; } EXPORT_SYMBOL(tcp_rcv_state_process); + +static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + if (family == AF_INET) + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), + &ireq->ir_rmt_addr, port); + else + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"), + &ireq->ir_v6_rmt_addr, port); +} + +int tcp_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb) +{ + struct tcp_options_received tmp_opt; + struct request_sock *req; + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = NULL; + __u32 isn = TCP_SKB_CB(skb)->when; + bool want_cookie = false, fastopen; + struct flowi fl; + struct tcp_fastopen_cookie foc = { .len = -1 }; + int err; + + + /* TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if ((sysctl_tcp_syncookies == 2 || + inet_csk_reqsk_queue_is_full(sk)) && !isn) { + want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + if (!want_cookie) + goto drop; + } + + + /* Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + goto drop; + } + + req = inet_reqsk_alloc(rsk_ops); + if (!req) + goto drop; + + tcp_rsk(req)->af_specific = af_ops; + + tcp_clear_options(&tmp_opt); + tmp_opt.mss_clamp = af_ops->mss_clamp; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + + if (want_cookie && !tmp_opt.saw_tstamp) + tcp_clear_options(&tmp_opt); + + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + tcp_openreq_init(req, &tmp_opt, skb, sk); + + af_ops->init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + if (!want_cookie || tmp_opt.tstamp_ok) + TCP_ECN_create_request(req, skb, sock_net(sk)); + + if (want_cookie) { + isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; + } else if (!isn) { + /* VJ's idea. We save last timestamp seen + * from the destination in peer table, when entering + * state TIME-WAIT, and check against it before + * accepting new connection request. + * + * If "isn" is not zero, this request hit alive + * timewait bucket, so that all the necessary checks + * are made in the function processing timewait state. + */ + if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { + bool strict; + + dst = af_ops->route_req(sk, &fl, req, &strict); + if (dst && strict && + !tcp_peer_is_proven(req, dst, true)) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + goto drop_and_release; + } + } + /* Kill the following clause, if you dislike this way. */ + else if (!sysctl_tcp_syncookies && + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst, false)) { + /* Without syncookies last quarter of + * backlog is filled with destinations, + * proven to be alive. + * It means that we continue to communicate + * to destinations, already remembered + * to the moment of synflood. + */ + pr_drop_req(req, ntohs(tcp_hdr(skb)->source), + rsk_ops->family); + goto drop_and_release; + } + + isn = af_ops->init_seq(skb); + } + if (!dst) { + dst = af_ops->route_req(sk, &fl, req, NULL); + if (!dst) + goto drop_and_free; + } + + tcp_rsk(req)->snt_isn = isn; + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = af_ops->send_synack(sk, dst, &fl, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + } + + return 0; + +drop_and_release: + dst_release(dst); +drop_and_free: + reqsk_free(req); +drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return 0; +} +EXPORT_SYMBOL(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 845c39de97ab..5dfebd2f2e38 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1282,137 +1282,13 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = NULL; - __be32 saddr = ip_hdr(skb)->saddr; - __u32 isn = TCP_SKB_CB(skb)->when; - bool want_cookie = false, fastopen; - struct flowi4 fl4; - struct tcp_fastopen_cookie foc = { .len = -1 }; - const struct tcp_request_sock_ops *af_ops; - int err; - /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); - if (!want_cookie) - goto drop; - } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet_reqsk_alloc(&tcp_request_sock_ops); - if (!req) - goto drop; - - af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = af_ops->mss_clamp; - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb, sk); - - af_ops->init_req(req, sk, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; + return tcp_conn_request(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, skb); - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - if (want_cookie) { - isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - } else if (!isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { - bool strict; - - dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, - &strict); - if (dst && strict && - !tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), - &saddr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } - - isn = af_ops->init_seq(skb); - } - if (!dst) { - dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, NULL); - if (!dst) - goto drop_and_free; - } - - tcp_rsk(req)->snt_isn = isn; - tcp_openreq_init_rwin(req, sk, dst); - fastopen = !want_cookie && - tcp_try_fastopen(sk, skb, req, &foc, dst); - err = af_ops->send_synack(sk, dst, NULL, req, - skb_get_queue_mapping(skb), &foc); - if (!fastopen) { - if (err || want_cookie) - goto drop_and_free; - - tcp_rsk(req)->listener = NULL; - af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } - - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8232bc7423c6..bc24ee21339a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1008,133 +1008,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -/* FIXME: this is substantially similar to the ipv4 code. - * Can some kind of merge be done? -- erics - */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct tcp_sock *tp = tcp_sk(sk); - __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; - struct tcp_fastopen_cookie foc = { .len = -1 }; - bool want_cookie = false, fastopen; - struct flowi6 fl6; - const struct tcp_request_sock_ops *af_ops; - int err; - if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); - if (!want_cookie) - goto drop; - } + return tcp_conn_request(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet_reqsk_alloc(&tcp6_request_sock_ops); - if (req == NULL) - goto drop; - - af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = af_ops->mss_clamp; - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb, sk); - - ireq = inet_rsk(req); - af_ops->init_req(req, sk, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; - - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - if (want_cookie) { - isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - } else if (!isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { - dst = af_ops->route_req(sk, (struct flowi *)&fl6, req, - NULL); - if (dst && !tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } - - isn = af_ops->init_seq(skb); - } - - if (!dst) { - dst = af_ops->route_req(sk, (struct flowi *)&fl6, req, NULL); - if (!dst) - goto drop_and_free; - } - - tcp_rsk(req)->snt_isn = isn; - tcp_openreq_init_rwin(req, sk, dst); - fastopen = !want_cookie && - tcp_try_fastopen(sk, skb, req, &foc, dst); - err = af_ops->send_synack(sk, dst, (struct flowi *)&fl6, req, - skb_get_queue_mapping(skb), &foc); - if (!fastopen) { - if (err || want_cookie) - goto drop_and_free; - - tcp_rsk(req)->listener = NULL; - af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ -- cgit v1.2.3 From 86c6a2c75ab97fe31844985169e26aea335432f9 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 30 Jun 2014 15:09:49 -0400 Subject: tcp: switch snt_synack back to measuring transmit time of first SYNACK Always store in snt_synack the time at which the server received the first client SYN and attempted to send the first SYNACK. Recent commit aa27fc501 ("tcp: tcp_v[46]_conn_request: fix snt_synack initialization") resolved an inconsistency between IPv4 and IPv6 in the initialization of snt_synack. This commit brings back the idea from 843f4a55e (tcp: use tcp_v4_send_synack on first SYN-ACK), which was going for the original behavior of snt_synack from the commit where it was added in 9ad7c049f0f79 ("tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side") in v3.1. In addition to being simpler (and probably a tiny bit faster), unconditionally storing the time of the first SYNACK attempt has been useful because it allows calculating a performance metric quantifying how long it took to establish a passive TCP connection. Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Cc: Octavian Purdila Cc: Jerry Chu Acked-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp_ipv4.c | 2 -- net/ipv6/tcp_ipv6.c | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0d5389aecf18..c9a75dbba0c7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1093,7 +1093,7 @@ static inline void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_rsk(req)->snt_synack = 0; + tcp_rsk(req)->snt_synack = tcp_time_stamp; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; ireq->tstamp_ok = rx_opt->tstamp_ok; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5dfebd2f2e38..52d0f6a1ec2c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -838,8 +838,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); - if (!tcp_rsk(req)->snt_synack && !err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; } return err; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bc24ee21339a..a97c95585da8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -498,8 +498,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); - if (!tcp_rsk(req)->snt_synack && !err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; } done: -- cgit v1.2.3 From b73c3d0e4f0e1961e15bec18720e48aabebe2109 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 1 Jul 2014 21:32:17 -0700 Subject: net: Save TX flow hash in sock and set in skbuf on xmit For a connected socket we can precompute the flow hash for setting in skb->hash on output. This is a performance advantage over calculating the skb->hash for every packet on the connection. The computation is done using the common hash algorithm to be consistent with computations done for packets of the connection in other states where thers is no socket (e.g. time-wait, syn-recv, syn-cookies). This patch adds sk_txhash to the sock structure. inet_set_txhash and ip6_set_txhash functions are added which are called from points in TCP and UDP where socket moves to established state. skb_set_hash_from_sk is a function which sets skb->hash from the sock txhash value. This is called in UDP and TCP transmit path when transmitting within the context of a socket. Tested: ran super_netperf with 200 TCP_RR streams over a vxlan interface (in this case skb_get_hash called on every TX packet to create a UDP source port). Before fix: 95.02% CPU utilization 154/256/505 90/95/99% latencies 1.13042e+06 tps Time in functions: 0.28% skb_flow_dissect 0.21% __skb_get_hash After fix: 94.95% CPU utilization 156/254/485 90/95/99% latencies 1.15447e+06 Neither __skb_get_hash nor skb_flow_dissect appear in perf Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip.h | 14 ++++++++++++++ include/net/ipv6.h | 15 +++++++++++++++ include/net/sock.h | 11 +++++++++++ net/ipv4/datagram.c | 1 + net/ipv4/tcp_ipv4.c | 3 +++ net/ipv4/tcp_output.c | 1 + net/ipv6/datagram.c | 1 + net/ipv6/tcp_ipv6.c | 4 ++++ 8 files changed, 50 insertions(+) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/ip.h b/include/net/ip.h index 0e795df05ec9..2e8f055989c3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -31,6 +31,7 @@ #include #include #include +#include struct sock; @@ -353,6 +354,19 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0); } +static inline void inet_set_txhash(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct flow_keys keys; + + keys.src = inet->inet_saddr; + keys.dst = inet->inet_daddr; + keys.port16[0] = inet->inet_sport; + keys.port16[1] = inet->inet_dport; + + sk->sk_txhash = flow_hash_from_keys(&keys); +} + /* * Map a multicast IP onto multicast MAC for type ethernet. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 574337fe72dd..2aa86e1135a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #define SIN6_LEN_RFC2133 24 @@ -684,6 +685,20 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, return hlimit; } +static inline void ip6_set_txhash(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct flow_keys keys; + + keys.src = (__force __be32)ipv6_addr_hash(&np->saddr); + keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + keys.port16[0] = inet->inet_sport; + keys.port16[1] = inet->inet_dport; + + sk->sk_txhash = flow_hash_from_keys(&keys); +} + /* * Header manipulation */ diff --git a/include/net/sock.h b/include/net/sock.h index 8d4c9473e7d7..cb84b2f1ad8f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -273,6 +273,7 @@ struct cg_proto; * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting * @sk_rxhash: flow hash received from netif layer + * @sk_txhash: computed flow hash for use on transmit * @sk_filter: socket filtering instructions * @sk_protinfo: private area, net family specific, when not using slab * @sk_timer: sock cleanup timer @@ -347,6 +348,7 @@ struct sock { #ifdef CONFIG_RPS __u32 sk_rxhash; #endif + __u32 sk_txhash; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_napi_id; unsigned int sk_ll_usec; @@ -1980,6 +1982,14 @@ static inline void sock_poll_wait(struct file *filp, } } +static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) +{ + if (sk->sk_txhash) { + skb->l4_hash = 1; + skb->hash = sk->sk_txhash; + } +} + /* * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in @@ -1994,6 +2004,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) skb_orphan(skb); skb->sk = sk; skb->destructor = sock_wfree; + skb_set_hash_from_sk(skb, sk); /* * We used to take a refcount on sk, but following operation * is enough to guarantee sk_free() wont free this sock until diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index a3095fdefbed..90c0e8386116 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; + inet_set_txhash(sk); inet->inet_id = jiffies; sk_dst_set(sk, &rt->dst); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 52d0f6a1ec2c..1edc739b9da5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; + inet_set_txhash(sk); + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -1334,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; + inet_set_txhash(newsk); if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f8f2a944a1ce..bcee13c4627c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; skb->destructor = tcp_wfree; + skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c3bf2d2e519e..2753319524f1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -199,6 +199,7 @@ ipv4_connected: NULL); sk->sk_state = TCP_ESTABLISHED; + ip6_set_txhash(sk); out: fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a97c95585da8..22055b098428 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; + ip6_set_txhash(sk); + /* * TCP over IPv4 */ @@ -1132,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; + ip6_set_txhash(newsk); + /* Now IPv6 options... First: no IPv4 options. -- cgit v1.2.3 From 9ea88a153001ffeb3d8810917e8eea62ca9b6f25 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Thu, 7 Aug 2014 02:38:22 +0400 Subject: tcp: md5: check md5 signature without socket lock Since a8afca032 (tcp: md5: protects md5sig_info with RCU) tcp_md5_do_lookup doesn't require socket lock, rcu_read_lock is enough. Therefore socket lock is no longer required for tcp_v{4,6}_inbound_md5_hash too, so we can move these calls (wrapped with rcu_read_{,un}lock) before bh_lock_sock: from tcp_v{4,6}_do_rcv to tcp_v{4,6}_rcv. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 36 +++++++++++++++++++++++++----------- net/ipv6/tcp_ipv6.c | 25 +++++++++++++++++++------ 2 files changed, 44 insertions(+), 17 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 992a1f926009..dceff5fe8e66 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1167,7 +1167,8 @@ clear_hash_noput: } EXPORT_SYMBOL(tcp_v4_md5_hash_skb); -static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +static bool __tcp_v4_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { /* * This gets called for each TCP segment that arrives @@ -1220,6 +1221,17 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) return false; } +static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +{ + bool ret; + + rcu_read_lock(); + ret = __tcp_v4_inbound_md5_hash(sk, skb); + rcu_read_unlock(); + + return ret; +} + #endif static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, @@ -1432,16 +1444,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct sock *rsk; -#ifdef CONFIG_TCP_MD5SIG - /* - * We really want to reject the packet as early as possible - * if: - * o We're expecting an MD5'd packet and this is no MD5 tcp option - * o There is an MD5 option and we're not expecting one - */ - if (tcp_v4_inbound_md5_hash(sk, skb)) - goto discard; -#endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; @@ -1644,6 +1646,18 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + +#ifdef CONFIG_TCP_MD5SIG + /* + * We really want to reject the packet as early as possible + * if: + * o We're expecting an MD5'd packet and this is no MD5 tcp option + * o There is an MD5 option and we're not expecting one + */ + if (tcp_v4_inbound_md5_hash(sk, skb)) + goto discard_and_relse; +#endif + nf_reset(skb); if (sk_filter(sk, skb)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 22055b098428..f2ce95502392 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -667,7 +667,8 @@ clear_hash_noput: return 1; } -static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +static int __tcp_v6_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; @@ -707,6 +708,18 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } return 0; } + +static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +{ + int ret; + + rcu_read_lock(); + ret = __tcp_v6_inbound_md5_hash(sk, skb); + rcu_read_unlock(); + + return ret; +} + #endif static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, @@ -1247,11 +1260,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); -#ifdef CONFIG_TCP_MD5SIG - if (tcp_v6_inbound_md5_hash(sk, skb)) - goto discard; -#endif - if (sk_filter(sk, skb)) goto discard; @@ -1424,6 +1432,11 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; +#ifdef CONFIG_TCP_MD5SIG + if (tcp_v6_inbound_md5_hash(sk, skb)) + goto discard_and_relse; +#endif + if (sk_filter(sk, skb)) goto discard_and_relse; -- cgit v1.2.3 From 4fab9071950c2021d846e18351e0f46a1cffd67b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 14 Aug 2014 12:40:05 -0400 Subject: tcp: fix tcp_release_cb() to dispatch via address family for mtu_reduced() Make sure we use the correct address-family-specific function for handling MTU reductions from within tcp_release_cb(). Previously AF_INET6 sockets were incorrectly always using the IPv6 code path when sometimes they were handling IPv4 traffic and thus had an IPv4 dst. Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Diagnosed-by: Willem de Bruijn Fixes: 563d34d057862 ("tcp: dont drop MTU reduction indications") Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 1 + include/net/sock.h | 1 - include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 5 +++-- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 3 ++- 6 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7a4313887568..5fbe6568c3cf 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -62,6 +62,7 @@ struct inet_connection_sock_af_ops { void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); int (*bind_conflict)(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax); + void (*mtu_reduced)(struct sock *sk); }; /** inet_connection_sock - INET connection oriented sock diff --git a/include/net/sock.h b/include/net/sock.h index 38805fa02e48..7f2ab72f321a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -987,7 +987,6 @@ struct proto { struct sk_buff *skb); void (*release_cb)(struct sock *sk); - void (*mtu_reduced)(struct sock *sk); /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index 36f55254573f..e337e05035be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -448,6 +448,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); */ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); +void tcp_v4_mtu_reduced(struct sock *sk); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index dceff5fe8e66..cd17f009aede 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -271,7 +271,7 @@ EXPORT_SYMBOL(tcp_v4_connect); * It can be called through tcp_release_cb() if socket was owned by user * at the time tcp_v4_err() was called to handle ICMP message. */ -static void tcp_v4_mtu_reduced(struct sock *sk) +void tcp_v4_mtu_reduced(struct sock *sk) { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); @@ -302,6 +302,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk) tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ } +EXPORT_SYMBOL(tcp_v4_mtu_reduced); static void do_redirect(struct sk_buff *skb, struct sock *sk) { @@ -1787,6 +1788,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .compat_setsockopt = compat_ip_setsockopt, .compat_getsockopt = compat_ip_getsockopt, #endif + .mtu_reduced = tcp_v4_mtu_reduced, }; EXPORT_SYMBOL(ipv4_specific); @@ -2406,7 +2408,6 @@ struct proto tcp_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, .release_cb = tcp_release_cb, - .mtu_reduced = tcp_v4_mtu_reduced, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ff3f0f75cc6c..5a7c41fbc6d3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -800,7 +800,7 @@ void tcp_release_cb(struct sock *sk) __sock_put(sk); } if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { - sk->sk_prot->mtu_reduced(sk); + inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f2ce95502392..29964c3d363c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1595,6 +1595,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif + .mtu_reduced = tcp_v6_mtu_reduced, }; #ifdef CONFIG_TCP_MD5SIG @@ -1625,6 +1626,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif + .mtu_reduced = tcp_v4_mtu_reduced, }; #ifdef CONFIG_TCP_MD5SIG @@ -1864,7 +1866,6 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .mtu_reduced = tcp_v6_mtu_reduced, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, -- cgit v1.2.3