diff options
-rw-r--r-- | include/net/inet_connection_sock.h | 3 | ||||
-rw-r--r-- | include/net/ip6_fib.h | 10 | ||||
-rw-r--r-- | include/net/tcp.h | 22 | ||||
-rw-r--r-- | include/uapi/linux/rtnetlink.h | 2 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 15 | ||||
-rw-r--r-- | net/decnet/dn_fib.c | 3 | ||||
-rw-r--r-- | net/decnet/dn_table.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 121 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 30 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 21 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 68 | ||||
-rw-r--r-- | net/ipv6/route.c | 72 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 |
15 files changed, 304 insertions, 85 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 848e85cb5c61..5976bdecf58b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -98,7 +98,8 @@ struct inet_connection_sock { const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); - __u8 icsk_ca_state; + __u8 icsk_ca_state:7, + icsk_ca_dst_locked:1; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 8eea35d32a75..20e80fa7bbdd 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -74,6 +74,11 @@ struct fib6_node { #define FIB6_SUBTREE(fn) ((fn)->subtree) #endif +struct mx6_config { + const u32 *mx; + DECLARE_BITMAP(mx_valid, RTAX_MAX); +}; + /* * routing information * @@ -291,9 +296,8 @@ struct fib6_node *fib6_locate(struct fib6_node *root, void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), void *arg); -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len); - +int fib6_add(struct fib6_node *root, struct rt6_info *rt, + struct nl_info *info, struct mx6_config *mxc); int fib6_del(struct rt6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); diff --git a/include/net/tcp.h b/include/net/tcp.h index f50f29faf76f..b8fdc6bab3f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -448,6 +448,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb); +void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst); struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); @@ -636,6 +637,11 @@ static inline u32 tcp_rto_min_us(struct sock *sk) return jiffies_to_usecs(tcp_rto_min(sk)); } +static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) +{ + return dst_metric_locked(dst, RTAX_CC_ALGO); +} + /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. @@ -787,6 +793,8 @@ enum tcp_ca_ack_event_flags { #define TCP_CA_MAX 128 #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) +#define TCP_CA_UNSPEC 0 + /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ @@ -794,7 +802,8 @@ enum tcp_ca_ack_event_flags { struct tcp_congestion_ops { struct list_head list; - unsigned long flags; + u32 key; + u32 flags; /* initialize private data (optional) */ void (*init)(struct sock *sk); @@ -841,6 +850,17 @@ u32 tcp_reno_ssthresh(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; +struct tcp_congestion_ops *tcp_ca_find_key(u32 key); +u32 tcp_ca_get_key_by_name(const char *name); +#ifdef CONFIG_INET +char *tcp_ca_get_name_by_key(u32 key, char *buffer); +#else +static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) +{ + return NULL; +} +#endif + static inline bool tcp_ca_needs_ecn(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 9c9b8b4480cd..d81f22d5b390 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -389,6 +389,8 @@ enum { #define RTAX_INITRWND RTAX_INITRWND RTAX_QUICKACK, #define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO __RTAX_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index da983d4bac02..6a6cdade1676 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -50,6 +50,7 @@ #include <net/arp.h> #include <net/route.h> #include <net/udp.h> +#include <net/tcp.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/fib_rules.h> @@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) for (i = 0; i < RTAX_MAX; i++) { if (metrics[i]) { + if (i == RTAX_CC_ALGO - 1) { + char tmp[TCP_CA_NAME_MAX], *name; + + name = tcp_ca_get_name_by_key(metrics[i], tmp); + if (!name) + continue; + if (nla_put_string(skb, i + 1, name)) + goto nla_put_failure; + } else { + if (nla_put_u32(skb, i + 1, metrics[i])) + goto nla_put_failure; + } valid++; - if (nla_put_u32(skb, i+1, metrics[i])) - goto nla_put_failure; } } diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index d332aefb0846..df4803437888 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att int type = nla_type(attr); if (type) { - if (type > RTAX_MAX || nla_len(attr) < 4) + if (type > RTAX_MAX || type == RTAX_CC_ALGO || + nla_len(attr) < 4) goto err_inval; fi->fib_metrics[type-1] = nla_get_u32(attr); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 86e3807052e9..3f19fcbf126d 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -29,6 +29,7 @@ #include <linux/route.h> /* RTF_xxx */ #include <net/neighbour.h> #include <net/netlink.h> +#include <net/tcp.h> #include <net/dst.h> #include <net/flow.h> #include <net/fib_rules.h> @@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(2) /* RTA_DST */ - + nla_total_size(4); /* RTA_PRIORITY */ + + nla_total_size(4) /* RTA_PRIORITY */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f99f41bd15b8..d2b7b5521b1b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(4) /* RTA_DST */ + nla_total_size(4) /* RTA_PRIORITY */ - + nla_total_size(4); /* RTA_PREFSRC */ + + nla_total_size(4) /* RTA_PREFSRC */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); @@ -859,7 +860,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (type > RTAX_MAX) goto err_inval; - val = nla_get_u32(nla); + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err_inval; + } else { + val = nla_get_u32(nla); + } if (type == RTAX_ADVMSS && val > 65535 - 40) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 27ead0dd16bc..63c29dba68a8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> +#include <linux/jhash.h> #include <net/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); @@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) return NULL; } +/* Must be called with rcu lock held */ +static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) +{ + const struct tcp_congestion_ops *ca = tcp_ca_find(name); +#ifdef CONFIG_MODULES + if (!ca && capable(CAP_NET_ADMIN)) { + rcu_read_unlock(); + request_module("tcp_%s", name); + rcu_read_lock(); + ca = tcp_ca_find(name); + } +#endif + return ca; +} + +/* Simple linear search, not much in here. */ +struct tcp_congestion_ops *tcp_ca_find_key(u32 key) +{ + struct tcp_congestion_ops *e; + + list_for_each_entry_rcu(e, &tcp_cong_list, list) { + if (e->key == key) + return e; + } + + return NULL; +} + /* * Attach new congestion control algorithm to the list * of available options. @@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) return -EINVAL; } + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + spin_lock(&tcp_cong_list_lock); - if (tcp_ca_find(ca->name)) { - pr_notice("%s already registered\n", ca->name); + if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { + pr_notice("%s already registered or non-unique key\n", + ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); @@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); + + /* Wait for outstanding readers to complete before the + * module gets removed entirely. + * + * A try_module_get() should fail by now as our module is + * in "going" state since no refs are held anymore and + * module_exit() handler being called. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +u32 tcp_ca_get_key_by_name(const char *name) +{ + const struct tcp_congestion_ops *ca; + u32 key; + + might_sleep(); + + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + key = ca ? ca->key : TCP_CA_UNSPEC; + rcu_read_unlock(); + + return key; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name); + +char *tcp_ca_get_name_by_key(u32 key, char *buffer) +{ + const struct tcp_congestion_ops *ca; + char *ret = NULL; + + rcu_read_lock(); + ca = tcp_ca_find_key(key); + if (ca) + ret = strncpy(buffer, ca->name, + TCP_CA_NAME_MAX); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); + /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { @@ -107,6 +180,18 @@ void tcp_init_congestion_control(struct sock *sk) icsk->icsk_ca_ops->init(sk); } +static void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + + if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); +} + /* Manage refcounts on socket close. */ void tcp_cleanup_congestion_control(struct sock *sk) { @@ -241,42 +326,26 @@ out: int tcp_set_congestion_control(struct sock *sk, const char *name) { struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_congestion_ops *ca; + const struct tcp_congestion_ops *ca; int err = 0; - rcu_read_lock(); - ca = tcp_ca_find(name); + if (icsk->icsk_ca_dst_locked) + return -EPERM; - /* no change asking for existing value */ + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) goto out; - -#ifdef CONFIG_MODULES - /* not found attempt to autoload module */ - if (!ca && capable(CAP_NET_ADMIN)) { - rcu_read_unlock(); - request_module("tcp_%s", name); - rcu_read_lock(); - ca = tcp_ca_find(name); - } -#endif if (!ca) err = -ENOENT; - else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) err = -EPERM; - else if (!try_module_get(ca->owner)) err = -EBUSY; - - else { - tcp_cleanup_congestion_control(sk); - icsk->icsk_ca_ops = ca; - - if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - } + else + tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a3f72d7fc06c..ad3e65bdd368 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1340,6 +1340,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d2680b65db..bc9216dc9de1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -399,6 +399,32 @@ static void tcp_ecn_openreq_child(struct tcp_sock *tp, tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } +void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + bool ca_got_dst = false; + + if (ca_key != TCP_CA_UNSPEC) { + const struct tcp_congestion_ops *ca; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + ca_got_dst = true; + } + rcu_read_unlock(); + } + + if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + tcp_assign_congestion_control(sk); + + tcp_set_ca_state(sk, TCP_CA_Open); +} +EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -451,10 +477,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - if (!try_module_get(newicsk->icsk_ca_ops->owner)) - tcp_assign_congestion_control(newsk); - - tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f18262e2326..dc30cb563e4f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2939,6 +2939,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } EXPORT_SYMBOL(tcp_make_synack); +static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_congestion_ops *ca; + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + + if (ca_key == TCP_CA_UNSPEC) + return; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + module_put(icsk->icsk_ca_ops->owner); + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + } + rcu_read_unlock(); +} + /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { @@ -2964,6 +2983,8 @@ static void tcp_connect_init(struct sock *sk) tcp_mtup_init(sk); tcp_sync_mss(sk, dst_mtu(dst)); + tcp_ca_dst_init(sk, dst); + if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric_advmss(dst); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b2d1838897c9..03c520a4ebeb 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -630,32 +630,35 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } -static int fib6_commit_metrics(struct dst_entry *dst, - struct nlattr *mx, int mx_len) +static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) { - struct nlattr *nla; - int remaining; - u32 *mp; + int i; - if (dst->flags & DST_HOST) { - mp = dst_metrics_write_ptr(dst); - } else { - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); - if (!mp) - return -ENOMEM; - dst_init_metrics(dst, mp, 0); + for (i = 0; i < RTAX_MAX; i++) { + if (test_bit(i, mxc->mx_valid)) + mp[i] = mxc->mx[i]; } +} - nla_for_each_attr(nla, mx, mx_len, remaining) { - int type = nla_type(nla); +static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) +{ + if (!mxc->mx) + return 0; - if (type) { - if (type > RTAX_MAX) - return -EINVAL; + if (dst->flags & DST_HOST) { + u32 *mp = dst_metrics_write_ptr(dst); - mp[type - 1] = nla_get_u32(nla); - } + if (unlikely(!mp)) + return -ENOMEM; + + fib6_copy_metrics(mp, mxc); + } else { + dst_init_metrics(dst, mxc->mx, false); + + /* We've stolen mx now. */ + mxc->mx = NULL; } + return 0; } @@ -664,7 +667,7 @@ static int fib6_commit_metrics(struct dst_entry *dst, */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct nlattr *mx, int mx_len) + struct nl_info *info, struct mx6_config *mxc) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -773,11 +776,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; @@ -797,11 +799,11 @@ add: pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; @@ -838,8 +840,8 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, + struct nl_info *info, struct mx6_config *mxc) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -934,7 +936,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, } #endif - err = fib6_add_rt2node(fn, rt, info, mx, mx_len); + err = fib6_add_rt2node(fn, rt, info, mxc); if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c91083156edb..34dcbb59df75 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -853,14 +853,14 @@ EXPORT_SYMBOL(rt6_lookup); */ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) + struct mx6_config *mxc) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); + err = fib6_add(&table->tb6_root, rt, info, mxc); write_unlock_bh(&table->tb6_lock); return err; @@ -868,10 +868,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, int ip6_ins_rt(struct rt6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; - return __ip6_ins_rt(rt, &info, NULL, 0); + struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; + struct mx6_config mxc = { .mx = NULL, }; + + return __ip6_ins_rt(rt, &info, &mxc); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -1470,9 +1470,51 @@ out: return entries > rt_max_size; } -/* - * - */ +static int ip6_convert_metrics(struct mx6_config *mxc, + const struct fib6_config *cfg) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (cfg->fc_mx == NULL) + return 0; + + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (unlikely(!mp)) + return -ENOMEM; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + u32 val; + + if (unlikely(type > RTAX_MAX)) + goto err; + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err; + } else { + val = nla_get_u32(nla); + } + + mp[type - 1] = val; + __set_bit(type - 1, mxc->mx_valid); + } + } + + mxc->mx = mp; + + return 0; + err: + kfree(mp); + return -EINVAL; +} int ip6_route_add(struct fib6_config *cfg) { @@ -1482,6 +1524,7 @@ int ip6_route_add(struct fib6_config *cfg) struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; + struct mx6_config mxc = { .mx = NULL, }; int addr_type; if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) @@ -1677,8 +1720,14 @@ install_route: cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); + err = ip6_convert_metrics(&mxc, cfg); + if (err) + goto out; + + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); + kfree(mxc.mx); + return err; out: if (dev) dev_put(dev); @@ -2534,7 +2583,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_OIF */ + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ - + nla_total_size(sizeof(struct rta_cacheinfo)); + + nla_total_size(sizeof(struct rta_cacheinfo)) + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ } static int rt6_fill_node(struct net *net, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9c0b54e87b47..5d46832c6f72 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1199,6 +1199,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && |