summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/ip6_fib.h10
-rw-r--r--include/net/tcp.h22
-rw-r--r--include/uapi/linux/rtnetlink.h2
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/decnet/dn_fib.c3
-rw-r--r--net/decnet/dn_table.c4
-rw-r--r--net/ipv4/fib_semantics.c14
-rw-r--r--net/ipv4/tcp_cong.c121
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c30
-rw-r--r--net/ipv4/tcp_output.c21
-rw-r--r--net/ipv6/ip6_fib.c68
-rw-r--r--net/ipv6/route.c72
-rw-r--r--net/ipv6/tcp_ipv6.c2
15 files changed, 304 insertions, 85 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 848e85cb5c61..5976bdecf58b 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -98,7 +98,8 @@ struct inet_connection_sock {
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
- __u8 icsk_ca_state;
+ __u8 icsk_ca_state:7,
+ icsk_ca_dst_locked:1;
__u8 icsk_retransmits;
__u8 icsk_pending;
__u8 icsk_backoff;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 8eea35d32a75..20e80fa7bbdd 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -74,6 +74,11 @@ struct fib6_node {
#define FIB6_SUBTREE(fn) ((fn)->subtree)
#endif
+struct mx6_config {
+ const u32 *mx;
+ DECLARE_BITMAP(mx_valid, RTAX_MAX);
+};
+
/*
* routing information
*
@@ -291,9 +296,8 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
void *arg);
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
- struct nlattr *mx, int mx_len);
-
+int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+ struct nl_info *info, struct mx6_config *mxc);
int fib6_del(struct rt6_info *rt, struct nl_info *info);
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f50f29faf76f..b8fdc6bab3f3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -448,6 +448,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(struct sock *sk,
struct request_sock *req,
struct sk_buff *skb);
+void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
@@ -636,6 +637,11 @@ static inline u32 tcp_rto_min_us(struct sock *sk)
return jiffies_to_usecs(tcp_rto_min(sk));
}
+static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
+{
+ return dst_metric_locked(dst, RTAX_CC_ALGO);
+}
+
/* Compute the actual receive window we are currently advertising.
* Rcv_nxt can be after the window if our peer push more data
* than the offered window.
@@ -787,6 +793,8 @@ enum tcp_ca_ack_event_flags {
#define TCP_CA_MAX 128
#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
+#define TCP_CA_UNSPEC 0
+
/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
@@ -794,7 +802,8 @@ enum tcp_ca_ack_event_flags {
struct tcp_congestion_ops {
struct list_head list;
- unsigned long flags;
+ u32 key;
+ u32 flags;
/* initialize private data (optional) */
void (*init)(struct sock *sk);
@@ -841,6 +850,17 @@ u32 tcp_reno_ssthresh(struct sock *sk);
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
extern struct tcp_congestion_ops tcp_reno;
+struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
+u32 tcp_ca_get_key_by_name(const char *name);
+#ifdef CONFIG_INET
+char *tcp_ca_get_name_by_key(u32 key, char *buffer);
+#else
+static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+{
+ return NULL;
+}
+#endif
+
static inline bool tcp_ca_needs_ecn(const struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 9c9b8b4480cd..d81f22d5b390 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -389,6 +389,8 @@ enum {
#define RTAX_INITRWND RTAX_INITRWND
RTAX_QUICKACK,
#define RTAX_QUICKACK RTAX_QUICKACK
+ RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
__RTAX_MAX
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index da983d4bac02..6a6cdade1676 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
+#include <net/tcp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
@@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
for (i = 0; i < RTAX_MAX; i++) {
if (metrics[i]) {
+ if (i == RTAX_CC_ALGO - 1) {
+ char tmp[TCP_CA_NAME_MAX], *name;
+
+ name = tcp_ca_get_name_by_key(metrics[i], tmp);
+ if (!name)
+ continue;
+ if (nla_put_string(skb, i + 1, name))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u32(skb, i + 1, metrics[i]))
+ goto nla_put_failure;
+ }
valid++;
- if (nla_put_u32(skb, i+1, metrics[i]))
- goto nla_put_failure;
}
}
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index d332aefb0846..df4803437888 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att
int type = nla_type(attr);
if (type) {
- if (type > RTAX_MAX || nla_len(attr) < 4)
+ if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
+ nla_len(attr) < 4)
goto err_inval;
fi->fib_metrics[type-1] = nla_get_u32(attr);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 86e3807052e9..3f19fcbf126d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -29,6 +29,7 @@
#include <linux/route.h> /* RTF_xxx */
#include <net/neighbour.h>
#include <net/netlink.h>
+#include <net/tcp.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/fib_rules.h>
@@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(4) /* RTA_TABLE */
+ nla_total_size(2) /* RTA_DST */
- + nla_total_size(4); /* RTA_PRIORITY */
+ + nla_total_size(4) /* RTA_PRIORITY */
+ + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f99f41bd15b8..d2b7b5521b1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
+ nla_total_size(4) /* RTA_TABLE */
+ nla_total_size(4) /* RTA_DST */
+ nla_total_size(4) /* RTA_PRIORITY */
- + nla_total_size(4); /* RTA_PREFSRC */
+ + nla_total_size(4) /* RTA_PREFSRC */
+ + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
@@ -859,7 +860,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (type > RTAX_MAX)
goto err_inval;
- val = nla_get_u32(nla);
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(tmp);
+ if (val == TCP_CA_UNSPEC)
+ goto err_inval;
+ } else {
+ val = nla_get_u32(nla);
+ }
if (type == RTAX_ADVMSS && val > 65535 - 40)
val = 65535 - 40;
if (type == RTAX_MTU && val > 65535 - 15)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 27ead0dd16bc..63c29dba68a8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/gfp.h>
+#include <linux/jhash.h>
#include <net/tcp.h>
static DEFINE_SPINLOCK(tcp_cong_list_lock);
@@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
return NULL;
}
+/* Must be called with rcu lock held */
+static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
+{
+ const struct tcp_congestion_ops *ca = tcp_ca_find(name);
+#ifdef CONFIG_MODULES
+ if (!ca && capable(CAP_NET_ADMIN)) {
+ rcu_read_unlock();
+ request_module("tcp_%s", name);
+ rcu_read_lock();
+ ca = tcp_ca_find(name);
+ }
+#endif
+ return ca;
+}
+
+/* Simple linear search, not much in here. */
+struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
+{
+ struct tcp_congestion_ops *e;
+
+ list_for_each_entry_rcu(e, &tcp_cong_list, list) {
+ if (e->key == key)
+ return e;
+ }
+
+ return NULL;
+}
+
/*
* Attach new congestion control algorithm to the list
* of available options.
@@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
return -EINVAL;
}
+ ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
+
spin_lock(&tcp_cong_list_lock);
- if (tcp_ca_find(ca->name)) {
- pr_notice("%s already registered\n", ca->name);
+ if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) {
+ pr_notice("%s already registered or non-unique key\n",
+ ca->name);
ret = -EEXIST;
} else {
list_add_tail_rcu(&ca->list, &tcp_cong_list);
@@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
spin_lock(&tcp_cong_list_lock);
list_del_rcu(&ca->list);
spin_unlock(&tcp_cong_list_lock);
+
+ /* Wait for outstanding readers to complete before the
+ * module gets removed entirely.
+ *
+ * A try_module_get() should fail by now as our module is
+ * in "going" state since no refs are held anymore and
+ * module_exit() handler being called.
+ */
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
+u32 tcp_ca_get_key_by_name(const char *name)
+{
+ const struct tcp_congestion_ops *ca;
+ u32 key;
+
+ might_sleep();
+
+ rcu_read_lock();
+ ca = __tcp_ca_find_autoload(name);
+ key = ca ? ca->key : TCP_CA_UNSPEC;
+ rcu_read_unlock();
+
+ return key;
+}
+EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name);
+
+char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+{
+ const struct tcp_congestion_ops *ca;
+ char *ret = NULL;
+
+ rcu_read_lock();
+ ca = tcp_ca_find_key(key);
+ if (ca)
+ ret = strncpy(buffer, ca->name,
+ TCP_CA_NAME_MAX);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
+
/* Assign choice of congestion control. */
void tcp_assign_congestion_control(struct sock *sk)
{
@@ -107,6 +180,18 @@ void tcp_init_congestion_control(struct sock *sk)
icsk->icsk_ca_ops->init(sk);
}
+static void tcp_reinit_congestion_control(struct sock *sk,
+ const struct tcp_congestion_ops *ca)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_cleanup_congestion_control(sk);
+ icsk->icsk_ca_ops = ca;
+
+ if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
+ icsk->icsk_ca_ops->init(sk);
+}
+
/* Manage refcounts on socket close. */
void tcp_cleanup_congestion_control(struct sock *sk)
{
@@ -241,42 +326,26 @@ out:
int tcp_set_congestion_control(struct sock *sk, const char *name)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_congestion_ops *ca;
+ const struct tcp_congestion_ops *ca;
int err = 0;
- rcu_read_lock();
- ca = tcp_ca_find(name);
+ if (icsk->icsk_ca_dst_locked)
+ return -EPERM;
- /* no change asking for existing value */
+ rcu_read_lock();
+ ca = __tcp_ca_find_autoload(name);
+ /* No change asking for existing value */
if (ca == icsk->icsk_ca_ops)
goto out;
-
-#ifdef CONFIG_MODULES
- /* not found attempt to autoload module */
- if (!ca && capable(CAP_NET_ADMIN)) {
- rcu_read_unlock();
- request_module("tcp_%s", name);
- rcu_read_lock();
- ca = tcp_ca_find(name);
- }
-#endif
if (!ca)
err = -ENOENT;
-
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
err = -EPERM;
-
else if (!try_module_get(ca->owner))
err = -EBUSY;
-
- else {
- tcp_cleanup_congestion_control(sk);
- icsk->icsk_ca_ops = ca;
-
- if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
- icsk->icsk_ca_ops->init(sk);
- }
+ else
+ tcp_reinit_congestion_control(sk, ca);
out:
rcu_read_unlock();
return err;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a3f72d7fc06c..ad3e65bdd368 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1340,6 +1340,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
sk_setup_caps(newsk, dst);
+ tcp_ca_openreq_child(newsk, dst);
+
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric_advmss(dst);
if (tcp_sk(sk)->rx_opt.user_mss &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 63d2680b65db..bc9216dc9de1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -399,6 +399,32 @@ static void tcp_ecn_openreq_child(struct tcp_sock *tp,
tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
}
+void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+ bool ca_got_dst = false;
+
+ if (ca_key != TCP_CA_UNSPEC) {
+ const struct tcp_congestion_ops *ca;
+
+ rcu_read_lock();
+ ca = tcp_ca_find_key(ca_key);
+ if (likely(ca && try_module_get(ca->owner))) {
+ icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
+ icsk->icsk_ca_ops = ca;
+ ca_got_dst = true;
+ }
+ rcu_read_unlock();
+ }
+
+ if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner))
+ tcp_assign_congestion_control(sk);
+
+ tcp_set_ca_state(sk, TCP_CA_Open);
+}
+EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
+
/* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
*
@@ -451,10 +477,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
- if (!try_module_get(newicsk->icsk_ca_ops->owner))
- tcp_assign_congestion_control(newsk);
-
- tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
__skb_queue_head_init(&newtp->out_of_order_queue);
newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f18262e2326..dc30cb563e4f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2939,6 +2939,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
}
EXPORT_SYMBOL(tcp_make_synack);
+static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct tcp_congestion_ops *ca;
+ u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+
+ if (ca_key == TCP_CA_UNSPEC)
+ return;
+
+ rcu_read_lock();
+ ca = tcp_ca_find_key(ca_key);
+ if (likely(ca && try_module_get(ca->owner))) {
+ module_put(icsk->icsk_ca_ops->owner);
+ icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
+ icsk->icsk_ca_ops = ca;
+ }
+ rcu_read_unlock();
+}
+
/* Do all connect socket setups that can be done AF independent. */
static void tcp_connect_init(struct sock *sk)
{
@@ -2964,6 +2983,8 @@ static void tcp_connect_init(struct sock *sk)
tcp_mtup_init(sk);
tcp_sync_mss(sk, dst_mtu(dst));
+ tcp_ca_dst_init(sk, dst);
+
if (!tp->window_clamp)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
tp->advmss = dst_metric_advmss(dst);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b2d1838897c9..03c520a4ebeb 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -630,32 +630,35 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
RTF_GATEWAY;
}
-static int fib6_commit_metrics(struct dst_entry *dst,
- struct nlattr *mx, int mx_len)
+static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
{
- struct nlattr *nla;
- int remaining;
- u32 *mp;
+ int i;
- if (dst->flags & DST_HOST) {
- mp = dst_metrics_write_ptr(dst);
- } else {
- mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
- if (!mp)
- return -ENOMEM;
- dst_init_metrics(dst, mp, 0);
+ for (i = 0; i < RTAX_MAX; i++) {
+ if (test_bit(i, mxc->mx_valid))
+ mp[i] = mxc->mx[i];
}
+}
- nla_for_each_attr(nla, mx, mx_len, remaining) {
- int type = nla_type(nla);
+static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
+{
+ if (!mxc->mx)
+ return 0;
- if (type) {
- if (type > RTAX_MAX)
- return -EINVAL;
+ if (dst->flags & DST_HOST) {
+ u32 *mp = dst_metrics_write_ptr(dst);
- mp[type - 1] = nla_get_u32(nla);
- }
+ if (unlikely(!mp))
+ return -ENOMEM;
+
+ fib6_copy_metrics(mp, mxc);
+ } else {
+ dst_init_metrics(dst, mxc->mx, false);
+
+ /* We've stolen mx now. */
+ mxc->mx = NULL;
}
+
return 0;
}
@@ -664,7 +667,7 @@ static int fib6_commit_metrics(struct dst_entry *dst,
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info, struct nlattr *mx, int mx_len)
+ struct nl_info *info, struct mx6_config *mxc)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
@@ -773,11 +776,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
- if (mx) {
- err = fib6_commit_metrics(&rt->dst, mx, mx_len);
- if (err)
- return err;
- }
+ err = fib6_commit_metrics(&rt->dst, mxc);
+ if (err)
+ return err;
+
rt->dst.rt6_next = iter;
*ins = rt;
rt->rt6i_node = fn;
@@ -797,11 +799,11 @@ add:
pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
return -ENOENT;
}
- if (mx) {
- err = fib6_commit_metrics(&rt->dst, mx, mx_len);
- if (err)
- return err;
- }
+
+ err = fib6_commit_metrics(&rt->dst, mxc);
+ if (err)
+ return err;
+
*ins = rt;
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
@@ -838,8 +840,8 @@ void fib6_force_start_gc(struct net *net)
* with source addr info in sub-trees
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
- struct nlattr *mx, int mx_len)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+ struct nl_info *info, struct mx6_config *mxc)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
@@ -934,7 +936,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
}
#endif
- err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
+ err = fib6_add_rt2node(fn, rt, info, mxc);
if (!err) {
fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags & RTF_CACHE))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c91083156edb..34dcbb59df75 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -853,14 +853,14 @@ EXPORT_SYMBOL(rt6_lookup);
*/
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
- struct nlattr *mx, int mx_len)
+ struct mx6_config *mxc)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
+ err = fib6_add(&table->tb6_root, rt, info, mxc);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -868,10 +868,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
int ip6_ins_rt(struct rt6_info *rt)
{
- struct nl_info info = {
- .nl_net = dev_net(rt->dst.dev),
- };
- return __ip6_ins_rt(rt, &info, NULL, 0);
+ struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
+ struct mx6_config mxc = { .mx = NULL, };
+
+ return __ip6_ins_rt(rt, &info, &mxc);
}
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
@@ -1470,9 +1470,51 @@ out:
return entries > rt_max_size;
}
-/*
- *
- */
+static int ip6_convert_metrics(struct mx6_config *mxc,
+ const struct fib6_config *cfg)
+{
+ struct nlattr *nla;
+ int remaining;
+ u32 *mp;
+
+ if (cfg->fc_mx == NULL)
+ return 0;
+
+ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ if (unlikely(!mp))
+ return -ENOMEM;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla_type(nla);
+
+ if (type) {
+ u32 val;
+
+ if (unlikely(type > RTAX_MAX))
+ goto err;
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(tmp);
+ if (val == TCP_CA_UNSPEC)
+ goto err;
+ } else {
+ val = nla_get_u32(nla);
+ }
+
+ mp[type - 1] = val;
+ __set_bit(type - 1, mxc->mx_valid);
+ }
+ }
+
+ mxc->mx = mp;
+
+ return 0;
+ err:
+ kfree(mp);
+ return -EINVAL;
+}
int ip6_route_add(struct fib6_config *cfg)
{
@@ -1482,6 +1524,7 @@ int ip6_route_add(struct fib6_config *cfg)
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
+ struct mx6_config mxc = { .mx = NULL, };
int addr_type;
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
@@ -1677,8 +1720,14 @@ install_route:
cfg->fc_nlinfo.nl_net = dev_net(dev);
- return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
+ err = ip6_convert_metrics(&mxc, cfg);
+ if (err)
+ goto out;
+
+ err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+ kfree(mxc.mx);
+ return err;
out:
if (dev)
dev_put(dev);
@@ -2534,7 +2583,8 @@ static inline size_t rt6_nlmsg_size(void)
+ nla_total_size(4) /* RTA_OIF */
+ nla_total_size(4) /* RTA_PRIORITY */
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
- + nla_total_size(sizeof(struct rta_cacheinfo));
+ + nla_total_size(sizeof(struct rta_cacheinfo))
+ + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
}
static int rt6_fill_node(struct net *net,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9c0b54e87b47..5d46832c6f72 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1199,6 +1199,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
newnp->opt->opt_flen);
+ tcp_ca_openreq_child(newsk, dst);
+
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric_advmss(dst);
if (tcp_sk(sk)->rx_opt.user_mss &&