summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-09-29 20:41:10 -0700
committerDavid S. Miller <davem@davemloft.net>2015-09-29 20:41:10 -0700
commitf82ff596c7a8917b21100117646c719017ce8fe4 (patch)
treec5106779c21e734397a7bedf8ddf7f9002d478cd
parente6934f3ec00b04234acb24a1a2c28af59763d3b5 (diff)
parent9478d12d33ad12d29c5343ae7346b51bc1f4c5a9 (diff)
Merge branch 'L3_master_device'
David Ahern says: ==================== net: L3 master device The VRF device is essentially a Layer 3 master device used to associate netdevices with a specific routing table and to influence FIB lookups via 'ip rules' and controlling the oif/iif used for the lookup. This series generalizes the VRF into L3 master device, l3mdev. Similar to switchdev it has a Kconfig option and separate set of operations in net_device allowing it to be completely compiled out if not wanted. The l3mdev methods rely on the 'master' aspect and use of netdev_master_upper_dev_get_rcu to retrieve the master device from a given netdevice if it is enslaved to an L3_MASTER. The VRF device is converted to use the l3mdev operations. At the end the vrf_ptr is no longer and removed, as are all direct references to VRF. The end result is a much simpler implementation for VRF. Thanks to Nikolay for suggestions (eg., use of the master linkage which is the key to making this work) and to Roopa, Andy and Shrijeet for early reviews. v3 - added license header to l3mdev.c - export symbols in l3mdev.c for use with GPL modules - removed netdevice header from l3mdev.h (not needed) and fixed typo in comment v2 - rebased to top of net-next - addressed Niks comments (checking master, removing extra lines, and flipping the order of patches 1 and 2) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--MAINTAINERS8
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/vrf.c81
-rw-r--r--include/linux/netdevice.h36
-rw-r--r--include/net/l3mdev.h149
-rw-r--r--include/net/route.h3
-rw-r--r--include/net/vrf.h178
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile3
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/fib_frontend.c12
-rw-r--r--net/ipv4/icmp.c8
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_policy.c8
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/l3mdev/Kconfig10
-rw-r--r--net/l3mdev/Makefile5
-rw-r--r--net/l3mdev/l3mdev.c92
21 files changed, 356 insertions, 276 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index bcd263de4827..fa43fa2f30e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6095,6 +6095,13 @@ F: Documentation/auxdisplay/ks0108
F: drivers/auxdisplay/ks0108.c
F: include/linux/ks0108.h
+L3MDEV
+M: David Ahern <dsa@cumulusnetworks.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: net/l3mdev
+F: include/net/l3mdev.h
+
LAPB module
L: linux-x25@vger.kernel.org
S: Orphan
@@ -11266,7 +11273,6 @@ M: Shrijeet Mukherjee <shm@cumulusnetworks.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/vrf.c
-F: include/net/vrf.h
F: Documentation/networking/vrf.txt
VT1211 HARDWARE MONITOR DRIVER
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d18eb607bee6..b9ebd0d18a52 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -299,6 +299,7 @@ config NLMON
config NET_VRF
tristate "Virtual Routing and Forwarding (Lite)"
depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES
+ depends on NET_L3_MASTER_DEV
---help---
This option enables the support for mapping interfaces into VRF's. The
support enables VRF devices.
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4ecb3a3e516a..64f2ab663ffe 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -34,7 +34,7 @@
#include <net/rtnetlink.h>
#include <net/route.h>
#include <net/addrconf.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
@@ -44,6 +44,21 @@
#define vrf_master_get_rcu(dev) \
((struct net_device *)rcu_dereference(dev->rx_handler_data))
+struct slave {
+ struct list_head list;
+ struct net_device *dev;
+};
+
+struct slave_queue {
+ struct list_head all_slaves;
+};
+
+struct net_vrf {
+ struct slave_queue queue;
+ struct rtable *rth;
+ u32 tb_id;
+};
+
struct pcpu_dstats {
u64 tx_pkts;
u64 tx_bytes;
@@ -395,18 +410,15 @@ static void __vrf_insert_slave(struct slave_queue *queue, struct slave *slave)
static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
- struct net_vrf_dev *vrf_ptr = kmalloc(sizeof(*vrf_ptr), GFP_KERNEL);
struct slave *slave = kzalloc(sizeof(*slave), GFP_KERNEL);
struct net_vrf *vrf = netdev_priv(dev);
struct slave_queue *queue = &vrf->queue;
int ret = -ENOMEM;
- if (!slave || !vrf_ptr)
+ if (!slave)
goto out_fail;
slave->dev = port_dev;
- vrf_ptr->ifindex = dev->ifindex;
- vrf_ptr->tb_id = vrf->tb_id;
/* register the packet handler for slave ports */
ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev);
@@ -423,7 +435,6 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
port_dev->flags |= IFF_SLAVE;
__vrf_insert_slave(queue, slave);
- rcu_assign_pointer(port_dev->vrf_ptr, vrf_ptr);
cycle_netdev(port_dev);
return 0;
@@ -431,14 +442,13 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
out_unregister:
netdev_rx_handler_unregister(port_dev);
out_fail:
- kfree(vrf_ptr);
kfree(slave);
return ret;
}
static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
- if (netif_is_vrf(port_dev) || vrf_is_slave(port_dev))
+ if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev))
return -EINVAL;
return do_vrf_add_slave(dev, port_dev);
@@ -447,21 +457,15 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
/* inverse of do_vrf_add_slave */
static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
{
- struct net_vrf_dev *vrf_ptr = rtnl_dereference(port_dev->vrf_ptr);
struct net_vrf *vrf = netdev_priv(dev);
struct slave_queue *queue = &vrf->queue;
struct slave *slave;
- RCU_INIT_POINTER(port_dev->vrf_ptr, NULL);
-
netdev_upper_dev_unlink(port_dev, dev);
port_dev->flags &= ~IFF_SLAVE;
netdev_rx_handler_unregister(port_dev);
- /* after netdev_rx_handler_unregister for synchronize_rcu */
- kfree(vrf_ptr);
-
cycle_netdev(port_dev);
slave = __vrf_find_slave_dev(queue, port_dev);
@@ -529,6 +533,33 @@ static const struct net_device_ops vrf_netdev_ops = {
.ndo_del_slave = vrf_del_slave,
};
+static u32 vrf_fib_table(const struct net_device *dev)
+{
+ struct net_vrf *vrf = netdev_priv(dev);
+
+ return vrf->tb_id;
+}
+
+static struct rtable *vrf_get_rtable(const struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct rtable *rth = NULL;
+
+ if (!(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
+ struct net_vrf *vrf = netdev_priv(dev);
+
+ rth = vrf->rth;
+ atomic_inc(&rth->dst.__refcnt);
+ }
+
+ return rth;
+}
+
+static const struct l3mdev_ops vrf_l3mdev_ops = {
+ .l3mdev_fib_table = vrf_fib_table,
+ .l3mdev_get_rtable = vrf_get_rtable,
+};
+
static void vrf_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
@@ -546,6 +577,7 @@ static void vrf_setup(struct net_device *dev)
/* Initialize the device structure. */
dev->netdev_ops = &vrf_netdev_ops;
+ dev->l3mdev_ops = &vrf_l3mdev_ops;
dev->ethtool_ops = &vrf_ethtool_ops;
dev->destructor = free_netdev;
@@ -572,10 +604,6 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[])
static void vrf_dellink(struct net_device *dev, struct list_head *head)
{
- struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
-
- RCU_INIT_POINTER(dev->vrf_ptr, NULL);
- kfree_rcu(vrf_ptr, rcu);
unregister_netdevice_queue(dev, head);
}
@@ -583,7 +611,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net_vrf *vrf = netdev_priv(dev);
- struct net_vrf_dev *vrf_ptr;
int err;
if (!data || !data[IFLA_VRF_TABLE])
@@ -591,26 +618,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
- dev->priv_flags |= IFF_VRF_MASTER;
-
- err = -ENOMEM;
- vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
- if (!vrf_ptr)
- goto out_fail;
-
- vrf_ptr->ifindex = dev->ifindex;
- vrf_ptr->tb_id = vrf->tb_id;
+ dev->priv_flags |= IFF_L3MDEV_MASTER;
err = register_netdevice(dev);
if (err < 0)
goto out_fail;
- rcu_assign_pointer(dev->vrf_ptr, vrf_ptr);
-
return 0;
out_fail:
- kfree(vrf_ptr);
free_netdev(dev);
return err;
}
@@ -654,10 +670,9 @@ static int vrf_device_event(struct notifier_block *unused,
/* only care about unregister events to drop slave references */
if (event == NETDEV_UNREGISTER) {
- struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
struct net_device *vrf_dev;
- if (!vrf_ptr || netif_is_vrf(dev))
+ if (netif_is_l3_master(dev))
goto out;
vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d2ffeafc9998..b9450784ae06 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1258,7 +1258,7 @@ struct net_device_ops {
* @IFF_LIVE_ADDR_CHANGE: device supports hardware address
* change when it's running
* @IFF_MACVLAN: Macvlan device
- * @IFF_VRF_MASTER: device is a VRF master
+ * @IFF_L3MDEV_MASTER: device is an L3 master device
* @IFF_NO_QUEUE: device can run without qdisc attached
* @IFF_OPENVSWITCH: device is a Open vSwitch master
*/
@@ -1283,7 +1283,7 @@ enum netdev_priv_flags {
IFF_XMIT_DST_RELEASE_PERM = 1<<17,
IFF_IPVLAN_MASTER = 1<<18,
IFF_IPVLAN_SLAVE = 1<<19,
- IFF_VRF_MASTER = 1<<20,
+ IFF_L3MDEV_MASTER = 1<<20,
IFF_NO_QUEUE = 1<<21,
IFF_OPENVSWITCH = 1<<22,
};
@@ -1308,7 +1308,7 @@ enum netdev_priv_flags {
#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM
#define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER
#define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE
-#define IFF_VRF_MASTER IFF_VRF_MASTER
+#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER
#define IFF_NO_QUEUE IFF_NO_QUEUE
#define IFF_OPENVSWITCH IFF_OPENVSWITCH
@@ -1427,7 +1427,6 @@ enum netdev_priv_flags {
* @dn_ptr: DECnet specific data
* @ip6_ptr: IPv6 specific data
* @ax25_ptr: AX.25 specific data
- * @vrf_ptr: VRF specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
*
* @last_rx: Time of last Rx
@@ -1587,6 +1586,9 @@ struct net_device {
#ifdef CONFIG_NET_SWITCHDEV
const struct switchdev_ops *switchdev_ops;
#endif
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ const struct l3mdev_ops *l3mdev_ops;
+#endif
const struct header_ops *header_ops;
@@ -1646,7 +1648,6 @@ struct net_device {
struct dn_dev __rcu *dn_ptr;
struct inet6_dev __rcu *ip6_ptr;
void *ax25_ptr;
- struct net_vrf_dev __rcu *vrf_ptr;
struct wireless_dev *ieee80211_ptr;
struct wpan_dev *ieee802154_ptr;
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
@@ -3824,9 +3825,9 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
return dev->priv_flags & IFF_SUPP_NOFCS;
}
-static inline bool netif_is_vrf(const struct net_device *dev)
+static inline bool netif_is_l3_master(const struct net_device *dev)
{
- return dev->priv_flags & IFF_VRF_MASTER;
+ return dev->priv_flags & IFF_L3MDEV_MASTER;
}
static inline bool netif_is_bridge_master(const struct net_device *dev)
@@ -3839,27 +3840,6 @@ static inline bool netif_is_ovs_master(const struct net_device *dev)
return dev->priv_flags & IFF_OPENVSWITCH;
}
-static inline bool netif_index_is_vrf(struct net *net, int ifindex)
-{
- bool rc = false;
-
-#if IS_ENABLED(CONFIG_NET_VRF)
- struct net_device *dev;
-
- if (ifindex == 0)
- return false;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- rc = netif_is_vrf(dev);
-
- rcu_read_unlock();
-#endif
- return rc;
-}
-
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev)
{
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
new file mode 100644
index 000000000000..87cee05a0a17
--- /dev/null
+++ b/include/net/l3mdev.h
@@ -0,0 +1,149 @@
+/*
+ * include/net/l3mdev.h - L3 master device API
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _NET_L3MDEV_H_
+#define _NET_L3MDEV_H_
+
+/**
+ * struct l3mdev_ops - l3mdev operations
+ *
+ * @l3mdev_fib_table: Get FIB table id to use for lookups
+ *
+ * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ */
+
+struct l3mdev_ops {
+ u32 (*l3mdev_fib_table)(const struct net_device *dev);
+ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
+ const struct flowi4 *fl4);
+};
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev);
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+ int ifindex;
+
+ rcu_read_lock();
+ ifindex = l3mdev_master_ifindex_rcu(dev);
+ rcu_read_unlock();
+
+ return ifindex;
+}
+
+/* get index of an interface to use for FIB lookups. For devices
+ * enslaved to an L3 master device FIB lookups are based on the
+ * master index
+ */
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+ return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
+}
+
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+ int oif;
+
+ rcu_read_lock();
+ oif = l3mdev_fib_oif_rcu(dev);
+ rcu_read_unlock();
+
+ return oif;
+}
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev);
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+ u32 tb_id;
+
+ rcu_read_lock();
+ tb_id = l3mdev_fib_table_rcu(dev);
+ rcu_read_unlock();
+
+ return tb_id;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
+ return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
+
+ return NULL;
+}
+
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+ bool rc = false;
+
+ if (ifindex == 0)
+ return false;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (dev)
+ rc = netif_is_l3_master(dev);
+
+ rcu_read_unlock();
+
+ return rc;
+}
+
+#else
+
+static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+ return 0;
+}
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+ return 0;
+}
+
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+
+static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+ return 0;
+}
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+ return 0;
+}
+static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+ return 0;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ return NULL;
+}
+
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+ return false;
+}
+
+#endif
+
+#endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/route.h b/include/net/route.h
index d1bd90bb3187..e211dc167db1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -28,6 +28,7 @@
#include <net/inetpeer.h>
#include <net/flow.h>
#include <net/inet_sock.h>
+#include <net/l3mdev.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -256,7 +257,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- if (netif_index_is_vrf(sock_net(sk), oif))
+ if (netif_index_is_l3_master(sock_net(sk), oif))
flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF;
flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
diff --git a/include/net/vrf.h b/include/net/vrf.h
deleted file mode 100644
index 593e6094ddd4..000000000000
--- a/include/net/vrf.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * include/net/net_vrf.h - adds vrf dev structure definitions
- * Copyright (c) 2015 Cumulus Networks
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __LINUX_NET_VRF_H
-#define __LINUX_NET_VRF_H
-
-struct net_vrf_dev {
- struct rcu_head rcu;
- int ifindex; /* ifindex of master dev */
- u32 tb_id; /* table id for VRF */
-};
-
-struct slave {
- struct list_head list;
- struct net_device *dev;
-};
-
-struct slave_queue {
- struct list_head all_slaves;
-};
-
-struct net_vrf {
- struct slave_queue queue;
- struct rtable *rth;
- u32 tb_id;
-};
-
-
-#if IS_ENABLED(CONFIG_NET_VRF)
-/* called with rcu_read_lock() */
-static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
-{
- struct net_vrf_dev *vrf_ptr;
- int ifindex = 0;
-
- if (!dev)
- return 0;
-
- if (netif_is_vrf(dev)) {
- ifindex = dev->ifindex;
- } else {
- vrf_ptr = rcu_dereference(dev->vrf_ptr);
- if (vrf_ptr)
- ifindex = vrf_ptr->ifindex;
- }
-
- return ifindex;
-}
-
-static inline int vrf_master_ifindex(const struct net_device *dev)
-{
- int ifindex;
-
- rcu_read_lock();
- ifindex = vrf_master_ifindex_rcu(dev);
- rcu_read_unlock();
-
- return ifindex;
-}
-
-/* called with rcu_read_lock */
-static inline u32 vrf_dev_table_rcu(const struct net_device *dev)
-{
- u32 tb_id = 0;
-
- if (dev) {
- struct net_vrf_dev *vrf_ptr;
-
- vrf_ptr = rcu_dereference(dev->vrf_ptr);
- if (vrf_ptr)
- tb_id = vrf_ptr->tb_id;
- }
- return tb_id;
-}
-
-static inline u32 vrf_dev_table(const struct net_device *dev)
-{
- u32 tb_id;
-
- rcu_read_lock();
- tb_id = vrf_dev_table_rcu(dev);
- rcu_read_unlock();
-
- return tb_id;
-}
-
-static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex)
-{
- struct net_device *dev;
- u32 tb_id = 0;
-
- if (!ifindex)
- return 0;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- tb_id = vrf_dev_table_rcu(dev);
-
- rcu_read_unlock();
-
- return tb_id;
-}
-
-/* called with rtnl */
-static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
-{
- u32 tb_id = 0;
-
- if (dev) {
- struct net_vrf_dev *vrf_ptr;
-
- vrf_ptr = rtnl_dereference(dev->vrf_ptr);
- if (vrf_ptr)
- tb_id = vrf_ptr->tb_id;
- }
- return tb_id;
-}
-
-/* caller has already checked netif_is_vrf(dev) */
-static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
-{
- struct rtable *rth = ERR_PTR(-ENETUNREACH);
- struct net_vrf *vrf = netdev_priv(dev);
-
- if (vrf) {
- rth = vrf->rth;
- atomic_inc(&rth->dst.__refcnt);
- }
- return rth;
-}
-
-#else
-static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline int vrf_master_ifindex(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table_rcu(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
-{
- return ERR_PTR(-ENETUNREACH);
-}
-#endif
-
-#endif /* __LINUX_NET_VRF_H */
diff --git a/net/Kconfig b/net/Kconfig
index 7021c1bf44d6..127da94ae25e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
source "net/hsr/Kconfig"
source "net/switchdev/Kconfig"
+source "net/l3mdev/Kconfig"
config RPS
bool
diff --git a/net/Makefile b/net/Makefile
index 3995613e5510..a5d04098dfce 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HSR) += hsr/
ifneq ($(CONFIG_NET_SWITCHDEV),)
obj-y += switchdev/
endif
+ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
+obj-y += l3mdev/
+endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3af85eecbe11..11c4ca13ec3b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,7 +119,7 @@
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/* The inetsw table contains everything that inet_create needs to
@@ -446,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id;
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
/* Not specified by any standard per-se, however it breaks too
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6fcbd215cdbc..fac172370276 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -45,7 +45,7 @@
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(inet_addr_type);
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
__be32 addr)
{
- u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, dev, addr, rt_table);
}
@@ -268,7 +268,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr)
{
- u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, NULL, addr, rt_table);
}
@@ -332,7 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = vrf_master_ifindex_rcu(dev);
+ fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
if (!fl4.flowi4_iif)
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
@@ -366,7 +366,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (nh->nh_dev == dev) {
dev_match = true;
break;
- } else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+ } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
dev_match = true;
break;
}
@@ -803,7 +803,7 @@ out:
static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
- u32 tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev);
+ u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
struct fib_table *tb;
struct fib_config cfg = {
.fc_protocol = RTPROT_KERNEL,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5eb8ac4089d..6b96dee2800b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -96,7 +96,7 @@
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/ip_fib.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/*
* Build xmit assembly blocks
@@ -309,7 +309,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
rc = false;
if (icmp_global_allow()) {
- int vif = vrf_master_ifindex(dst->dev);
+ int vif = l3mdev_master_ifindex(dst->dev);
struct inet_peer *peer;
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.flowi4_mark = mark;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
- fl4.flowi4_oif = vrf_master_ifindex(skb->dev);
+ fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
@@ -461,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev);
+ fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key(net, fl4);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fa7f15305f9a..9772b789adf3 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -48,7 +48,7 @@
#include <linux/inet.h>
#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -78,7 +78,7 @@ struct ipq {
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* VRF device index */
+ int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -657,7 +657,7 @@ out_fail:
int ip_defrag(struct sk_buff *skb, u32 user)
{
struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
- int vif = vrf_master_ifindex_rcu(dev);
+ int vif = l3mdev_master_ifindex_rcu(dev);
struct net *net = dev_net(dev);
struct ipq *qp;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 06d2c87ed505..aff6766922e8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1571,7 +1571,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
}
oif = arg->bound_dev_if;
- if (!oif && netif_index_is_vrf(net, skb->skb_iif))
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8c84a6664b30..1441de1550e6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,7 @@
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -847,7 +847,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
return;
}
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
- vif = vrf_master_ifindex_rcu(rt->dst.dev);
+ vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
rcu_read_unlock();
net = dev_net(rt->dst.dev);
@@ -941,7 +941,7 @@ static int ip_error(struct sk_buff *skb)
}
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
- vrf_master_ifindex(skb->dev), 1);
+ l3mdev_master_ifindex(skb->dev), 1);
send = true;
if (peer) {
@@ -1739,7 +1739,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* Now we are ready to route packet.
*/
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex;
+ fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -2124,11 +2124,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_HOST);
}
- if (netif_is_vrf(dev_out) &&
- !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
- rth = vrf_dev_get_rth(dev_out);
+
+ rth = l3mdev_get_rtable(dev_out, fl4);
+ if (rth)
goto out;
- }
}
if (!fl4->daddr) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f7d1d5e19e95..156ba75b6000 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1021,7 +1021,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
* device lookup source address from VRF table. This mimics
* behavior of ip_route_connect{_init}.
*/
- if (netif_index_is_vrf(net, ipc.oif)) {
+ if (netif_index_is_l3_master(net, ipc.oif)) {
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
(flow_flags | FLOWI_FLAG_VRFSRC |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0304d1680ca2..f2606b9056bb 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,7 @@
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/ip.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
@@ -111,10 +111,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
- if (skb_dst(skb)) {
- oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
- : skb_dst(skb)->dev->ifindex;
- }
+ if (skb_dst(skb))
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 30caa289c5db..69cee4e0d728 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
@@ -132,10 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
nexthdr = nh[nhoff];
- if (skb_dst(skb)) {
- oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
- : skb_dst(skb)->dev->ifindex;
- }
+ if (skb_dst(skb))
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
new file mode 100644
index 000000000000..5d47325037bc
--- /dev/null
+++ b/net/l3mdev/Kconfig
@@ -0,0 +1,10 @@
+#
+# Configuration for L3 master device support
+#
+
+config NET_L3_MASTER_DEV
+ bool "L3 Master device support"
+ depends on INET || IPV6
+ ---help---
+ This module provides glue between core networking code and device
+ drivers to support L3 master devices like VRF.
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
new file mode 100644
index 000000000000..84a53a6f609a
--- /dev/null
+++ b/net/l3mdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L3 device API
+#
+
+obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
new file mode 100644
index 000000000000..ddf75ad41713
--- /dev/null
+++ b/net/l3mdev/l3mdev.c
@@ -0,0 +1,92 @@
+/*
+ * net/l3mdev/l3mdev.c - L3 master device implementation
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <net/l3mdev.h>
+
+/**
+ * l3mdev_master_ifindex - get index of L3 master device
+ * @dev: targeted interface
+ */
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+ int ifindex = 0;
+
+ if (!dev)
+ return 0;
+
+ if (netif_is_l3_master(dev)) {
+ ifindex = dev->ifindex;
+ } else if (dev->flags & IFF_SLAVE) {
+ struct net_device *master;
+
+ master = netdev_master_upper_dev_get_rcu(dev);
+ if (master && netif_is_l3_master(master))
+ ifindex = master->ifindex;
+ }
+
+ return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
+
+/**
+ * l3mdev_fib_table - get FIB table id associated with an L3
+ * master interface
+ * @dev: targeted interface
+ */
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+ u32 tb_id = 0;
+
+ if (!dev)
+ return 0;
+
+ if (netif_is_l3_master(dev)) {
+ if (dev->l3mdev_ops->l3mdev_fib_table)
+ tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+ } else if (dev->flags & IFF_SLAVE) {
+ /* Users of netdev_master_upper_dev_get_rcu need non-const,
+ * but current inet_*type functions take a const
+ */
+ struct net_device *_dev = (struct net_device *) dev;
+ const struct net_device *master;
+
+ master = netdev_master_upper_dev_get_rcu(_dev);
+ if (master && netif_is_l3_master(master) &&
+ master->l3mdev_ops->l3mdev_fib_table)
+ tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
+ }
+
+ return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
+
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+ u32 tb_id = 0;
+
+ if (!ifindex)
+ return 0;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (dev)
+ tb_id = l3mdev_fib_table_rcu(dev);
+
+ rcu_read_unlock();
+
+ return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);