summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/cirrus,cs89x0.txt13
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_ethtool.c41
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c715
-rw-r--r--drivers/net/ethernet/cavium/liquidio/liquidio_common.h260
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c70
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.h1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.c169
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.h21
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_iq.h62
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_main.h23
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_network.h229
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_nic.c35
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_nic.h154
-rw-r--r--drivers/net/ethernet/cavium/liquidio/request_manager.c96
-rw-r--r--drivers/net/ethernet/cavium/liquidio/response_manager.c6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c82
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c8
-rw-r--r--drivers/net/ethernet/cirrus/cs89x0.c12
-rw-r--r--drivers/net/phy/mdio-mux.c4
-rw-r--r--drivers/net/tun.c14
-rw-r--r--drivers/net/vrf.c169
-rw-r--r--include/linux/mlx5/mlx5_ifc.h275
-rw-r--r--include/linux/netdevice.h8
-rw-r--r--include/linux/ptr_ring.h393
-rw-r--r--include/linux/rtnetlink.h5
-rw-r--r--include/linux/skb_array.h169
-rw-r--r--include/net/6lowpan.h16
-rw-r--r--include/net/act_api.h4
-rw-r--r--include/net/addrconf.h10
-rw-r--r--include/net/ip6_route.h2
-rw-r--r--include/net/ip_tunnels.h1
-rw-r--r--include/net/l3mdev.h6
-rw-r--r--include/net/ndisc.h248
-rw-r--r--include/net/sch_generic.h16
-rw-r--r--include/uapi/linux/if_tunnel.h1
-rw-r--r--net/6lowpan/6lowpan_i.h4
-rw-r--r--net/6lowpan/Makefile2
-rw-r--r--net/6lowpan/core.c50
-rw-r--r--net/6lowpan/debugfs.c39
-rw-r--r--net/6lowpan/iphc.c167
-rw-r--r--net/6lowpan/ndisc.c234
-rw-r--r--net/core/rtnetlink.c22
-rw-r--r--net/ieee802154/6lowpan/core.c12
-rw-r--r--net/ieee802154/6lowpan/tx.c113
-rw-r--r--net/ipv4/ip_gre.c42
-rw-r--r--net/ipv4/ip_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c220
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ila/ila_common.c10
-rw-r--r--net/ipv6/ndisc.c123
-rw-r--r--net/ipv6/route.c13
-rw-r--r--net/iucv/af_iucv.c223
-rw-r--r--net/l3mdev/l3mdev.c2
-rw-r--r--net/sched/act_api.c8
-rw-r--r--net/sched/act_ife.c3
-rw-r--r--net/sched/act_ipt.c3
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/act_skbedit.c3
-rw-r--r--net/sched/act_vlan.c4
-rw-r--r--net/sched/sch_choke.c8
-rw-r--r--net/sched/sch_codel.c2
-rw-r--r--net/sched/sch_fq.c19
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hhf.c4
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/addr.h1
-rw-r--r--net/tipc/bearer.c8
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c1
-rw-r--r--net/tipc/core.h15
-rw-r--r--net/tipc/link.c49
-rw-r--r--net/tipc/monitor.c651
-rw-r--r--net/tipc/monitor.h73
-rw-r--r--net/tipc/node.c26
-rw-r--r--tools/virtio/ringtest/Makefile5
-rw-r--r--tools/virtio/ringtest/ptr_ring.c192
82 files changed, 4563 insertions, 1171 deletions
diff --git a/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt b/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt
new file mode 100644
index 000000000000..c070076bacb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cirrus,cs89x0.txt
@@ -0,0 +1,13 @@
+* Cirrus Logic CS8900/CS8920 Network Controller
+
+Required properties:
+- compatible : Should be "cirrus,cs8900" or "cirrus,cs8920".
+- reg : Address and length of the IO space.
+- interrupts : Should contain the controller interrupt line.
+
+Examples:
+ eth0: eth@10000000 {
+ compatible = "cirrus,cs8900";
+ reg = <0x10000000 0x400>;
+ interrupts = <10>;
+ };
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 245c063ed4db..4523c8662ed2 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -127,7 +127,7 @@ static int lio_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
dev_err(&oct->pci_dev->dev, "Unknown link interface reported\n");
}
- if (linfo->link.s.status) {
+ if (linfo->link.s.link_up) {
ethtool_cmd_speed_set(ecmd, linfo->link.s.speed);
ecmd->duplex = linfo->link.s.duplex;
} else {
@@ -222,23 +222,20 @@ static int octnet_gpio_access(struct net_device *netdev, int addr, int val)
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int ret = 0;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_GPIO_ACCESS;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = addr;
- nctrl.ncmd.s.param3 = val;
+ nctrl.ncmd.s.param1 = addr;
+ nctrl.ncmd.s.param2 = val;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 100;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
return -EINVAL;
@@ -303,9 +300,10 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
mdio_cmd->mdio_addr = loc;
if (op)
mdio_cmd->value1 = *value;
- mdio_cmd->value2 = lio->linfo.ifidx;
octeon_swap_8B_data((u64 *)mdio_cmd, sizeof(struct oct_mdio_cmd) / 8);
+ sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC, OPCODE_NIC_MDIO45,
0, 0, 0);
@@ -317,7 +315,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
retval = octeon_send_soft_command(oct_dev, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
dev_err(&oct_dev->pci_dev->dev,
"octnet_mdio45_access instruction failed status: %x\n",
retval);
@@ -503,10 +501,10 @@ static void lio_set_msglevel(struct net_device *netdev, u32 msglvl)
if ((msglvl ^ lio->msg_enable) & NETIF_MSG_HW) {
if (msglvl & NETIF_MSG_HW)
liquidio_set_feature(netdev,
- OCTNET_CMD_VERBOSE_ENABLE);
+ OCTNET_CMD_VERBOSE_ENABLE, 0);
else
liquidio_set_feature(netdev,
- OCTNET_CMD_VERBOSE_DISABLE);
+ OCTNET_CMD_VERBOSE_DISABLE, 0);
}
lio->msg_enable = msglvl;
@@ -653,7 +651,7 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
intrmod_cfg->intrmod_mincnt_trigger;
}
- iq = oct->instr_queue[lio->linfo.txpciq[0]];
+ iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no];
intr_coal->tx_max_coalesced_frames = iq->fill_threshold;
break;
@@ -722,7 +720,7 @@ static int octnet_set_intrmod_cfg(void *oct, struct oct_intrmod_cfg *intr_cfg)
sc->wait_time = 1000;
retval = octeon_send_soft_command(oct_dev, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
octeon_free_soft_command(oct_dev, sc);
return -EINVAL;
}
@@ -859,7 +857,7 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
if ((intr_coal->tx_max_coalesced_frames >= CN6XXX_DB_MIN) &&
(intr_coal->tx_max_coalesced_frames <= CN6XXX_DB_MAX)) {
for (j = 0; j < lio->linfo.num_txpciq; j++) {
- q_no = lio->linfo.txpciq[j];
+ q_no = lio->linfo.txpciq[j].s.q_no;
oct->instr_queue[q_no]->fill_threshold =
intr_coal->tx_max_coalesced_frames;
}
@@ -950,7 +948,6 @@ static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
struct octeon_device *oct = lio->oct_dev;
struct oct_link_info *linfo;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int ret = 0;
/* get the link info */
@@ -978,9 +975,9 @@ static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_SET_SETTINGS;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 1000;
nctrl.netpndev = (u64)netdev;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
/* Passing the parameters sent by ethtool like Speed, Autoneg & Duplex
@@ -990,19 +987,17 @@ static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
/* Autoneg ON */
nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON |
OCTNIC_NCMD_AUTONEG_ON;
- nctrl.ncmd.s.param2 = ecmd->advertising;
+ nctrl.ncmd.s.param1 = ecmd->advertising;
} else {
/* Autoneg OFF */
nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON;
- nctrl.ncmd.s.param3 = ecmd->duplex;
+ nctrl.ncmd.s.param2 = ecmd->duplex;
- nctrl.ncmd.s.param2 = ecmd->speed;
+ nctrl.ncmd.s.param1 = ecmd->speed;
}
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Failed to set settings\n");
return -1;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 655d89e8814f..d0ab97c15f4a 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -84,6 +84,8 @@ static int conf_type;
module_param(conf_type, int, 0);
MODULE_PARM_DESC(conf_type, "select octeon configuration 0 default 1 ovs");
+static int ptp_enable = 1;
+
/* Bit mask values for lio->ifstate */
#define LIO_IFSTATE_DROQ_OPS 0x01
#define LIO_IFSTATE_REGISTERED 0x02
@@ -166,6 +168,8 @@ struct octnic_gather {
* received from the IP layer.
*/
struct octeon_sg_entry *sg;
+
+ u64 sg_dma_ptr;
};
/** This structure is used by NIC driver to store information required
@@ -682,7 +686,8 @@ static inline void txqs_wake(struct net_device *netdev)
int i;
for (i = 0; i < netdev->num_tx_queues; i++)
- netif_wake_subqueue(netdev, i);
+ if (__netif_subqueue_stopped(netdev, i))
+ netif_wake_subqueue(netdev, i);
} else {
netif_wake_queue(netdev);
}
@@ -705,7 +710,7 @@ static void start_txq(struct net_device *netdev)
{
struct lio *lio = GET_LIO(netdev);
- if (lio->linfo.link.s.status) {
+ if (lio->linfo.link.s.link_up) {
txqs_start(netdev);
return;
}
@@ -752,11 +757,14 @@ static inline int check_txq_status(struct lio *lio)
/* check each sub-queue state */
for (q = 0; q < numqs; q++) {
- iq = lio->linfo.txpciq[q & (lio->linfo.num_txpciq - 1)];
+ iq = lio->linfo.txpciq[q %
+ (lio->linfo.num_txpciq)].s.q_no;
if (octnet_iq_is_full(lio->oct_dev, iq))
continue;
- wake_q(lio->netdev, q);
- ret_val++;
+ if (__netif_subqueue_stopped(lio->netdev, q)) {
+ wake_q(lio->netdev, q);
+ ret_val++;
+ }
}
} else {
if (octnet_iq_is_full(lio->oct_dev, lio->txq))
@@ -787,64 +795,116 @@ static inline struct list_head *list_delete_head(struct list_head *root)
}
/**
- * \brief Delete gather list
+ * \brief Delete gather lists
* @param lio per-network private data
*/
-static void delete_glist(struct lio *lio)
+static void delete_glists(struct lio *lio)
{
struct octnic_gather *g;
+ int i;
- do {
- g = (struct octnic_gather *)
- list_delete_head(&lio->glist);
- if (g) {
- if (g->sg)
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
- kfree(g);
- }
- } while (g);
+ if (!lio->glist)
+ return;
+
+ for (i = 0; i < lio->linfo.num_txpciq; i++) {
+ do {
+ g = (struct octnic_gather *)
+ list_delete_head(&lio->glist[i]);
+ if (g) {
+ if (g->sg) {
+ dma_unmap_single(&lio->oct_dev->
+ pci_dev->dev,
+ g->sg_dma_ptr,
+ g->sg_size,
+ DMA_TO_DEVICE);
+ kfree((void *)((unsigned long)g->sg -
+ g->adjust));
+ }
+ kfree(g);
+ }
+ } while (g);
+ }
+
+ kfree((void *)lio->glist);
}
/**
- * \brief Setup gather list
+ * \brief Setup gather lists
* @param lio per-network private data
*/
-static int setup_glist(struct lio *lio)
+static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
{
- int i;
+ int i, j;
struct octnic_gather *g;
- INIT_LIST_HEAD(&lio->glist);
+ lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
+ GFP_KERNEL);
+ if (!lio->glist_lock)
+ return 1;
- for (i = 0; i < lio->tx_qsize; i++) {
- g = kzalloc(sizeof(*g), GFP_KERNEL);
- if (!g)
- break;
+ lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
+ GFP_KERNEL);
+ if (!lio->glist) {
+ kfree((void *)lio->glist_lock);
+ return 1;
+ }
- g->sg_size =
- ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+ for (i = 0; i < num_iqs; i++) {
+ int numa_node = cpu_to_node(i % num_online_cpus());
- g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
- if (!g->sg) {
- kfree(g);
- break;
+ spin_lock_init(&lio->glist_lock[i]);
+
+ INIT_LIST_HEAD(&lio->glist[i]);
+
+ for (j = 0; j < lio->tx_qsize; j++) {
+ g = kzalloc_node(sizeof(*g), GFP_KERNEL,
+ numa_node);
+ if (!g)
+ g = kzalloc(sizeof(*g), GFP_KERNEL);
+ if (!g)
+ break;
+
+ g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
+ OCT_SG_ENTRY_SIZE);
+
+ g->sg = kmalloc_node(g->sg_size + 8,
+ GFP_KERNEL, numa_node);
+ if (!g->sg)
+ g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
+ if (!g->sg) {
+ kfree(g);
+ break;
+ }
+
+ /* The gather component should be aligned on 64-bit
+ * boundary
+ */
+ if (((unsigned long)g->sg) & 7) {
+ g->adjust = 8 - (((unsigned long)g->sg) & 7);
+ g->sg = (struct octeon_sg_entry *)
+ ((unsigned long)g->sg + g->adjust);
+ }
+ g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
+ g->sg, g->sg_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev,
+ g->sg_dma_ptr)) {
+ kfree((void *)((unsigned long)g->sg -
+ g->adjust));
+ kfree(g);
+ break;
+ }
+
+ list_add_tail(&g->list, &lio->glist[i]);
}
- /* The gather component should be aligned on 64-bit boundary */
- if (((unsigned long)g->sg) & 7) {
- g->adjust = 8 - (((unsigned long)g->sg) & 7);
- g->sg = (struct octeon_sg_entry *)
- ((unsigned long)g->sg + g->adjust);
+ if (j != lio->tx_qsize) {
+ delete_glists(lio);
+ return 1;
}
- list_add_tail(&g->list, &lio->glist);
}
- if (i == lio->tx_qsize)
- return 0;
-
- delete_glist(lio);
- return 1;
+ return 0;
}
/**
@@ -858,7 +918,7 @@ static void print_link_info(struct net_device *netdev)
if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
struct oct_link_info *linfo = &lio->linfo;
- if (linfo->link.s.status) {
+ if (linfo->link.s.link_up) {
netif_info(lio, link, lio->netdev, "%d Mbps %s Duplex UP\n",
linfo->link.s.speed,
(linfo->link.s.duplex) ? "Full" : "Half");
@@ -880,13 +940,15 @@ static inline void update_link_status(struct net_device *netdev,
union oct_link_status *ls)
{
struct lio *lio = GET_LIO(netdev);
+ int changed = (lio->linfo.link.u64 != ls->u64);
- if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
- lio->linfo.link.u64 = ls->u64;
+ lio->linfo.link.u64 = ls->u64;
+ if ((lio->intf_open) && (changed)) {
print_link_info(netdev);
+ lio->link_changes++;
- if (lio->linfo.link.s.status) {
+ if (lio->linfo.link.s.link_up) {
netif_carrier_on(netdev);
/* start_txq(netdev); */
txqs_wake(netdev);
@@ -1159,18 +1221,15 @@ static void octeon_destroy_resources(struct octeon_device *oct)
static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
{
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = start_stop;
+ nctrl.ncmd.s.param1 = start_stop;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.netpndev = (u64)lio->netdev;
- nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
- if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams) < 0)
+ if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0)
netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
}
@@ -1205,10 +1264,12 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
unregister_netdev(netdev);
- delete_glist(lio);
+ delete_glists(lio);
free_netdev(netdev);
+ oct->props[ifidx].gmxport = -1;
+
oct->props[ifidx].netdev = NULL;
}
@@ -1230,7 +1291,8 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
for (i = 0; i < oct->ifcount; i++) {
lio = GET_LIO(oct->props[i].netdev);
for (j = 0; j < lio->linfo.num_rxpciq; j++)
- octeon_unregister_droq_ops(oct, lio->linfo.rxpciq[j]);
+ octeon_unregister_droq_ops(oct,
+ lio->linfo.rxpciq[j].s.q_no);
}
for (i = 0; i < oct->ifcount; i++)
@@ -1326,6 +1388,16 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
return 0;
}
+static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+ int q = 0;
+
+ if (netif_is_multiqueue(lio->netdev))
+ q = skb->queue_mapping % lio->linfo.num_txpciq;
+
+ return q;
+}
+
/**
* \brief Check Tx queue state for a given network buffer
* @param lio per-network private data
@@ -1337,14 +1409,17 @@ static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
if (netif_is_multiqueue(lio->netdev)) {
q = skb->queue_mapping;
- iq = lio->linfo.txpciq[(q & (lio->linfo.num_txpciq - 1))];
+ iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
} else {
iq = lio->txq;
+ q = iq;
}
if (octnet_iq_is_full(lio->oct_dev, iq))
return 0;
- wake_q(lio->netdev, q);
+
+ if (__netif_subqueue_stopped(lio->netdev, q))
+ wake_q(lio->netdev, q);
return 1;
}
@@ -1367,7 +1442,7 @@ static void free_netbuf(void *buf)
check_txq_state(lio, skb);
- recv_buffer_free((struct sk_buff *)skb);
+ tx_buffer_free(skb);
}
/**
@@ -1380,7 +1455,7 @@ static void free_netsgbuf(void *buf)
struct sk_buff *skb;
struct lio *lio;
struct octnic_gather *g;
- int i, frags;
+ int i, frags, iq;
finfo = (struct octnet_buf_free_info *)buf;
skb = finfo->skb;
@@ -1402,17 +1477,17 @@ static void free_netsgbuf(void *buf)
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
+ g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
- spin_lock(&lio->lock);
- list_add_tail(&g->list, &lio->glist);
- spin_unlock(&lio->lock);
+ iq = skb_iq(lio, skb);
+ spin_lock(&lio->glist_lock[iq]);
+ list_add_tail(&g->list, &lio->glist[iq]);
+ spin_unlock(&lio->glist_lock[iq]);
check_txq_state(lio, skb); /* mq support: sub-queue state check */
- recv_buffer_free((struct sk_buff *)skb);
+ tx_buffer_free(skb);
}
/**
@@ -1426,7 +1501,7 @@ static void free_netsgbuf_with_resp(void *buf)
struct sk_buff *skb;
struct lio *lio;
struct octnic_gather *g;
- int i, frags;
+ int i, frags, iq;
sc = (struct octeon_soft_command *)buf;
skb = (struct sk_buff *)sc->callback_arg;
@@ -1450,13 +1525,14 @@ static void free_netsgbuf_with_resp(void *buf)
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
+ g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
- spin_lock(&lio->lock);
- list_add_tail(&g->list, &lio->glist);
- spin_unlock(&lio->lock);
+ iq = skb_iq(lio, skb);
+
+ spin_lock(&lio->glist_lock[iq]);
+ list_add_tail(&g->list, &lio->glist[iq]);
+ spin_unlock(&lio->glist_lock[iq]);
/* Don't free the skb yet */
@@ -1743,14 +1819,13 @@ static void if_cfg_callback(struct octeon_device *oct,
static u16 select_q(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
- int qindex;
+ u32 qindex = 0;
struct lio *lio;
lio = GET_LIO(dev);
- /* select queue on chosen queue_mapping or core */
- qindex = skb_rx_queue_recorded(skb) ?
- skb_get_rx_queue(skb) : smp_processor_id();
- return (u16)(qindex & (lio->linfo.num_txpciq - 1));
+ qindex = skb_tx_hash(dev, skb);
+
+ return (u16)(qindex % (lio->linfo.num_txpciq));
}
/** Routine to push packets arriving on Octeon interface upto network layer.
@@ -1759,26 +1834,27 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
* @param len - size of total data received.
* @param rh - Control header associated with the packet
* @param param - additional control data with the packet
+ * @param arg - farg registered in droq_ops
*/
static void
liquidio_push_packet(u32 octeon_id,
void *skbuff,
u32 len,
union octeon_rh *rh,
- void *param)
+ void *param,
+ void *arg)
{
struct napi_struct *napi = param;
- struct octeon_device *oct = lio_get_device(octeon_id);
struct sk_buff *skb = (struct sk_buff *)skbuff;
struct skb_shared_hwtstamps *shhwtstamps;
u64 ns;
- struct net_device *netdev =
- (struct net_device *)oct->props[rh->r_dh.link].netdev;
+ struct net_device *netdev = (struct net_device *)arg;
struct octeon_droq *droq = container_of(param, struct octeon_droq,
napi);
if (netdev) {
int packet_was_received;
struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
/* Do not proceed if the interface is not in RUNNING state. */
if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
@@ -1789,21 +1865,54 @@ liquidio_push_packet(u32 octeon_id,
skb->dev = netdev;
- if (rh->r_dh.has_hwtstamp) {
- /* timestamp is included from the hardware at the
- * beginning of the packet.
- */
- if (ifstate_check(lio,
- LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
- /* Nanoseconds are in the first 64-bits
- * of the packet.
+ skb_record_rx_queue(skb, droq->q_no);
+ if (likely(len > MIN_SKB_SIZE)) {
+ struct octeon_skb_page_info *pg_info;
+ unsigned char *va;
+
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ if (pg_info->page) {
+ /* For Paged allocation use the frags */
+ va = page_address(pg_info->page) +
+ pg_info->page_offset;
+ memcpy(skb->data, va, MIN_SKB_SIZE);
+ skb_put(skb, MIN_SKB_SIZE);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ pg_info->page,
+ pg_info->page_offset +
+ MIN_SKB_SIZE,
+ len - MIN_SKB_SIZE,
+ LIO_RXBUFFER_SZ);
+ }
+ } else {
+ struct octeon_skb_page_info *pg_info =
+ ((struct octeon_skb_page_info *)(skb->cb));
+ skb_copy_to_linear_data(skb, page_address(pg_info->page)
+ + pg_info->page_offset, len);
+ skb_put(skb, len);
+ put_page(pg_info->page);
+ }
+
+ if (((oct->chip_id == OCTEON_CN66XX) ||
+ (oct->chip_id == OCTEON_CN68XX)) &&
+ ptp_enable) {
+ if (rh->r_dh.has_hwtstamp) {
+ /* timestamp is included from the hardware at
+ * the beginning of the packet.
*/
- memcpy(&ns, (skb->data), sizeof(ns));
- shhwtstamps = skb_hwtstamps(skb);
- shhwtstamps->hwtstamp =
- ns_to_ktime(ns + lio->ptp_adjust);
+ if (ifstate_check
+ (lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
+ /* Nanoseconds are in the first 64-bits
+ * of the packet.
+ */
+ memcpy(&ns, (skb->data), sizeof(ns));
+ shhwtstamps = skb_hwtstamps(skb);
+ shhwtstamps->hwtstamp =
+ ns_to_ktime(ns +
+ lio->ptp_adjust);
+ }
+ skb_pull(skb, sizeof(ns));
}
- skb_pull(skb, sizeof(ns));
}
skb->protocol = eth_type_trans(skb, skb->dev);
@@ -1935,10 +2044,10 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
* are for ingress packets.
*/
static inline int setup_io_queues(struct octeon_device *octeon_dev,
- struct net_device *net_device)
+ int ifidx)
{
- static int first_time = 1;
- static struct octeon_droq_ops droq_ops;
+ struct octeon_droq_ops droq_ops;
+ struct net_device *netdev;
static int cpu_id;
static int cpu_id_modulus;
struct octeon_droq *droq;
@@ -1947,23 +2056,26 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
struct lio *lio;
int num_tx_descs;
- lio = GET_LIO(net_device);
- if (first_time) {
- first_time = 0;
- memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+ netdev = octeon_dev->props[ifidx].netdev;
- droq_ops.fptr = liquidio_push_packet;
+ lio = GET_LIO(netdev);
- droq_ops.poll_mode = 1;
- droq_ops.napi_fn = liquidio_napi_drv_callback;
- cpu_id = 0;
- cpu_id_modulus = num_present_cpus();
- }
+ memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+ droq_ops.fptr = liquidio_push_packet;
+ droq_ops.farg = (void *)netdev;
+
+ droq_ops.poll_mode = 1;
+ droq_ops.napi_fn = liquidio_napi_drv_callback;
+ cpu_id = 0;
+ cpu_id_modulus = num_present_cpus();
/* set up DROQs. */
for (q = 0; q < lio->linfo.num_rxpciq; q++) {
- q_no = lio->linfo.rxpciq[q];
-
+ q_no = lio->linfo.rxpciq[q].s.q_no;
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "setup_io_queues index:%d linfo.rxpciq.s.q_no:%d\n",
+ q, q_no);
retval = octeon_setup_droq(octeon_dev, q_no,
CFG_GET_NUM_RX_DESCS_NIC_IF
(octeon_get_conf(octeon_dev),
@@ -1980,7 +2092,11 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
droq = octeon_dev->droq[q_no];
napi = &droq->napi;
- netif_napi_add(net_device, napi, liquidio_napi_poll, 64);
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "netif_napi_add netdev:%llx oct:%llx\n",
+ (u64)netdev,
+ (u64)octeon_dev);
+ netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
/* designate a CPU for this droq */
droq->cpu_id = cpu_id;
@@ -1996,9 +2112,9 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf
(octeon_dev),
lio->ifidx);
- retval = octeon_setup_iq(octeon_dev, lio->linfo.txpciq[q],
- num_tx_descs,
- netdev_get_tx_queue(net_device, q));
+ retval = octeon_setup_iq(octeon_dev, ifidx, q,
+ lio->linfo.txpciq[q], num_tx_descs,
+ netdev_get_tx_queue(netdev, q));
if (retval) {
dev_err(&octeon_dev->pci_dev->dev,
" %s : Runtime IQ(TxQ) creation failed.\n",
@@ -2096,7 +2212,8 @@ static int liquidio_stop(struct net_device *netdev)
netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
/* Inform that netif carrier is down */
lio->intf_open = 0;
- lio->linfo.link.s.status = 0;
+ lio->linfo.link.s.link_up = 0;
+ lio->link_changes++;
netif_carrier_off(netdev);
@@ -2235,7 +2352,6 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
struct netdev_hw_addr *ha;
u64 *mc;
int ret, i;
@@ -2246,10 +2362,10 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
/* Create a ctrl pkt command to be sent to core app. */
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_SET_MULTI_LIST;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = get_new_flags(netdev);
- nctrl.ncmd.s.param3 = mc_count;
+ nctrl.ncmd.s.param1 = get_new_flags(netdev);
+ nctrl.ncmd.s.param2 = mc_count;
nctrl.ncmd.s.more = mc_count;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
@@ -2270,9 +2386,7 @@ static void liquidio_set_mcast_list(struct net_device *netdev)
*/
nctrl.wait_time = 0;
- nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
ret);
@@ -2290,19 +2404,17 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
struct octeon_device *oct = lio->oct_dev;
struct sockaddr *addr = (struct sockaddr *)p;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
- if ((!is_valid_ether_addr(addr->sa_data)) ||
- (ifstate_check(lio, LIO_IFSTATE_RUNNING)))
+ if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = 0;
+ nctrl.ncmd.s.param1 = 0;
nctrl.ncmd.s.more = 1;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
nctrl.wait_time = 100;
@@ -2311,9 +2423,7 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
/* The MAC Address is presented in network byte order. */
memcpy((u8 *)&nctrl.udd[0] + 2, addr->sa_data, ETH_ALEN);
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
return -ENOMEM;
@@ -2341,7 +2451,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
oct = lio->oct_dev;
for (i = 0; i < lio->linfo.num_txpciq; i++) {
- iq_no = lio->linfo.txpciq[i];
+ iq_no = lio->linfo.txpciq[i].s.q_no;
iq_stats = &oct->instr_queue[iq_no]->stats;
pkts += iq_stats->tx_done;
drop += iq_stats->tx_dropped;
@@ -2357,7 +2467,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
bytes = 0;
for (i = 0; i < lio->linfo.num_rxpciq; i++) {
- oq_no = lio->linfo.rxpciq[i];
+ oq_no = lio->linfo.rxpciq[i].s.q_no;
oq_stats = &oct->droq[oq_no]->stats;
pkts += oq_stats->rx_pkts_received;
drop += (oq_stats->rx_dropped +
@@ -2383,7 +2493,6 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int max_frm_size = new_mtu + OCTNET_FRM_HEADER_SIZE;
int ret = 0;
@@ -2403,15 +2512,13 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = new_mtu;
+ nctrl.ncmd.s.param1 = new_mtu;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 100;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
- nparams.resp_order = OCTEON_RESP_ORDERED;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
return -1;
@@ -2536,7 +2643,7 @@ static void handle_timestamp(struct octeon_device *oct,
}
octeon_free_soft_command(oct, sc);
- recv_buffer_free(skb);
+ tx_buffer_free(skb);
}
/* \brief Send a data packet that will be timestamped
@@ -2551,10 +2658,9 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
{
int retval;
struct octeon_soft_command *sc;
- struct octeon_instr_ih *ih;
- struct octeon_instr_rdp *rdp;
struct lio *lio;
int ring_doorbell;
+ u32 len;
lio = finfo->lio;
@@ -2576,14 +2682,13 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
sc->callback_arg = finfo->skb;
sc->iq_no = ndata->q_no;
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz;
ring_doorbell = !xmit_more;
retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
- sc, ih->dlengsz, ndata->reqtype);
+ sc, len, ndata->reqtype);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
dev_err(&oct->pci_dev->dev, "timestamp data packet failed status: %x\n",
retval);
octeon_free_soft_command(oct, sc);
@@ -2594,68 +2699,6 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
return retval;
}
-static inline int is_ipv4(struct sk_buff *skb)
-{
- return (skb->protocol == htons(ETH_P_IP)) &&
- (ip_hdr(skb)->version == 4);
-}
-
-static inline int is_vlan(struct sk_buff *skb)
-{
- return skb->protocol == htons(ETH_P_8021Q);
-}
-
-static inline int is_ip_fragmented(struct sk_buff *skb)
-{
- /* The Don't fragment and Reserved flag fields are ignored.
- * IP is fragmented if
- * - the More fragments bit is set (indicating this IP is a fragment
- * with more to follow; the current offset could be 0 ).
- * - ths offset field is non-zero.
- */
- return (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ? 1 : 0;
-}
-
-static inline int is_ipv6(struct sk_buff *skb)
-{
- return (skb->protocol == htons(ETH_P_IPV6)) &&
- (ipv6_hdr(skb)->version == 6);
-}
-
-static inline int is_with_extn_hdr(struct sk_buff *skb)
-{
- return (ipv6_hdr(skb)->nexthdr != IPPROTO_TCP) &&
- (ipv6_hdr(skb)->nexthdr != IPPROTO_UDP);
-}
-
-static inline int is_tcpudp(struct sk_buff *skb)
-{
- return (ip_hdr(skb)->protocol == IPPROTO_TCP) ||
- (ip_hdr(skb)->protocol == IPPROTO_UDP);
-}
-
-static inline u32 get_ipv4_5tuple_tag(struct sk_buff *skb)
-{
- u32 tag;
- struct iphdr *iphdr = ip_hdr(skb);
-
- tag = crc32(0, &iphdr->protocol, 1);
- tag = crc32(tag, (u8 *)&iphdr->saddr, 8);
- tag = crc32(tag, skb_transport_header(skb), 4);
- return tag;
-}
-
-static inline u32 get_ipv6_5tuple_tag(struct sk_buff *skb)
-{
- u32 tag;
- struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
-
- tag = crc32(0, &ipv6hdr->nexthdr, 1);
- tag = crc32(tag, (u8 *)&ipv6hdr->saddr, 32);
- tag = crc32(tag, skb_transport_header(skb), 4);
- return tag;
-}
-
/** \brief Transmit networks packets to the Octeon interface
* @param skbuff skbuff struct to be passed to network layer.
* @param netdev pointer to network device
@@ -2670,18 +2713,22 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
struct octnic_data_pkt ndata;
struct octeon_device *oct;
struct oct_iq_stats *stats;
- int cpu = 0, status = 0;
+ struct octeon_instr_irh *irh;
+ union tx_info *tx_info;
+ int status = 0;
int q_idx = 0, iq_no = 0;
- int xmit_more;
+ int xmit_more, j;
+ u64 dptr = 0;
u32 tag = 0;
lio = GET_LIO(netdev);
oct = lio->oct_dev;
if (netif_is_multiqueue(netdev)) {
- cpu = skb->queue_mapping;
- q_idx = (cpu & (lio->linfo.num_txpciq - 1));
- iq_no = lio->linfo.txpciq[q_idx];
+ q_idx = skb->queue_mapping;
+ q_idx = (q_idx % (lio->linfo.num_txpciq));
+ tag = q_idx;
+ iq_no = lio->linfo.txpciq[q_idx].s.q_no;
} else {
iq_no = lio->txq;
}
@@ -2692,11 +2739,11 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
* transmitted.
*/
if (!(atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) ||
- (!lio->linfo.link.s.status) ||
+ (!lio->linfo.link.s.link_up) ||
(skb->len <= 0)) {
netif_info(lio, tx_err, lio->netdev,
"Transmit failed link_status : %d\n",
- lio->linfo.link.s.status);
+ lio->linfo.link.s.link_up);
goto lio_xmit_failed;
}
@@ -2739,53 +2786,11 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
ndata.datasize = skb->len;
cmdsetup.u64 = 0;
- cmdsetup.s.ifidx = lio->linfo.ifidx;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (is_ipv4(skb) && !is_ip_fragmented(skb) && is_tcpudp(skb)) {
- tag = get_ipv4_5tuple_tag(skb);
-
- cmdsetup.s.cksum_offset = sizeof(struct ethhdr) + 1;
-
- if (ip_hdr(skb)->ihl > 5)
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV4OPTS;
-
- } else if (is_ipv6(skb)) {
- tag = get_ipv6_5tuple_tag(skb);
-
- cmdsetup.s.cksum_offset = sizeof(struct ethhdr) + 1;
-
- if (is_with_extn_hdr(skb))
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV6EXTHDR;
+ cmdsetup.s.iq_no = iq_no;
- } else if (is_vlan(skb)) {
- if (vlan_eth_hdr(skb)->h_vlan_encapsulated_proto
- == htons(ETH_P_IP) &&
- !is_ip_fragmented(skb) && is_tcpudp(skb)) {
- tag = get_ipv4_5tuple_tag(skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ cmdsetup.s.transport_csum = 1;
- cmdsetup.s.cksum_offset =
- sizeof(struct vlan_ethhdr) + 1;
-
- if (ip_hdr(skb)->ihl > 5)
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV4OPTS;
-
- } else if (vlan_eth_hdr(skb)->h_vlan_encapsulated_proto
- == htons(ETH_P_IPV6)) {
- tag = get_ipv6_5tuple_tag(skb);
-
- cmdsetup.s.cksum_offset =
- sizeof(struct vlan_ethhdr) + 1;
-
- if (is_with_extn_hdr(skb))
- cmdsetup.s.ipv4opts_ipv6exthdr =
- OCT_PKT_PARAM_IPV6EXTHDR;
- }
- }
- }
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
cmdsetup.s.timestamp = 1;
@@ -2793,20 +2798,20 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
if (skb_shinfo(skb)->nr_frags == 0) {
cmdsetup.s.u.datasize = skb->len;
- octnet_prepare_pci_cmd(&ndata.cmd, &cmdsetup, tag);
+ octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
/* Offload checksum calculation for TCP/UDP packets */
- ndata.cmd.dptr = dma_map_single(&oct->pci_dev->dev,
- skb->data,
- skb->len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev, ndata.cmd.dptr)) {
+ dptr = dma_map_single(&oct->pci_dev->dev,
+ skb->data,
+ skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
__func__);
return NETDEV_TX_BUSY;
}
- finfo->dptr = ndata.cmd.dptr;
-
+ ndata.cmd.cmd2.dptr = dptr;
+ finfo->dptr = dptr;
ndata.reqtype = REQTYPE_NORESP_NET;
} else {
@@ -2814,9 +2819,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
struct skb_frag_struct *frag;
struct octnic_gather *g;
- spin_lock(&lio->lock);
- g = (struct octnic_gather *)list_delete_head(&lio->glist);
- spin_unlock(&lio->lock);
+ spin_lock(&lio->glist_lock[q_idx]);
+ g = (struct octnic_gather *)
+ list_delete_head(&lio->glist[q_idx]);
+ spin_unlock(&lio->glist_lock[q_idx]);
if (!g) {
netif_info(lio, tx_err, lio->netdev,
@@ -2826,7 +2832,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
cmdsetup.s.gather = 1;
cmdsetup.s.u.gatherptrs = (skb_shinfo(skb)->nr_frags + 1);
- octnet_prepare_pci_cmd(&ndata.cmd, &cmdsetup, tag);
+ octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
memset(g->sg, 0, g->sg_size);
@@ -2853,34 +2859,43 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
frag->size,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev,
+ g->sg[i >> 2].ptr[i & 3])) {
+ dma_unmap_single(&oct->pci_dev->dev,
+ g->sg[0].ptr[0],
+ skb->len - skb->data_len,
+ DMA_TO_DEVICE);
+ for (j = 1; j < i; j++) {
+ frag = &skb_shinfo(skb)->frags[j - 1];
+ dma_unmap_page(&oct->pci_dev->dev,
+ g->sg[j >> 2].ptr[j & 3],
+ frag->size,
+ DMA_TO_DEVICE);
+ }
+ dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
+ __func__);
+ return NETDEV_TX_BUSY;
+ }
+
add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
i++;
}
- ndata.cmd.dptr = dma_map_single(&oct->pci_dev->dev,
- g->sg, g->sg_size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev, ndata.cmd.dptr)) {
- dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
- __func__);
- dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
- skb->len - skb->data_len,
- DMA_TO_DEVICE);
- return NETDEV_TX_BUSY;
- }
+ dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
+ g->sg_size, DMA_TO_DEVICE);
+ dptr = g->sg_dma_ptr;
- finfo->dptr = ndata.cmd.dptr;
+ ndata.cmd.cmd2.dptr = dptr;
+ finfo->dptr = dptr;
finfo->g = g;
ndata.reqtype = REQTYPE_NORESP_NET_SG;
}
- if (skb_shinfo(skb)->gso_size) {
- struct octeon_instr_irh *irh =
- (struct octeon_instr_irh *)&ndata.cmd.irh;
- union tx_info *tx_info = (union tx_info *)&ndata.cmd.ossp[0];
+ irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
+ tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
- irh->len = 1; /* to indicate that ossp[0] contains tx_info */
+ if (skb_shinfo(skb)->gso_size) {
tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
tx_info->s.gso_segs = skb_shinfo(skb)->gso_segs;
}
@@ -2910,9 +2925,10 @@ lio_xmit_failed:
stats->tx_dropped++;
netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
iq_no, stats->tx_dropped);
- dma_unmap_single(&oct->pci_dev->dev, ndata.cmd.dptr,
- ndata.datasize, DMA_TO_DEVICE);
- recv_buffer_free(skb);
+ if (dptr)
+ dma_unmap_single(&oct->pci_dev->dev, dptr,
+ ndata.datasize, DMA_TO_DEVICE);
+ tx_buffer_free(skb);
return NETDEV_TX_OK;
}
@@ -2932,27 +2948,24 @@ static void liquidio_tx_timeout(struct net_device *netdev)
txqs_wake(netdev);
}
-int liquidio_set_feature(struct net_device *netdev, int cmd)
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
{
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
struct octnic_ctrl_pkt nctrl;
- struct octnic_ctrl_params nparams;
int ret = 0;
memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
nctrl.ncmd.u64 = 0;
nctrl.ncmd.s.cmd = cmd;
- nctrl.ncmd.s.param1 = lio->linfo.ifidx;
- nctrl.ncmd.s.param2 = OCTNIC_LROIPV4 | OCTNIC_LROIPV6;
+ nctrl.ncmd.s.param1 = param1;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
nctrl.wait_time = 100;
nctrl.netpndev = (u64)netdev;
nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
- nparams.resp_order = OCTEON_RESP_NORESPONSE;
-
- ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl, nparams);
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
if (ret < 0) {
dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
ret);
@@ -3008,10 +3021,12 @@ static int liquidio_set_features(struct net_device *netdev,
return 0;
if ((features & NETIF_F_LRO) && (lio->dev_capability & NETIF_F_LRO))
- liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE);
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
else if (!(features & NETIF_F_LRO) &&
(lio->dev_capability & NETIF_F_LRO))
- liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE);
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
return 0;
}
@@ -3082,24 +3097,27 @@ static int lio_nic_info(struct octeon_recv_info *recv_info, void *buf)
{
struct octeon_device *oct = (struct octeon_device *)buf;
struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
- int ifidx = 0;
+ int gmxport = 0;
union oct_link_status *ls;
int i;
- if ((recv_pkt->buffer_size[0] != sizeof(*ls)) ||
- (recv_pkt->rh.r_nic_info.ifidx > oct->ifcount)) {
+ if (recv_pkt->buffer_size[0] != sizeof(*ls)) {
dev_err(&oct->pci_dev->dev, "Malformed NIC_INFO, len=%d, ifidx=%d\n",
recv_pkt->buffer_size[0],
- recv_pkt->rh.r_nic_info.ifidx);
+ recv_pkt->rh.r_nic_info.gmxport);
goto nic_info_err;
}
- ifidx = recv_pkt->rh.r_nic_info.ifidx;
+ gmxport = recv_pkt->rh.r_nic_info.gmxport;
ls = (union oct_link_status *)get_rbd(recv_pkt->buffer_ptr[0]);
octeon_swap_8B_data((u64 *)ls, (sizeof(union oct_link_status)) >> 3);
-
- update_link_status(oct->props[ifidx].netdev, ls);
+ for (i = 0; i < oct->ifcount; i++) {
+ if (oct->props[i].gmxport == gmxport) {
+ update_link_status(oct->props[i].netdev, ls);
+ break;
+ }
+ }
nic_info_err:
for (i = 0; i < recv_pkt->buffer_count; i++)
@@ -3125,13 +3143,13 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
struct liquidio_if_cfg_context *ctx;
struct liquidio_if_cfg_resp *resp;
struct octdev_props *props;
- int retval, num_iqueues, num_oqueues, q_no;
- u64 q_mask;
+ int retval, num_iqueues, num_oqueues;
int num_cpus = num_online_cpus();
union oct_nic_if_cfg if_cfg;
unsigned int base_queue;
unsigned int gmx_port_id;
u32 resp_size, ctx_size;
+ u32 ifidx_or_pfnum;
/* This is to handle link status changes */
octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
@@ -3167,13 +3185,14 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
CFG_GET_BASE_QUE_NIC_IF(octeon_get_conf(octeon_dev), i);
gmx_port_id =
CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i);
+ ifidx_or_pfnum = i;
if (num_iqueues > num_cpus)
num_iqueues = num_cpus;
if (num_oqueues > num_cpus)
num_oqueues = num_cpus;
dev_dbg(&octeon_dev->pci_dev->dev,
"requesting config for interface %d, iqs %d, oqs %d\n",
- i, num_iqueues, num_oqueues);
+ ifidx_or_pfnum, num_iqueues, num_oqueues);
ACCESS_ONCE(ctx->cond) = 0;
ctx->octeon_id = lio_get_device_id(octeon_dev);
init_waitqueue_head(&ctx->wc);
@@ -3183,8 +3202,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
if_cfg.s.num_oqueues = num_oqueues;
if_cfg.s.base_queue = base_queue;
if_cfg.s.gmx_port_id = gmx_port_id;
+
+ sc->iq_no = 0;
+
octeon_prepare_soft_command(octeon_dev, sc, OPCODE_NIC,
- OPCODE_NIC_IF_CFG, i,
+ OPCODE_NIC_IF_CFG, 0,
if_cfg.u64, 0);
sc->callback = if_cfg_callback;
@@ -3192,7 +3214,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
sc->wait_time = 1000;
retval = octeon_send_soft_command(octeon_dev, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
dev_err(&octeon_dev->pci_dev->dev,
"iq/oq config failed status: %x\n",
retval);
@@ -3234,8 +3256,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
goto setup_nic_dev_fail;
}
- props = &octeon_dev->props[i];
- props->netdev = netdev;
+ SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
if (num_iqueues > 1)
lionetdevops.ndo_select_queue = select_q;
@@ -3249,23 +3270,21 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
memset(lio, 0, sizeof(struct lio));
- lio->linfo.ifidx = resp->cfg_info.ifidx;
- lio->ifidx = resp->cfg_info.ifidx;
+ lio->ifidx = ifidx_or_pfnum;
+
+ props = &octeon_dev->props[i];
+ props->gmxport = resp->cfg_info.linfo.gmxport;
+ props->netdev = netdev;
lio->linfo.num_rxpciq = num_oqueues;
lio->linfo.num_txpciq = num_iqueues;
- q_mask = resp->cfg_info.oqmask;
- /* q_mask is 0-based and already verified mask is nonzero */
for (j = 0; j < num_oqueues; j++) {
- q_no = __ffs64(q_mask);
- q_mask &= (~(1UL << q_no));
- lio->linfo.rxpciq[j] = q_no;
+ lio->linfo.rxpciq[j].u64 =
+ resp->cfg_info.linfo.rxpciq[j].u64;
}
- q_mask = resp->cfg_info.iqmask;
for (j = 0; j < num_iqueues; j++) {
- q_no = __ffs64(q_mask);
- q_mask &= (~(1UL << q_no));
- lio->linfo.txpciq[j] = q_no;
+ lio->linfo.txpciq[j].u64 =
+ resp->cfg_info.linfo.txpciq[j].u64;
}
lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
@@ -3274,13 +3293,15 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
lio->dev_capability = NETIF_F_HIGHDMA
- | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
- | NETIF_F_SG | NETIF_F_RXCSUM
- | NETIF_F_TSO | NETIF_F_TSO6
- | NETIF_F_LRO;
+ | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
+ | NETIF_F_SG | NETIF_F_RXCSUM
+ | NETIF_F_GRO
+ | NETIF_F_TSO | NETIF_F_TSO6
+ | NETIF_F_LRO;
netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
- netdev->features = lio->dev_capability;
+ netdev->features = (lio->dev_capability & ~NETIF_F_LRO);
+
netdev->vlan_features = lio->dev_capability;
netdev->hw_features = lio->dev_capability;
@@ -3291,7 +3312,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
lio->oct_dev = octeon_dev;
lio->octprops = props;
lio->netdev = netdev;
- spin_lock_init(&lio->lock);
dev_dbg(&octeon_dev->pci_dev->dev,
"if%d gmx: %d hw_addr: 0x%llx\n", i,
@@ -3306,23 +3326,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
ether_addr_copy(netdev->dev_addr, mac);
- if (setup_io_queues(octeon_dev, netdev)) {
+ /* By default all interfaces on a single Octeon uses the same
+ * tx and rx queues
+ */
+ lio->txq = lio->linfo.txpciq[0].s.q_no;
+ lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+ if (setup_io_queues(octeon_dev, i)) {
dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
goto setup_nic_dev_fail;
}
ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
- /* By default all interfaces on a single Octeon uses the same
- * tx and rx queues
- */
- lio->txq = lio->linfo.txpciq[0];
- lio->rxq = lio->linfo.rxpciq[0];
-
lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
- if (setup_glist(lio)) {
+ if (setup_glists(octeon_dev, lio, num_iqueues)) {
dev_err(&octeon_dev->pci_dev->dev,
"Gather list allocation failed\n");
goto setup_nic_dev_fail;
@@ -3331,10 +3350,13 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
/* Register ethtool support */
liquidio_set_ethtool_ops(netdev);
- liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE);
+ if (netdev->features & NETIF_F_LRO)
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
if ((debug != -1) && (debug & NETIF_MSG_HW))
- liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE);
+ liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE,
+ 0);
/* Register the network device with the OS */
if (register_netdev(netdev)) {
@@ -3346,13 +3368,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
"Setup NIC ifidx:%d mac:%02x%02x%02x%02x%02x%02x\n",
i, mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
netif_carrier_off(netdev);
-
- if (lio->linfo.link.s.status) {
- netif_carrier_on(netdev);
- start_txq(netdev);
- } else {
- netif_carrier_off(netdev);
- }
+ lio->link_changes++;
ifstate_set(lio, LIO_IFSTATE_REGISTERED);
@@ -3386,7 +3402,7 @@ setup_nic_dev_fail:
static int liquidio_init_nic_module(struct octeon_device *oct)
{
struct oct_intrmod_cfg *intrmod_cfg;
- int retval = 0;
+ int i, retval = 0;
int num_nic_ports = CFG_GET_NUM_NIC_PORTS(octeon_get_conf(oct));
dev_dbg(&oct->pci_dev->dev, "Initializing network interfaces\n");
@@ -3400,6 +3416,9 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
memset(oct->props, 0,
sizeof(struct octdev_props) * num_nic_ports);
+ for (i = 0; i < MAX_OCTEON_LINKS; i++)
+ oct->props[i].gmxport = -1;
+
retval = setup_nic_devices(oct);
if (retval) {
dev_err(&oct->pci_dev->dev, "Setup NIC devices failed\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 0ac347ccc8ba..2179691efebc 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -174,9 +174,11 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
/*------------------------- End Scatter/Gather ---------------------------*/
#define OCTNET_FRM_PTP_HEADER_SIZE 8
-#define OCTNET_FRM_HEADER_SIZE 30 /* PTP timestamp + VLAN + Ethernet */
-#define OCTNET_MIN_FRM_SIZE (64 + OCTNET_FRM_PTP_HEADER_SIZE)
+#define OCTNET_FRM_HEADER_SIZE 22 /* VLAN + Ethernet */
+
+#define OCTNET_MIN_FRM_SIZE 64
+
#define OCTNET_MAX_FRM_SIZE (16000 + OCTNET_FRM_HEADER_SIZE)
#define OCTNET_DEFAULT_FRM_SIZE (1500 + OCTNET_FRM_HEADER_SIZE)
@@ -258,19 +260,19 @@ union octnet_cmd {
u64 more:6; /* How many udd words follow the command */
- u64 param1:29;
+ u64 reserved:29;
- u64 param2:16;
+ u64 param1:16;
- u64 param3:8;
+ u64 param2:8;
#else
- u64 param3:8;
+ u64 param2:8;
- u64 param2:16;
+ u64 param1:16;
- u64 param1:29;
+ u64 reserved:29;
u64 more:6;
@@ -283,8 +285,140 @@ union octnet_cmd {
#define OCTNET_CMD_SIZE (sizeof(union octnet_cmd))
+/* Instruction Header (DPI - CN23xx) - for OCTEON-III models */
+struct octeon_instr_ih3 {
+#ifdef __BIG_ENDIAN_BITFIELD
+
+ /** Reserved3 */
+ u64 reserved3:1;
+
+ /** Gather indicator 1=gather*/
+ u64 gather:1;
+
+ /** Data length OR no. of entries in gather list */
+ u64 dlengsz:14;
+
+ /** Front Data size */
+ u64 fsz:6;
+
+ /** Reserved2 */
+ u64 reserved2:4;
+
+ /** PKI port kind - PKIND */
+ u64 pkind:6;
+
+ /** Reserved1 */
+ u64 reserved1:32;
+
+#else
+ /** Reserved1 */
+ u64 reserved1:32;
+
+ /** PKI port kind - PKIND */
+ u64 pkind:6;
+
+ /** Reserved2 */
+ u64 reserved2:4;
+
+ /** Front Data size */
+ u64 fsz:6;
+
+ /** Data length OR no. of entries in gather list */
+ u64 dlengsz:14;
+
+ /** Gather indicator 1=gather*/
+ u64 gather:1;
+
+ /** Reserved3 */
+ u64 reserved3:1;
+
+#endif
+};
+
+/* Optional PKI Instruction Header(PKI IH) - for OCTEON CN23XX models */
+/** BIG ENDIAN format. */
+struct octeon_instr_pki_ih3 {
+#ifdef __BIG_ENDIAN_BITFIELD
+
+ /** Wider bit */
+ u64 w:1;
+
+ /** Raw mode indicator 1 = RAW */
+ u64 raw:1;
+
+ /** Use Tag */
+ u64 utag:1;
+
+ /** Use QPG */
+ u64 uqpg:1;
+
+ /** Reserved2 */
+ u64 reserved2:1;
+
+ /** Parse Mode */
+ u64 pm:3;
+
+ /** Skip Length */
+ u64 sl:8;
+
+ /** Use Tag Type */
+ u64 utt:1;
+
+ /** Tag type */
+ u64 tagtype:2;
+
+ /** Reserved1 */
+ u64 reserved1:2;
+
+ /** QPG Value */
+ u64 qpg:11;
+
+ /** Tag Value */
+ u64 tag:32;
+
+#else
+
+ /** Tag Value */
+ u64 tag:32;
+
+ /** QPG Value */
+ u64 qpg:11;
+
+ /** Reserved1 */
+ u64 reserved1:2;
+
+ /** Tag type */
+ u64 tagtype:2;
+
+ /** Use Tag Type */
+ u64 utt:1;
+
+ /** Skip Length */
+ u64 sl:8;
+
+ /** Parse Mode */
+ u64 pm:3;
+
+ /** Reserved2 */
+ u64 reserved2:1;
+
+ /** Use QPG */
+ u64 uqpg:1;
+
+ /** Use Tag */
+ u64 utag:1;
+
+ /** Raw mode indicator 1 = RAW */
+ u64 raw:1;
+
+ /** Wider bit */
+ u64 w:1;
+#endif
+
+};
+
/** Instruction Header */
-struct octeon_instr_ih {
+struct octeon_instr_ih2 {
#ifdef __BIG_ENDIAN_BITFIELD
/** Raw mode indicator 1 = RAW */
u64 raw:1;
@@ -412,10 +546,9 @@ union octeon_rh {
u64 opcode:4;
u64 subcode:8;
u64 len:3; /** additional 64-bit words */
- u64 rid:13;
- u64 reserved:4;
+ u64 reserved:8;
u64 extra:25;
- u64 ifidx:7;
+ u64 gmxport:16;
} r_nic_info;
#else
u64 u64;
@@ -448,10 +581,9 @@ union octeon_rh {
u64 opcode:4;
} r_core_drv_init;
struct {
- u64 ifidx:7;
+ u64 gmxport:16;
u64 extra:25;
- u64 reserved:4;
- u64 rid:13;
+ u64 reserved:8;
u64 len:3; /** additional 64-bit words */
u64 subcode:8;
u64 opcode:4;
@@ -461,30 +593,25 @@ union octeon_rh {
#define OCT_RH_SIZE (sizeof(union octeon_rh))
-#define OCT_PKT_PARAM_IPV4OPTS 1
-#define OCT_PKT_PARAM_IPV6EXTHDR 2
-
union octnic_packet_params {
u32 u32;
struct {
#ifdef __BIG_ENDIAN_BITFIELD
- u32 reserved:6;
+ u32 reserved:24;
+ u32 ip_csum:1; /* Perform IP header checksum(s) */
+ /* Perform Outer transport header checksum */
+ u32 transport_csum:1;
+ /* Find tunnel, and perform transport csum. */
u32 tnl_csum:1;
- u32 ip_csum:1;
- u32 ipv4opts_ipv6exthdr:2;
- u32 ipsec_ops:4;
- u32 tsflag:1;
- u32 csoffset:9;
- u32 ifidx:8;
+ u32 tsflag:1; /* Timestamp this packet */
+ u32 ipsec_ops:4; /* IPsec operation */
#else
- u32 ifidx:8;
- u32 csoffset:9;
- u32 tsflag:1;
u32 ipsec_ops:4;
- u32 ipv4opts_ipv6exthdr:2;
- u32 ip_csum:1;
+ u32 tsflag:1;
u32 tnl_csum:1;
- u32 reserved:6;
+ u32 transport_csum:1;
+ u32 ip_csum:1;
+ u32 reserved:24;
#endif
} s;
};
@@ -496,53 +623,90 @@ union oct_link_status {
struct {
#ifdef __BIG_ENDIAN_BITFIELD
u64 duplex:8;
- u64 status:8;
u64 mtu:16;
u64 speed:16;
+ u64 link_up:1;
u64 autoneg:1;
u64 interface:4;
u64 pause:1;
- u64 reserved:10;
+ u64 reserved:17;
#else
- u64 reserved:10;
+ u64 reserved:17;
u64 pause:1;
u64 interface:4;
u64 autoneg:1;
+ u64 link_up:1;
u64 speed:16;
u64 mtu:16;
- u64 status:8;
u64 duplex:8;
#endif
} s;
};
+/** The txpciq info passed to host from the firmware */
+
+union oct_txpciq {
+ u64 u64;
+
+ struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+ u64 q_no:8;
+ u64 port:8;
+ u64 pkind:6;
+ u64 use_qpg:1;
+ u64 qpg:11;
+ u64 reserved:30;
+#else
+ u64 reserved:30;
+ u64 qpg:11;
+ u64 use_qpg:1;
+ u64 pkind:6;
+ u64 port:8;
+ u64 q_no:8;
+#endif
+ } s;
+};
+
+/** The rxpciq info passed to host from the firmware */
+
+union oct_rxpciq {
+ u64 u64;
+
+ struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+ u64 q_no:8;
+ u64 reserved:56;
+#else
+ u64 reserved:56;
+ u64 q_no:8;
+#endif
+ } s;
+};
+
/** Information for a OCTEON ethernet interface shared between core & host. */
struct oct_link_info {
union oct_link_status link;
u64 hw_addr;
#ifdef __BIG_ENDIAN_BITFIELD
- u16 gmxport;
- u8 rsvd[3];
- u8 num_txpciq;
- u8 num_rxpciq;
- u8 ifidx;
+ u64 gmxport:16;
+ u64 rsvd:32;
+ u64 num_txpciq:8;
+ u64 num_rxpciq:8;
#else
- u8 ifidx;
- u8 num_rxpciq;
- u8 num_txpciq;
- u8 rsvd[3];
- u16 gmxport;
+ u64 num_rxpciq:8;
+ u64 num_txpciq:8;
+ u64 rsvd:32;
+ u64 gmxport:16;
#endif
- u8 txpciq[MAX_IOQS_PER_NICIF];
- u8 rxpciq[MAX_IOQS_PER_NICIF];
+ union oct_txpciq txpciq[MAX_IOQS_PER_NICIF];
+ union oct_rxpciq rxpciq[MAX_IOQS_PER_NICIF];
};
#define OCT_LINK_INFO_SIZE (sizeof(struct oct_link_info))
struct liquidio_if_cfg_info {
- u64 ifidx;
u64 iqmask; /** mask for IQs enabled for the port */
u64 oqmask; /** mask for OQs enabled for the port */
struct oct_link_info linfo; /** initial link information */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 8e23e3fad662..32900093527a 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -741,49 +741,59 @@ struct octeon_device *octeon_allocate_device(u32 pci_id,
return oct;
}
+/* this function is only for setting up the first queue */
int octeon_setup_instr_queues(struct octeon_device *oct)
{
- u32 i, num_iqs = 0;
+ u32 num_iqs = 0;
u32 num_descs = 0;
+ u32 iq_no = 0;
+ union oct_txpciq txpciq;
+ int numa_node = cpu_to_node(iq_no % num_online_cpus());
+ num_iqs = 1;
/* this causes queue 0 to be default queue */
- if (OCTEON_CN6XXX(oct)) {
- num_iqs = 1;
+ if (OCTEON_CN6XXX(oct))
num_descs =
CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
- }
oct->num_iqs = 0;
- for (i = 0; i < num_iqs; i++) {
- oct->instr_queue[i] =
+ oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]),
+ numa_node);
+ if (!oct->instr_queue[0])
+ oct->instr_queue[0] =
vmalloc(sizeof(struct octeon_instr_queue));
- if (!oct->instr_queue[i])
- return 1;
-
- memset(oct->instr_queue[i], 0,
- sizeof(struct octeon_instr_queue));
-
- oct->instr_queue[i]->app_ctx = (void *)(size_t)i;
- if (octeon_init_instr_queue(oct, i, num_descs))
- return 1;
-
- oct->num_iqs++;
+ if (!oct->instr_queue[0])
+ return 1;
+ memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue));
+ oct->instr_queue[0]->q_index = 0;
+ oct->instr_queue[0]->app_ctx = (void *)(size_t)0;
+ oct->instr_queue[0]->ifidx = 0;
+ txpciq.u64 = 0;
+ txpciq.s.q_no = iq_no;
+ txpciq.s.use_qpg = 0;
+ txpciq.s.qpg = 0;
+ if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
+ /* prevent memory leak */
+ vfree(oct->instr_queue[0]);
+ return 1;
}
+ oct->num_iqs++;
return 0;
}
int octeon_setup_output_queues(struct octeon_device *oct)
{
- u32 i, num_oqs = 0;
+ u32 num_oqs = 0;
u32 num_descs = 0;
u32 desc_size = 0;
+ u32 oq_no = 0;
+ int numa_node = cpu_to_node(oq_no % num_online_cpus());
+ num_oqs = 1;
/* this causes queue 0 to be default queue */
if (OCTEON_CN6XXX(oct)) {
- /* CFG_GET_OQ_MAX_BASE_Q(CHIP_FIELD(oct, cn6xxx, conf)); */
- num_oqs = 1;
num_descs =
CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
desc_size =
@@ -791,19 +801,15 @@ int octeon_setup_output_queues(struct octeon_device *oct)
}
oct->num_oqs = 0;
+ oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
+ if (!oct->droq[0])
+ oct->droq[0] = vmalloc(sizeof(*oct->droq[0]));
+ if (!oct->droq[0])
+ return 1;
- for (i = 0; i < num_oqs; i++) {
- oct->droq[i] = vmalloc(sizeof(*oct->droq[i]));
- if (!oct->droq[i])
- return 1;
-
- memset(oct->droq[i], 0, sizeof(struct octeon_droq));
-
- if (octeon_init_droq(oct, i, num_descs, desc_size, NULL))
- return 1;
-
- oct->num_oqs++;
- }
+ if (octeon_init_droq(oct, oq_no, num_descs, desc_size, NULL))
+ return 1;
+ oct->num_oqs++;
return 0;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 36e1f85df8c4..0950b94f8805 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -267,6 +267,7 @@ struct octdev_props {
/* Each interface in the Octeon device has a network
* device pointer (used for OS specific calls).
*/
+ int gmxport;
struct net_device *netdev;
};
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 174072b3740b..59a529353f6d 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -151,22 +151,26 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
struct octeon_droq *droq)
{
u32 i;
+ struct octeon_skb_page_info *pg_info;
for (i = 0; i < droq->max_count; i++) {
- if (droq->recv_buf_list[i].buffer) {
- if (droq->desc_ring) {
- lio_unmap_ring_info(oct->pci_dev,
- (u64)droq->
- desc_ring[i].info_ptr,
- OCT_DROQ_INFO_SIZE);
- lio_unmap_ring(oct->pci_dev,
- (u64)droq->desc_ring[i].
- buffer_ptr,
- droq->buffer_size);
- }
- recv_buffer_free(droq->recv_buf_list[i].buffer);
- droq->recv_buf_list[i].buffer = NULL;
- }
+ pg_info = &droq->recv_buf_list[i].pg_info;
+
+ if (pg_info->dma)
+ lio_unmap_ring(oct->pci_dev,
+ (u64)pg_info->dma);
+ pg_info->dma = 0;
+
+ if (pg_info->page)
+ recv_buffer_destroy(droq->recv_buf_list[i].buffer,
+ pg_info);
+
+ if (droq->desc_ring && droq->desc_ring[i].info_ptr)
+ lio_unmap_ring_info(oct->pci_dev,
+ (u64)droq->
+ desc_ring[i].info_ptr,
+ OCT_DROQ_INFO_SIZE);
+ droq->recv_buf_list[i].buffer = NULL;
}
octeon_droq_reset_indices(droq);
@@ -181,11 +185,12 @@ octeon_droq_setup_ring_buffers(struct octeon_device *oct,
struct octeon_droq_desc *desc_ring = droq->desc_ring;
for (i = 0; i < droq->max_count; i++) {
- buf = recv_buffer_alloc(oct, droq->q_no, droq->buffer_size);
+ buf = recv_buffer_alloc(oct, &droq->recv_buf_list[i].pg_info);
if (!buf) {
dev_err(&oct->pci_dev->dev, "%s buffer alloc failed\n",
__func__);
+ droq->stats.rx_alloc_failure++;
return -ENOMEM;
}
@@ -197,9 +202,7 @@ octeon_droq_setup_ring_buffers(struct octeon_device *oct,
/* map ring buffers into memory */
desc_ring[i].info_ptr = lio_map_ring_info(droq, i);
desc_ring[i].buffer_ptr =
- lio_map_ring(oct->pci_dev,
- droq->recv_buf_list[i].buffer,
- droq->buffer_size);
+ lio_map_ring(droq->recv_buf_list[i].buffer);
}
octeon_droq_reset_indices(droq);
@@ -242,6 +245,8 @@ int octeon_init_droq(struct octeon_device *oct,
struct octeon_droq *droq;
u32 desc_ring_size = 0, c_num_descs = 0, c_buf_size = 0;
u32 c_pkts_per_intr = 0, c_refill_threshold = 0;
+ int orig_node = dev_to_node(&oct->pci_dev->dev);
+ int numa_node = cpu_to_node(q_no % num_online_cpus());
dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);
@@ -261,15 +266,23 @@ int octeon_init_droq(struct octeon_device *oct,
struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
- c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+ c_refill_threshold =
+ (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+ } else {
+ return 1;
}
droq->max_count = c_num_descs;
droq->buffer_size = c_buf_size;
desc_ring_size = droq->max_count * OCT_DROQ_DESC_SIZE;
+ set_dev_node(&oct->pci_dev->dev, numa_node);
droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
(dma_addr_t *)&droq->desc_ring_dma);
+ set_dev_node(&oct->pci_dev->dev, orig_node);
+ if (!droq->desc_ring)
+ droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
+ (dma_addr_t *)&droq->desc_ring_dma);
if (!droq->desc_ring) {
dev_err(&oct->pci_dev->dev,
@@ -283,12 +296,11 @@ int octeon_init_droq(struct octeon_device *oct,
droq->max_count);
droq->info_list =
- cnnic_alloc_aligned_dma(oct->pci_dev,
- (droq->max_count * OCT_DROQ_INFO_SIZE),
- &droq->info_alloc_size,
- &droq->info_base_addr,
- &droq->info_list_dma);
-
+ cnnic_numa_alloc_aligned_dma((droq->max_count *
+ OCT_DROQ_INFO_SIZE),
+ &droq->info_alloc_size,
+ &droq->info_base_addr,
+ numa_node);
if (!droq->info_list) {
dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -297,7 +309,12 @@ int octeon_init_droq(struct octeon_device *oct,
}
droq->recv_buf_list = (struct octeon_recv_buffer *)
- vmalloc(droq->max_count *
+ vmalloc_node(droq->max_count *
+ OCT_DROQ_RECVBUF_SIZE,
+ numa_node);
+ if (!droq->recv_buf_list)
+ droq->recv_buf_list = (struct octeon_recv_buffer *)
+ vmalloc(droq->max_count *
OCT_DROQ_RECVBUF_SIZE);
if (!droq->recv_buf_list) {
dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
@@ -358,6 +375,7 @@ static inline struct octeon_recv_info *octeon_create_recv_info(
struct octeon_recv_pkt *recv_pkt;
struct octeon_recv_info *recv_info;
u32 i, bytes_left;
+ struct octeon_skb_page_info *pg_info;
info = &droq->info_list[idx];
@@ -375,9 +393,14 @@ static inline struct octeon_recv_info *octeon_create_recv_info(
bytes_left = (u32)info->length;
while (buf_cnt) {
- lio_unmap_ring(octeon_dev->pci_dev,
- (u64)droq->desc_ring[idx].buffer_ptr,
- droq->buffer_size);
+ {
+ pg_info = &droq->recv_buf_list[idx].pg_info;
+
+ lio_unmap_ring(octeon_dev->pci_dev,
+ (u64)pg_info->dma);
+ pg_info->page = NULL;
+ pg_info->dma = 0;
+ }
recv_pkt->buffer_size[i] =
(bytes_left >=
@@ -449,6 +472,7 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
void *buf = NULL;
u8 *data;
u32 desc_refilled = 0;
+ struct octeon_skb_page_info *pg_info;
desc_ring = droq->desc_ring;
@@ -458,13 +482,22 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
* the buffer, else allocate.
*/
if (!droq->recv_buf_list[droq->refill_idx].buffer) {
- buf = recv_buffer_alloc(octeon_dev, droq->q_no,
- droq->buffer_size);
+ pg_info =
+ &droq->recv_buf_list[droq->refill_idx].pg_info;
+ /* Either recycle the existing pages or go for
+ * new page alloc
+ */
+ if (pg_info->page)
+ buf = recv_buffer_reuse(octeon_dev, pg_info);
+ else
+ buf = recv_buffer_alloc(octeon_dev, pg_info);
/* If a buffer could not be allocated, no point in
* continuing
*/
- if (!buf)
+ if (!buf) {
+ droq->stats.rx_alloc_failure++;
break;
+ }
droq->recv_buf_list[droq->refill_idx].buffer =
buf;
data = get_rbd(buf);
@@ -476,11 +509,8 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
droq->recv_buf_list[droq->refill_idx].data = data;
desc_ring[droq->refill_idx].buffer_ptr =
- lio_map_ring(octeon_dev->pci_dev,
- droq->recv_buf_list[droq->
- refill_idx].buffer,
- droq->buffer_size);
-
+ lio_map_ring(droq->recv_buf_list[droq->
+ refill_idx].buffer);
/* Reset any previous values in the length field. */
droq->info_list[droq->refill_idx].length = 0;
@@ -586,6 +616,8 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
for (pkt = 0; pkt < pkt_count; pkt++) {
u32 pkt_len = 0;
struct sk_buff *nicbuf = NULL;
+ struct octeon_skb_page_info *pg_info;
+ void *buf;
info = &droq->info_list[droq->read_idx];
octeon_swap_8B_data((u64 *)info, 2);
@@ -605,7 +637,6 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
rh = &info->rh;
total_len += (u32)info->length;
-
if (OPCODE_SLOW_PATH(rh)) {
u32 buf_cnt;
@@ -614,50 +645,44 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
droq->refill_count += buf_cnt;
} else {
if (info->length <= droq->buffer_size) {
- lio_unmap_ring(oct->pci_dev,
- (u64)droq->desc_ring[
- droq->read_idx].buffer_ptr,
- droq->buffer_size);
pkt_len = (u32)info->length;
nicbuf = droq->recv_buf_list[
droq->read_idx].buffer;
+ pg_info = &droq->recv_buf_list[
+ droq->read_idx].pg_info;
+ if (recv_buffer_recycle(oct, pg_info))
+ pg_info->page = NULL;
droq->recv_buf_list[droq->read_idx].buffer =
NULL;
INCR_INDEX_BY1(droq->read_idx, droq->max_count);
- skb_put(nicbuf, pkt_len);
droq->refill_count++;
} else {
- nicbuf = octeon_fast_packet_alloc(oct, droq,
- droq->q_no,
- (u32)
+ nicbuf = octeon_fast_packet_alloc((u32)
info->length);
pkt_len = 0;
/* nicbuf allocation can fail. We'll handle it
* inside the loop.
*/
while (pkt_len < info->length) {
- int cpy_len;
+ int cpy_len, idx = droq->read_idx;
- cpy_len = ((pkt_len +
- droq->buffer_size) >
- info->length) ?
+ cpy_len = ((pkt_len + droq->buffer_size)
+ > info->length) ?
((u32)info->length - pkt_len) :
droq->buffer_size;
if (nicbuf) {
- lio_unmap_ring(oct->pci_dev,
- (u64)
- droq->desc_ring
- [droq->read_idx].
- buffer_ptr,
- droq->
- buffer_size);
octeon_fast_packet_next(droq,
nicbuf,
cpy_len,
- droq->
- read_idx
- );
+ idx);
+ buf = droq->recv_buf_list[idx].
+ buffer;
+ recv_buffer_fast_free(buf);
+ droq->recv_buf_list[idx].buffer
+ = NULL;
+ } else {
+ droq->stats.rx_alloc_failure++;
}
pkt_len += cpy_len;
@@ -668,12 +693,14 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
}
if (nicbuf) {
- if (droq->ops.fptr)
+ if (droq->ops.fptr) {
droq->ops.fptr(oct->octeon_id,
- nicbuf, pkt_len,
- rh, &droq->napi);
- else
+ nicbuf, pkt_len,
+ rh, &droq->napi,
+ droq->ops.farg);
+ } else {
recv_buffer_free(nicbuf);
+ }
}
}
@@ -681,16 +708,16 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
int desc_refilled = octeon_droq_refill(oct, droq);
/* Flush the droq descriptor data to memory to be sure
- * that when we update the credits the data in memory
- * is accurate.
- */
+ * that when we update the credits the data in memory
+ * is accurate.
+ */
wmb();
writel((desc_refilled), droq->pkts_credit_reg);
/* make sure mmio write completes */
mmiowb();
}
- } /* for ( each packet )... */
+ } /* for (each packet)... */
/* Increment refill_count by the number of buffers processed. */
droq->stats.pkts_received += pkt;
@@ -937,6 +964,7 @@ int octeon_unregister_droq_ops(struct octeon_device *oct, u32 q_no)
spin_lock_irqsave(&droq->lock, flags);
droq->ops.fptr = NULL;
+ droq->ops.farg = NULL;
droq->ops.drop_on_max = 0;
spin_unlock_irqrestore(&droq->lock, flags);
@@ -949,6 +977,7 @@ int octeon_create_droq(struct octeon_device *oct,
u32 desc_size, void *app_ctx)
{
struct octeon_droq *droq;
+ int numa_node = cpu_to_node(q_no % num_online_cpus());
if (oct->droq[q_no]) {
dev_dbg(&oct->pci_dev->dev, "Droq already in use. Cannot create droq %d again\n",
@@ -957,7 +986,9 @@ int octeon_create_droq(struct octeon_device *oct,
}
/* Allocate the DS for the new droq. */
- droq = vmalloc(sizeof(*droq));
+ droq = vmalloc_node(sizeof(*droq), numa_node);
+ if (!droq)
+ droq = vmalloc(sizeof(*droq));
if (!droq)
goto create_droq_fail;
memset(droq, 0, sizeof(struct octeon_droq));
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 7940ccee12d9..1ca9c4f05702 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -65,6 +65,17 @@ struct octeon_droq_info {
#define OCT_DROQ_INFO_SIZE (sizeof(struct octeon_droq_info))
+struct octeon_skb_page_info {
+ /* DMA address for the page */
+ dma_addr_t dma;
+
+ /* Page for the rx dma **/
+ struct page *page;
+
+ /** which offset into page */
+ unsigned int page_offset;
+};
+
/** Pointer to data buffer.
* Driver keeps a pointer to the data buffer that it made available to
* the Octeon device. Since the descriptor ring keeps physical (bus)
@@ -77,6 +88,9 @@ struct octeon_recv_buffer {
/** Data in the packet buffer. */
u8 *data;
+
+ /** pg_info **/
+ struct octeon_skb_page_info pg_info;
};
#define OCT_DROQ_RECVBUF_SIZE (sizeof(struct octeon_recv_buffer))
@@ -106,6 +120,10 @@ struct oct_droq_stats {
/** Num of Packets dropped due to receive path failures. */
u64 rx_dropped;
+
+ /** Num of failures of recv_buffer_alloc() */
+ u64 rx_alloc_failure;
+
};
#define POLL_EVENT_INTR_ARRIVED 1
@@ -213,7 +231,8 @@ struct octeon_droq_ops {
* data in the buffer. The receive header gives the port
* number to the caller. Function pointer is set by caller.
*/
- void (*fptr)(u32, void *, u32, union octeon_rh *, void *);
+ void (*fptr)(u32, void *, u32, union octeon_rh *, void *, void *);
+ void *farg;
/* This function will be called by the driver for all NAPI related
* events. The first param is the octeon id. The second param is the
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 592fe49b589d..513f8a068179 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -75,14 +75,16 @@ struct oct_iq_stats {
* a Octeon device has one such structure to represent it.
*/
struct octeon_instr_queue {
+ struct octeon_device *oct_dev;
+
/** A spinlock to protect access to the input ring. */
spinlock_t lock;
/** Flag that indicates if the queue uses 64 byte commands. */
u32 iqcmd_64B:1;
- /** Queue Number. */
- u32 iq_no:5;
+ /** Queue info. */
+ union oct_txpciq txpciq;
u32 rsvd:17;
@@ -147,6 +149,13 @@ struct octeon_instr_queue {
/** Application context */
void *app_ctx;
+
+ /* network stack queue index */
+ int q_index;
+
+ /*os ifidx associated with this queue */
+ int ifidx;
+
};
/*---------------------- INSTRUCTION FORMAT ----------------------------*/
@@ -176,12 +185,12 @@ struct octeon_instr_32B {
/** 64-byte instruction format.
* Format of instruction for a 64-byte mode input queue.
*/
-struct octeon_instr_64B {
+struct octeon_instr2_64B {
/** Pointer where the input data is available. */
u64 dptr;
/** Instruction Header. */
- u64 ih;
+ u64 ih2;
/** Input Request Header. */
u64 irh;
@@ -198,10 +207,40 @@ struct octeon_instr_64B {
u64 rptr;
u64 reserved;
+};
+
+struct octeon_instr3_64B {
+ /** Pointer where the input data is available. */
+ u64 dptr;
+
+ /** Instruction Header. */
+ u64 ih3;
+
+ /** Instruction Header. */
+ u64 pki_ih3;
+
+ /** Input Request Header. */
+ u64 irh;
+
+ /** opcode/subcode specific parameters */
+ u64 ossp[2];
+
+ /** Return Data Parameters */
+ u64 rdp;
+
+ /** Pointer where the response for a RAW mode packet will be written
+ * by Octeon.
+ */
+ u64 rptr;
};
-#define OCT_64B_INSTR_SIZE (sizeof(struct octeon_instr_64B))
+union octeon_instr_64B {
+ struct octeon_instr2_64B cmd2;
+ struct octeon_instr3_64B cmd3;
+};
+
+#define OCT_64B_INSTR_SIZE (sizeof(union octeon_instr_64B))
/** The size of each buffer in soft command buffer pool
*/
@@ -214,7 +253,8 @@ struct octeon_soft_command {
u32 size;
/** Command and return status */
- struct octeon_instr_64B cmd;
+ union octeon_instr_64B cmd;
+
#define COMPLETION_WORD_INIT 0xffffffffffffffffULL
u64 *status_word;
@@ -268,14 +308,15 @@ void octeon_free_soft_command(struct octeon_device *oct,
/**
* octeon_init_instr_queue()
* @param octeon_dev - pointer to the octeon device structure.
- * @param iq_no - queue to be initialized (0 <= q_no <= 3).
+ * @param txpciq - queue to be initialized (0 <= q_no <= 3).
*
* Called at driver init time for each input queue. iq_conf has the
* configuration parameters for the queue.
*
* @return Success: 0 Failure: 1
*/
-int octeon_init_instr_queue(struct octeon_device *octeon_dev, u32 iq_no,
+int octeon_init_instr_queue(struct octeon_device *octeon_dev,
+ union oct_txpciq txpciq,
u32 num_descs);
/**
@@ -313,7 +354,8 @@ void octeon_prepare_soft_command(struct octeon_device *oct,
int octeon_send_soft_command(struct octeon_device *oct,
struct octeon_soft_command *sc);
-int octeon_setup_iq(struct octeon_device *oct, u32 iq_no,
- u32 num_descs, void *app_ctx);
+int octeon_setup_iq(struct octeon_device *oct, int ifidx,
+ int q_index, union oct_txpciq iq_no, u32 num_descs,
+ void *app_ctx);
#endif /* __OCTEON_IQ_H__ */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index cbd081981180..0ff3efc67b84 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -126,22 +126,27 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
}
static inline void *
-cnnic_alloc_aligned_dma(struct pci_dev *pci_dev,
- u32 size,
- u32 *alloc_size,
- size_t *orig_ptr,
- size_t *dma_addr __attribute__((unused)))
+cnnic_numa_alloc_aligned_dma(u32 size,
+ u32 *alloc_size,
+ size_t *orig_ptr,
+ int numa_node)
{
int retries = 0;
void *ptr = NULL;
#define OCTEON_MAX_ALLOC_RETRIES 1
do {
- ptr =
- (void *)__get_free_pages(GFP_KERNEL,
- get_order(size));
+ struct page *page = NULL;
+
+ page = alloc_pages_node(numa_node,
+ GFP_KERNEL,
+ get_order(size));
+ if (!page)
+ page = alloc_pages(GFP_KERNEL,
+ get_order(size));
+ ptr = (void *)page_address(page);
if ((unsigned long)ptr & 0x07) {
- free_pages((unsigned long)ptr, get_order(size));
+ __free_pages(page, get_order(size));
ptr = NULL;
/* Increment the size required if the first
* attempt failed.
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index b3abe5818fd3..9c14484bfca0 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -48,11 +48,11 @@ struct lio {
*/
int rxq;
- /** Guards the glist */
- spinlock_t lock;
+ /** Guards each glist */
+ spinlock_t *glist_lock;
- /** Linked list of gather components */
- struct list_head glist;
+ /** Array of gather component linked lists */
+ struct list_head *glist;
/** Pointer to the NIC properties for the Octeon device this network
* interface is associated with.
@@ -67,6 +67,9 @@ struct lio {
/** Link information sent by the core application for this interface. */
struct oct_link_info linfo;
+ /** counter of link changes */
+ u64 link_changes;
+
/** Size of Tx queue for this octeon device. */
u32 tx_qsize;
@@ -111,8 +114,9 @@ struct lio {
* \brief Enable or disable feature
* @param netdev pointer to network device
* @param cmd Command that just requires acknowledgment
+ * @param param1 Parameter to command
*/
-int liquidio_set_feature(struct net_device *netdev, int cmd);
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1);
/**
* \brief Link control command completion callback
@@ -131,14 +135,30 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr);
*/
void liquidio_set_ethtool_ops(struct net_device *netdev);
-static inline void
-*recv_buffer_alloc(struct octeon_device *oct __attribute__((unused)),
- u32 q_no __attribute__((unused)), u32 size)
-{
#define SKB_ADJ_MASK 0x3F
#define SKB_ADJ (SKB_ADJ_MASK + 1)
- struct sk_buff *skb = dev_alloc_skb(size + SKB_ADJ);
+#define MIN_SKB_SIZE 256 /* 8 bytes and more - 8 bytes for PTP */
+#define LIO_RXBUFFER_SZ 2048
+
+static inline void
+*recv_buffer_alloc(struct octeon_device *oct,
+ struct octeon_skb_page_info *pg_info)
+{
+ struct page *page;
+ struct sk_buff *skb;
+ struct octeon_skb_page_info *skb_pg_info;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ if (unlikely(!page))
+ return NULL;
+
+ skb = dev_alloc_skb(MIN_SKB_SIZE + SKB_ADJ);
+ if (unlikely(!skb)) {
+ __free_page(page);
+ pg_info->page = NULL;
+ return NULL;
+ }
if ((unsigned long)skb->data & SKB_ADJ_MASK) {
u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
@@ -146,11 +166,151 @@ static inline void
skb_reserve(skb, r);
}
+ skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ /* Get DMA info */
+ pg_info->dma = dma_map_page(&oct->pci_dev->dev, page, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* Mapping failed!! */
+ if (dma_mapping_error(&oct->pci_dev->dev, pg_info->dma)) {
+ __free_page(page);
+ dev_kfree_skb_any((struct sk_buff *)skb);
+ pg_info->page = NULL;
+ return NULL;
+ }
+
+ pg_info->page = page;
+ pg_info->page_offset = 0;
+ skb_pg_info->page = page;
+ skb_pg_info->page_offset = 0;
+ skb_pg_info->dma = pg_info->dma;
+
return (void *)skb;
}
+static inline void
+*recv_buffer_fast_alloc(u32 size)
+{
+ struct sk_buff *skb;
+ struct octeon_skb_page_info *skb_pg_info;
+
+ skb = dev_alloc_skb(size + SKB_ADJ);
+ if (unlikely(!skb))
+ return NULL;
+
+ if ((unsigned long)skb->data & SKB_ADJ_MASK) {
+ u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
+
+ skb_reserve(skb, r);
+ }
+
+ skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ skb_pg_info->page = NULL;
+ skb_pg_info->page_offset = 0;
+ skb_pg_info->dma = 0;
+
+ return skb;
+}
+
+static inline int
+recv_buffer_recycle(struct octeon_device *oct, void *buf)
+{
+ struct octeon_skb_page_info *pg_info = buf;
+
+ if (!pg_info->page) {
+ dev_err(&oct->pci_dev->dev, "%s: pg_info->page NULL\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ if (unlikely(page_count(pg_info->page) != 1) ||
+ unlikely(page_to_nid(pg_info->page) != numa_node_id())) {
+ dma_unmap_page(&oct->pci_dev->dev,
+ pg_info->dma, (PAGE_SIZE << 0),
+ DMA_FROM_DEVICE);
+ pg_info->dma = 0;
+ pg_info->page = NULL;
+ pg_info->page_offset = 0;
+ return -ENOMEM;
+ }
+
+ /* Flip to other half of the buffer */
+ if (pg_info->page_offset == 0)
+ pg_info->page_offset = LIO_RXBUFFER_SZ;
+ else
+ pg_info->page_offset = 0;
+ page_ref_inc(pg_info->page);
+
+ return 0;
+}
+
+static inline void
+*recv_buffer_reuse(struct octeon_device *oct, void *buf)
+{
+ struct octeon_skb_page_info *pg_info = buf, *skb_pg_info;
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(MIN_SKB_SIZE + SKB_ADJ);
+ if (unlikely(!skb)) {
+ dma_unmap_page(&oct->pci_dev->dev,
+ pg_info->dma, (PAGE_SIZE << 0),
+ DMA_FROM_DEVICE);
+ return NULL;
+ }
+
+ if ((unsigned long)skb->data & SKB_ADJ_MASK) {
+ u32 r = SKB_ADJ - ((unsigned long)skb->data & SKB_ADJ_MASK);
+
+ skb_reserve(skb, r);
+ }
+
+ skb_pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ skb_pg_info->page = pg_info->page;
+ skb_pg_info->page_offset = pg_info->page_offset;
+ skb_pg_info->dma = pg_info->dma;
+
+ return skb;
+}
+
+static inline void
+recv_buffer_destroy(void *buffer, struct octeon_skb_page_info *pg_info)
+{
+ struct sk_buff *skb = (struct sk_buff *)buffer;
+
+ put_page(pg_info->page);
+ pg_info->dma = 0;
+ pg_info->page = NULL;
+ pg_info->page_offset = 0;
+
+ if (skb)
+ dev_kfree_skb_any(skb);
+}
+
static inline void recv_buffer_free(void *buffer)
{
+ struct sk_buff *skb = (struct sk_buff *)buffer;
+ struct octeon_skb_page_info *pg_info;
+
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+
+ if (pg_info->page) {
+ put_page(pg_info->page);
+ pg_info->dma = 0;
+ pg_info->page = NULL;
+ pg_info->page_offset = 0;
+ }
+
+ dev_kfree_skb_any((struct sk_buff *)buffer);
+}
+
+static inline void
+recv_buffer_fast_free(void *buffer)
+{
+ dev_kfree_skb_any((struct sk_buff *)buffer);
+}
+
+static inline void tx_buffer_free(void *buffer)
+{
dev_kfree_skb_any((struct sk_buff *)buffer);
}
@@ -159,7 +319,17 @@ static inline void recv_buffer_free(void *buffer)
#define lio_dma_free(oct, size, virt_addr, dma_addr) \
dma_free_coherent(&oct->pci_dev->dev, size, virt_addr, dma_addr)
-#define get_rbd(ptr) (((struct sk_buff *)(ptr))->data)
+static inline
+void *get_rbd(struct sk_buff *skb)
+{
+ struct octeon_skb_page_info *pg_info;
+ unsigned char *va;
+
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ va = page_address(pg_info->page) + pg_info->page_offset;
+
+ return va;
+}
static inline u64
lio_map_ring_info(struct octeon_droq *droq, u32 i)
@@ -183,33 +353,44 @@ lio_unmap_ring_info(struct pci_dev *pci_dev,
}
static inline u64
-lio_map_ring(struct pci_dev *pci_dev,
- void *buf, u32 size)
+lio_map_ring(void *buf)
{
dma_addr_t dma_addr;
- dma_addr = dma_map_single(&pci_dev->dev, get_rbd(buf), size,
- DMA_FROM_DEVICE);
+ struct sk_buff *skb = (struct sk_buff *)buf;
+ struct octeon_skb_page_info *pg_info;
- BUG_ON(dma_mapping_error(&pci_dev->dev, dma_addr));
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ if (!pg_info->page) {
+ pr_err("%s: pg_info->page NULL\n", __func__);
+ WARN_ON(1);
+ }
+
+ /* Get DMA info */
+ dma_addr = pg_info->dma;
+ if (!pg_info->dma) {
+ pr_err("%s: ERROR it should be already available\n",
+ __func__);
+ WARN_ON(1);
+ }
+ dma_addr += pg_info->page_offset;
return (u64)dma_addr;
}
static inline void
lio_unmap_ring(struct pci_dev *pci_dev,
- u64 buf_ptr, u32 size)
+ u64 buf_ptr)
+
{
- dma_unmap_single(&pci_dev->dev,
- buf_ptr, size,
- DMA_FROM_DEVICE);
+ dma_unmap_page(&pci_dev->dev,
+ buf_ptr, (PAGE_SIZE << 0),
+ DMA_FROM_DEVICE);
}
-static inline void *octeon_fast_packet_alloc(struct octeon_device *oct,
- struct octeon_droq *droq,
- u32 q_no, u32 size)
+static inline void *octeon_fast_packet_alloc(u32 size)
{
- return recv_buffer_alloc(oct, q_no, size);
+ return recv_buffer_fast_alloc(size);
}
static inline void octeon_fast_packet_next(struct octeon_droq *droq,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 1a0191549cb3..7843b8a05dcf 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -44,11 +44,11 @@
void *
octeon_alloc_soft_command_resp(struct octeon_device *oct,
- struct octeon_instr_64B *cmd,
- size_t rdatasize)
+ union octeon_instr_64B *cmd,
+ u32 rdatasize)
{
struct octeon_soft_command *sc;
- struct octeon_instr_ih *ih;
+ struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
@@ -59,24 +59,25 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct,
return NULL;
/* Copy existing command structure into the soft command */
- memcpy(&sc->cmd, cmd, sizeof(struct octeon_instr_64B));
+ memcpy(&sc->cmd, cmd, sizeof(union octeon_instr_64B));
/* Add in the response related fields. Opcode and Param are already
* there.
*/
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- ih->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+ ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
irh->rflag = 1; /* a response is required */
- irh->len = 4; /* means four 64-bit words immediately follow irh */
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
rdp->pcie_port = oct->pcie_port;
rdp->rlen = rdatasize;
*sc->status_word = COMPLETION_WORD_INIT;
+ sc->cmd.cmd2.rptr = sc->dmarptr;
+
sc->wait_time = 1000;
sc->timeout = jiffies + sc->wait_time;
@@ -119,12 +120,11 @@ static void octnet_link_ctrl_callback(struct octeon_device *oct,
static inline struct octeon_soft_command
*octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct,
- struct octnic_ctrl_pkt *nctrl,
- struct octnic_ctrl_params nparams)
+ struct octnic_ctrl_pkt *nctrl)
{
struct octeon_soft_command *sc = NULL;
u8 *data;
- size_t rdatasize;
+ u32 rdatasize;
u32 uddsize = 0, datasize = 0;
uddsize = (u32)(nctrl->ncmd.s.more * 8);
@@ -143,7 +143,7 @@ static inline struct octeon_soft_command
data = (u8 *)sc->virtdptr;
- memcpy(data, &nctrl->ncmd, OCTNET_CMD_SIZE);
+ memcpy(data, &nctrl->ncmd, OCTNET_CMD_SIZE);
octeon_swap_8B_data((u64 *)data, (OCTNET_CMD_SIZE >> 3));
@@ -152,6 +152,8 @@ static inline struct octeon_soft_command
memcpy(data + OCTNET_CMD_SIZE, nctrl->udd, uddsize);
}
+ sc->iq_no = (u32)nctrl->iq_no;
+
octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_CMD,
0, 0, 0);
@@ -164,13 +166,12 @@ static inline struct octeon_soft_command
int
octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
- struct octnic_ctrl_pkt *nctrl,
- struct octnic_ctrl_params nparams)
+ struct octnic_ctrl_pkt *nctrl)
{
int retval;
struct octeon_soft_command *sc = NULL;
- sc = octnic_alloc_ctrl_pkt_sc(oct, nctrl, nparams);
+ sc = octnic_alloc_ctrl_pkt_sc(oct, nctrl);
if (!sc) {
dev_err(&oct->pci_dev->dev, "%s soft command alloc failed\n",
__func__);
@@ -178,7 +179,7 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
}
retval = octeon_send_soft_command(oct, sc);
- if (retval) {
+ if (retval == IQ_SEND_FAILED) {
octeon_free_soft_command(oct, sc);
dev_err(&oct->pci_dev->dev, "%s soft command send failed status: %x\n",
__func__, retval);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index 0238857c8105..b71a2bbe4bee 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
@@ -52,6 +52,9 @@ struct octnic_ctrl_pkt {
/** Additional data that may be needed by some commands. */
u64 udd[MAX_NCTRL_UDD];
+ /** Input queue to use to send this command. */
+ u64 iq_no;
+
/** Time to wait for Octeon software to respond to this control command.
* If wait_time is 0, OSI assumes no response is expected.
*/
@@ -82,7 +85,7 @@ struct octnic_data_pkt {
u32 datasize;
/** Command to be passed to the Octeon device software. */
- struct octeon_instr_64B cmd;
+ union octeon_instr_64B cmd;
/** Input queue to use to send this command. */
u32 q_no;
@@ -94,15 +97,14 @@ struct octnic_data_pkt {
*/
union octnic_cmd_setup {
struct {
- u32 ifidx:8;
- u32 cksum_offset:7;
+ u32 iq_no:8;
u32 gather:1;
u32 timestamp:1;
- u32 ipv4opts_ipv6exthdr:2;
u32 ip_csum:1;
+ u32 transport_csum:1;
u32 tnl_csum:1;
+ u32 rsvd:19;
- u32 rsvd:11;
union {
u32 datasize;
u32 gatherptrs;
@@ -113,79 +115,146 @@ union octnic_cmd_setup {
};
-struct octnic_ctrl_params {
- u32 resp_order;
-};
-
static inline int octnet_iq_is_full(struct octeon_device *oct, u32 q_no)
{
return ((u32)atomic_read(&oct->instr_queue[q_no]->instr_pending)
>= (oct->instr_queue[q_no]->max_count - 2));
}
-/** Utility function to prepare a 64B NIC instruction based on a setup command
- * @param cmd - pointer to instruction to be filled in.
- * @param setup - pointer to the setup structure
- * @param q_no - which queue for back pressure
- *
- * Assumes the cmd instruction is pre-allocated, but no fields are filled in.
- */
static inline void
-octnet_prepare_pci_cmd(struct octeon_instr_64B *cmd,
- union octnic_cmd_setup *setup, u32 tag)
+octnet_prepare_pci_cmd_o2(struct octeon_device *oct,
+ union octeon_instr_64B *cmd,
+ union octnic_cmd_setup *setup, u32 tag)
{
- struct octeon_instr_ih *ih;
+ struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
union octnic_packet_params packet_params;
+ int port;
- memset(cmd, 0, sizeof(struct octeon_instr_64B));
+ memset(cmd, 0, sizeof(union octeon_instr_64B));
- ih = (struct octeon_instr_ih *)&cmd->ih;
+ ih2 = (struct octeon_instr_ih2 *)&cmd->cmd2.ih2;
/* assume that rflag is cleared so therefore front data will only have
- * irh and ossp[1] and ossp[2] for a total of 24 bytes
+ * irh and ossp[0], ossp[1] for a total of 32 bytes
*/
- ih->fsz = 24;
+ ih2->fsz = 24;
+
+ ih2->tagtype = ORDERED_TAG;
+ ih2->grp = DEFAULT_POW_GRP;
- ih->tagtype = ORDERED_TAG;
- ih->grp = DEFAULT_POW_GRP;
+ port = (int)oct->instr_queue[setup->s.iq_no]->txpciq.s.port;
if (tag)
- ih->tag = tag;
+ ih2->tag = tag;
else
- ih->tag = LIO_DATA(setup->s.ifidx);
+ ih2->tag = LIO_DATA(port);
- ih->raw = 1;
- ih->qos = (setup->s.ifidx & 3) + 4; /* map qos based on interface */
+ ih2->raw = 1;
+ ih2->qos = (port & 3) + 4; /* map qos based on interface */
if (!setup->s.gather) {
- ih->dlengsz = setup->s.u.datasize;
+ ih2->dlengsz = setup->s.u.datasize;
} else {
- ih->gather = 1;
- ih->dlengsz = setup->s.u.gatherptrs;
+ ih2->gather = 1;
+ ih2->dlengsz = setup->s.u.gatherptrs;
}
- irh = (struct octeon_instr_irh *)&cmd->irh;
+ irh = (struct octeon_instr_irh *)&cmd->cmd2.irh;
irh->opcode = OPCODE_NIC;
irh->subcode = OPCODE_NIC_NW_DATA;
packet_params.u32 = 0;
- if (setup->s.cksum_offset) {
- packet_params.s.csoffset = setup->s.cksum_offset;
- packet_params.s.ipv4opts_ipv6exthdr =
- setup->s.ipv4opts_ipv6exthdr;
+ packet_params.s.ip_csum = setup->s.ip_csum;
+ packet_params.s.transport_csum = setup->s.transport_csum;
+ packet_params.s.tnl_csum = setup->s.tnl_csum;
+ packet_params.s.tsflag = setup->s.timestamp;
+
+ irh->ossp = packet_params.u32;
+}
+
+static inline void
+octnet_prepare_pci_cmd_o3(struct octeon_device *oct,
+ union octeon_instr_64B *cmd,
+ union octnic_cmd_setup *setup, u32 tag)
+{
+ struct octeon_instr_irh *irh;
+ struct octeon_instr_ih3 *ih3;
+ struct octeon_instr_pki_ih3 *pki_ih3;
+ union octnic_packet_params packet_params;
+ int port;
+
+ memset(cmd, 0, sizeof(union octeon_instr_64B));
+
+ ih3 = (struct octeon_instr_ih3 *)&cmd->cmd3.ih3;
+ pki_ih3 = (struct octeon_instr_pki_ih3 *)&cmd->cmd3.pki_ih3;
+
+ /* assume that rflag is cleared so therefore front data will only have
+ * irh and ossp[1] and ossp[2] for a total of 24 bytes
+ */
+ ih3->pkind = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind;
+ /*PKI IH*/
+ ih3->fsz = 24 + 8;
+
+ if (!setup->s.gather) {
+ ih3->dlengsz = setup->s.u.datasize;
+ } else {
+ ih3->gather = 1;
+ ih3->dlengsz = setup->s.u.gatherptrs;
}
+ pki_ih3->w = 1;
+ pki_ih3->raw = 1;
+ pki_ih3->utag = 1;
+ pki_ih3->utt = 1;
+ pki_ih3->uqpg = oct->instr_queue[setup->s.iq_no]->txpciq.s.use_qpg;
+
+ port = (int)oct->instr_queue[setup->s.iq_no]->txpciq.s.port;
+
+ if (tag)
+ pki_ih3->tag = tag;
+ else
+ pki_ih3->tag = LIO_DATA(port);
+
+ pki_ih3->tagtype = ORDERED_TAG;
+ pki_ih3->qpg = oct->instr_queue[setup->s.iq_no]->txpciq.s.qpg;
+ pki_ih3->pm = 0x7; /*0x7 - meant for Parse nothing, uninterpreted*/
+ pki_ih3->sl = 8; /* sl will be sizeof(pki_ih3)*/
+
+ irh = (struct octeon_instr_irh *)&cmd->cmd3.irh;
+
+ irh->opcode = OPCODE_NIC;
+ irh->subcode = OPCODE_NIC_NW_DATA;
+
+ packet_params.u32 = 0;
+
packet_params.s.ip_csum = setup->s.ip_csum;
+ packet_params.s.transport_csum = setup->s.transport_csum;
packet_params.s.tnl_csum = setup->s.tnl_csum;
- packet_params.s.ifidx = setup->s.ifidx;
packet_params.s.tsflag = setup->s.timestamp;
irh->ossp = packet_params.u32;
}
+/** Utility function to prepare a 64B NIC instruction based on a setup command
+ * @param cmd - pointer to instruction to be filled in.
+ * @param setup - pointer to the setup structure
+ * @param q_no - which queue for back pressure
+ *
+ * Assumes the cmd instruction is pre-allocated, but no fields are filled in.
+ */
+static inline void
+octnet_prepare_pci_cmd(struct octeon_device *oct, union octeon_instr_64B *cmd,
+ union octnic_cmd_setup *setup, u32 tag)
+{
+ if (OCTEON_CN6XXX(oct))
+ octnet_prepare_pci_cmd_o2(oct, cmd, setup, tag);
+ else
+ octnet_prepare_pci_cmd_o3(oct, cmd, setup, tag);
+}
+
/** Allocate and a soft command with space for a response immediately following
* the commnad.
* @param oct - octeon device pointer
@@ -198,8 +267,8 @@ octnet_prepare_pci_cmd(struct octeon_instr_64B *cmd,
*/
void *
octeon_alloc_soft_command_resp(struct octeon_device *oct,
- struct octeon_instr_64B *cmd,
- size_t rdatasize);
+ union octeon_instr_64B *cmd,
+ u32 rdatasize);
/** Send a NIC data packet to the device
* @param oct - octeon device pointer
@@ -214,14 +283,11 @@ int octnet_send_nic_data_pkt(struct octeon_device *oct,
/** Send a NIC control packet to the device
* @param oct - octeon device pointer
* @param nctrl - control structure with command, timout, and callback info
- * @param nparams - response control structure
- *
* @returns IQ_FAILED if it failed to add to the input queue. IQ_STOP if it the
* queue should be stopped, and IQ_SEND_OK if it sent okay.
*/
int
octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
- struct octnic_ctrl_pkt *nctrl,
- struct octnic_ctrl_params nparams);
+ struct octnic_ctrl_pkt *nctrl);
#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 931391574048..8649677b2411 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -69,12 +69,16 @@ static inline int IQ_INSTR_MODE_64B(struct octeon_device *oct, int iq_no)
/* Return 0 on success, 1 on failure */
int octeon_init_instr_queue(struct octeon_device *oct,
- u32 iq_no, u32 num_descs)
+ union oct_txpciq txpciq,
+ u32 num_descs)
{
struct octeon_instr_queue *iq;
struct octeon_iq_config *conf = NULL;
+ u32 iq_no = (u32)txpciq.s.q_no;
u32 q_size;
struct cavium_wq *db_wq;
+ int orig_node = dev_to_node(&oct->pci_dev->dev);
+ int numa_node = cpu_to_node(iq_no % num_online_cpus());
if (OCTEON_CN6XXX(oct))
conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
@@ -95,9 +99,15 @@ int octeon_init_instr_queue(struct octeon_device *oct,
q_size = (u32)conf->instr_type * num_descs;
iq = oct->instr_queue[iq_no];
+ iq->oct_dev = oct;
+ set_dev_node(&oct->pci_dev->dev, numa_node);
iq->base_addr = lio_dma_alloc(oct, q_size,
(dma_addr_t *)&iq->base_addr_dma);
+ set_dev_node(&oct->pci_dev->dev, orig_node);
+ if (!iq->base_addr)
+ iq->base_addr = lio_dma_alloc(oct, q_size,
+ (dma_addr_t *)&iq->base_addr_dma);
if (!iq->base_addr) {
dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
iq_no);
@@ -109,7 +119,11 @@ int octeon_init_instr_queue(struct octeon_device *oct,
/* Initialize a list to holds requests that have been posted to Octeon
* but has yet to be fetched by octeon
*/
- iq->request_list = vmalloc(sizeof(*iq->request_list) * num_descs);
+ iq->request_list = vmalloc_node((sizeof(*iq->request_list) * num_descs),
+ numa_node);
+ if (!iq->request_list)
+ iq->request_list = vmalloc(sizeof(*iq->request_list) *
+ num_descs);
if (!iq->request_list) {
lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
@@ -122,7 +136,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n",
iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count);
- iq->iq_no = iq_no;
+ iq->txpciq.u64 = txpciq.u64;
iq->fill_threshold = (u32)conf->db_min;
iq->fill_cnt = 0;
iq->host_write_index = 0;
@@ -189,26 +203,38 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
/* Return 0 on success, 1 on failure */
int octeon_setup_iq(struct octeon_device *oct,
- u32 iq_no,
+ int ifidx,
+ int q_index,
+ union oct_txpciq txpciq,
u32 num_descs,
void *app_ctx)
{
+ u32 iq_no = (u32)txpciq.s.q_no;
+ int numa_node = cpu_to_node(iq_no % num_online_cpus());
+
if (oct->instr_queue[iq_no]) {
dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
iq_no);
+ oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
oct->instr_queue[iq_no]->app_ctx = app_ctx;
return 0;
}
oct->instr_queue[iq_no] =
- vmalloc(sizeof(struct octeon_instr_queue));
+ vmalloc_node(sizeof(struct octeon_instr_queue), numa_node);
+ if (!oct->instr_queue[iq_no])
+ oct->instr_queue[iq_no] =
+ vmalloc(sizeof(struct octeon_instr_queue));
if (!oct->instr_queue[iq_no])
return 1;
memset(oct->instr_queue[iq_no], 0,
sizeof(struct octeon_instr_queue));
+ oct->instr_queue[iq_no]->q_index = q_index;
oct->instr_queue[iq_no]->app_ctx = app_ctx;
- if (octeon_init_instr_queue(oct, iq_no, num_descs)) {
+ oct->instr_queue[iq_no]->ifidx = ifidx;
+
+ if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
vfree(oct->instr_queue[iq_no]);
oct->instr_queue[iq_no] = NULL;
return 1;
@@ -395,7 +421,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
case REQTYPE_SOFT_COMMAND:
sc = buf;
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
if (irh->rflag) {
/* We're expecting a response from Octeon.
* It's up to lio_process_ordered_list() to
@@ -558,7 +584,7 @@ octeon_prepare_soft_command(struct octeon_device *oct,
u64 ossp1)
{
struct octeon_config *oct_cfg;
- struct octeon_instr_ih *ih;
+ struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
@@ -567,73 +593,69 @@ octeon_prepare_soft_command(struct octeon_device *oct,
oct_cfg = octeon_get_conf(oct);
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- ih->tagtype = ATOMIC_TAG;
- ih->tag = LIO_CONTROL;
- ih->raw = 1;
- ih->grp = CFG_GET_CTRL_Q_GRP(oct_cfg);
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ ih2->tagtype = ATOMIC_TAG;
+ ih2->tag = LIO_CONTROL;
+ ih2->raw = 1;
+ ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg);
if (sc->datasize) {
- ih->dlengsz = sc->datasize;
- ih->rs = 1;
+ ih2->dlengsz = sc->datasize;
+ ih2->rs = 1;
}
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
irh->opcode = opcode;
irh->subcode = subcode;
/* opcode/subcode specific parameters (ossp) */
irh->ossp = irh_ossp;
- sc->cmd.ossp[0] = ossp0;
- sc->cmd.ossp[1] = ossp1;
+ sc->cmd.cmd2.ossp[0] = ossp0;
+ sc->cmd.cmd2.ossp[1] = ossp1;
if (sc->rdatasize) {
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
rdp->pcie_port = oct->pcie_port;
rdp->rlen = sc->rdatasize;
irh->rflag = 1;
- irh->len = 4;
- ih->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
+ ih2->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
} else {
irh->rflag = 0;
- irh->len = 2;
- ih->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
+ ih2->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
}
-
- while (!(oct->io_qmask.iq & (1 << sc->iq_no)))
- sc->iq_no++;
}
int octeon_send_soft_command(struct octeon_device *oct,
struct octeon_soft_command *sc)
{
- struct octeon_instr_ih *ih;
+ struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
+ u32 len;
- ih = (struct octeon_instr_ih *)&sc->cmd.ih;
- if (ih->dlengsz) {
- BUG_ON(!sc->dmadptr);
- sc->cmd.dptr = sc->dmadptr;
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ if (ih2->dlengsz) {
+ WARN_ON(!sc->dmadptr);
+ sc->cmd.cmd2.dptr = sc->dmadptr;
}
-
- irh = (struct octeon_instr_irh *)&sc->cmd.irh;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
if (irh->rflag) {
BUG_ON(!sc->dmarptr);
BUG_ON(!sc->status_word);
*sc->status_word = COMPLETION_WORD_INIT;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
- sc->cmd.rptr = sc->dmarptr;
+ sc->cmd.cmd2.rptr = sc->dmarptr;
}
+ len = (u32)ih2->dlengsz;
if (sc->wait_time)
sc->timeout = jiffies + sc->wait_time;
- return octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
- (u32)ih->dlengsz, REQTYPE_SOFT_COMMAND);
+ return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
+ len, REQTYPE_SOFT_COMMAND));
}
int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 6287a7c72b9e..e2e9103e6ebd 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -85,6 +85,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
u32 status;
u64 status64;
struct octeon_instr_rdp *rdp;
+ u64 rptr;
ordered_sc_list = &octeon_dev->response_list[OCTEON_ORDERED_SC_LIST];
@@ -102,7 +103,8 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
sc = (struct octeon_soft_command *)ordered_sc_list->
head.next;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+ rptr = sc->cmd.cmd2.rptr;
status = OCTEON_REQUEST_PENDING;
@@ -110,7 +112,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
* to where rptr is pointing to
*/
dma_sync_single_for_cpu(&octeon_dev->pci_dev->dev,
- sc->cmd.rptr, rdp->rlen,
+ rptr, rdp->rlen,
DMA_FROM_DEVICE);
status64 = *sc->status_word;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 477db477b133..c45de49dc963 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -64,6 +64,7 @@
#include <net/bonding.h>
#include <net/addrconf.h>
#include <asm/uaccess.h>
+#include <linux/crash_dump.h>
#include "cxgb4.h"
#include "t4_regs.h"
@@ -206,7 +207,7 @@ static int rx_dma_offset = 2;
static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV];
module_param_array(num_vf, uint, NULL, 0644);
-MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
+MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3, deprecated parameter - please use the pci sysfs interface.");
#endif
/* TX Queue select used to determine what algorithm to use for selecting TX
@@ -460,11 +461,8 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
- if (!(dev->flags & IFF_PROMISC)) {
- __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
- if (!(dev->flags & IFF_ALLMULTI))
- __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
- }
+ __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
+ __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu,
(dev->flags & IFF_PROMISC) ? 1 : 0,
@@ -3735,7 +3733,8 @@ static int adap_init0(struct adapter *adap)
return ret;
/* Contact FW, advertising Master capability */
- ret = t4_fw_hello(adap, adap->mbox, adap->mbox, MASTER_MAY, &state);
+ ret = t4_fw_hello(adap, adap->mbox, adap->mbox,
+ is_kdump_kernel() ? MASTER_MUST : MASTER_MAY, &state);
if (ret < 0) {
dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
ret);
@@ -4366,6 +4365,11 @@ static void cfg_queues(struct adapter *adap)
if (q10g > netif_get_num_default_rss_queues())
q10g = netif_get_num_default_rss_queues();
+ /* Reduce memory usage in kdump environment, disable all offload.
+ */
+ if (is_kdump_kernel())
+ adap->params.offload = 0;
+
for_each_port(adap, i) {
struct port_info *pi = adap2pinfo(adap, i);
@@ -4829,6 +4833,60 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
return -EINVAL;
}
+#ifdef CONFIG_PCI_IOV
+static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ int err = 0;
+ int current_vfs = pci_num_vf(pdev);
+ u32 pcie_fw;
+ void __iomem *regs;
+
+ regs = pci_ioremap_bar(pdev, 0);
+ if (!regs) {
+ dev_err(&pdev->dev, "cannot map device registers\n");
+ return -ENOMEM;
+ }
+
+ pcie_fw = readl(regs + PCIE_FW_A);
+ iounmap(regs);
+ /* Check if cxgb4 is the MASTER and fw is initialized */
+ if (!(pcie_fw & PCIE_FW_INIT_F) ||
+ !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
+ PCIE_FW_MASTER_G(pcie_fw) != 4) {
+ dev_warn(&pdev->dev,
+ "cxgb4 driver needs to be MASTER to support SRIOV\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* If any of the VF's is already assigned to Guest OS, then
+ * SRIOV for the same cannot be modified
+ */
+ if (current_vfs && pci_vfs_assigned(pdev)) {
+ dev_err(&pdev->dev,
+ "Cannot modify SR-IOV while VFs are assigned\n");
+ num_vfs = current_vfs;
+ return num_vfs;
+ }
+
+ /* Disable SRIOV when zero is passed.
+ * One needs to disable SRIOV before modifying it, else
+ * stack throws the below warning:
+ * " 'n' VFs already enabled. Disable before enabling 'm' VFs."
+ */
+ if (!num_vfs) {
+ pci_disable_sriov(pdev);
+ return num_vfs;
+ }
+
+ if (num_vfs != current_vfs) {
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err)
+ return err;
+ }
+ return num_vfs;
+}
+#endif
+
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int func, i, err, s_qpp, qpp, num_seg;
@@ -5162,11 +5220,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
sriov:
#ifdef CONFIG_PCI_IOV
- if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
+ if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) {
+ dev_warn(&pdev->dev,
+ "Enabling SR-IOV VFs using the num_vf module "
+ "parameter is deprecated - please use the pci sysfs "
+ "interface instead.\n");
if (pci_enable_sriov(pdev, num_vf[func]) == 0)
dev_info(&pdev->dev,
"instantiated %u virtual functions\n",
num_vf[func]);
+ }
#endif
return 0;
@@ -5259,6 +5322,9 @@ static struct pci_driver cxgb4_driver = {
.probe = init_one,
.remove = remove_one,
.shutdown = remove_one,
+#ifdef CONFIG_PCI_IOV
+ .sriov_configure = cxgb4_iov_configure,
+#endif
.err_handler = &cxgb4_eeh,
};
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 04fc6f6d1e25..8d9b2cb74aa2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -937,12 +937,8 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
{
struct port_info *pi = netdev_priv(dev);
- if (!(dev->flags & IFF_PROMISC)) {
- __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
- if (!(dev->flags & IFF_ALLMULTI))
- __dev_mc_sync(dev, cxgb4vf_mac_sync,
- cxgb4vf_mac_unsync);
- }
+ __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
+ __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
(dev->flags & IFF_PROMISC) != 0,
(dev->flags & IFF_ALLMULTI) != 0,
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 60383040d6c6..c363b58552e9 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -53,6 +53,8 @@
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -1895,9 +1897,17 @@ static int cs89x0_platform_remove(struct platform_device *pdev)
return 0;
}
+static const struct __maybe_unused of_device_id cs89x0_match[] = {
+ { .compatible = "cirrus,cs8900", },
+ { .compatible = "cirrus,cs8920", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, cs89x0_match);
+
static struct platform_driver cs89x0_driver = {
.driver = {
- .name = DRV_NAME,
+ .name = DRV_NAME,
+ .of_match_table = of_match_ptr(cs89x0_match),
},
.remove = cs89x0_platform_remove,
};
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index dbd4ecc205dc..963838d4fac1 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -115,6 +115,7 @@ int mdio_mux_init(struct device *dev,
goto err_parent_bus;
}
} else {
+ parent_bus_node = NULL;
parent_bus = mux_bus;
}
@@ -184,8 +185,7 @@ int mdio_mux_init(struct device *dev,
put_device(&pb->mii_bus->dev);
err_parent_bus:
- if (!mux_bus)
- of_node_put(parent_bus_node);
+ of_node_put(parent_bus_node);
return ret_val;
}
EXPORT_SYMBOL_GPL(mdio_mux_init);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8cc6bf4f5ba3..4884802e0af1 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1254,6 +1254,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
return -EFAULT;
}
+ err = virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun));
+ if (err) {
+ this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN:
if (tun->flags & IFF_NO_PI) {
@@ -1280,13 +1287,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
break;
}
- err = virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun));
- if (err) {
- this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
- kfree_skb(skb);
- return -EINVAL;
- }
-
/* copy skb_ubuf_info for callback when skb has no error */
if (zerocopy) {
skb_shinfo(skb)->destructor_arg = msg_control;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index b4d746943bc5..32173aa9208e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -378,23 +378,37 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
}
/* holding rtnl */
-static void vrf_rt6_release(struct net_vrf *vrf)
+static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
struct rt6_info *rt6_local = rtnl_dereference(vrf->rt6_local);
+ struct net *net = dev_net(dev);
+ struct dst_entry *dst;
RCU_INIT_POINTER(vrf->rt6, NULL);
RCU_INIT_POINTER(vrf->rt6_local, NULL);
synchronize_rcu();
- if (rt6)
- dst_release(&rt6->dst);
+ /* move dev in dst's to loopback so this VRF device can be deleted
+ * - based on dst_ifdown
+ */
+ if (rt6) {
+ dst = &rt6->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
+ }
if (rt6_local) {
if (rt6_local->rt6i_idev)
in6_dev_put(rt6_local->rt6i_idev);
- dst_release(&rt6_local->dst);
+ dst = &rt6_local->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
}
}
@@ -449,7 +463,7 @@ out:
return rc;
}
#else
-static void vrf_rt6_release(struct net_vrf *vrf)
+static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
}
@@ -518,20 +532,35 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
/* holding rtnl */
-static void vrf_rtable_release(struct net_vrf *vrf)
+static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
struct rtable *rth = rtnl_dereference(vrf->rth);
struct rtable *rth_local = rtnl_dereference(vrf->rth_local);
+ struct net *net = dev_net(dev);
+ struct dst_entry *dst;
RCU_INIT_POINTER(vrf->rth, NULL);
RCU_INIT_POINTER(vrf->rth_local, NULL);
synchronize_rcu();
- if (rth)
- dst_release(&rth->dst);
+ /* move dev in dst's to loopback so this VRF device can be deleted
+ * - based on dst_ifdown
+ */
+ if (rth) {
+ dst = &rth->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
+ }
- if (rth_local)
- dst_release(&rth_local->dst);
+ if (rth_local) {
+ dst = &rth_local->dst;
+ dev_put(dst->dev);
+ dst->dev = net->loopback_dev;
+ dev_hold(dst->dev);
+ dst_release(dst);
+ }
}
static int vrf_rtable_create(struct net_device *dev)
@@ -633,8 +662,8 @@ static void vrf_dev_uninit(struct net_device *dev)
struct net_device *port_dev;
struct list_head *iter;
- vrf_rtable_release(vrf);
- vrf_rt6_release(vrf);
+ vrf_rtable_release(dev, vrf);
+ vrf_rt6_release(dev, vrf);
netdev_for_each_lower_dev(dev, port_dev, iter)
vrf_del_slave(dev, port_dev);
@@ -669,7 +698,7 @@ static int vrf_dev_init(struct net_device *dev)
return 0;
out_rth:
- vrf_rtable_release(vrf);
+ vrf_rtable_release(dev, vrf);
out_stats:
free_percpu(dev->dstats);
dev->dstats = NULL;
@@ -785,9 +814,63 @@ out:
return rc;
}
+static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
+ const struct net_device *dev,
+ struct flowi6 *fl6,
+ int ifindex,
+ int flags)
+{
+ struct net_vrf *vrf = netdev_priv(dev);
+ struct fib6_table *table = NULL;
+ struct rt6_info *rt6;
+
+ rcu_read_lock();
+
+ /* fib6_table does not have a refcnt and can not be freed */
+ rt6 = rcu_dereference(vrf->rt6);
+ if (likely(rt6))
+ table = rt6->rt6i_table;
+
+ rcu_read_unlock();
+
+ if (!table)
+ return NULL;
+
+ return ip6_pol_route(net, table, ifindex, fl6, flags);
+}
+
+static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
+ int ifindex)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct flowi6 fl6 = {
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_mark = skb->mark,
+ .flowi6_proto = iph->nexthdr,
+ .flowi6_iif = ifindex,
+ };
+ struct net *net = dev_net(vrf_dev);
+ struct rt6_info *rt6;
+
+ rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+ RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
+ if (unlikely(!rt6))
+ return;
+
+ if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst))
+ return;
+
+ skb_dst_set(skb, &rt6->dst);
+}
+
static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
struct sk_buff *skb)
{
+ int orig_iif = skb->skb_iif;
+ bool need_strict;
+
/* loopback traffic; do not push through packet taps again.
* Reset pkt_type for upper layers to process skb
*/
@@ -798,8 +881,11 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
goto out;
}
- /* if packet is NDISC keep the ingress interface */
- if (!ipv6_ndisc_frame(skb)) {
+ /* if packet is NDISC or addressed to multicast or link-local
+ * then keep the ingress interface
+ */
+ need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
+ if (!ipv6_ndisc_frame(skb) && !need_strict) {
skb->dev = vrf_dev;
skb->skb_iif = vrf_dev->ifindex;
@@ -810,6 +896,9 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
}
+ if (need_strict)
+ vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
+
out:
return skb;
}
@@ -861,13 +950,37 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
- const struct flowi6 *fl6)
+ struct flowi6 *fl6)
{
+ bool need_strict = rt6_need_strict(&fl6->daddr);
+ struct net_vrf *vrf = netdev_priv(dev);
+ struct net *net = dev_net(dev);
struct dst_entry *dst = NULL;
+ struct rt6_info *rt;
- if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
- struct net_vrf *vrf = netdev_priv(dev);
- struct rt6_info *rt;
+ /* send to link-local or multicast address */
+ if (need_strict) {
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ /* VRF device does not have a link-local address and
+ * sending packets to link-local or mcast addresses over
+ * a VRF device does not make sense
+ */
+ if (fl6->flowi6_oif == dev->ifindex) {
+ struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
+
+ dst_hold(dst);
+ return dst;
+ }
+
+ if (!ipv6_addr_any(&fl6->saddr))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+ if (rt)
+ dst = &rt->dst;
+
+ } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
rcu_read_lock();
@@ -880,6 +993,10 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
rcu_read_unlock();
}
+ /* make sure oif is set to VRF device for lookup */
+ if (!need_strict)
+ fl6->flowi6_oif = dev->ifindex;
+
return dst;
}
#endif
@@ -1011,6 +1128,20 @@ static void vrf_setup(struct net_device *dev)
/* don't allow vrf devices to change network namespaces. */
dev->features |= NETIF_F_NETNS_LOCAL;
+
+ /* does not make sense for a VLAN to be added to a vrf device */
+ dev->features |= NETIF_F_VLAN_CHALLENGED;
+
+ /* enable offload features */
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->features |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM;
+ dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA;
+
+ dev->hw_features = dev->features;
+ dev->hw_enc_features = dev->features;
+
+ /* default to no qdisc; user can add if desired */
+ dev->priv_flags |= IFF_NO_QUEUE;
}
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e955a2859009..152421cc6f44 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -123,6 +123,10 @@ enum {
MLX5_CMD_OP_DRAIN_DCT = 0x712,
MLX5_CMD_OP_QUERY_DCT = 0x713,
MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714,
+ MLX5_CMD_OP_CREATE_XRQ = 0x717,
+ MLX5_CMD_OP_DESTROY_XRQ = 0x718,
+ MLX5_CMD_OP_QUERY_XRQ = 0x719,
+ MLX5_CMD_OP_ARM_XRQ = 0x71a,
MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750,
MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751,
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
@@ -139,6 +143,8 @@ enum {
MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771,
MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772,
MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773,
+ MLX5_CMD_OP_SET_RATE_LIMIT = 0x780,
+ MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781,
MLX5_CMD_OP_ALLOC_PD = 0x800,
MLX5_CMD_OP_DEALLOC_PD = 0x801,
MLX5_CMD_OP_ALLOC_UAR = 0x802,
@@ -362,7 +368,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
};
struct mlx5_ifc_fte_match_set_misc_bits {
- u8 reserved_at_0[0x20];
+ u8 reserved_at_0[0x8];
+ u8 source_sqn[0x18];
u8 reserved_at_20[0x10];
u8 source_port[0x10];
@@ -508,6 +515,17 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 reserved_at_20[0x7e0];
};
+struct mlx5_ifc_qos_cap_bits {
+ u8 packet_pacing[0x1];
+ u8 reserved_0[0x1f];
+ u8 reserved_1[0x20];
+ u8 packet_pacing_max_rate[0x20];
+ u8 packet_pacing_min_rate[0x20];
+ u8 reserved_2[0x10];
+ u8 packet_pacing_rate_table_size[0x10];
+ u8 reserved_3[0x760];
+};
+
struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
u8 csum_cap[0x1];
u8 vlan_cap[0x1];
@@ -747,7 +765,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 out_of_seq_cnt[0x1];
u8 vport_counters[0x1];
- u8 reserved_at_182[0x4];
+ u8 retransmission_q_counters[0x1];
+ u8 reserved_at_183[0x3];
u8 max_qp_cnt[0xa];
u8 pkey_table_size[0x10];
@@ -774,7 +793,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_msg[0x5];
u8 reserved_at_1c8[0x4];
u8 max_tc[0x4];
- u8 reserved_at_1d0[0x6];
+ u8 reserved_at_1d0[0x1];
+ u8 dcbx[0x1];
+ u8 reserved_at_1d2[0x4];
u8 rol_s[0x1];
u8 rol_g[0x1];
u8 reserved_at_1d8[0x1];
@@ -806,7 +827,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 tph[0x1];
u8 rf[0x1];
u8 dct[0x1];
- u8 reserved_at_21b[0x1];
+ u8 qos[0x1];
u8 eth_net_offloads[0x1];
u8 roce[0x1];
u8 atomic[0x1];
@@ -932,7 +953,15 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 cqe_compression_timeout[0x10];
u8 cqe_compression_max_num[0x10];
- u8 reserved_at_5e0[0x220];
+ u8 reserved_at_5e0[0x10];
+ u8 tag_matching[0x1];
+ u8 rndv_offload_rc[0x1];
+ u8 rndv_offload_dc[0x1];
+ u8 log_tag_matching_list_sz[0x5];
+ u8 reserved_at_5e8[0x3];
+ u8 log_max_xrq[0x5];
+
+ u8 reserved_at_5f0[0x200];
};
enum mlx5_flow_destination_type {
@@ -1970,7 +1999,7 @@ struct mlx5_ifc_qpc_bits {
u8 reserved_at_560[0x5];
u8 rq_type[0x3];
- u8 srqn_rmpn[0x18];
+ u8 srqn_rmpn_xrqn[0x18];
u8 reserved_at_580[0x8];
u8 rmsn[0x18];
@@ -2021,6 +2050,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
+ struct mlx5_ifc_qos_cap_bits qos_cap;
u8 reserved_at_0[0x8000];
};
@@ -2247,8 +2277,9 @@ struct mlx5_ifc_sqc_bits {
u8 reserved_at_40[0x8];
u8 cqn[0x18];
- u8 reserved_at_60[0xa0];
+ u8 reserved_at_60[0x90];
+ u8 packet_pacing_rate_limit_index[0x10];
u8 tis_lst_sz[0x10];
u8 reserved_at_110[0x10];
@@ -2596,7 +2627,7 @@ struct mlx5_ifc_dctc_bits {
u8 reserved_at_98[0x8];
u8 reserved_at_a0[0x8];
- u8 srqn[0x18];
+ u8 srqn_xrqn[0x18];
u8 reserved_at_c0[0x8];
u8 pd[0x18];
@@ -2648,6 +2679,7 @@ enum {
enum {
MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
+ MLX5_CQ_PERIOD_NUM_MODES
};
struct mlx5_ifc_cqc_bits {
@@ -2725,6 +2757,54 @@ struct mlx5_ifc_query_adapter_param_block_bits {
u8 vsd_contd_psid[16][0x8];
};
+enum {
+ MLX5_XRQC_STATE_GOOD = 0x0,
+ MLX5_XRQC_STATE_ERROR = 0x1,
+};
+
+enum {
+ MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0,
+ MLX5_XRQC_TOPOLOGY_TAG_MATCHING = 0x1,
+};
+
+enum {
+ MLX5_XRQC_OFFLOAD_RNDV = 0x1,
+};
+
+struct mlx5_ifc_tag_matching_topology_context_bits {
+ u8 log_matching_list_sz[0x4];
+ u8 reserved_at_4[0xc];
+ u8 append_next_index[0x10];
+
+ u8 sw_phase_cnt[0x10];
+ u8 hw_phase_cnt[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_xrqc_bits {
+ u8 state[0x4];
+ u8 rlkey[0x1];
+ u8 reserved_at_5[0xf];
+ u8 topology[0x4];
+ u8 reserved_at_18[0x4];
+ u8 offload[0x4];
+
+ u8 reserved_at_20[0x8];
+ u8 user_index[0x18];
+
+ u8 reserved_at_40[0x8];
+ u8 cqn[0x18];
+
+ u8 reserved_at_60[0xa0];
+
+ struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
+
+ u8 reserved_at_180[0x180];
+
+ struct mlx5_ifc_wq_bits wq;
+};
+
union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
struct mlx5_ifc_modify_field_select_bits modify_field_select;
struct mlx5_ifc_resize_field_select_bits resize_field_select;
@@ -3147,6 +3227,30 @@ struct mlx5_ifc_rst2init_qp_in_bits {
u8 reserved_at_800[0x80];
};
+struct mlx5_ifc_query_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
+struct mlx5_ifc_query_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
struct mlx5_ifc_query_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -3550,7 +3654,27 @@ struct mlx5_ifc_query_q_counter_out_bits {
u8 out_of_sequence[0x20];
- u8 reserved_at_1e0[0x620];
+ u8 reserved_at_1e0[0x20];
+
+ u8 duplicate_request[0x20];
+
+ u8 reserved_at_220[0x20];
+
+ u8 rnr_nak_retry_err[0x20];
+
+ u8 reserved_at_260[0x20];
+
+ u8 packet_seq_err[0x20];
+
+ u8 reserved_at_2a0[0x20];
+
+ u8 implied_nak_seq_err[0x20];
+
+ u8 reserved_at_2e0[0x20];
+
+ u8 local_ack_timeout_err[0x20];
+
+ u8 reserved_at_320[0x4e0];
};
struct mlx5_ifc_query_q_counter_in_bits {
@@ -5004,6 +5128,28 @@ struct mlx5_ifc_detach_from_mcg_in_bits {
u8 multicast_gid[16][0x8];
};
+struct mlx5_ifc_destroy_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
struct mlx5_ifc_destroy_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -5589,6 +5735,30 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits {
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_create_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_xrqc_bits xrq_context;
+};
+
struct mlx5_ifc_create_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6130,6 +6300,29 @@ struct mlx5_ifc_attach_to_mcg_in_bits {
u8 multicast_gid[16][0x8];
};
+struct mlx5_ifc_arm_xrq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_arm_xrq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 xrqn[0x18];
+
+ u8 reserved_at_60[0x10];
+ u8 lwm[0x10];
+};
+
struct mlx5_ifc_arm_xrc_srq_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6167,7 +6360,8 @@ struct mlx5_ifc_arm_rq_out_bits {
};
enum {
- MLX5_ARM_RQ_IN_OP_MOD_SRQ_ = 0x1,
+ MLX5_ARM_RQ_IN_OP_MOD_SRQ = 0x1,
+ MLX5_ARM_RQ_IN_OP_MOD_XRQ = 0x2,
};
struct mlx5_ifc_arm_rq_in_bits {
@@ -6360,6 +6554,30 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
u8 vxlan_udp_port[0x10];
};
+struct mlx5_ifc_set_rate_limit_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_rate_limit_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 rate_limit_index[0x10];
+
+ u8 reserved_at_60[0x20];
+
+ u8 rate_limit[0x20];
+};
+
struct mlx5_ifc_access_register_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6484,12 +6702,15 @@ struct mlx5_ifc_pude_reg_bits {
};
struct mlx5_ifc_ptys_reg_bits {
- u8 reserved_at_0[0x8];
+ u8 an_disable_cap[0x1];
+ u8 an_disable_admin[0x1];
+ u8 reserved_at_2[0x6];
u8 local_port[0x8];
u8 reserved_at_10[0xd];
u8 proto_mask[0x3];
- u8 reserved_at_20[0x40];
+ u8 an_status[0x4];
+ u8 reserved_at_24[0x3c];
u8 eth_proto_capability[0x20];
@@ -7450,4 +7671,34 @@ struct mlx5_ifc_mcia_reg_bits {
u8 dword_11[0x20];
};
+struct mlx5_ifc_dcbx_param_bits {
+ u8 dcbx_cee_cap[0x1];
+ u8 dcbx_ieee_cap[0x1];
+ u8 dcbx_standby_cap[0x1];
+ u8 reserved_at_0[0x5];
+ u8 port_number[0x8];
+ u8 reserved_at_10[0xa];
+ u8 max_application_table_size[6];
+ u8 reserved_at_20[0x15];
+ u8 version_oper[0x3];
+ u8 reserved_at_38[5];
+ u8 version_admin[0x3];
+ u8 willing_admin[0x1];
+ u8 reserved_at_41[0x3];
+ u8 pfc_cap_oper[0x4];
+ u8 reserved_at_48[0x4];
+ u8 pfc_cap_admin[0x4];
+ u8 reserved_at_50[0x4];
+ u8 num_of_tc_oper[0x4];
+ u8 reserved_at_58[0x4];
+ u8 num_of_tc_admin[0x4];
+ u8 remote_willing[0x1];
+ u8 reserved_at_61[3];
+ u8 remote_pfc_cap[4];
+ u8 reserved_at_68[0x14];
+ u8 remote_num_of_tc[0x4];
+ u8 reserved_at_80[0x18];
+ u8 error[0x8];
+ u8 reserved_at_a0[0x160];
+};
#endif /* MLX5_IFC_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d101e4d904ba..890158e99159 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1456,6 +1456,8 @@ enum netdev_priv_flags {
* @netdev_ops: Includes several pointers to callbacks,
* if one wants to override the ndo_*() functions
* @ethtool_ops: Management operations
+ * @ndisc_ops: Includes callbacks for different IPv6 neighbour
+ * discovery handling. Necessary for e.g. 6LoWPAN.
* @header_ops: Includes callbacks for creating,parsing,caching,etc
* of Layer 2 headers.
*
@@ -1483,8 +1485,7 @@ enum netdev_priv_flags {
* @perm_addr: Permanent hw address
* @addr_assign_type: Hw address assignment type
* @addr_len: Hardware address length
- * @neigh_priv_len; Used in neigh_alloc(),
- * initialized only in atm/clip.c
+ * @neigh_priv_len: Used in neigh_alloc()
* @dev_id: Used to differentiate devices that share
* the same link layer address
* @dev_port: Used to differentiate devices that share
@@ -1673,6 +1674,9 @@ struct net_device {
#ifdef CONFIG_NET_L3_MASTER_DEV
const struct l3mdev_ops *l3mdev_ops;
#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ndisc_ops *ndisc_ops;
+#endif
const struct header_ops *header_ops;
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
new file mode 100644
index 000000000000..562a65e8bcc0
--- /dev/null
+++ b/include/linux/ptr_ring.h
@@ -0,0 +1,393 @@
+/*
+ * Definitions for the 'struct ptr_ring' datastructure.
+ *
+ * Author:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This is a limited-size FIFO maintaining pointers in FIFO order, with
+ * one CPU producing entries and another consuming entries from a FIFO.
+ *
+ * This implementation tries to minimize cache-contention when there is a
+ * single producer and a single consumer CPU.
+ */
+
+#ifndef _LINUX_PTR_RING_H
+#define _LINUX_PTR_RING_H 1
+
+#ifdef __KERNEL__
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <asm/errno.h>
+#endif
+
+struct ptr_ring {
+ int producer ____cacheline_aligned_in_smp;
+ spinlock_t producer_lock;
+ int consumer ____cacheline_aligned_in_smp;
+ spinlock_t consumer_lock;
+ /* Shared consumer/producer data */
+ /* Read-only by both the producer and the consumer */
+ int size ____cacheline_aligned_in_smp; /* max entries in queue */
+ void **queue;
+};
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). If ring is ever resized, callers must hold
+ * producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold
+ * producer_lock, the next call to __ptr_ring_produce may fail.
+ */
+static inline bool __ptr_ring_full(struct ptr_ring *r)
+{
+ return r->queue[r->producer];
+}
+
+static inline bool ptr_ring_full(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock(&r->producer_lock);
+ ret = __ptr_ring_full(r);
+ spin_unlock(&r->producer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_full_irq(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_irq(&r->producer_lock);
+ ret = __ptr_ring_full(r);
+ spin_unlock_irq(&r->producer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_full_any(struct ptr_ring *r)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&r->producer_lock, flags);
+ ret = __ptr_ring_full(r);
+ spin_unlock_irqrestore(&r->producer_lock, flags);
+
+ return ret;
+}
+
+static inline bool ptr_ring_full_bh(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_bh(&r->producer_lock);
+ ret = __ptr_ring_full(r);
+ spin_unlock_bh(&r->producer_lock);
+
+ return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must hold producer_lock.
+ */
+static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+ if (r->queue[r->producer])
+ return -ENOSPC;
+
+ r->queue[r->producer++] = ptr;
+ if (unlikely(r->producer >= r->size))
+ r->producer = 0;
+ return 0;
+}
+
+static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+ int ret;
+
+ spin_lock(&r->producer_lock);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock(&r->producer_lock);
+
+ return ret;
+}
+
+static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
+{
+ int ret;
+
+ spin_lock_irq(&r->producer_lock);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock_irq(&r->producer_lock);
+
+ return ret;
+}
+
+static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&r->producer_lock, flags);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock_irqrestore(&r->producer_lock, flags);
+
+ return ret;
+}
+
+static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
+{
+ int ret;
+
+ spin_lock_bh(&r->producer_lock);
+ ret = __ptr_ring_produce(r, ptr);
+ spin_unlock_bh(&r->producer_lock);
+
+ return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
+ * If ring is never resized, and if the pointer is merely
+ * tested, there's no need to take the lock - see e.g. __ptr_ring_empty.
+ */
+static inline void *__ptr_ring_peek(struct ptr_ring *r)
+{
+ return r->queue[r->consumer];
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if the ring is ever resized - see e.g. ptr_ring_empty.
+ */
+static inline bool __ptr_ring_empty(struct ptr_ring *r)
+{
+ return !__ptr_ring_peek(r);
+}
+
+static inline bool ptr_ring_empty(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock(&r->consumer_lock);
+ ret = __ptr_ring_empty(r);
+ spin_unlock(&r->consumer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_irq(&r->consumer_lock);
+ ret = __ptr_ring_empty(r);
+ spin_unlock_irq(&r->consumer_lock);
+
+ return ret;
+}
+
+static inline bool ptr_ring_empty_any(struct ptr_ring *r)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&r->consumer_lock, flags);
+ ret = __ptr_ring_empty(r);
+ spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+ return ret;
+}
+
+static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
+{
+ bool ret;
+
+ spin_lock_bh(&r->consumer_lock);
+ ret = __ptr_ring_empty(r);
+ spin_unlock_bh(&r->consumer_lock);
+
+ return ret;
+}
+
+/* Must only be called after __ptr_ring_peek returned !NULL */
+static inline void __ptr_ring_discard_one(struct ptr_ring *r)
+{
+ r->queue[r->consumer++] = NULL;
+ if (unlikely(r->consumer >= r->size))
+ r->consumer = 0;
+}
+
+static inline void *__ptr_ring_consume(struct ptr_ring *r)
+{
+ void *ptr;
+
+ ptr = __ptr_ring_peek(r);
+ if (ptr)
+ __ptr_ring_discard_one(r);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume(struct ptr_ring *r)
+{
+ void *ptr;
+
+ spin_lock(&r->consumer_lock);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock(&r->consumer_lock);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
+{
+ void *ptr;
+
+ spin_lock_irq(&r->consumer_lock);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock_irq(&r->consumer_lock);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume_any(struct ptr_ring *r)
+{
+ unsigned long flags;
+ void *ptr;
+
+ spin_lock_irqsave(&r->consumer_lock, flags);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+ return ptr;
+}
+
+static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
+{
+ void *ptr;
+
+ spin_lock_bh(&r->consumer_lock);
+ ptr = __ptr_ring_consume(r);
+ spin_unlock_bh(&r->consumer_lock);
+
+ return ptr;
+}
+
+/* Cast to structure type and call a function without discarding from FIFO.
+ * Function must return a value.
+ * Callers must take consumer_lock.
+ */
+#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
+
+#define PTR_RING_PEEK_CALL(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ \
+ spin_lock(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ \
+ spin_lock_irq(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock_irq(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ \
+ spin_lock_bh(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock_bh(&(r)->consumer_lock); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
+ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+ unsigned long __PTR_RING_PEEK_CALL_f;\
+ \
+ spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+ spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+ __PTR_RING_PEEK_CALL_v; \
+})
+
+static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
+{
+ return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
+}
+
+static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+{
+ r->queue = __ptr_ring_init_queue_alloc(size, gfp);
+ if (!r->queue)
+ return -ENOMEM;
+
+ r->size = size;
+ r->producer = r->consumer = 0;
+ spin_lock_init(&r->producer_lock);
+ spin_lock_init(&r->consumer_lock);
+
+ return 0;
+}
+
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+ void (*destroy)(void *))
+{
+ unsigned long flags;
+ int producer = 0;
+ void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+ void **old;
+ void *ptr;
+
+ if (!queue)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&(r)->producer_lock, flags);
+
+ while ((ptr = ptr_ring_consume(r)))
+ if (producer < size)
+ queue[producer++] = ptr;
+ else if (destroy)
+ destroy(ptr);
+
+ r->size = size;
+ r->producer = producer;
+ r->consumer = 0;
+ old = r->queue;
+ r->queue = queue;
+
+ spin_unlock_irqrestore(&(r)->producer_lock, flags);
+
+ kfree(old);
+
+ return 0;
+}
+
+static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
+{
+ void *ptr;
+
+ if (destroy)
+ while ((ptr = ptr_ring_consume(r)))
+ destroy(ptr);
+ kfree(r->queue);
+}
+
+#endif /* _LINUX_PTR_RING_H */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c006cc900c44..2daece8979f7 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -89,8 +89,9 @@ void net_inc_egress_queue(void);
void net_dec_egress_queue(void);
#endif
-extern void rtnetlink_init(void);
-extern void __rtnl_unlock(void);
+void rtnetlink_init(void);
+void __rtnl_unlock(void);
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);
#define ASSERT_RTNL() do { \
if (unlikely(!rtnl_is_locked())) { \
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
new file mode 100644
index 000000000000..678bfbf78ac4
--- /dev/null
+++ b/include/linux/skb_array.h
@@ -0,0 +1,169 @@
+/*
+ * Definitions for the 'struct skb_array' datastructure.
+ *
+ * Author:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * Limited-size FIFO of skbs. Can be used more or less whenever
+ * sk_buff_head can be used, except you need to know the queue size in
+ * advance.
+ * Implemented as a type-safe wrapper around ptr_ring.
+ */
+
+#ifndef _LINUX_SKB_ARRAY_H
+#define _LINUX_SKB_ARRAY_H 1
+
+#ifdef __KERNEL__
+#include <linux/ptr_ring.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#endif
+
+struct skb_array {
+ struct ptr_ring ring;
+};
+
+/* Might be slightly faster than skb_array_full below, but callers invoking
+ * this in a loop must use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_full(struct skb_array *a)
+{
+ return __ptr_ring_full(&a->ring);
+}
+
+static inline bool skb_array_full(struct skb_array *a)
+{
+ return ptr_ring_full(&a->ring);
+}
+
+static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce(&a->ring, skb);
+}
+
+static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_irq(&a->ring, skb);
+}
+
+static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_bh(&a->ring, skb);
+}
+
+static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_any(&a->ring, skb);
+}
+
+/* Might be slightly faster than skb_array_empty below, but only safe if the
+ * array is never resized. Also, callers invoking this in a loop must take care
+ * to use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_empty(struct skb_array *a)
+{
+ return !__ptr_ring_peek(&a->ring);
+}
+
+static inline bool skb_array_empty(struct skb_array *a)
+{
+ return ptr_ring_empty(&a->ring);
+}
+
+static inline bool skb_array_empty_bh(struct skb_array *a)
+{
+ return ptr_ring_empty_bh(&a->ring);
+}
+
+static inline bool skb_array_empty_irq(struct skb_array *a)
+{
+ return ptr_ring_empty_irq(&a->ring);
+}
+
+static inline bool skb_array_empty_any(struct skb_array *a)
+{
+ return ptr_ring_empty_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume(struct skb_array *a)
+{
+ return ptr_ring_consume(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a)
+{
+ return ptr_ring_consume_irq(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_any(struct skb_array *a)
+{
+ return ptr_ring_consume_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a)
+{
+ return ptr_ring_consume_bh(&a->ring);
+}
+
+static inline int __skb_array_len_with_tag(struct sk_buff *skb)
+{
+ if (likely(skb)) {
+ int len = skb->len;
+
+ if (skb_vlan_tag_present(skb))
+ len += VLAN_HLEN;
+
+ return len;
+ } else {
+ return 0;
+ }
+}
+
+static inline int skb_array_peek_len(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_irq(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_bh(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_any(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
+{
+ return ptr_ring_init(&a->ring, size, gfp);
+}
+
+void __skb_array_destroy_skb(void *ptr)
+{
+ kfree_skb(ptr);
+}
+
+int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+{
+ return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
+}
+
+static inline void skb_array_cleanup(struct skb_array *a)
+{
+ ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
+}
+
+#endif /* _LINUX_SKB_ARRAY_H */
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index da84cf920b78..5ab4c9901ccc 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -141,6 +141,16 @@ struct lowpan_dev {
u8 priv[0] __aligned(sizeof(void *));
};
+struct lowpan_802154_neigh {
+ __le16 short_addr;
+};
+
+static inline
+struct lowpan_802154_neigh *lowpan_802154_neigh(void *neigh_priv)
+{
+ return neigh_priv;
+}
+
static inline
struct lowpan_dev *lowpan_dev(const struct net_device *dev)
{
@@ -244,6 +254,12 @@ static inline bool lowpan_fetch_skb(struct sk_buff *skb, void *data,
return false;
}
+static inline bool lowpan_802154_is_valid_src_short_addr(__le16 addr)
+{
+ /* First bit of addr is multicast, reserved or 802.15.4 specific */
+ return !(addr & cpu_to_le16(0x8000));
+}
+
static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data,
const size_t len)
{
diff --git a/include/net/act_api.h b/include/net/act_api.h
index db218a12efb5..fb82b5b5d9e7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -155,8 +155,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct tc_action *a);
int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index);
u32 tcf_hash_new_index(struct tc_action_net *tn);
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
- int bind);
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+ int bind);
int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action *a, int size, int bind, bool cpustats);
void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 730d856683e5..9826d3a9464c 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -94,6 +94,16 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
+void addrconf_add_linklocal(struct inet6_dev *idev,
+ const struct in6_addr *addr, u32 flags);
+
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ const struct in6_addr *addr, int addr_type,
+ u32 addr_flags, bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft);
+
static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
{
if (dev->addr_len != ETH_ALEN)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 54c779416eec..f55bf3d294aa 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -76,6 +76,8 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
int flags);
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+ int ifindex, struct flowi6 *fl6, int flags);
int ip6_route_init(void);
void ip6_route_cleanup(void);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index dbf444428437..9222678426a1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -132,6 +132,7 @@ struct ip_tunnel {
int ip_tnl_net_id;
struct gro_cells gro_cells;
bool collect_md;
+ bool ignore_df;
};
#define TUNNEL_CSUM __cpu_to_be16(0x01)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 34f33eb96a5e..f8a416ec674c 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -38,7 +38,7 @@ struct l3mdev_ops {
/* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
- const struct flowi6 *fl6);
+ struct flowi6 *fl6);
};
#ifdef CONFIG_NET_L3_MASTER_DEV
@@ -139,7 +139,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6);
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -225,7 +225,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex,
}
static inline
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6)
+struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
{
return NULL;
}
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 2d8edaad29cb..be1fe2283254 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -35,6 +35,7 @@ enum {
ND_OPT_ROUTE_INFO = 24, /* RFC4191 */
ND_OPT_RDNSS = 25, /* RFC5006 */
ND_OPT_DNSSL = 31, /* RFC6106 */
+ ND_OPT_6CO = 34, /* RFC6775 */
__ND_OPT_MAX
};
@@ -53,11 +54,21 @@ enum {
#include <net/neighbour.h>
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(val, level, fmt, ...) \
+do { \
+ if (val <= ND_DEBUG) \
+ net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
+} while (0)
+
struct ctl_table;
struct inet6_dev;
struct net_device;
struct net_proto_family;
struct sk_buff;
+struct prefix_info;
extern struct neigh_table nd_tbl;
@@ -99,20 +110,201 @@ struct ndisc_options {
#endif
struct nd_opt_hdr *nd_useropts;
struct nd_opt_hdr *nd_useropts_end;
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+ struct nd_opt_hdr *nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR + 1];
+#endif
};
-#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
+#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
+#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
+#define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR]
#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+ u8 *opt, int opt_len,
struct ndisc_options *ndopts);
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+ int data_len, int pad);
+
+#define NDISC_OPS_REDIRECT_DATA_SPACE 2
+
+/*
+ * This structure defines the hooks for IPv6 neighbour discovery.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * int (*is_useropt)(u8 nd_opt_type):
+ * This function is called when IPv6 decide RA userspace options. if
+ * this function returns 1 then the option given by nd_opt_type will
+ * be handled as userspace option additional to the IPv6 options.
+ *
+ * int (*parse_options)(const struct net_device *dev,
+ * struct nd_opt_hdr *nd_opt,
+ * struct ndisc_options *ndopts):
+ * This function is called while parsing ndisc ops and put each position
+ * as pointer into ndopts. If this function return unequal 0, then this
+ * function took care about the ndisc option, if 0 then the IPv6 ndisc
+ * option parser will take care about that option.
+ *
+ * void (*update)(const struct net_device *dev, struct neighbour *n,
+ * u32 flags, u8 icmp6_type,
+ * const struct ndisc_options *ndopts):
+ * This function is called when IPv6 ndisc updates the neighbour cache
+ * entry. Additional options which can be updated may be previously
+ * parsed by parse_opts callback and accessible over ndopts parameter.
+ *
+ * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+ * struct neighbour *neigh, u8 *ha_buf,
+ * u8 **ha):
+ * This function is called when the necessary option space will be
+ * calculated before allocating a skb. The parameters neigh, ha_buf
+ * abd ha are available on NDISC_REDIRECT messages only.
+ *
+ * void (*fill_addr_option)(const struct net_device *dev,
+ * struct sk_buff *skb, u8 icmp6_type,
+ * const u8 *ha):
+ * This function is called when the skb will finally fill the option
+ * fields inside skb. NOTE: this callback should fill the option
+ * fields to the skb which are previously indicated by opt_space
+ * parameter. That means the decision to add such option should
+ * not lost between these two callbacks, e.g. protected by interface
+ * up state.
+ *
+ * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+ * const struct prefix_info *pinfo,
+ * struct inet6_dev *in6_dev,
+ * struct in6_addr *addr,
+ * int addr_type, u32 addr_flags,
+ * bool sllao, bool tokenized,
+ * __u32 valid_lft, u32 prefered_lft,
+ * bool dev_addr_generated):
+ * This function is called when a RA messages is received with valid
+ * PIO option fields and an IPv6 address will be added to the interface
+ * for autoconfiguration. The parameter dev_addr_generated reports about
+ * if the address was based on dev->dev_addr or not. This can be used
+ * to add a second address if link-layer operates with two link layer
+ * addresses. E.g. 802.15.4 6LoWPAN.
+ */
+struct ndisc_ops {
+ int (*is_useropt)(u8 nd_opt_type);
+ int (*parse_options)(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts);
+ void (*update)(const struct net_device *dev, struct neighbour *n,
+ u32 flags, u8 icmp6_type,
+ const struct ndisc_options *ndopts);
+ int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type,
+ struct neighbour *neigh, u8 *ha_buf,
+ u8 **ha);
+ void (*fill_addr_option)(const struct net_device *dev,
+ struct sk_buff *skb, u8 icmp6_type,
+ const u8 *ha);
+ void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ struct in6_addr *addr,
+ int addr_type, u32 addr_flags,
+ bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft,
+ bool dev_addr_generated);
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_ops_is_useropt(const struct net_device *dev,
+ u8 nd_opt_type)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->is_useropt)
+ return dev->ndisc_ops->is_useropt(nd_opt_type);
+ else
+ return 0;
+}
+
+static inline int ndisc_ops_parse_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->parse_options)
+ return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts);
+ else
+ return 0;
+}
+
+static inline void ndisc_ops_update(const struct net_device *dev,
+ struct neighbour *n, u32 flags,
+ u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->update)
+ dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts);
+}
+
+static inline int ndisc_ops_opt_addr_space(const struct net_device *dev,
+ u8 icmp6_type)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space &&
+ icmp6_type != NDISC_REDIRECT)
+ return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL,
+ NULL, NULL);
+ else
+ return 0;
+}
+
+static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev,
+ struct neighbour *neigh,
+ u8 *ha_buf, u8 **ha)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space)
+ return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT,
+ neigh, ha_buf, ha);
+ else
+ return 0;
+}
+
+static inline void ndisc_ops_fill_addr_option(const struct net_device *dev,
+ struct sk_buff *skb,
+ u8 icmp6_type)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option &&
+ icmp6_type != NDISC_REDIRECT)
+ dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL);
+}
+
+static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev,
+ struct sk_buff *skb,
+ const u8 *ha)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option)
+ dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha);
+}
+
+static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net,
+ struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ struct in6_addr *addr,
+ int addr_type, u32 addr_flags,
+ bool sllao, bool tokenized,
+ __u32 valid_lft,
+ u32 prefered_lft,
+ bool dev_addr_generated)
+{
+ if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr)
+ dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ addr, addr_type,
+ addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft,
+ dev_addr_generated);
+}
+#endif
+
/*
* Return the padding between the option length and the start of the
* link addr. Currently only IP-over-InfiniBand needs this, although
@@ -127,23 +319,48 @@ static inline int ndisc_addr_option_pad(unsigned short type)
}
}
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad)
{
- return NDISC_OPT_SPACE(dev->addr_len +
- ndisc_addr_option_pad(dev->type));
+ return NDISC_OPT_SPACE(addr_len + pad);
}
-static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
- struct net_device *dev)
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type)
+{
+ return __ndisc_opt_addr_space(dev->addr_len,
+ ndisc_addr_option_pad(dev->type)) +
+ ndisc_ops_opt_addr_space(dev, icmp6_type);
+}
+
+static inline int ndisc_redirect_opt_addr_space(struct net_device *dev,
+ struct neighbour *neigh,
+ u8 *ops_data_buf,
+ u8 **ops_data)
+{
+ return __ndisc_opt_addr_space(dev->addr_len,
+ ndisc_addr_option_pad(dev->type)) +
+ ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf,
+ ops_data);
+}
+#endif
+
+static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p,
+ unsigned char addr_len, int prepad)
{
u8 *lladdr = (u8 *)(p + 1);
int lladdrlen = p->nd_opt_len << 3;
- int prepad = ndisc_addr_option_pad(dev->type);
- if (lladdrlen != ndisc_opt_addr_space(dev))
+ if (lladdrlen != __ndisc_opt_addr_space(addr_len, prepad))
return NULL;
return lladdr + prepad;
}
+static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
+ struct net_device *dev)
+{
+ return __ndisc_opt_addr_data(p, dev->addr_len,
+ ndisc_addr_option_pad(dev->type));
+}
+
static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd)
{
const u32 *p32 = pkey;
@@ -194,6 +411,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target);
int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev,
int dir);
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+ const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+ struct ndisc_options *ndopts);
/*
* IGMP
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9a0d177884c6..4f7cee8344c4 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -683,19 +683,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
return skb;
}
-static inline void __qdisc_reset_queue(struct Qdisc *sch,
- struct sk_buff_head *list)
+static inline void __qdisc_reset_queue(struct sk_buff_head *list)
{
/*
* We do not know the backlog in bytes of this list, it
* is up to the caller to correct it
*/
- __skb_queue_purge(list);
+ if (!skb_queue_empty(list)) {
+ rtnl_kfree_skbs(list->next, list->prev);
+ __skb_queue_head_init(list);
+ }
}
static inline void qdisc_reset_queue(struct Qdisc *sch)
{
- __qdisc_reset_queue(sch, &sch->q);
+ __qdisc_reset_queue(&sch->q);
sch->qstats.backlog = 0;
}
@@ -716,6 +718,12 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
return old;
}
+static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
+{
+ rtnl_kfree_skbs(skb, skb);
+ qdisc_qstats_drop(sch);
+}
+
static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
{
kfree_skb(skb);
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index af4de90ba27d..1046f5515174 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -113,6 +113,7 @@ enum {
IFLA_GRE_ENCAP_SPORT,
IFLA_GRE_ENCAP_DPORT,
IFLA_GRE_COLLECT_METADATA,
+ IFLA_GRE_IGNORE_DF,
__IFLA_GRE_MAX,
};
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index 97ecc27aeca6..a67caee11929 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -12,6 +12,10 @@ static inline bool lowpan_is_ll(const struct net_device *dev,
return lowpan_dev(dev)->lltype == lltype;
}
+extern const struct ndisc_ops lowpan_ndisc_ops;
+
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev);
+
#ifdef CONFIG_6LOWPAN_DEBUGFS
int lowpan_dev_debugfs_init(struct net_device *dev);
void lowpan_dev_debugfs_exit(struct net_device *dev);
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index e44f3bf2dd42..12d131ab2324 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_6LOWPAN) += 6lowpan.o
-6lowpan-y := core.o iphc.o nhc.o
+6lowpan-y := core.o iphc.o nhc.o ndisc.o
6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o
#rfc6282 nhcs
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 7a240b3eaed1..5945f7e19c67 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <net/6lowpan.h>
+#include <net/addrconf.h>
#include "6lowpan_i.h"
@@ -33,6 +34,8 @@ int lowpan_register_netdevice(struct net_device *dev,
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
lowpan_dev(dev)->ctx.table[i].id = i;
+ dev->ndisc_ops = &lowpan_ndisc_ops;
+
ret = register_netdevice(dev);
if (ret < 0)
return ret;
@@ -72,16 +75,61 @@ void lowpan_unregister_netdev(struct net_device *dev)
}
EXPORT_SYMBOL(lowpan_unregister_netdev);
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev)
+{
+ struct wpan_dev *wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ /* Set short_addr autoconfiguration if short_addr is present only */
+ if (!lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+ return -1;
+
+ /* For either address format, all zero addresses MUST NOT be used */
+ if (wpan_dev->pan_id == cpu_to_le16(0x0000) &&
+ wpan_dev->short_addr == cpu_to_le16(0x0000))
+ return -1;
+
+ /* Alternatively, if no PAN ID is known, 16 zero bits may be used */
+ if (wpan_dev->pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
+ memset(eui, 0, 2);
+ else
+ ieee802154_le16_to_be16(eui, &wpan_dev->pan_id);
+
+ /* The "Universal/Local" (U/L) bit shall be set to zero */
+ eui[0] &= ~2;
+ eui[2] = 0;
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[5] = 0;
+ ieee802154_le16_to_be16(&eui[6], &wpan_dev->short_addr);
+ return 0;
+}
+
static int lowpan_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct inet6_dev *idev;
+ struct in6_addr addr;
int i;
if (dev->type != ARPHRD_6LOWPAN)
return NOTIFY_DONE;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ return NOTIFY_DONE;
+
switch (event) {
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ /* (802.15.4 6LoWPAN short address slaac handling */
+ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) &&
+ addrconf_ifid_802154_6lowpan(addr.s6_addr + 8, dev) == 0) {
+ __ipv6_addr_set_half(&addr.s6_addr32[0],
+ htonl(0xFE800000), 0);
+ addrconf_add_linklocal(idev, &addr, 0);
+ }
+ break;
case NETDEV_DOWN:
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
@@ -112,8 +160,6 @@ static int __init lowpan_module_init(void)
return ret;
}
- request_module_nowait("ipv6");
-
request_module_nowait("nhc_dest");
request_module_nowait("nhc_fragment");
request_module_nowait("nhc_hop");
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index acbaa3db493b..24915e0bb9ea 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -245,6 +245,41 @@ static const struct file_operations lowpan_context_fops = {
.release = single_release,
};
+static int lowpan_short_addr_get(void *data, u64 *val)
+{
+ struct wpan_dev *wdev = data;
+
+ rtnl_lock();
+ *val = le16_to_cpu(wdev->short_addr);
+ rtnl_unlock();
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get,
+ NULL, "0x%04llx\n");
+
+static int lowpan_dev_debugfs_802154_init(const struct net_device *dev,
+ struct lowpan_dev *ldev)
+{
+ struct dentry *dentry, *root;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return 0;
+
+ root = debugfs_create_dir("ieee802154", ldev->iface_debugfs);
+ if (!root)
+ return -EINVAL;
+
+ dentry = debugfs_create_file("short_addr", 0444, root,
+ lowpan_802154_dev(dev)->wdev->ieee802154_ptr,
+ &lowpan_short_addr_fops);
+ if (!dentry)
+ return -EINVAL;
+
+ return 0;
+}
+
int lowpan_dev_debugfs_init(struct net_device *dev)
{
struct lowpan_dev *ldev = lowpan_dev(dev);
@@ -272,6 +307,10 @@ int lowpan_dev_debugfs_init(struct net_device *dev)
goto remove_root;
}
+ ret = lowpan_dev_debugfs_802154_init(dev, ldev);
+ if (ret < 0)
+ goto remove_root;
+
return 0;
remove_root:
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 8501dd532fe1..79f1fa22509a 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -761,22 +761,75 @@ static const u8 lowpan_iphc_dam_to_sam_value[] = {
[LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
};
-static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_ctx_802154_lladdr(const struct in6_addr *ipaddr,
+ const struct lowpan_iphc_ctx *ctx,
+ const void *lladdr)
+{
+ const struct ieee802154_addr *addr = lladdr;
+ unsigned char extended_addr[EUI64_ADDR_LEN];
+ bool lladdr_compress = false;
+ struct in6_addr tmp = {};
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_LONG:
+ ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+ /* check for SAM/DAM = 11 */
+ memcpy(&tmp.s6_addr[8], &extended_addr, EUI64_ADDR_LEN);
+ /* second bit-flip (Universe/Local) is done according RFC2464 */
+ tmp.s6_addr[8] ^= 0x02;
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ tmp.s6_addr[11] = 0xFF;
+ tmp.s6_addr[12] = 0xFE;
+ ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+ &addr->short_addr);
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ default:
+ /* should never handled and filtered by 802154 6lowpan */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return lladdr_compress;
+}
+
+static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
+ const struct in6_addr *ipaddr,
const struct lowpan_iphc_ctx *ctx,
const unsigned char *lladdr, bool sam)
{
struct in6_addr tmp = {};
u8 dam;
- /* check for SAM/DAM = 11 */
- memcpy(&tmp.s6_addr[8], lladdr, 8);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- tmp.s6_addr[8] ^= 0x02;
- /* context information are always used */
- ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
- if (ipv6_addr_equal(&tmp, ipaddr)) {
- dam = LOWPAN_IPHC_DAM_11;
- goto out;
+ switch (lowpan_dev(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_iphc_compress_ctx_802154_lladdr(ipaddr, ctx,
+ lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11;
+ goto out;
+ }
+ break;
+ default:
+ /* check for SAM/DAM = 11 */
+ memcpy(&tmp.s6_addr[8], lladdr, EUI64_ADDR_LEN);
+ /* second bit-flip (Universe/Local) is done according RFC2464 */
+ tmp.s6_addr[8] ^= 0x02;
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr)) {
+ dam = LOWPAN_IPHC_DAM_11;
+ goto out;
+ }
+ break;
}
memset(&tmp, 0, sizeof(tmp));
@@ -813,28 +866,85 @@ out:
return dam;
}
-static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_802154_lladdr(const struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ const struct ieee802154_addr *addr = lladdr;
+ unsigned char extended_addr[EUI64_ADDR_LEN];
+ bool lladdr_compress = false;
+ struct in6_addr tmp = {};
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_LONG:
+ ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+ if (is_addr_mac_addr_based(ipaddr, extended_addr))
+ lladdr_compress = true;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ /* fe:80::ff:fe00:XXXX
+ * \__/
+ * short_addr
+ *
+ * Universe/Local bit is zero.
+ */
+ tmp.s6_addr[0] = 0xFE;
+ tmp.s6_addr[1] = 0x80;
+ tmp.s6_addr[11] = 0xFF;
+ tmp.s6_addr[12] = 0xFE;
+ ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+ &addr->short_addr);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ default:
+ /* should never handled and filtered by 802154 6lowpan */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return lladdr_compress;
+}
+
+static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct net_device *dev,
+ const struct in6_addr *ipaddr,
const unsigned char *lladdr, bool sam)
{
- u8 dam = LOWPAN_IPHC_DAM_00;
+ u8 dam = LOWPAN_IPHC_DAM_01;
- if (is_addr_mac_addr_based(ipaddr, lladdr)) {
- dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
- pr_debug("address compression 0 bits\n");
- } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
+ switch (lowpan_dev(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_iphc_compress_802154_lladdr(ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ pr_debug("address compression 0 bits\n");
+ goto out;
+ }
+ break;
+ default:
+ if (is_addr_mac_addr_based(ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ pr_debug("address compression 0 bits\n");
+ goto out;
+ }
+ break;
+ }
+
+ if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
/* compress IID to 16 bits xxxx::XXXX */
lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
dam = LOWPAN_IPHC_DAM_10; /* 16-bits */
raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
*hc_ptr - 2, 2);
- } else {
- /* do not compress IID => xxxx::IID */
- lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
- dam = LOWPAN_IPHC_DAM_01; /* 64-bits */
- raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
- *hc_ptr - 8, 8);
+ goto out;
}
+ /* do not compress IID => xxxx::IID */
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
+ raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
+ *hc_ptr - 8, 8);
+
+out:
+
if (sam)
return lowpan_iphc_dam_to_sam_value[dam];
else
@@ -1013,9 +1123,6 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
iphc0 = LOWPAN_DISPATCH_IPHC;
iphc1 = 0;
- raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
- raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
-
raw_dump_table(__func__, "sending raw skb network uncompressed packet",
skb->data, skb->len);
@@ -1088,14 +1195,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
iphc1 |= LOWPAN_IPHC_SAC;
} else {
if (sci) {
- iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->saddr,
+ iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+ &hdr->saddr,
&sci_entry, saddr,
true);
iphc1 |= LOWPAN_IPHC_SAC;
} else {
if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL &&
lowpan_is_linklocal_zero_padded(hdr->saddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
&hdr->saddr,
saddr, true);
pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
@@ -1123,14 +1231,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
}
} else {
if (dci) {
- iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->daddr,
+ iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+ &hdr->daddr,
&dci_entry, daddr,
false);
iphc1 |= LOWPAN_IPHC_DAC;
} else {
if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL &&
lowpan_is_linklocal_zero_padded(hdr->daddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
&hdr->daddr,
daddr, false);
pr_debug("dest address unicast link-local %pI6c iphc1 0x%02x\n",
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
new file mode 100644
index 000000000000..ae1d4199aa4c
--- /dev/null
+++ b/net/6lowpan/ndisc.c
@@ -0,0 +1,234 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ * (C) 2016 Pengutronix, Alexander Aring <aar@pengutronix.de>
+ */
+
+#include <net/6lowpan.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+
+#include "6lowpan_i.h"
+
+static int lowpan_ndisc_is_useropt(u8 nd_opt_type)
+{
+ return nd_opt_type == ND_OPT_6CO;
+}
+
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+#define NDISC_802154_SHORT_ADDR_LENGTH 1
+static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ switch (nd_opt->nd_opt_len) {
+ case NDISC_802154_SHORT_ADDR_LENGTH:
+ if (ndopts->nd_802154_opt_array[nd_opt->nd_opt_type])
+ ND_PRINTK(2, warn,
+ "%s: duplicated short addr ND6 option found: type=%d\n",
+ __func__, nd_opt->nd_opt_type);
+ else
+ ndopts->nd_802154_opt_array[nd_opt->nd_opt_type] = nd_opt;
+ return 1;
+ default:
+ /* all others will be handled by ndisc IPv6 option parsing */
+ return 0;
+ }
+}
+
+static int lowpan_ndisc_parse_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ switch (nd_opt->nd_opt_type) {
+ case ND_OPT_SOURCE_LL_ADDR:
+ case ND_OPT_TARGET_LL_ADDR:
+ return lowpan_ndisc_parse_802154_options(dev, nd_opt, ndopts);
+ default:
+ return 0;
+ }
+}
+
+static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
+ u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+ u8 *lladdr_short = NULL;
+
+ switch (icmp6_type) {
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ if (ndopts->nd_802154_opts_src_lladdr) {
+ lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_src_lladdr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ if (!lladdr_short) {
+ ND_PRINTK(2, warn,
+ "NA: invalid short link-layer address length\n");
+ return;
+ }
+ }
+ break;
+ case NDISC_REDIRECT:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ if (ndopts->nd_802154_opts_tgt_lladdr) {
+ lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_tgt_lladdr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ if (!lladdr_short) {
+ ND_PRINTK(2, warn,
+ "NA: invalid short link-layer address length\n");
+ return;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ write_lock_bh(&n->lock);
+ if (lladdr_short)
+ ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short);
+ else
+ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ write_unlock_bh(&n->lock);
+}
+
+static void lowpan_ndisc_update(const struct net_device *dev,
+ struct neighbour *n, u32 flags, u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return;
+
+ /* react on overrides only. TODO check if this is really right. */
+ if (flags & NEIGH_UPDATE_F_OVERRIDE)
+ lowpan_ndisc_802154_update(n, flags, icmp6_type, ndopts);
+}
+
+static int lowpan_ndisc_opt_addr_space(const struct net_device *dev,
+ u8 icmp6_type, struct neighbour *neigh,
+ u8 *ha_buf, u8 **ha)
+{
+ struct lowpan_802154_neigh *n;
+ struct wpan_dev *wpan_dev;
+ int addr_space = 0;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return 0;
+
+ switch (icmp6_type) {
+ case NDISC_REDIRECT:
+ n = lowpan_802154_neigh(neighbour_priv(neigh));
+
+ read_lock_bh(&neigh->lock);
+ if (lowpan_802154_is_valid_src_short_addr(n->short_addr)) {
+ memcpy(ha_buf, &n->short_addr,
+ IEEE802154_SHORT_ADDR_LEN);
+ read_unlock_bh(&neigh->lock);
+ addr_space += __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+ *ha = ha_buf;
+ }
+ read_unlock_bh(&neigh->lock);
+ break;
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_ROUTER_SOLICITATION:
+ wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+ addr_space = __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+ break;
+ default:
+ break;
+ }
+
+ return addr_space;
+}
+
+static void lowpan_ndisc_fill_addr_option(const struct net_device *dev,
+ struct sk_buff *skb, u8 icmp6_type,
+ const u8 *ha)
+{
+ struct wpan_dev *wpan_dev;
+ __be16 short_addr;
+ u8 opt_type;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return;
+
+ switch (icmp6_type) {
+ case NDISC_REDIRECT:
+ if (ha) {
+ ieee802154_le16_to_be16(&short_addr, ha);
+ __ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+ &short_addr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ }
+ return;
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ opt_type = ND_OPT_TARGET_LL_ADDR;
+ break;
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ opt_type = ND_OPT_SOURCE_LL_ADDR;
+ break;
+ default:
+ return;
+ }
+
+ wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+ ieee802154_le16_to_be16(&short_addr,
+ &wpan_dev->short_addr);
+ __ndisc_fill_addr_option(skb, opt_type, &short_addr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ }
+}
+
+static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
+ struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ struct in6_addr *addr,
+ int addr_type, u32 addr_flags,
+ bool sllao, bool tokenized,
+ __u32 valid_lft,
+ u32 prefered_lft,
+ bool dev_addr_generated)
+{
+ int err;
+
+ /* generates short based address for RA PIO's */
+ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) && dev_addr_generated &&
+ !addrconf_ifid_802154_6lowpan(addr->s6_addr + 8, dev)) {
+ err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ addr, addr_type, addr_flags,
+ sllao, tokenized, valid_lft,
+ prefered_lft);
+ if (err)
+ ND_PRINTK(2, warn,
+ "RA: could not add a short address based address for prefix: %pI6c\n",
+ &pinfo->prefix);
+ }
+}
+#endif
+
+const struct ndisc_ops lowpan_ndisc_ops = {
+ .is_useropt = lowpan_ndisc_is_useropt,
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+ .parse_options = lowpan_ndisc_parse_options,
+ .update = lowpan_ndisc_update,
+ .opt_addr_space = lowpan_ndisc_opt_addr_space,
+ .fill_addr_option = lowpan_ndisc_fill_addr_option,
+ .prefix_rcv_add_addr = lowpan_ndisc_prefix_rcv_add_addr,
+#endif
+};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d69c4644f8f2..eb49ca24274a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -71,9 +71,31 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+static struct sk_buff *defer_kfree_skb_list;
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
+{
+ if (head && tail) {
+ tail->next = defer_kfree_skb_list;
+ defer_kfree_skb_list = head;
+ }
+}
+EXPORT_SYMBOL(rtnl_kfree_skbs);
+
void __rtnl_unlock(void)
{
+ struct sk_buff *head = defer_kfree_skb_list;
+
+ defer_kfree_skb_list = NULL;
+
mutex_unlock(&rtnl_mutex);
+
+ while (head) {
+ struct sk_buff *next = head->next;
+
+ kfree_skb(head);
+ cond_resched();
+ head = next;
+ }
}
void rtnl_unlock(void)
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 4e2b30894224..8c004a0c8d64 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -81,11 +81,21 @@ static int lowpan_stop(struct net_device *dev)
return 0;
}
+static int lowpan_neigh_construct(struct neighbour *n)
+{
+ struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+
+ /* default no short_addr is available for a neighbour */
+ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ return 0;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_open = lowpan_open,
.ndo_stop = lowpan_stop,
+ .ndo_neigh_construct = lowpan_neigh_construct,
};
static void lowpan_setup(struct net_device *ldev)
@@ -150,6 +160,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
wdev->needed_headroom;
ldev->needed_tailroom = wdev->needed_tailroom;
+ ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh);
+
ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154);
if (ret < 0) {
dev_put(wdev);
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index e459afd16bb3..dbb476d7d38f 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -9,6 +9,7 @@
*/
#include <net/6lowpan.h>
+#include <net/ndisc.h>
#include <net/ieee802154_netdev.h>
#include <net/mac802154.h>
@@ -17,19 +18,9 @@
#define LOWPAN_FRAG1_HEAD_SIZE 0x4
#define LOWPAN_FRAGN_HEAD_SIZE 0x5
-/* don't save pan id, it's intra pan */
-struct lowpan_addr {
- u8 mode;
- union {
- /* IPv6 needs big endian here */
- __be64 extended_addr;
- __be16 short_addr;
- } u;
-};
-
struct lowpan_addr_info {
- struct lowpan_addr daddr;
- struct lowpan_addr saddr;
+ struct ieee802154_addr daddr;
+ struct ieee802154_addr saddr;
};
static inline struct
@@ -48,12 +39,14 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
* RAW/DGRAM sockets.
*/
int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len)
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
{
- const u8 *saddr = _saddr;
- const u8 *daddr = _daddr;
- struct lowpan_addr_info *info;
+ struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
+ struct lowpan_addr_info *info = lowpan_skb_priv(skb);
+ struct lowpan_802154_neigh *llneigh = NULL;
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct neighbour *n;
/* TODO:
* if this package isn't ipv6 one, where should it be routed?
@@ -61,21 +54,50 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
if (type != ETH_P_IPV6)
return 0;
- if (!saddr)
- saddr = ldev->dev_addr;
+ /* intra-pan communication */
+ info->saddr.pan_id = wpan_dev->pan_id;
+ info->daddr.pan_id = info->saddr.pan_id;
- raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
- raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
+ if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
+ info->daddr.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+ info->daddr.mode = IEEE802154_ADDR_SHORT;
+ } else {
+ __le16 short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+
+ n = neigh_lookup(&nd_tbl, &hdr->daddr, ldev);
+ if (n) {
+ llneigh = lowpan_802154_neigh(neighbour_priv(n));
+ read_lock_bh(&n->lock);
+ short_addr = llneigh->short_addr;
+ read_unlock_bh(&n->lock);
+ }
- info = lowpan_skb_priv(skb);
+ if (llneigh &&
+ lowpan_802154_is_valid_src_short_addr(short_addr)) {
+ info->daddr.short_addr = short_addr;
+ info->daddr.mode = IEEE802154_ADDR_SHORT;
+ } else {
+ info->daddr.mode = IEEE802154_ADDR_LONG;
+ ieee802154_be64_to_le64(&info->daddr.extended_addr,
+ daddr);
+ }
- /* TODO: Currently we only support extended_addr */
- info->daddr.mode = IEEE802154_ADDR_LONG;
- memcpy(&info->daddr.u.extended_addr, daddr,
- sizeof(info->daddr.u.extended_addr));
- info->saddr.mode = IEEE802154_ADDR_LONG;
- memcpy(&info->saddr.u.extended_addr, saddr,
- sizeof(info->daddr.u.extended_addr));
+ if (n)
+ neigh_release(n);
+ }
+
+ if (!saddr) {
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+ info->saddr.mode = IEEE802154_ADDR_SHORT;
+ info->saddr.short_addr = wpan_dev->short_addr;
+ } else {
+ info->saddr.mode = IEEE802154_ADDR_LONG;
+ info->saddr.extended_addr = wpan_dev->extended_addr;
+ }
+ } else {
+ info->saddr.mode = IEEE802154_ADDR_LONG;
+ ieee802154_be64_to_le64(&info->saddr.extended_addr, saddr);
+ }
return 0;
}
@@ -209,47 +231,26 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
u16 *dgram_size, u16 *dgram_offset)
{
struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
- struct ieee802154_addr sa, da;
struct ieee802154_mac_cb *cb = mac_cb_init(skb);
struct lowpan_addr_info info;
- void *daddr, *saddr;
memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
- /* TODO: Currently we only support extended_addr */
- daddr = &info.daddr.u.extended_addr;
- saddr = &info.saddr.u.extended_addr;
-
*dgram_size = skb->len;
- lowpan_header_compress(skb, ldev, daddr, saddr);
+ lowpan_header_compress(skb, ldev, &info.daddr, &info.saddr);
/* dgram_offset = (saved bytes after compression) + lowpan header len */
*dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
cb->type = IEEE802154_FC_TYPE_DATA;
- /* prepare wpan address data */
- sa.mode = IEEE802154_ADDR_LONG;
- sa.pan_id = wpan_dev->pan_id;
- sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
-
- /* intra-PAN communications */
- da.pan_id = sa.pan_id;
-
- /* if the destination address is the broadcast address, use the
- * corresponding short address
- */
- if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
- da.mode = IEEE802154_ADDR_SHORT;
- da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+ if (info.daddr.mode == IEEE802154_ADDR_SHORT &&
+ ieee802154_is_broadcast_short_addr(info.daddr.short_addr))
cb->ackreq = false;
- } else {
- da.mode = IEEE802154_ADDR_LONG;
- da.extended_addr = ieee802154_devaddr_from_raw(daddr);
+ else
cb->ackreq = wpan_dev->ackreq;
- }
- return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da,
- &sa, 0);
+ return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev,
+ &info.daddr, &info.saddr, 0);
}
netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4d2025f7ec57..0f8ca3fca00a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -841,17 +841,19 @@ out:
return ipgre_tunnel_validate(tb, data);
}
-static void ipgre_netlink_parms(struct net_device *dev,
+static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
struct ip_tunnel_parm *parms)
{
+ struct ip_tunnel *t = netdev_priv(dev);
+
memset(parms, 0, sizeof(*parms));
parms->iph.protocol = IPPROTO_GRE;
if (!data)
- return;
+ return 0;
if (data[IFLA_GRE_LINK])
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
@@ -880,16 +882,26 @@ static void ipgre_netlink_parms(struct net_device *dev,
if (data[IFLA_GRE_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
- if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+ if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
+ if (t->ignore_df)
+ return -EINVAL;
parms->iph.frag_off = htons(IP_DF);
+ }
if (data[IFLA_GRE_COLLECT_METADATA]) {
- struct ip_tunnel *t = netdev_priv(dev);
-
t->collect_md = true;
if (dev->type == ARPHRD_IPGRE)
dev->type = ARPHRD_NONE;
}
+
+ if (data[IFLA_GRE_IGNORE_DF]) {
+ if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
+ && (parms->iph.frag_off & htons(IP_DF)))
+ return -EINVAL;
+ t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
+ }
+
+ return 0;
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -960,16 +972,19 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p);
+ if (err < 0)
+ return err;
return ip_tunnel_newlink(dev, tb, &p);
}
@@ -978,16 +993,19 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p);
+ if (err < 0)
+ return err;
return ip_tunnel_changelink(dev, tb, &p);
}
@@ -1024,6 +1042,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_GRE_IGNORE_DF */
+ nla_total_size(1) +
0;
}
@@ -1057,6 +1077,9 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
+ goto nla_put_failure;
+
if (t->collect_md) {
if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
goto nla_put_failure;
@@ -1084,6 +1107,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d8f5e0a269f5..95649ebd2874 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -682,7 +682,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP))
+ if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
df |= (inner_iph->frag_off&htons(IP_DF));
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 47f837a58e0a..6c8fc3f96b11 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2254,7 +2254,7 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return ERR_PTR(-EACCES);
/* Add default multicast route */
- if (!(dev->flags & IFF_LOOPBACK))
+ if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev))
addrconf_add_mroute(dev);
return idev;
@@ -2333,12 +2333,109 @@ static bool is_addr_mode_generate_stable(struct inet6_dev *idev)
idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
}
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ const struct in6_addr *addr, int addr_type,
+ u32 addr_flags, bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft)
+{
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
+ int create = 0, update_lft = 0;
+
+ if (!ifp && valid_lft) {
+ int max_addresses = in6_dev->cnf.max_addresses;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (in6_dev->cnf.optimistic_dad &&
+ !net->ipv6.devconf_all->forwarding && sllao)
+ addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
+ /* Do not allow to create too much of autoconfigured
+ * addresses; this would be too easy way to crash kernel.
+ */
+ if (!max_addresses ||
+ ipv6_count_addresses(in6_dev) < max_addresses)
+ ifp = ipv6_add_addr(in6_dev, addr, NULL,
+ pinfo->prefix_len,
+ addr_type&IPV6_ADDR_SCOPE_MASK,
+ addr_flags, valid_lft,
+ prefered_lft);
+
+ if (IS_ERR_OR_NULL(ifp))
+ return -1;
+
+ update_lft = 0;
+ create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
+ ifp->cstamp = jiffies;
+ ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
+ addrconf_dad_start(ifp);
+ }
+
+ if (ifp) {
+ u32 flags;
+ unsigned long now;
+ u32 stored_lft;
+
+ /* update lifetime (RFC2462 5.5.3 e) */
+ spin_lock_bh(&ifp->lock);
+ now = jiffies;
+ if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+ stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+ else
+ stored_lft = 0;
+ if (!update_lft && !create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = now;
+ flags = ifp->flags;
+ ifp->flags &= ~IFA_F_DEPRECATED;
+ spin_unlock_bh(&ifp->lock);
+
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+ } else
+ spin_unlock_bh(&ifp->lock);
+
+ manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+ create, now);
+
+ in6_ifa_put(ifp);
+ addrconf_verify();
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
+
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
__u32 valid_lft;
__u32 prefered_lft;
- int addr_type;
+ int addr_type, err;
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
@@ -2432,10 +2529,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
/* Try to figure out our local address for this prefix */
if (pinfo->autoconf && in6_dev->cnf.autoconf) {
- struct inet6_ifaddr *ifp;
struct in6_addr addr;
- int create = 0, update_lft = 0;
- bool tokenized = false;
+ bool tokenized = false, dev_addr_generated = false;
if (pinfo->prefix_len == 64) {
memcpy(&addr, &pinfo->prefix, 8);
@@ -2453,106 +2548,36 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
goto ok;
} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
- in6_dev_put(in6_dev);
- return;
+ goto put;
+ } else {
+ dev_addr_generated = true;
}
goto ok;
}
net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
pinfo->prefix_len);
- in6_dev_put(in6_dev);
- return;
+ goto put;
ok:
+ err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ &addr, addr_type,
+ addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft);
+ if (err)
+ goto put;
- ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
-
- if (!ifp && valid_lft) {
- int max_addresses = in6_dev->cnf.max_addresses;
-
-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if (in6_dev->cnf.optimistic_dad &&
- !net->ipv6.devconf_all->forwarding && sllao)
- addr_flags |= IFA_F_OPTIMISTIC;
-#endif
-
- /* Do not allow to create too much of autoconfigured
- * addresses; this would be too easy way to crash kernel.
- */
- if (!max_addresses ||
- ipv6_count_addresses(in6_dev) < max_addresses)
- ifp = ipv6_add_addr(in6_dev, &addr, NULL,
- pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK,
- addr_flags, valid_lft,
- prefered_lft);
-
- if (IS_ERR_OR_NULL(ifp)) {
- in6_dev_put(in6_dev);
- return;
- }
-
- update_lft = 0;
- create = 1;
- spin_lock_bh(&ifp->lock);
- ifp->flags |= IFA_F_MANAGETEMPADDR;
- ifp->cstamp = jiffies;
- ifp->tokenized = tokenized;
- spin_unlock_bh(&ifp->lock);
- addrconf_dad_start(ifp);
- }
-
- if (ifp) {
- u32 flags;
- unsigned long now;
- u32 stored_lft;
-
- /* update lifetime (RFC2462 5.5.3 e) */
- spin_lock_bh(&ifp->lock);
- now = jiffies;
- if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
- else
- stored_lft = 0;
- if (!update_lft && !create && stored_lft) {
- const u32 minimum_lft = min_t(u32,
- stored_lft, MIN_VALID_LIFETIME);
- valid_lft = max(valid_lft, minimum_lft);
-
- /* RFC4862 Section 5.5.3e:
- * "Note that the preferred lifetime of the
- * corresponding address is always reset to
- * the Preferred Lifetime in the received
- * Prefix Information option, regardless of
- * whether the valid lifetime is also reset or
- * ignored."
- *
- * So we should always update prefered_lft here.
- */
- update_lft = 1;
- }
-
- if (update_lft) {
- ifp->valid_lft = valid_lft;
- ifp->prefered_lft = prefered_lft;
- ifp->tstamp = now;
- flags = ifp->flags;
- ifp->flags &= ~IFA_F_DEPRECATED;
- spin_unlock_bh(&ifp->lock);
-
- if (!(flags&IFA_F_TENTATIVE))
- ipv6_ifa_notify(0, ifp);
- } else
- spin_unlock_bh(&ifp->lock);
-
- manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
- create, now);
-
- in6_ifa_put(ifp);
- addrconf_verify();
- }
+ /* Ignore error case here because previous prefix add addr was
+ * successful which will be notified.
+ */
+ ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr,
+ addr_type, addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft,
+ dev_addr_generated);
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+put:
in6_dev_put(in6_dev);
}
@@ -2947,8 +2972,8 @@ static void init_loopback(struct net_device *dev)
}
}
-static void addrconf_add_linklocal(struct inet6_dev *idev,
- const struct in6_addr *addr, u32 flags)
+void addrconf_add_linklocal(struct inet6_dev *idev,
+ const struct in6_addr *addr, u32 flags)
{
struct inet6_ifaddr *ifp;
u32 addr_flags = flags | IFA_F_PERMANENT;
@@ -2967,6 +2992,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev,
in6_ifa_put(ifp);
}
}
+EXPORT_SYMBOL_GPL(addrconf_add_linklocal);
static bool ipv6_reserved_interfaceid(struct in6_addr address)
{
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 40454bfb534e..e32a72fb9982 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -587,7 +587,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
+ fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index b3d00be484d4..ec9efbcdad35 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -34,12 +34,12 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
if (p->locator_match.v64) {
diff = p->csum_diff;
} else {
- diff = compute_csum_diff8((__be32 *)iaddr,
- (__be32 *)&p->locator);
+ diff = compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)iaddr);
}
fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
- ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG);
+ CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
diff = csum_add(diff, fval);
@@ -140,8 +140,8 @@ void ila_init_saved_csum(struct ila_params *p)
return;
p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match,
- (__be32 *)&p->locator);
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
}
static int __init ila_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c245895a3d41..fe65cdc28a45 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -73,15 +73,6 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
-/* Set to 3 to get tracing... */
-#define ND_DEBUG 1
-
-#define ND_PRINTK(val, level, fmt, ...) \
-do { \
- if (val <= ND_DEBUG) \
- net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
-} while (0)
-
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
@@ -150,11 +141,10 @@ struct neigh_table nd_tbl = {
};
EXPORT_SYMBOL_GPL(nd_tbl);
-static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+ int data_len, int pad)
{
- int pad = ndisc_addr_option_pad(skb->dev->type);
- int data_len = skb->dev->addr_len;
- int space = ndisc_opt_addr_space(skb->dev);
+ int space = __ndisc_opt_addr_space(data_len, pad);
u8 *opt = skb_put(skb, space);
opt[0] = type;
@@ -171,6 +161,23 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
if (space > 0)
memset(opt, 0, space);
}
+EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
+
+static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
+ void *data, u8 icmp6_type)
+{
+ __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
+ ndisc_addr_option_pad(skb->dev->type));
+ ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
+}
+
+static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
+ void *ha,
+ const u8 *ops_data)
+{
+ ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
+ ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
+}
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
@@ -185,24 +192,28 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
return cur <= end && cur->nd_opt_type == type ? cur : NULL;
}
-static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
+static inline int ndisc_is_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *opt)
{
return opt->nd_opt_type == ND_OPT_RDNSS ||
- opt->nd_opt_type == ND_OPT_DNSSL;
+ opt->nd_opt_type == ND_OPT_DNSSL ||
+ ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
-static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
+static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
{
if (!cur || !end || cur >= end)
return NULL;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
- } while (cur < end && !ndisc_is_useropt(cur));
- return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
+ } while (cur < end && !ndisc_is_useropt(dev, cur));
+ return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
}
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+ u8 *opt, int opt_len,
struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -217,6 +228,8 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
l = nd_opt->nd_opt_len << 3;
if (opt_len < l || l == 0)
return NULL;
+ if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
+ goto next_opt;
switch (nd_opt->nd_opt_type) {
case ND_OPT_SOURCE_LL_ADDR:
case ND_OPT_TARGET_LL_ADDR:
@@ -243,7 +256,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
break;
#endif
default:
- if (ndisc_is_useropt(nd_opt)) {
+ if (ndisc_is_useropt(dev, nd_opt)) {
ndopts->nd_useropts_end = nd_opt;
if (!ndopts->nd_useropts)
ndopts->nd_useropts = nd_opt;
@@ -260,6 +273,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
nd_opt->nd_opt_len);
}
}
+next_opt:
opt_len -= l;
nd_opt = ((void *)nd_opt) + l;
}
@@ -509,7 +523,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
if (!dev->addr_len)
inc_opt = 0;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -528,8 +543,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
- dev->dev_addr);
-
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
ndisc_send_skb(skb, daddr, src_addr);
}
@@ -574,7 +589,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
if (ipv6_addr_any(saddr))
inc_opt = false;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -590,7 +606,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -626,7 +643,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
}
#endif
if (send_sllao)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -641,7 +658,8 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
if (send_sllao)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_ROUTER_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -702,6 +720,15 @@ static int pndisc_is_router(const void *pkey,
return ret;
}
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+ const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+ struct ndisc_options *ndopts)
+{
+ neigh_update(neigh, lladdr, new, flags);
+ /* report ndisc ops about neighbour update */
+ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
+}
+
static void ndisc_recv_ns(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
@@ -738,7 +765,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND options\n");
return;
}
@@ -856,9 +883,10 @@ have_ifp:
neigh = __neigh_lookup(&nd_tbl, saddr, dev,
!inc || lladdr || !dev->addr_len);
if (neigh)
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE,
+ NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, saddr, &msg->target, !!is_router,
true, (ifp != NULL && inc), inc);
@@ -911,7 +939,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
idev->cnf.drop_unsolicited_na)
return;
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
}
@@ -967,12 +995,13 @@ static void ndisc_recv_na(struct sk_buff *skb)
goto out;
}
- neigh_update(neigh, lladdr,
+ ndisc_update(dev, neigh, lladdr,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
(msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+ (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
+ NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
/*
@@ -1017,7 +1046,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
goto out;
/* Parse ND options */
- if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
goto out;
}
@@ -1031,10 +1060,11 @@ static void ndisc_recv_rs(struct sk_buff *skb)
neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
if (neigh) {
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
+ NDISC_ROUTER_SOLICITATION, &ndopts);
neigh_release(neigh);
}
out:
@@ -1135,7 +1165,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
ND_PRINTK(2, warn, "RA: invalid ND options\n");
return;
}
@@ -1329,11 +1359,12 @@ skip_linkparms:
goto out;
}
}
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER);
+ NEIGH_UPDATE_F_ISROUTER,
+ NDISC_ROUTER_ADVERTISEMENT, &ndopts);
}
if (!ipv6_accept_ra(in6_dev)) {
@@ -1421,7 +1452,8 @@ skip_routeinfo:
struct nd_opt_hdr *p;
for (p = ndopts.nd_useropts;
p;
- p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+ p = ndisc_next_useropt(skb->dev, p,
+ ndopts.nd_useropts_end)) {
ndisc_ra_useropt(skb, p);
}
}
@@ -1459,7 +1491,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
+ if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
return;
if (!ndopts.nd_opts_rh) {
@@ -1504,7 +1536,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct dst_entry *dst;
struct flowi6 fl6;
int rd_len;
- u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
+ ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
int oif = l3mdev_fib_oif(dev);
bool ret;
@@ -1563,7 +1596,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
memcpy(ha_buf, neigh->ha, dev->addr_len);
read_unlock_bh(&neigh->lock);
ha = ha_buf;
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_redirect_opt_addr_space(dev, neigh,
+ ops_data_buf,
+ &ops_data);
} else
read_unlock_bh(&neigh->lock);
@@ -1594,7 +1629,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
*/
if (ha)
- ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
+ ndisc_fill_redirect_addr_option(buff, ha, ops_data);
/*
* build redirect option and copy skb over to the new packet.
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c6ae6f9b5fe3..9e1516785dac 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1042,8 +1042,8 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return pcpu_rt;
}
-static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
- struct flowi6 *fl6, int flags)
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt;
@@ -1139,6 +1139,7 @@ redo_rt6_select:
}
}
+EXPORT_SYMBOL_GPL(ip6_pol_route);
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
struct flowi6 *fl6, int flags)
@@ -2200,7 +2201,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* first-hop router for the specified ICMP Destination Address.
*/
- if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
return;
}
@@ -2235,12 +2236,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* We have finally decided to accept it.
*/
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
(on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER))
- );
+ NEIGH_UPDATE_F_ISROUTER)),
+ NDISC_REDIRECT, &ndopts);
nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
if (!nrt)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fc3598a922b0..37d674e6f8a9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1033,6 +1033,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
+ size_t headroom, linear;
struct sk_buff *skb;
struct iucv_message txmsg = {0};
struct cmsghdr *cmsg;
@@ -1110,20 +1111,31 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
* this is fine for SOCK_SEQPACKET (unless we want to support
* segmented records using the MSG_EOR flag), but
* for SOCK_STREAM we might want to improve it in future */
- if (iucv->transport == AF_IUCV_TRANS_HIPER)
- skb = sock_alloc_send_skb(sk,
- len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN,
- noblock, &err);
- else
- skb = sock_alloc_send_skb(sk, len, noblock, &err);
+ headroom = (iucv->transport == AF_IUCV_TRANS_HIPER)
+ ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0;
+ if (headroom + len < PAGE_SIZE) {
+ linear = len;
+ } else {
+ /* In nonlinear "classic" iucv skb,
+ * reserve space for iucv_array
+ */
+ if (iucv->transport != AF_IUCV_TRANS_HIPER)
+ headroom += sizeof(struct iucv_array) *
+ (MAX_SKB_FRAGS + 1);
+ linear = PAGE_SIZE - headroom;
+ }
+ skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear,
+ noblock, &err, 0);
if (!skb)
goto out;
- if (iucv->transport == AF_IUCV_TRANS_HIPER)
- skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
+ if (headroom)
+ skb_reserve(skb, headroom);
+ skb_put(skb, linear);
+ skb->len = len;
+ skb->data_len = len - linear;
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
+ if (err)
goto fail;
- }
/* wait if outstanding messages for iucv path has reached */
timeo = sock_sndtimeo(sk, noblock);
@@ -1148,49 +1160,67 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
atomic_dec(&iucv->msg_sent);
goto fail;
}
- goto release;
- }
- skb_queue_tail(&iucv->send_skb_q, skb);
-
- if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
- && skb->len <= 7) {
- err = iucv_send_iprm(iucv->path, &txmsg, skb);
+ } else { /* Classic VM IUCV transport */
+ skb_queue_tail(&iucv->send_skb_q, skb);
+
+ if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) &&
+ skb->len <= 7) {
+ err = iucv_send_iprm(iucv->path, &txmsg, skb);
+
+ /* on success: there is no message_complete callback */
+ /* for an IPRMDATA msg; remove skb from send queue */
+ if (err == 0) {
+ skb_unlink(skb, &iucv->send_skb_q);
+ kfree_skb(skb);
+ }
- /* on success: there is no message_complete callback
- * for an IPRMDATA msg; remove skb from send queue */
- if (err == 0) {
- skb_unlink(skb, &iucv->send_skb_q);
- kfree_skb(skb);
+ /* this error should never happen since the */
+ /* IUCV_IPRMDATA path flag is set... sever path */
+ if (err == 0x15) {
+ pr_iucv->path_sever(iucv->path, NULL);
+ skb_unlink(skb, &iucv->send_skb_q);
+ err = -EPIPE;
+ goto fail;
+ }
+ } else if (skb_is_nonlinear(skb)) {
+ struct iucv_array *iba = (struct iucv_array *)skb->head;
+ int i;
+
+ /* skip iucv_array lying in the headroom */
+ iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].length = (u32)skb_headlen(skb);
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ iba[i + 1].address =
+ (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].length = (u32)skb_frag_size(frag);
+ }
+ err = pr_iucv->message_send(iucv->path, &txmsg,
+ IUCV_IPBUFLST, 0,
+ (void *)iba, skb->len);
+ } else { /* non-IPRM Linear skb */
+ err = pr_iucv->message_send(iucv->path, &txmsg,
+ 0, 0, (void *)skb->data, skb->len);
}
-
- /* this error should never happen since the
- * IUCV_IPRMDATA path flag is set... sever path */
- if (err == 0x15) {
- pr_iucv->path_sever(iucv->path, NULL);
+ if (err) {
+ if (err == 3) {
+ user_id[8] = 0;
+ memcpy(user_id, iucv->dst_user_id, 8);
+ appl_id[8] = 0;
+ memcpy(appl_id, iucv->dst_name, 8);
+ pr_err(
+ "Application %s on z/VM guest %s exceeds message limit\n",
+ appl_id, user_id);
+ err = -EAGAIN;
+ } else {
+ err = -EPIPE;
+ }
skb_unlink(skb, &iucv->send_skb_q);
- err = -EPIPE;
goto fail;
}
- } else
- err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
- (void *) skb->data, skb->len);
- if (err) {
- if (err == 3) {
- user_id[8] = 0;
- memcpy(user_id, iucv->dst_user_id, 8);
- appl_id[8] = 0;
- memcpy(appl_id, iucv->dst_name, 8);
- pr_err("Application %s on z/VM guest %s"
- " exceeds message limit\n",
- appl_id, user_id);
- err = -EAGAIN;
- } else
- err = -EPIPE;
- skb_unlink(skb, &iucv->send_skb_q);
- goto fail;
}
-release:
release_sock(sk);
return len;
@@ -1201,42 +1231,32 @@ out:
return err;
}
-/* iucv_fragment_skb() - Fragment a single IUCV message into multiple skb's
- *
- * Locking: must be called with message_q.lock held
- */
-static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
+static struct sk_buff *alloc_iucv_recv_skb(unsigned long len)
{
- int dataleft, size, copied = 0;
- struct sk_buff *nskb;
-
- dataleft = len;
- while (dataleft) {
- if (dataleft >= sk->sk_rcvbuf / 4)
- size = sk->sk_rcvbuf / 4;
- else
- size = dataleft;
-
- nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
- if (!nskb)
- return -ENOMEM;
-
- /* copy target class to control buffer of new skb */
- IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class;
-
- /* copy data fragment */
- memcpy(nskb->data, skb->data + copied, size);
- copied += size;
- dataleft -= size;
-
- skb_reset_transport_header(nskb);
- skb_reset_network_header(nskb);
- nskb->len = size;
+ size_t headroom, linear;
+ struct sk_buff *skb;
+ int err;
- skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, nskb);
+ if (len < PAGE_SIZE) {
+ headroom = 0;
+ linear = len;
+ } else {
+ headroom = sizeof(struct iucv_array) * (MAX_SKB_FRAGS + 1);
+ linear = PAGE_SIZE - headroom;
+ }
+ skb = alloc_skb_with_frags(headroom + linear, len - linear,
+ 0, &err, GFP_ATOMIC | GFP_DMA);
+ WARN_ONCE(!skb,
+ "alloc of recv iucv skb len=%lu failed with errcode=%d\n",
+ len, err);
+ if (skb) {
+ if (headroom)
+ skb_reserve(skb, headroom);
+ skb_put(skb, linear);
+ skb->len = len;
+ skb->data_len = len - linear;
}
-
- return 0;
+ return skb;
}
/* iucv_process_message() - Receive a single outstanding IUCV message
@@ -1263,31 +1283,32 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
skb->len = 0;
}
} else {
- rc = pr_iucv->message_receive(path, msg,
+ if (skb_is_nonlinear(skb)) {
+ struct iucv_array *iba = (struct iucv_array *)skb->head;
+ int i;
+
+ iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].length = (u32)skb_headlen(skb);
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ iba[i + 1].address =
+ (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].length = (u32)skb_frag_size(frag);
+ }
+ rc = pr_iucv->message_receive(path, msg,
+ IUCV_IPBUFLST,
+ (void *)iba, len, NULL);
+ } else {
+ rc = pr_iucv->message_receive(path, msg,
msg->flags & IUCV_IPRMDATA,
skb->data, len, NULL);
+ }
if (rc) {
kfree_skb(skb);
return;
}
- /* we need to fragment iucv messages for SOCK_STREAM only;
- * for SOCK_SEQPACKET, it is only relevant if we support
- * record segmentation using MSG_EOR (see also recvmsg()) */
- if (sk->sk_type == SOCK_STREAM &&
- skb->truesize >= sk->sk_rcvbuf / 4) {
- rc = iucv_fragment_skb(sk, skb, len);
- kfree_skb(skb);
- skb = NULL;
- if (rc) {
- pr_iucv->path_sever(path, NULL);
- return;
- }
- skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
- } else {
- skb_reset_transport_header(skb);
- skb_reset_network_header(skb);
- skb->len = len;
- }
+ WARN_ON_ONCE(skb->len != len);
}
IUCV_SKB_CB(skb)->offset = 0;
@@ -1306,7 +1327,7 @@ static void iucv_process_message_q(struct sock *sk)
struct sock_msg_q *p, *n;
list_for_each_entry_safe(p, n, &iucv->message_q.list, list) {
- skb = alloc_skb(iucv_msg_length(&p->msg), GFP_ATOMIC | GFP_DMA);
+ skb = alloc_iucv_recv_skb(iucv_msg_length(&p->msg));
if (!skb)
break;
iucv_process_message(sk, skb, p->path, &p->msg);
@@ -1801,7 +1822,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
if (len > sk->sk_rcvbuf)
goto save_message;
- skb = alloc_skb(iucv_msg_length(msg), GFP_ATOMIC | GFP_DMA);
+ skb = alloc_iucv_recv_skb(iucv_msg_length(msg));
if (!skb)
goto save_message;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 7da97809a7e8..d90e4ef09e85 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
*/
struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
- const struct flowi6 *fl6)
+ struct flowi6 *fl6)
{
struct dst_entry *dst = NULL;
struct net_device *dev;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index b6db56ec8117..f8c61d2a7963 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -224,8 +224,8 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index)
}
EXPORT_SYMBOL(tcf_hash_search);
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
- int bind)
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+ int bind)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
struct tcf_common *p = NULL;
@@ -235,9 +235,9 @@ int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
p->tcfc_refcnt++;
a->priv = p;
a->hinfo = hinfo;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
EXPORT_SYMBOL(tcf_hash_check);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 02f5a8ba95d7..b7fa96926c90 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -423,7 +423,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
u16 ife_type = 0;
u8 *daddr = NULL;
u8 *saddr = NULL;
- int ret = 0, exists = 0;
+ bool exists = false;
+ int ret = 0;
int err;
err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8998a3594e86..6148e323ed93 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -97,7 +97,8 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
char *tname;
- int ret = 0, err, exists = 0;
+ bool exists = false;
+ int ret = 0, err;
u32 hook = 0;
u32 index = 0;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 787751a7981a..5b135d357e1e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -62,7 +62,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct tc_mirred *parm;
struct tcf_mirred *m;
struct net_device *dev;
- int ret, ok_push = 0, exists = 0;
+ int ret, ok_push = 0;
+ bool exists = false;
if (nla == NULL)
return -EINVAL;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index be5fbb51cfed..318328d34d12 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -86,8 +86,9 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tc_defact *parm;
struct tcf_defact *d;
+ bool exists = false;
+ int ret = 0, err;
char *defdata;
- int ret = 0, err, exists = 0;
if (nla == NULL)
return -EINVAL;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 7e2bc3c2b6da..53d1486cddf7 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -69,7 +69,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL;
u16 *queue_mapping = NULL;
- int ret = 0, err, exists = 0;
+ bool exists = false;
+ int ret = 0, err;
if (nla == NULL)
return -EINVAL;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index b075d50e0fc3..db9b7ed570ba 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -77,8 +77,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
int action;
__be16 push_vid = 0;
__be16 push_proto = 0;
- int ret = 0, exists = 0;
- int err;
+ bool exists = false;
+ int ret = 0, err;
if (!nla)
return -EINVAL;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 04e0b0583e00..789b69ee9e51 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -375,11 +375,11 @@ static void choke_reset(struct Qdisc *sch)
q->head = (q->head + 1) & q->tab_mask;
if (!skb)
continue;
- qdisc_qstats_backlog_dec(sch, skb);
- --sch->q.qlen;
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
@@ -455,7 +455,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0;
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index dddf3bb65a32..c5bc424e3b3c 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -174,7 +174,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f49c81e91acd..6eb06674f778 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -514,17 +514,25 @@ out:
return skb;
}
+static void fq_flow_purge(struct fq_flow *flow)
+{
+ rtnl_kfree_skbs(flow->head, flow->tail);
+ flow->head = NULL;
+ flow->qlen = 0;
+}
+
static void fq_reset(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct rb_root *root;
- struct sk_buff *skb;
struct rb_node *p;
struct fq_flow *f;
unsigned int idx;
- while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
- kfree_skb(skb);
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ fq_flow_purge(&q->internal);
if (!q->fq_root)
return;
@@ -535,8 +543,7 @@ static void fq_reset(struct Qdisc *sch)
f = container_of(p, struct fq_flow, fq_node);
rb_erase(p, root);
- while ((skb = fq_dequeue_head(sch, f)) != NULL)
- kfree_skb(skb);
+ fq_flow_purge(f);
kmem_cache_free(fq_flow_cachep, f);
}
@@ -737,7 +744,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!skb)
break;
drop_len += qdisc_pkt_len(skb);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
drop_count++;
}
qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a302e8ef5498..2dc0a849515a 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -336,6 +336,12 @@ begin:
return skb;
}
+static void fq_codel_flow_purge(struct fq_codel_flow *flow)
+{
+ rtnl_kfree_skbs(flow->head, flow->tail);
+ flow->head = NULL;
+}
+
static void fq_codel_reset(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -346,18 +352,13 @@ static void fq_codel_reset(struct Qdisc *sch)
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
- while (flow->head) {
- struct sk_buff *skb = dequeue_head(flow);
-
- qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
- }
-
+ fq_codel_flow_purge(flow);
INIT_LIST_HEAD(&flow->flowchain);
codel_vars_init(&flow->cvars);
}
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
q->memory_usage = 0;
}
@@ -433,7 +434,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
struct sk_buff *skb = fq_codel_dequeue(sch);
q->cstats.drop_len += qdisc_pkt_len(skb);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
q->cstats.drop_count++;
}
qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0c9cb516f2e3..773b632e1e33 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -493,7 +493,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __qdisc_reset_queue(qdisc, band2list(priv, prio));
+ __qdisc_reset_queue(band2list(priv, prio));
priv->bitmap = 0;
qdisc->qstats.backlog = 0;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index c51791848a38..c44593b8e65a 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -464,7 +464,7 @@ static void hhf_reset(struct Qdisc *sch)
struct sk_buff *skb;
while ((skb = hhf_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
static void *hhf_zalloc(size_t sz)
@@ -574,7 +574,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
prev_backlog - sch->qstats.backlog);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 07dcd2933f01..a454605ab5cb 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -957,7 +957,7 @@ static void htb_reset(struct Qdisc *sch)
}
}
qdisc_watchdog_cancel(&q->watchdog);
- __skb_queue_purge(&q->direct_queue);
+ __qdisc_reset_queue(&q->direct_queue);
sch->q.qlen = 0;
sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
@@ -1231,7 +1231,7 @@ static void htb_destroy(struct Qdisc *sch)
htb_destroy_class(sch, cl);
}
qdisc_class_hash_destroy(&q->clhash);
- __skb_queue_purge(&q->direct_queue);
+ __qdisc_reset_queue(&q->direct_queue);
}
static int htb_delete(struct Qdisc *sch, unsigned long arg)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 876df13c745a..e271967439bf 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -368,9 +368,7 @@ static void tfifo_reset(struct Qdisc *sch)
struct sk_buff *skb = netem_rb_to_skb(p);
rb_erase(p, &q->t_root);
- skb->next = NULL;
- skb->prev = NULL;
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 71ae3b9629f9..912a46a5d02e 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -234,7 +234,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index a2e0b855d1c8..57d118b41cad 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -520,7 +520,7 @@ sfq_reset(struct Qdisc *sch)
struct sk_buff *skb;
while ((skb = sfq_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
/*
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 57e460be4692..31b9f9c52974 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_TIPC) := tipc.o
tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
- name_distr.o subscr.o name_table.o net.o \
+ name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
server.o socket.o
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 93f7c983be33..64f4004a6fac 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -73,4 +73,5 @@ int tipc_addr_node_valid(u32 addr);
int tipc_in_scope(u32 domain, u32 addr);
int tipc_addr_scope(u32 domain);
char *tipc_addr_string_fill(char *string, u32 addr);
+
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6f11c62bc8f9..9a70e1d744d2 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1,7 +1,7 @@
/*
* net/tipc/bearer.c: TIPC bearer code
*
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
* Copyright (c) 2004-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -39,6 +39,7 @@
#include "bearer.h"
#include "link.h"
#include "discover.h"
+#include "monitor.h"
#include "bcast.h"
#include "netlink.h"
@@ -313,6 +314,10 @@ restart:
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
+ if (tipc_mon_create(net, bearer_id))
+ return -ENOMEM;
+
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
tipc_addr_string_fill(addr_string, disc_domain), priority);
@@ -348,6 +353,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
tipc_disc_delete(b->link_req);
RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
kfree_rcu(b, rcu);
+ tipc_mon_delete(net, bearer_id);
}
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index f686e41b5abb..0d337c7b6fad 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -1,7 +1,7 @@
/*
* net/tipc/bearer.h: Include file for TIPC bearer code
*
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fe1b062c4f18..236b043a4156 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -57,6 +57,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->net_id = 4711;
tn->own_addr = 0;
+ tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
get_random_bytes(&tn->random, sizeof(int));
INIT_LIST_HEAD(&tn->node_list);
spin_lock_init(&tn->node_list_lock);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index eff58dc53aa1..a1845fb27d80 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -66,11 +66,13 @@ struct tipc_bc_base;
struct tipc_link;
struct tipc_name_table;
struct tipc_server;
+struct tipc_monitor;
#define TIPC_MOD_VER "2.0.0"
-#define NODE_HTABLE_SIZE 512
-#define MAX_BEARERS 3
+#define NODE_HTABLE_SIZE 512
+#define MAX_BEARERS 3
+#define TIPC_DEF_MON_THRESHOLD 32
extern int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
@@ -88,6 +90,10 @@ struct tipc_net {
u32 num_nodes;
u32 num_links;
+ /* Neighbor monitoring list */
+ struct tipc_monitor *monitors[MAX_BEARERS];
+ int mon_threshold;
+
/* Bearer list */
struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
@@ -126,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
return &tipc_net(net)->node_list;
}
+static inline unsigned int tipc_hashfn(u32 addr)
+{
+ return addr & (NODE_HTABLE_SIZE - 1);
+}
+
static inline u16 mod(u16 x)
{
return x & 0xffffu;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a904ccd5a93a..03f8bdf70d8f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -42,6 +42,7 @@
#include "name_distr.h"
#include "discover.h"
#include "netlink.h"
+#include "monitor.h"
#include <linux/pkt_sched.h>
@@ -95,6 +96,7 @@ struct tipc_stats {
* @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
* @net_plane: current link network plane ('A' through 'H')
+ * @mon_state: cookie with information needed by link monitor
* @backlog_limit: backlog queue congestion thresholds (indexed by importance)
* @exp_msg_count: # of tunnelled messages expected during link changeover
* @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
@@ -138,6 +140,7 @@ struct tipc_link {
char if_name[TIPC_MAX_IF_NAME];
u32 priority;
char net_plane;
+ struct tipc_mon_state mon_state;
u16 rst_cnt;
/* Failover/synch */
@@ -708,18 +711,25 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
bool setup = false;
u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
u16 bc_acked = l->bc_rcvlink->acked;
-
- link_profile_stats(l);
+ struct tipc_mon_state *mstate = &l->mon_state;
switch (l->state) {
case LINK_ESTABLISHED:
case LINK_SYNCHING:
- if (l->silent_intv_cnt > l->abort_limit)
- return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
mtyp = STATE_MSG;
+ link_profile_stats(l);
+ tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id);
+ if (mstate->reset || (l->silent_intv_cnt > l->abort_limit))
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
state = bc_acked != bc_snt;
- probe = l->silent_intv_cnt;
- l->silent_intv_cnt++;
+ state |= l->bc_rcvlink->rcv_unacked;
+ state |= l->rcv_unacked;
+ state |= !skb_queue_empty(&l->transmq);
+ state |= !skb_queue_empty(&l->deferdq);
+ probe = mstate->probing;
+ probe |= l->silent_intv_cnt;
+ if (probe || mstate->monitoring)
+ l->silent_intv_cnt++;
break;
case LINK_RESET:
setup = l->rst_cnt++ <= 4;
@@ -830,6 +840,7 @@ void tipc_link_reset(struct tipc_link *l)
l->stats.recv_info = 0;
l->stale_count = 0;
l->bc_peer_is_up = false;
+ memset(&l->mon_state, 0, sizeof(l->mon_state));
tipc_link_reset_stats(l);
}
@@ -1238,6 +1249,9 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
struct tipc_msg *hdr;
struct sk_buff_head *dfq = &l->deferdq;
bool node_up = link_is_up(l->bc_rcvlink);
+ struct tipc_mon_state *mstate = &l->mon_state;
+ int dlen = 0;
+ void *data;
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
@@ -1250,12 +1264,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
- TIPC_MAX_IF_NAME, l->addr,
+ tipc_max_domain_size, l->addr,
tipc_own_addr(l->net), 0, 0, 0);
if (!skb)
return;
hdr = buf_msg(skb);
+ data = msg_data(hdr);
msg_set_session(hdr, l->session);
msg_set_bearer_id(hdr, l->bearer_id);
msg_set_net_plane(hdr, l->net_plane);
@@ -1271,14 +1286,18 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
if (mtyp == STATE_MSG) {
msg_set_seq_gap(hdr, rcvgap);
- msg_set_size(hdr, INT_H_SIZE);
msg_set_probe(hdr, probe);
+ tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
+ msg_set_size(hdr, INT_H_SIZE + dlen);
+ skb_trim(skb, INT_H_SIZE + dlen);
l->stats.sent_states++;
l->rcv_unacked = 0;
} else {
/* RESET_MSG or ACTIVATE_MSG */
msg_set_max_pkt(hdr, l->advertised_mtu);
- strcpy(msg_data(hdr), l->if_name);
+ strcpy(data, l->if_name);
+ msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
+ skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME);
}
if (probe)
l->stats.sent_probes++;
@@ -1371,7 +1390,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
u16 rcv_nxt = l->rcv_nxt;
+ u16 dlen = msg_data_sz(hdr);
int mtyp = msg_type(hdr);
+ void *data;
char *if_name;
int rc = 0;
@@ -1381,6 +1402,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
+ skb_linearize(skb);
+ hdr = buf_msg(skb);
+ data = msg_data(hdr);
+
switch (mtyp) {
case RESET_MSG:
@@ -1391,8 +1416,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* fall thru' */
case ACTIVATE_MSG:
- skb_linearize(skb);
- hdr = buf_msg(skb);
/* Complete own link name with peer's interface name */
if_name = strrchr(l->name, ':') + 1;
@@ -1400,7 +1423,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
break;
- strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME);
+ strncpy(if_name, data, TIPC_MAX_IF_NAME);
/* Update own tolerance if peer indicates a non-zero value */
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
@@ -1448,6 +1471,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
rc = TIPC_LINK_UP_EVT;
break;
}
+ tipc_mon_rcv(l->net, data, dlen, l->addr,
+ &l->mon_state, l->bearer_id);
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
new file mode 100644
index 000000000000..87d4efedd09f
--- /dev/null
+++ b/net/tipc/monitor.c
@@ -0,0 +1,651 @@
+/*
+ * net/tipc/monitor.c
+ *
+ * Copyright (c) 2016, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "monitor.h"
+
+#define MAX_MON_DOMAIN 64
+#define MON_TIMEOUT 120000
+#define MAX_PEER_DOWN_EVENTS 4
+
+/* struct tipc_mon_domain: domain record to be transferred between peers
+ * @len: actual size of domain record
+ * @gen: current generation of sender's domain
+ * @ack_gen: most recent generation of self's domain acked by peer
+ * @member_cnt: number of domain member nodes described in this record
+ * @up_map: bit map indicating which of the members the sender considers up
+ * @members: identity of the domain members
+ */
+struct tipc_mon_domain {
+ u16 len;
+ u16 gen;
+ u16 ack_gen;
+ u16 member_cnt;
+ u64 up_map;
+ u32 members[MAX_MON_DOMAIN];
+};
+
+/* struct tipc_peer: state of a peer node and its domain
+ * @addr: tipc node identity of peer
+ * @head_map: shows which other nodes currently consider peer 'up'
+ * @domain: most recent domain record from peer
+ * @hash: position in hashed lookup list
+ * @list: position in linked list, in circular ascending order by 'addr'
+ * @applied: number of reported domain members applied on this monitor list
+ * @is_up: peer is up as seen from this node
+ * @is_head: peer is assigned domain head as seen from this node
+ * @is_local: peer is in local domain and should be continuously monitored
+ * @down_cnt: - numbers of other peers which have reported this on lost
+ */
+struct tipc_peer {
+ u32 addr;
+ struct tipc_mon_domain *domain;
+ struct hlist_node hash;
+ struct list_head list;
+ u8 applied;
+ u8 down_cnt;
+ bool is_up;
+ bool is_head;
+ bool is_local;
+};
+
+struct tipc_monitor {
+ struct hlist_head peers[NODE_HTABLE_SIZE];
+ int peer_cnt;
+ struct tipc_peer *self;
+ rwlock_t lock;
+ struct tipc_mon_domain cache;
+ u16 list_gen;
+ u16 dom_gen;
+ struct net *net;
+ struct timer_list timer;
+ unsigned long timer_intv;
+};
+
+static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
+{
+ return tipc_net(net)->monitors[bearer_id];
+}
+
+const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
+
+/* dom_rec_len(): actual length of domain record for transport
+ */
+static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
+{
+ return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
+}
+
+/* dom_size() : calculate size of own domain based on number of peers
+ */
+static int dom_size(int peers)
+{
+ int i = 0;
+
+ while ((i * i) < peers)
+ i++;
+ return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
+}
+
+static void map_set(u64 *up_map, int i, unsigned int v)
+{
+ *up_map &= ~(1 << i);
+ *up_map |= (v << i);
+}
+
+static int map_get(u64 up_map, int i)
+{
+ return (up_map & (1 << i)) >> i;
+}
+
+static struct tipc_peer *peer_prev(struct tipc_peer *peer)
+{
+ return list_last_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
+{
+ return list_first_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_head(struct tipc_peer *peer)
+{
+ while (!peer->is_head)
+ peer = peer_prev(peer);
+ return peer;
+}
+
+static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
+{
+ struct tipc_peer *peer;
+ unsigned int thash = tipc_hashfn(addr);
+
+ hlist_for_each_entry(peer, &mon->peers[thash], hash) {
+ if (peer->addr == addr)
+ return peer;
+ }
+ return NULL;
+}
+
+static struct tipc_peer *get_self(struct net *net, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+
+ return mon->self;
+}
+
+static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ return mon->peer_cnt > tn->mon_threshold;
+}
+
+/* mon_identify_lost_members() : - identify amd mark potentially lost members
+ */
+static void mon_identify_lost_members(struct tipc_peer *peer,
+ struct tipc_mon_domain *dom_bef,
+ int applied_bef)
+{
+ struct tipc_peer *member = peer;
+ struct tipc_mon_domain *dom_aft = peer->domain;
+ int applied_aft = peer->applied;
+ int i;
+
+ for (i = 0; i < applied_bef; i++) {
+ member = peer_nxt(member);
+
+ /* Do nothing if self or peer already see member as down */
+ if (!member->is_up || !map_get(dom_bef->up_map, i))
+ continue;
+
+ /* Loss of local node must be detected by active probing */
+ if (member->is_local)
+ continue;
+
+ /* Start probing if member was removed from applied domain */
+ if (!applied_aft || (applied_aft < i)) {
+ member->down_cnt = 1;
+ continue;
+ }
+
+ /* Member loss is confirmed if it is still in applied domain */
+ if (!map_get(dom_aft->up_map, i))
+ member->down_cnt++;
+ }
+}
+
+/* mon_apply_domain() : match a peer's domain record against monitor list
+ */
+static void mon_apply_domain(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ struct tipc_mon_domain *dom = peer->domain;
+ struct tipc_peer *member;
+ u32 addr;
+ int i;
+
+ if (!dom || !peer->is_up)
+ return;
+
+ /* Scan across domain members and match against monitor list */
+ peer->applied = 0;
+ member = peer_nxt(peer);
+ for (i = 0; i < dom->member_cnt; i++) {
+ addr = dom->members[i];
+ if (addr != member->addr)
+ return;
+ peer->applied++;
+ member = peer_nxt(member);
+ }
+}
+
+/* mon_update_local_domain() : update after peer addition/removal/up/down
+ */
+static void mon_update_local_domain(struct tipc_monitor *mon)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_mon_domain *cache = &mon->cache;
+ struct tipc_mon_domain *dom = self->domain;
+ struct tipc_peer *peer = self;
+ u64 prev_up_map = dom->up_map;
+ u16 member_cnt, i;
+ bool diff;
+
+ /* Update local domain size based on current size of cluster */
+ member_cnt = dom_size(mon->peer_cnt) - 1;
+ self->applied = member_cnt;
+
+ /* Update native and cached outgoing local domain records */
+ dom->len = dom_rec_len(dom, member_cnt);
+ diff = dom->member_cnt != member_cnt;
+ dom->member_cnt = member_cnt;
+ for (i = 0; i < member_cnt; i++) {
+ peer = peer_nxt(peer);
+ diff |= dom->members[i] != peer->addr;
+ dom->members[i] = peer->addr;
+ map_set(&dom->up_map, i, peer->is_up);
+ cache->members[i] = htonl(peer->addr);
+ }
+ diff |= dom->up_map != prev_up_map;
+ if (!diff)
+ return;
+ dom->gen = ++mon->dom_gen;
+ cache->len = htons(dom->len);
+ cache->gen = htons(dom->gen);
+ cache->member_cnt = htons(member_cnt);
+ cache->up_map = cpu_to_be64(dom->up_map);
+ mon_apply_domain(mon, self);
+}
+
+/* mon_update_neighbors() : update preceding neighbors of added/removed peer
+ */
+static void mon_update_neighbors(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ int dz, i;
+
+ dz = dom_size(mon->peer_cnt);
+ for (i = 0; i < dz; i++) {
+ mon_apply_domain(mon, peer);
+ peer = peer_prev(peer);
+ }
+}
+
+/* mon_assign_roles() : reassign peer roles after a network change
+ * The monitor list is consistent at this stage; i.e., each peer is monitoring
+ * a set of domain members as matched between domain record and the monitor list
+ */
+static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
+{
+ struct tipc_peer *peer = peer_nxt(head);
+ struct tipc_peer *self = mon->self;
+ int i = 0;
+
+ for (; peer != self; peer = peer_nxt(peer)) {
+ peer->is_local = false;
+
+ /* Update domain member */
+ if (i++ < head->applied) {
+ peer->is_head = false;
+ if (head == self)
+ peer->is_local = true;
+ continue;
+ }
+ /* Assign next domain head */
+ if (!peer->is_up)
+ continue;
+ if (peer->is_head)
+ break;
+ head = peer;
+ head->is_head = true;
+ i = 0;
+ }
+ mon->list_gen++;
+}
+
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *prev, *head;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer)
+ goto exit;
+ prev = peer_prev(peer);
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ mon->peer_cnt--;
+ head = peer_head(prev);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_update_neighbors(mon, prev);
+
+ /* Revert to full-mesh monitoring if we reach threshold */
+ if (!tipc_mon_is_active(net, mon)) {
+ list_for_each_entry(peer, &self->list, list) {
+ kfree(peer->domain);
+ peer->domain = NULL;
+ peer->applied = 0;
+ }
+ }
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
+ struct tipc_peer **peer)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_peer *cur, *prev, *p;
+
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ *peer = p;
+ if (!p)
+ return false;
+ p->addr = addr;
+
+ /* Add new peer to lookup list */
+ INIT_LIST_HEAD(&p->list);
+ hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
+
+ /* Sort new peer into iterator list, in ascending circular order */
+ prev = self;
+ list_for_each_entry(cur, &self->list, list) {
+ if ((addr > prev->addr) && (addr < cur->addr))
+ break;
+ if (((addr < cur->addr) || (addr > prev->addr)) &&
+ (prev->addr > cur->addr))
+ break;
+ prev = cur;
+ }
+ list_add_tail(&p->list, &cur->list);
+ mon->peer_cnt++;
+ mon_update_neighbors(mon, p);
+ return true;
+}
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *head;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
+ goto exit;
+ peer->is_up = true;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *head;
+ struct tipc_mon_domain *dom;
+ int applied;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer) {
+ pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
+ goto exit;
+ }
+ applied = peer->applied;
+ peer->applied = 0;
+ dom = peer->domain;
+ peer->domain = NULL;
+ if (peer->is_head)
+ mon_identify_lost_members(peer, dom, applied);
+ kfree(dom);
+ peer->is_up = false;
+ peer->is_head = false;
+ peer->is_local = false;
+ peer->down_cnt = 0;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+/* tipc_mon_rcv - process monitor domain event message
+ */
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *arrv_dom = data;
+ struct tipc_mon_domain dom_bef;
+ struct tipc_mon_domain *dom;
+ struct tipc_peer *peer;
+ u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
+ int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
+ u16 new_gen = ntohs(arrv_dom->gen);
+ u16 acked_gen = ntohs(arrv_dom->ack_gen);
+ bool probing = state->probing;
+ int i, applied_bef;
+
+ state->probing = false;
+ if (!dlen)
+ return;
+
+ /* Sanity check received domain record */
+ if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
+ pr_warn_ratelimited("Received illegal domain record\n");
+ return;
+ }
+
+ /* Synch generation numbers with peer if link just came up */
+ if (!state->synched) {
+ state->peer_gen = new_gen - 1;
+ state->acked_gen = acked_gen;
+ state->synched = true;
+ }
+
+ if (more(acked_gen, state->acked_gen))
+ state->acked_gen = acked_gen;
+
+ /* Drop duplicate unless we are waiting for a probe response */
+ if (!more(new_gen, state->peer_gen) && !probing)
+ return;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer || !peer->is_up)
+ goto exit;
+
+ /* Peer is confirmed, stop any ongoing probing */
+ peer->down_cnt = 0;
+
+ /* Task is done for duplicate record */
+ if (!more(new_gen, state->peer_gen))
+ goto exit;
+
+ state->peer_gen = new_gen;
+
+ /* Cache current domain record for later use */
+ dom_bef.member_cnt = 0;
+ dom = peer->domain;
+ if (dom)
+ memcpy(&dom_bef, dom, dom->len);
+
+ /* Transform and store received domain record */
+ if (!dom || (dom->len < new_dlen)) {
+ kfree(dom);
+ dom = kmalloc(new_dlen, GFP_ATOMIC);
+ peer->domain = dom;
+ if (!dom)
+ goto exit;
+ }
+ dom->len = new_dlen;
+ dom->gen = new_gen;
+ dom->member_cnt = new_member_cnt;
+ dom->up_map = be64_to_cpu(arrv_dom->up_map);
+ for (i = 0; i < new_member_cnt; i++)
+ dom->members[i] = ntohl(arrv_dom->members[i]);
+
+ /* Update peers affected by this domain record */
+ applied_bef = peer->applied;
+ mon_apply_domain(mon, peer);
+ mon_identify_lost_members(peer, &dom_bef, applied_bef);
+ mon_assign_roles(mon, peer_head(peer));
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *dom = data;
+ u16 gen = mon->dom_gen;
+ u16 len;
+
+ if (!tipc_mon_is_active(net, mon))
+ return;
+
+ /* Send only a dummy record with ack if peer has acked our last sent */
+ if (likely(state->acked_gen == gen)) {
+ len = dom_rec_len(dom, 0);
+ *dlen = len;
+ dom->len = htons(len);
+ dom->gen = htons(gen);
+ dom->ack_gen = htons(state->peer_gen);
+ dom->member_cnt = 0;
+ return;
+ }
+ /* Send the full record */
+ read_lock_bh(&mon->lock);
+ len = ntohs(mon->cache.len);
+ *dlen = len;
+ memcpy(data, &mon->cache, len);
+ read_unlock_bh(&mon->lock);
+ dom->ack_gen = htons(state->peer_gen);
+}
+
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *peer;
+
+ /* Used cached state if table has not changed */
+ if (!state->probing &&
+ (state->list_gen == mon->list_gen) &&
+ (state->acked_gen == mon->dom_gen))
+ return;
+
+ read_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (peer) {
+ state->probing = state->acked_gen != mon->dom_gen;
+ state->probing |= peer->down_cnt;
+ state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
+ state->monitoring = peer->is_local;
+ state->monitoring |= peer->is_head;
+ state->list_gen = mon->list_gen;
+ }
+ read_unlock_bh(&mon->lock);
+}
+
+static void mon_timeout(unsigned long m)
+{
+ struct tipc_monitor *mon = (void *)m;
+ struct tipc_peer *self;
+ int best_member_cnt = dom_size(mon->peer_cnt) - 1;
+
+ write_lock_bh(&mon->lock);
+ self = mon->self;
+ if (self && (best_member_cnt != self->applied)) {
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, self);
+ }
+ write_unlock_bh(&mon->lock);
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+}
+
+int tipc_mon_create(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon;
+ struct tipc_peer *self;
+ struct tipc_mon_domain *dom;
+
+ if (tn->monitors[bearer_id])
+ return 0;
+
+ mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
+ self = kzalloc(sizeof(*self), GFP_ATOMIC);
+ dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
+ if (!mon || !self || !dom) {
+ kfree(mon);
+ kfree(self);
+ kfree(dom);
+ return -ENOMEM;
+ }
+ tn->monitors[bearer_id] = mon;
+ rwlock_init(&mon->lock);
+ mon->net = net;
+ mon->peer_cnt = 1;
+ mon->self = self;
+ self->domain = dom;
+ self->addr = tipc_own_addr(net);
+ self->is_up = true;
+ self->is_head = true;
+ INIT_LIST_HEAD(&self->list);
+ setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
+ mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+ return 0;
+}
+
+void tipc_mon_delete(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *tmp;
+
+ write_lock_bh(&mon->lock);
+ tn->monitors[bearer_id] = NULL;
+ list_for_each_entry_safe(peer, tmp, &self->list, list) {
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ }
+ mon->self = NULL;
+ write_unlock_bh(&mon->lock);
+ del_timer_sync(&mon->timer);
+ kfree(self->domain);
+ kfree(self);
+ kfree(mon);
+}
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
new file mode 100644
index 000000000000..598459cbed5d
--- /dev/null
+++ b/net/tipc/monitor.h
@@ -0,0 +1,73 @@
+/*
+ * net/tipc/monitor.h
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_MONITOR_H
+#define _TIPC_MONITOR_H
+
+/* struct tipc_mon_state: link instance's cache of monitor list and domain state
+ * @list_gen: current generation of this node's monitor list
+ * @gen: current generation of this node's local domain
+ * @peer_gen: most recent domain generation received from peer
+ * @acked_gen: most recent generation of self's domain acked by peer
+ * @monitoring: this peer endpoint should continuously monitored
+ * @probing: peer endpoint should be temporarily probed for potential loss
+ * @synched: domain record's generation has been synched with peer after reset
+ */
+struct tipc_mon_state {
+ u16 list_gen;
+ u16 peer_gen;
+ u16 acked_gen;
+ bool monitoring :1;
+ bool probing :1;
+ bool reset :1;
+ bool synched :1;
+};
+
+int tipc_mon_create(struct net *net, int bearer_id);
+void tipc_mon_delete(struct net *net, int bearer_id);
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id);
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id);
+
+extern const int tipc_max_domain_size;
+#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index d6a490f991a4..a3fc0a3f4077 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,6 +40,7 @@
#include "name_distr.h"
#include "socket.h"
#include "bcast.h"
+#include "monitor.h"
#include "discover.h"
#include "netlink.h"
@@ -205,17 +206,6 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr)
return caps;
}
-/*
- * A trivial power-of-two bitmask technique is used for speed, since this
- * operation is done for every incoming TIPC packet. The number of hash table
- * entries has been chosen so that no hash chain exceeds 8 nodes and will
- * usually be much smaller (typically only a single node).
- */
-static unsigned int tipc_hashfn(u32 addr)
-{
- return addr & (NODE_HTABLE_SIZE - 1);
-}
-
static void tipc_node_kref_release(struct kref *kref)
{
struct tipc_node *n = container_of(kref, struct tipc_node, kref);
@@ -279,6 +269,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
u32 addr = 0;
u32 flags = n->action_flags;
u32 link_id = 0;
+ u32 bearer_id;
struct list_head *publ_list;
if (likely(!flags)) {
@@ -288,6 +279,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
addr = n->addr;
link_id = n->link_id;
+ bearer_id = link_id & 0xffff;
publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -301,13 +293,16 @@ static void tipc_node_write_unlock(struct tipc_node *n)
if (flags & TIPC_NOTIFY_NODE_UP)
tipc_named_node_up(net, addr);
- if (flags & TIPC_NOTIFY_LINK_UP)
+ if (flags & TIPC_NOTIFY_LINK_UP) {
+ tipc_mon_peer_up(net, addr, bearer_id);
tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
TIPC_NODE_SCOPE, link_id, addr);
-
- if (flags & TIPC_NOTIFY_LINK_DOWN)
+ }
+ if (flags & TIPC_NOTIFY_LINK_DOWN) {
+ tipc_mon_peer_down(net, addr, bearer_id);
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
link_id, addr);
+ }
}
struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
@@ -691,6 +686,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
struct tipc_link *l = le->link;
struct tipc_media_addr *maddr;
struct sk_buff_head xmitq;
+ int old_bearer_id = bearer_id;
if (!l)
return;
@@ -710,6 +706,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_link_fsm_evt(l, LINK_RESET_EVT);
}
tipc_node_write_unlock(n);
+ if (delete)
+ tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
index 6ba745529833..50e086c6a7b6 100644
--- a/tools/virtio/ringtest/Makefile
+++ b/tools/virtio/ringtest/Makefile
@@ -1,6 +1,6 @@
all:
-all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder
+all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring
CFLAGS += -Wall
CFLAGS += -pthread -O2 -ggdb
@@ -8,6 +8,7 @@ LDFLAGS += -pthread -O2 -ggdb
main.o: main.c main.h
ring.o: ring.c main.h
+ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h
virtio_ring_0_9.o: virtio_ring_0_9.c main.h
virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h
@@ -15,11 +16,13 @@ ring: ring.o main.o
virtio_ring_0_9: virtio_ring_0_9.o main.o
virtio_ring_poll: virtio_ring_poll.o main.o
virtio_ring_inorder: virtio_ring_inorder.o main.o
+ptr_ring: ptr_ring.o main.o
clean:
-rm main.o
-rm ring.o ring
-rm virtio_ring_0_9.o virtio_ring_0_9
-rm virtio_ring_poll.o virtio_ring_poll
-rm virtio_ring_inorder.o virtio_ring_inorder
+ -rm ptr_ring.o ptr_ring
.PHONY: all clean
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
new file mode 100644
index 000000000000..74abd746ae91
--- /dev/null
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -0,0 +1,192 @@
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <malloc.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+
+#define SMP_CACHE_BYTES 64
+#define cache_line_size() SMP_CACHE_BYTES
+#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
+#define unlikely(x) (__builtin_expect(!!(x), 0))
+#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
+typedef pthread_spinlock_t spinlock_t;
+
+typedef int gfp_t;
+static void *kzalloc(unsigned size, gfp_t gfp)
+{
+ void *p = memalign(64, size);
+ if (!p)
+ return p;
+ memset(p, 0, size);
+
+ return p;
+}
+
+static void kfree(void *p)
+{
+ if (p)
+ free(p);
+}
+
+static void spin_lock_init(spinlock_t *lock)
+{
+ int r = pthread_spin_init(lock, 0);
+ assert(!r);
+}
+
+static void spin_lock(spinlock_t *lock)
+{
+ int ret = pthread_spin_lock(lock);
+ assert(!ret);
+}
+
+static void spin_unlock(spinlock_t *lock)
+{
+ int ret = pthread_spin_unlock(lock);
+ assert(!ret);
+}
+
+static void spin_lock_bh(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_bh(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static void spin_lock_irq(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_irq(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static void spin_lock_irqsave(spinlock_t *lock, unsigned long f)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f)
+{
+ spin_unlock(lock);
+}
+
+#include "../../../include/linux/ptr_ring.h"
+
+static unsigned long long headcnt, tailcnt;
+static struct ptr_ring array ____cacheline_aligned_in_smp;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret = ptr_ring_init(&array, ring_size, 0);
+ assert(!ret);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ int ret;
+
+ ret = __ptr_ring_produce(&array, buf);
+ if (ret >= 0) {
+ ret = 0;
+ headcnt++;
+ }
+
+ return ret;
+}
+
+/*
+ * ptr_ring API provides no way for producer to find out whether a given
+ * buffer was consumed. Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ void *datap;
+
+ if (tailcnt == headcnt || __ptr_ring_full(&array))
+ datap = NULL;
+ else {
+ datap = "Buffer\n";
+ ++tailcnt;
+ }
+
+ return datap;
+}
+
+void poll_used(void)
+{
+ void *b;
+
+ do {
+ if (tailcnt == headcnt || __ptr_ring_full(&array)) {
+ b = NULL;
+ barrier();
+ } else {
+ b = "Buffer\n";
+ }
+ } while (!b);
+}
+
+void disable_call()
+{
+ assert(0);
+}
+
+bool enable_call()
+{
+ assert(0);
+}
+
+void kick_available(void)
+{
+ assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+ assert(0);
+}
+
+bool enable_kick()
+{
+ assert(0);
+}
+
+void poll_avail(void)
+{
+ void *b;
+
+ do {
+ barrier();
+ b = __ptr_ring_peek(&array);
+ } while (!b);
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ void *ptr;
+
+ ptr = __ptr_ring_consume(&array);
+
+ return ptr;
+}
+
+void call_used(void)
+{
+ assert(0);
+}