diff options
-rw-r--r-- | drivers/net/atl1c/atl1c_main.c | 4 | ||||
-rw-r--r-- | drivers/net/atl1e/atl1e_main.c | 4 | ||||
-rw-r--r-- | drivers/net/qlcnic/qlcnic.h | 5 | ||||
-rw-r--r-- | drivers/net/qlcnic/qlcnic_main.c | 15 | ||||
-rw-r--r-- | drivers/net/r6040.c | 115 | ||||
-rw-r--r-- | include/linux/ethtool.h | 5 | ||||
-rw-r--r-- | include/linux/pkt_sched.h | 39 | ||||
-rw-r--r-- | net/core/dev.c | 12 | ||||
-rw-r--r-- | net/core/ethtool.c | 75 | ||||
-rw-r--r-- | net/key/af_key.c | 2 | ||||
-rw-r--r-- | net/sched/Kconfig | 11 | ||||
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/em_meta.c | 2 | ||||
-rw-r--r-- | net/sched/sch_mqprio.c | 6 | ||||
-rw-r--r-- | net/sched/sch_sfb.c | 709 | ||||
-rw-r--r-- | net/socket.c | 8 |
16 files changed, 935 insertions, 78 deletions
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index e60595f0247c..7d9d5067a65c 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -1102,10 +1102,10 @@ static void atl1c_configure_tx(struct atl1c_adapter *adapter) AT_READ_REG(hw, REG_DEVICE_CTRL, &dev_ctrl_data); max_pay_load = (dev_ctrl_data >> DEVICE_CTRL_MAX_PAYLOAD_SHIFT) & DEVICE_CTRL_MAX_PAYLOAD_MASK; - hw->dmaw_block = min(max_pay_load, hw->dmaw_block); + hw->dmaw_block = min_t(u32, max_pay_load, hw->dmaw_block); max_pay_load = (dev_ctrl_data >> DEVICE_CTRL_MAX_RREQ_SZ_SHIFT) & DEVICE_CTRL_MAX_RREQ_SZ_MASK; - hw->dmar_block = min(max_pay_load, hw->dmar_block); + hw->dmar_block = min_t(u32, max_pay_load, hw->dmar_block); txq_ctrl_data = (hw->tpd_burst & TXQ_NUM_TPD_BURST_MASK) << TXQ_NUM_TPD_BURST_SHIFT; diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index bf7500ccd73f..21f501184023 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -932,11 +932,11 @@ static inline void atl1e_configure_tx(struct atl1e_adapter *adapter) max_pay_load = ((dev_ctrl_data >> DEVICE_CTRL_MAX_PAYLOAD_SHIFT)) & DEVICE_CTRL_MAX_PAYLOAD_MASK; - hw->dmaw_block = min(max_pay_load, hw->dmaw_block); + hw->dmaw_block = min_t(u32, max_pay_load, hw->dmaw_block); max_pay_load = ((dev_ctrl_data >> DEVICE_CTRL_MAX_RREQ_SZ_SHIFT)) & DEVICE_CTRL_MAX_RREQ_SZ_MASK; - hw->dmar_block = min(max_pay_load, hw->dmar_block); + hw->dmar_block = min_t(u32, max_pay_load, hw->dmar_block); if (hw->nic_type != athr_l2e_revB) AT_WRITE_REGW(hw, REG_TXQ_CTRL + 2, diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h index 44e316fd67b8..dc44564ef6f9 100644 --- a/drivers/net/qlcnic/qlcnic.h +++ b/drivers/net/qlcnic/qlcnic.h @@ -867,7 +867,6 @@ struct qlcnic_nic_intr_coalesce { #define LINKEVENT_LINKSPEED_MBPS 0 #define LINKEVENT_LINKSPEED_ENCODED 1 -#define AUTO_FW_RESET_ENABLED 0x01 /* firmware response header: * 63:58 - message type * 57:56 - owner @@ -1133,14 +1132,10 @@ struct qlcnic_eswitch { #define MAX_BW 100 /* % of link speed */ #define MAX_VLAN_ID 4095 #define MIN_VLAN_ID 2 -#define MAX_TX_QUEUES 1 -#define MAX_RX_QUEUES 4 #define DEFAULT_MAC_LEARN 1 #define IS_VALID_VLAN(vlan) (vlan >= MIN_VLAN_ID && vlan < MAX_VLAN_ID) #define IS_VALID_BW(bw) (bw <= MAX_BW) -#define IS_VALID_TX_QUEUES(que) (que > 0 && que <= MAX_TX_QUEUES) -#define IS_VALID_RX_QUEUES(que) (que > 0 && que <= MAX_RX_QUEUES) struct qlcnic_pci_func_cfg { u16 func_type; diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c index 37c04b4fade3..cd88c7e1bfa9 100644 --- a/drivers/net/qlcnic/qlcnic_main.c +++ b/drivers/net/qlcnic/qlcnic_main.c @@ -42,7 +42,7 @@ static int use_msi_x = 1; module_param(use_msi_x, int, 0444); MODULE_PARM_DESC(use_msi_x, "MSI-X interrupt (0=disabled, 1=enabled"); -static int auto_fw_reset = AUTO_FW_RESET_ENABLED; +static int auto_fw_reset = 1; module_param(auto_fw_reset, int, 0644); MODULE_PARM_DESC(auto_fw_reset, "Auto firmware reset (0=disabled, 1=enabled"); @@ -2959,8 +2959,7 @@ qlcnic_check_health(struct qlcnic_adapter *adapter) if (adapter->need_fw_reset) goto detach; - if (adapter->reset_context && - auto_fw_reset == AUTO_FW_RESET_ENABLED) { + if (adapter->reset_context && auto_fw_reset) { qlcnic_reset_hw_context(adapter); adapter->netdev->trans_start = jiffies; } @@ -2973,7 +2972,7 @@ qlcnic_check_health(struct qlcnic_adapter *adapter) qlcnic_dev_request_reset(adapter); - if ((auto_fw_reset == AUTO_FW_RESET_ENABLED)) + if (auto_fw_reset) clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state); dev_info(&netdev->dev, "firmware hang detected\n"); @@ -2982,7 +2981,7 @@ detach: adapter->dev_state = (state == QLCNIC_DEV_NEED_QUISCENT) ? state : QLCNIC_DEV_NEED_RESET; - if ((auto_fw_reset == AUTO_FW_RESET_ENABLED) && + if (auto_fw_reset && !test_and_set_bit(__QLCNIC_RESETTING, &adapter->state)) { qlcnic_schedule_work(adapter, qlcnic_detach_work, 0); @@ -3654,10 +3653,8 @@ validate_npar_config(struct qlcnic_adapter *adapter, if (adapter->npars[pci_func].type != QLCNIC_TYPE_NIC) return QL_STATUS_INVALID_PARAM; - if (!IS_VALID_BW(np_cfg[i].min_bw) - || !IS_VALID_BW(np_cfg[i].max_bw) - || !IS_VALID_RX_QUEUES(np_cfg[i].max_rx_queues) - || !IS_VALID_TX_QUEUES(np_cfg[i].max_tx_queues)) + if (!IS_VALID_BW(np_cfg[i].min_bw) || + !IS_VALID_BW(np_cfg[i].max_bw)) return QL_STATUS_INVALID_PARAM; } return 0; diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index 27e6f6d43cac..e3ebd90ae651 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -49,8 +49,8 @@ #include <asm/processor.h> #define DRV_NAME "r6040" -#define DRV_VERSION "0.26" -#define DRV_RELDATE "30May2010" +#define DRV_VERSION "0.27" +#define DRV_RELDATE "23Feb2011" /* PHY CHIP Address */ #define PHY1_ADDR 1 /* For MAC1 */ @@ -69,6 +69,8 @@ /* MAC registers */ #define MCR0 0x00 /* Control register 0 */ +#define MCR0_PROMISC 0x0020 /* Promiscuous mode */ +#define MCR0_HASH_EN 0x0100 /* Enable multicast hash table function */ #define MCR1 0x04 /* Control register 1 */ #define MAC_RST 0x0001 /* Reset the MAC */ #define MBCR 0x08 /* Bus control */ @@ -851,77 +853,92 @@ static void r6040_multicast_list(struct net_device *dev) { struct r6040_private *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - u16 *adrp; - u16 reg; unsigned long flags; struct netdev_hw_addr *ha; int i; + u16 *adrp; + u16 hash_table[4] = { 0 }; + + spin_lock_irqsave(&lp->lock, flags); - /* MAC Address */ + /* Keep our MAC Address */ adrp = (u16 *)dev->dev_addr; iowrite16(adrp[0], ioaddr + MID_0L); iowrite16(adrp[1], ioaddr + MID_0M); iowrite16(adrp[2], ioaddr + MID_0H); - /* Promiscous Mode */ - spin_lock_irqsave(&lp->lock, flags); - /* Clear AMCP & PROM bits */ - reg = ioread16(ioaddr) & ~0x0120; - if (dev->flags & IFF_PROMISC) { - reg |= 0x0020; - lp->mcr0 |= 0x0020; - } - /* Too many multicast addresses - * accept all traffic */ - else if ((netdev_mc_count(dev) > MCAST_MAX) || - (dev->flags & IFF_ALLMULTI)) - reg |= 0x0020; + lp->mcr0 = ioread16(ioaddr + MCR0) & ~(MCR0_PROMISC | MCR0_HASH_EN); - iowrite16(reg, ioaddr); - spin_unlock_irqrestore(&lp->lock, flags); + /* Promiscuous mode */ + if (dev->flags & IFF_PROMISC) + lp->mcr0 |= MCR0_PROMISC; - /* Build the hash table */ - if (netdev_mc_count(dev) > MCAST_MAX) { - u16 hash_table[4]; - u32 crc; + /* Enable multicast hash table function to + * receive all multicast packets. */ + else if (dev->flags & IFF_ALLMULTI) { + lp->mcr0 |= MCR0_HASH_EN; - for (i = 0; i < 4; i++) - hash_table[i] = 0; + for (i = 0; i < MCAST_MAX ; i++) { + iowrite16(0, ioaddr + MID_1L + 8 * i); + iowrite16(0, ioaddr + MID_1M + 8 * i); + iowrite16(0, ioaddr + MID_1H + 8 * i); + } + for (i = 0; i < 4; i++) + hash_table[i] = 0xffff; + } + /* Use internal multicast address registers if the number of + * multicast addresses is not greater than MCAST_MAX. */ + else if (netdev_mc_count(dev) <= MCAST_MAX) { + i = 0; netdev_for_each_mc_addr(ha, dev) { - char *addrs = ha->addr; + u16 *adrp = (u16 *) ha->addr; + iowrite16(adrp[0], ioaddr + MID_1L + 8 * i); + iowrite16(adrp[1], ioaddr + MID_1M + 8 * i); + iowrite16(adrp[2], ioaddr + MID_1H + 8 * i); + i++; + } + while (i < MCAST_MAX) { + iowrite16(0, ioaddr + MID_1L + 8 * i); + iowrite16(0, ioaddr + MID_1M + 8 * i); + iowrite16(0, ioaddr + MID_1H + 8 * i); + i++; + } + } + /* Otherwise, Enable multicast hash table function. */ + else { + u32 crc; - if (!(*addrs & 1)) - continue; + lp->mcr0 |= MCR0_HASH_EN; + + for (i = 0; i < MCAST_MAX ; i++) { + iowrite16(0, ioaddr + MID_1L + 8 * i); + iowrite16(0, ioaddr + MID_1M + 8 * i); + iowrite16(0, ioaddr + MID_1H + 8 * i); + } - crc = ether_crc_le(6, addrs); + /* Build multicast hash table */ + netdev_for_each_mc_addr(ha, dev) { + u8 *addrs = ha->addr; + + crc = ether_crc(ETH_ALEN, addrs); crc >>= 26; - hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf)); + hash_table[crc >> 4] |= 1 << (crc & 0xf); } - /* Fill the MAC hash tables with their values */ + } + + iowrite16(lp->mcr0, ioaddr + MCR0); + + /* Fill the MAC hash tables with their values */ + if (lp->mcr0 && MCR0_HASH_EN) { iowrite16(hash_table[0], ioaddr + MAR0); iowrite16(hash_table[1], ioaddr + MAR1); iowrite16(hash_table[2], ioaddr + MAR2); iowrite16(hash_table[3], ioaddr + MAR3); } - /* Multicast Address 1~4 case */ - i = 0; - netdev_for_each_mc_addr(ha, dev) { - if (i >= MCAST_MAX) - break; - adrp = (u16 *) ha->addr; - iowrite16(adrp[0], ioaddr + MID_1L + 8 * i); - iowrite16(adrp[1], ioaddr + MID_1M + 8 * i); - iowrite16(adrp[2], ioaddr + MID_1H + 8 * i); - i++; - } - while (i < MCAST_MAX) { - iowrite16(0xffff, ioaddr + MID_1L + 8 * i); - iowrite16(0xffff, ioaddr + MID_1M + 8 * i); - iowrite16(0xffff, ioaddr + MID_1H + 8 * i); - i++; - } + + spin_unlock_irqrestore(&lp->lock, flags); } static void netdev_get_drvinfo(struct net_device *dev, diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 54d776c2c1b5..aac3e2eeb4fd 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -591,6 +591,9 @@ struct ethtool_sfeatures { * Probably there are other device-specific constraints on some features * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered * here as though ignored bits were cleared. + * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling + * compatibility functions. Requested offload state cannot be properly + * managed by kernel. * * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands @@ -600,10 +603,12 @@ struct ethtool_sfeatures { enum ethtool_sfeatures_retval_bits { ETHTOOL_F_UNSUPPORTED__BIT, ETHTOOL_F_WISH__BIT, + ETHTOOL_F_COMPAT__BIT, }; #define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) #define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) +#define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) #ifdef __KERNEL__ diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d4bb6f58c90c..5afee2b238bd 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -522,4 +522,43 @@ struct tc_mqprio_qopt { __u16 offset[TC_QOPT_MAX_QUEUE]; }; +/* SFB */ + +enum { + TCA_SFB_UNSPEC, + TCA_SFB_PARMS, + __TCA_SFB_MAX, +}; + +#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) + +/* + * Note: increment, decrement are Q0.16 fixed-point values. + */ +struct tc_sfb_qopt { + __u32 rehash_interval; /* delay between hash move, in ms */ + __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ + __u32 max; /* max len of qlen_min */ + __u32 bin_size; /* maximum queue length per bin */ + __u32 increment; /* probability increment, (d1 in Blue) */ + __u32 decrement; /* probability decrement, (d2 in Blue) */ + __u32 limit; /* max SFB queue length */ + __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ + __u32 penalty_burst; +}; + +struct tc_sfb_xstats { + __u32 earlydrop; + __u32 penaltydrop; + __u32 bucketdrop; + __u32 queuedrop; + __u32 childdrop; /* drops in child qdisc */ + __u32 marked; + __u32 maxqlen; + __u32 maxprob; + __u32 avgprob; +}; + +#define SFB_MAX_PROB 0xFFFF + #endif diff --git a/net/core/dev.c b/net/core/dev.c index 578415c1ef75..69a3c0817d6f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5476,14 +5476,14 @@ int register_netdevice(struct net_device *dev) * software offloads (GSO and GRO). */ dev->hw_features |= NETIF_F_SOFT_FEATURES; - dev->wanted_features = (dev->features & dev->hw_features) - | NETIF_F_SOFT_FEATURES; + dev->features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = dev->features & dev->hw_features; /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->wanted_features & NETIF_F_SG)) + if (!(dev->wanted_features & NETIF_F_SG)) { dev->wanted_features &= ~NETIF_F_GSO; - - netdev_update_features(dev); + dev->features &= ~NETIF_F_GSO; + } /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features @@ -5501,6 +5501,8 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; + netdev_update_features(dev); + /* * Default initial state at registry is that the * device is present. diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 66cdc76770ce..c1a71bb738da 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -168,6 +168,76 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); #define ETHTOOL_DEV_FEATURE_WORDS 1 +static void ethtool_get_features_compat(struct net_device *dev, + struct ethtool_get_features_block *features) +{ + if (!dev->ethtool_ops) + return; + + /* getting RX checksum */ + if (dev->ethtool_ops->get_rx_csum) + if (dev->ethtool_ops->get_rx_csum(dev)) + features[0].active |= NETIF_F_RXCSUM; + + /* mark legacy-changeable features */ + if (dev->ethtool_ops->set_sg) + features[0].available |= NETIF_F_SG; + if (dev->ethtool_ops->set_tx_csum) + features[0].available |= NETIF_F_ALL_CSUM; + if (dev->ethtool_ops->set_tso) + features[0].available |= NETIF_F_ALL_TSO; + if (dev->ethtool_ops->set_rx_csum) + features[0].available |= NETIF_F_RXCSUM; + if (dev->ethtool_ops->set_flags) + features[0].available |= flags_dup_features; +} + +static int ethtool_set_feature_compat(struct net_device *dev, + int (*legacy_set)(struct net_device *, u32), + struct ethtool_set_features_block *features, u32 mask) +{ + u32 do_set; + + if (!legacy_set) + return 0; + + if (!(features[0].valid & mask)) + return 0; + + features[0].valid &= ~mask; + + do_set = !!(features[0].requested & mask); + + if (legacy_set(dev, do_set) < 0) + netdev_info(dev, + "Legacy feature change (%s) failed for 0x%08x\n", + do_set ? "set" : "clear", mask); + + return 1; +} + +static int ethtool_set_features_compat(struct net_device *dev, + struct ethtool_set_features_block *features) +{ + int compat; + + if (!dev->ethtool_ops) + return 0; + + compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, + features, NETIF_F_SG); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, + features, NETIF_F_ALL_CSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, + features, NETIF_F_ALL_TSO); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, + features, NETIF_F_RXCSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags, + features, flags_dup_features); + + return compat; +} + static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -185,6 +255,8 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) u32 __user *sizeaddr; u32 copy_size; + ethtool_get_features_compat(dev, features); + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); if (get_user(copy_size, sizeaddr)) return -EFAULT; @@ -220,6 +292,9 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) return -EINVAL; + if (ethtool_set_features_compat(dev, features)) + ret |= ETHTOOL_F_COMPAT; + if (features[0].valid & ~dev->hw_features) { features[0].valid &= dev->hw_features; ret |= ETHTOOL_F_UNSUPPORTED; diff --git a/net/key/af_key.c b/net/key/af_key.c index d87c22df6f1e..60fd2f13f427 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3655,6 +3655,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) } static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) + __acquires(rcu) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); @@ -3672,6 +3673,7 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) } static void pfkey_seq_stop(struct seq_file *f, void *v) + __releases(rcu) { rcu_read_unlock(); } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 8c19b6e3201e..a7a5583d4f68 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -126,6 +126,17 @@ config NET_SCH_RED To compile this code as a module, choose M here: the module will be called sch_red. +config NET_SCH_SFB + tristate "Stochastic Fair Blue (SFB)" + ---help--- + Say Y here if you want to use the Stochastic Fair Blue (SFB) + packet scheduling algorithm. + + See the top of <file:net/sched/sch_sfb.c> for more details. + + To compile this code as a module, choose M here: the + module will be called sch_sfb. + config NET_SCH_SFQ tristate "Stochastic Fairness Queueing (SFQ)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 06c6cdfd1948..2e77b8dba22e 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o +obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a889d099320f..e5e174782677 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -401,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf) META_COLLECTOR(int_sk_alloc) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_allocation; + dst->value = (__force int) skb->sk->sk_allocation; } META_COLLECTOR(int_sk_route_caps) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ace37f9f1cd0..ea17cbed29ef 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -311,7 +311,9 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, } static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, - struct gnet_dump *d) + struct gnet_dump *d) + __releases(d->lock) + __acquires(d->lock) { struct net_device *dev = qdisc_dev(sch); @@ -389,7 +391,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = { .dump_stats = mqprio_dump_class_stats, }; -struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { +static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .cl_ops = &mqprio_class_ops, .id = "mqprio", .priv_size = sizeof(struct mqprio_sched), diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c new file mode 100644 index 000000000000..0a833d0c1f61 --- /dev/null +++ b/net/sched/sch_sfb.c @@ -0,0 +1,709 @@ +/* + * net/sched/sch_sfb.c Stochastic Fair Blue + * + * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr> + * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: + * A New Class of Active Queue Management Algorithms. + * U. Michigan CSE-TR-387-99, April 1999. + * + * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf + * + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/random.h> +#include <linux/jhash.h> +#include <net/ip.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> + +/* + * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) + * This implementation uses L = 8 and N = 16 + * This permits us to split one 32bit hash (provided per packet by rxhash or + * external classifier) into 8 subhashes of 4 bits. + */ +#define SFB_BUCKET_SHIFT 4 +#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */ +#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1) +#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */ + +/* SFB algo uses a virtual queue, named "bin" */ +struct sfb_bucket { + u16 qlen; /* length of virtual queue */ + u16 p_mark; /* marking probability */ +}; + +/* We use a double buffering right before hash change + * (Section 4.4 of SFB reference : moving hash functions) + */ +struct sfb_bins { + u32 perturbation; /* jhash perturbation */ + struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; +}; + +struct sfb_sched_data { + struct Qdisc *qdisc; + struct tcf_proto *filter_list; + unsigned long rehash_interval; + unsigned long warmup_time; /* double buffering warmup time in jiffies */ + u32 max; + u32 bin_size; /* maximum queue length per bin */ + u32 increment; /* d1 */ + u32 decrement; /* d2 */ + u32 limit; /* HARD maximal queue length */ + u32 penalty_rate; + u32 penalty_burst; + u32 tokens_avail; + unsigned long rehash_time; + unsigned long token_time; + + u8 slot; /* current active bins (0 or 1) */ + bool double_buffering; + struct sfb_bins bins[2]; + + struct { + u32 earlydrop; + u32 penaltydrop; + u32 bucketdrop; + u32 queuedrop; + u32 childdrop; /* drops in child qdisc */ + u32 marked; /* ECN mark */ + } stats; +}; + +/* + * Each queued skb might be hashed on one or two bins + * We store in skb_cb the two hash values. + * (A zero value means double buffering was not used) + */ +struct sfb_skb_cb { + u32 hashes[2]; +}; + +static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); + return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; +} + +/* + * If using 'internal' SFB flow classifier, hash comes from skb rxhash + * If using external classifier, hash comes from the classid. + */ +static u32 sfb_hash(const struct sk_buff *skb, u32 slot) +{ + return sfb_skb_cb(skb)->hashes[slot]; +} + +/* Probabilities are coded as Q0.16 fixed-point values, + * with 0xFFFF representing 65535/65536 (almost 1.0) + * Addition and subtraction are saturating in [0, 65535] + */ +static u32 prob_plus(u32 p1, u32 p2) +{ + u32 res = p1 + p2; + + return min_t(u32, res, SFB_MAX_PROB); +} + +static u32 prob_minus(u32 p1, u32 p2) +{ + return p1 > p2 ? p1 - p2 : 0; +} + +static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) +{ + int i; + struct sfb_bucket *b = &q->bins[slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b[hash].qlen < 0xFFFF) + b[hash].qlen++; + b += SFB_NUMBUCKETS; /* next level */ + } +} + +static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +{ + u32 sfbhash; + + sfbhash = sfb_hash(skb, 0); + if (sfbhash) + increment_one_qlen(sfbhash, 0, q); + + sfbhash = sfb_hash(skb, 1); + if (sfbhash) + increment_one_qlen(sfbhash, 1, q); +} + +static void decrement_one_qlen(u32 sfbhash, u32 slot, + struct sfb_sched_data *q) +{ + int i; + struct sfb_bucket *b = &q->bins[slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b[hash].qlen > 0) + b[hash].qlen--; + b += SFB_NUMBUCKETS; /* next level */ + } +} + +static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +{ + u32 sfbhash; + + sfbhash = sfb_hash(skb, 0); + if (sfbhash) + decrement_one_qlen(sfbhash, 0, q); + + sfbhash = sfb_hash(skb, 1); + if (sfbhash) + decrement_one_qlen(sfbhash, 1, q); +} + +static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) +{ + b->p_mark = prob_minus(b->p_mark, q->decrement); +} + +static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) +{ + b->p_mark = prob_plus(b->p_mark, q->increment); +} + +static void sfb_zero_all_buckets(struct sfb_sched_data *q) +{ + memset(&q->bins, 0, sizeof(q->bins)); +} + +/* + * compute max qlen, max p_mark, and avg p_mark + */ +static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q) +{ + int i; + u32 qlen = 0, prob = 0, totalpm = 0; + const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { + if (qlen < b->qlen) + qlen = b->qlen; + totalpm += b->p_mark; + if (prob < b->p_mark) + prob = b->p_mark; + b++; + } + *prob_r = prob; + *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS); + return qlen; +} + + +static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) +{ + q->bins[slot].perturbation = net_random(); +} + +static void sfb_swap_slot(struct sfb_sched_data *q) +{ + sfb_init_perturbation(q->slot, q); + q->slot ^= 1; + q->double_buffering = false; +} + +/* Non elastic flows are allowed to use part of the bandwidth, expressed + * in "penalty_rate" packets per second, with "penalty_burst" burst + */ +static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q) +{ + if (q->penalty_rate == 0 || q->penalty_burst == 0) + return true; + + if (q->tokens_avail < 1) { + unsigned long age = min(10UL * HZ, jiffies - q->token_time); + + q->tokens_avail = (age * q->penalty_rate) / HZ; + if (q->tokens_avail > q->penalty_burst) + q->tokens_avail = q->penalty_burst; + q->token_time = jiffies; + if (q->tokens_avail < 1) + return true; + } + + q->tokens_avail--; + return false; +} + +static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q, + int *qerr, u32 *salt) +{ + struct tcf_result res; + int result; + + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return false; + } +#endif + *salt = TC_H_MIN(res.classid); + return true; + } + return false; +} + +static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + int i; + u32 p_min = ~0; + u32 minqlen = ~0; + u32 r, slot, salt, sfbhash; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + + if (q->rehash_interval > 0) { + unsigned long limit = q->rehash_time + q->rehash_interval; + + if (unlikely(time_after(jiffies, limit))) { + sfb_swap_slot(q); + q->rehash_time = jiffies; + } else if (unlikely(!q->double_buffering && q->warmup_time > 0 && + time_after(jiffies, limit - q->warmup_time))) { + q->double_buffering = true; + } + } + + if (q->filter_list) { + /* If using external classifiers, get result and record it. */ + if (!sfb_classify(skb, q, &ret, &salt)) + goto other_drop; + } else { + salt = skb_get_rxhash(skb); + } + + slot = q->slot; + + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + if (!sfbhash) + sfbhash = 1; + sfb_skb_cb(skb)->hashes[slot] = sfbhash; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b->qlen == 0) + decrement_prob(b, q); + else if (b->qlen >= q->bin_size) + increment_prob(b, q); + if (minqlen > b->qlen) + minqlen = b->qlen; + if (p_min > b->p_mark) + p_min = b->p_mark; + } + + slot ^= 1; + sfb_skb_cb(skb)->hashes[slot] = 0; + + if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { + sch->qstats.overlimits++; + if (minqlen >= q->max) + q->stats.bucketdrop++; + else + q->stats.queuedrop++; + goto drop; + } + + if (unlikely(p_min >= SFB_MAX_PROB)) { + /* Inelastic flow */ + if (q->double_buffering) { + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + if (!sfbhash) + sfbhash = 1; + sfb_skb_cb(skb)->hashes[slot] = sfbhash; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b->qlen == 0) + decrement_prob(b, q); + else if (b->qlen >= q->bin_size) + increment_prob(b, q); + } + } + if (sfb_rate_limit(skb, q)) { + sch->qstats.overlimits++; + q->stats.penaltydrop++; + goto drop; + } + goto enqueue; + } + + r = net_random() & SFB_MAX_PROB; + + if (unlikely(r < p_min)) { + if (unlikely(p_min > SFB_MAX_PROB / 2)) { + /* If we're marking that many packets, then either + * this flow is unresponsive, or we're badly congested. + * In either case, we want to start dropping packets. + */ + if (r < (p_min - SFB_MAX_PROB / 2) * 2) { + q->stats.earlydrop++; + goto drop; + } + } + if (INET_ECN_set_ce(skb)) { + q->stats.marked++; + } else { + q->stats.earlydrop++; + goto drop; + } + } + +enqueue: + ret = qdisc_enqueue(skb, child); + if (likely(ret == NET_XMIT_SUCCESS)) { + sch->q.qlen++; + increment_qlen(skb, q); + } else if (net_xmit_drop_count(ret)) { + q->stats.childdrop++; + sch->qstats.drops++; + } + return ret; + +drop: + qdisc_drop(skb, sch); + return NET_XMIT_CN; +other_drop: + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; +} + +static struct sk_buff *sfb_dequeue(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + struct sk_buff *skb; + + skb = child->dequeue(q->qdisc); + + if (skb) { + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + decrement_qlen(skb, q); + } + + return skb; +} + +static struct sk_buff *sfb_peek(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + + return child->ops->peek(child); +} + +/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */ + +static void sfb_reset(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + qdisc_reset(q->qdisc); + sch->q.qlen = 0; + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); + sfb_init_perturbation(0, q); +} + +static void sfb_destroy(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + qdisc_destroy(q->qdisc); +} + +static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { + [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) }, +}; + +static const struct tc_sfb_qopt sfb_default_ops = { + .rehash_interval = 600 * MSEC_PER_SEC, + .warmup_time = 60 * MSEC_PER_SEC, + .limit = 0, + .max = 25, + .bin_size = 20, + .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */ + .decrement = (SFB_MAX_PROB + 3000) / 6000, + .penalty_rate = 10, + .penalty_burst = 20, +}; + +static int sfb_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child; + struct nlattr *tb[TCA_SFB_MAX + 1]; + const struct tc_sfb_qopt *ctl = &sfb_default_ops; + u32 limit; + int err; + + if (opt) { + err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy); + if (err < 0) + return -EINVAL; + + if (tb[TCA_SFB_PARMS] == NULL) + return -EINVAL; + + ctl = nla_data(tb[TCA_SFB_PARMS]); + } + + limit = ctl->limit; + if (limit == 0) + limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); + + child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); + if (IS_ERR(child)) + return PTR_ERR(child); + + sch_tree_lock(sch); + + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_destroy(q->qdisc); + q->qdisc = child; + + q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); + q->warmup_time = msecs_to_jiffies(ctl->warmup_time); + q->rehash_time = jiffies; + q->limit = limit; + q->increment = ctl->increment; + q->decrement = ctl->decrement; + q->max = ctl->max; + q->bin_size = ctl->bin_size; + q->penalty_rate = ctl->penalty_rate; + q->penalty_burst = ctl->penalty_burst; + q->tokens_avail = ctl->penalty_burst; + q->token_time = jiffies; + + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); + sfb_init_perturbation(0, q); + sfb_init_perturbation(1, q); + + sch_tree_unlock(sch); + + return 0; +} + +static int sfb_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + q->qdisc = &noop_qdisc; + return sfb_change(sch, opt); +} + +static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + struct tc_sfb_qopt opt = { + .rehash_interval = jiffies_to_msecs(q->rehash_interval), + .warmup_time = jiffies_to_msecs(q->warmup_time), + .limit = q->limit, + .max = q->max, + .bin_size = q->bin_size, + .increment = q->increment, + .decrement = q->decrement, + .penalty_rate = q->penalty_rate, + .penalty_burst = q->penalty_burst, + }; + + sch->qstats.backlog = q->qdisc->qstats.backlog; + opts = nla_nest_start(skb, TCA_OPTIONS); + NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt); + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; +} + +static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct tc_sfb_xstats st = { + .earlydrop = q->stats.earlydrop, + .penaltydrop = q->stats.penaltydrop, + .bucketdrop = q->stats.bucketdrop, + .queuedrop = q->stats.queuedrop, + .childdrop = q->stats.childdrop, + .marked = q->stats.marked, + }; + + st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q); + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static int sfb_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + return -ENOSYS; +} + +static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->qdisc; + q->qdisc = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + return q->qdisc; +} + +static unsigned long sfb_get(struct Qdisc *sch, u32 classid) +{ + return 1; +} + +static void sfb_put(struct Qdisc *sch, unsigned long arg) +{ +} + +static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + return -ENOSYS; +} + +static int sfb_delete(struct Qdisc *sch, unsigned long cl) +{ + return -ENOSYS; +} + +static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) +{ + if (!walker->stop) { + if (walker->count >= walker->skip) + if (walker->fn(sch, 1, walker) < 0) { + walker->stop = 1; + return; + } + walker->count++; + } +} + +static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + + +static const struct Qdisc_class_ops sfb_class_ops = { + .graft = sfb_graft, + .leaf = sfb_leaf, + .get = sfb_get, + .put = sfb_put, + .change = sfb_change_class, + .delete = sfb_delete, + .walk = sfb_walk, + .tcf_chain = sfb_find_tcf, + .bind_tcf = sfb_bind, + .unbind_tcf = sfb_put, + .dump = sfb_dump_class, +}; + +static struct Qdisc_ops sfb_qdisc_ops __read_mostly = { + .id = "sfb", + .priv_size = sizeof(struct sfb_sched_data), + .cl_ops = &sfb_class_ops, + .enqueue = sfb_enqueue, + .dequeue = sfb_dequeue, + .peek = sfb_peek, + .init = sfb_init, + .reset = sfb_reset, + .destroy = sfb_destroy, + .change = sfb_change, + .dump = sfb_dump, + .dump_stats = sfb_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init sfb_module_init(void) +{ + return register_qdisc(&sfb_qdisc_ops); +} + +static void __exit sfb_module_exit(void) +{ + unregister_qdisc(&sfb_qdisc_ops); +} + +module_init(sfb_module_init) +module_exit(sfb_module_exit) + +MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline"); +MODULE_AUTHOR("Juliusz Chroboczek"); +MODULE_AUTHOR("Eric Dumazet"); +MODULE_LICENSE("GPL"); diff --git a/net/socket.c b/net/socket.c index 9fa1e3b4366e..937d0fcf74bc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2648,7 +2648,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, &kifr); + err = dev_ioctl(net, cmd, + (struct ifreq __user __force *) &kifr); set_fs(old_fs); return err; @@ -2757,7 +2758,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user *)&ifr); + err = dev_ioctl(net, cmd, (void __user __force *)&ifr); set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { @@ -2862,7 +2863,8 @@ static int routing_ioctl(struct net *net, struct socket *sock, ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { ret |= copy_from_user(devname, compat_ptr(rtdev), 15); - r4.rt_dev = devname; devname[15] = 0; + r4.rt_dev = (char __user __force *)devname; + devname[15] = 0; } else r4.rt_dev = NULL; |