diff options
85 files changed, 2639 insertions, 874 deletions
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt new file mode 100644 index 000000000000..70da5086153d --- /dev/null +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -0,0 +1,176 @@ +/proc/sys/net/netfilter/nf_conntrack_* Variables: + +nf_conntrack_acct - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + Enable connection tracking flow accounting. 64-bit byte and packet + counters per flow are added. + +nf_conntrack_buckets - INTEGER (read-only) + Size of hash table. If not specified as parameter during module + loading, the default size is calculated by dividing total memory + by 16384 to determine the number of buckets but the hash table will + never have fewer than 32 or more than 16384 buckets. + +nf_conntrack_checksum - BOOLEAN + 0 - disabled + not 0 - enabled (default) + + Verify checksum of incoming packets. Packets with bad checksums are + in INVALID state. If this is enabled, such packets will not be + considered for connection tracking. + +nf_conntrack_count - INTEGER (read-only) + Number of currently allocated flow entries. + +nf_conntrack_events - BOOLEAN + 0 - disabled + not 0 - enabled (default) + + If this option is enabled, the connection tracking code will + provide userspace with connection tracking events via ctnetlink. + +nf_conntrack_events_retry_timeout - INTEGER (seconds) + default 15 + + This option is only relevant when "reliable connection tracking + events" are used. Normally, ctnetlink is "lossy", that is, + events are normally dropped when userspace listeners can't keep up. + + Userspace can request "reliable event mode". When this mode is + active, the conntrack will only be destroyed after the event was + delivered. If event delivery fails, the kernel periodically + re-tries to send the event to userspace. + + This is the maximum interval the kernel should use when re-trying + to deliver the destroy event. + + A higher number means there will be fewer delivery retries and it + will take longer for a backlog to be processed. + +nf_conntrack_expect_max - INTEGER + Maximum size of expectation table. Default value is + nf_conntrack_buckets / 256. Minimum is 1. + +nf_conntrack_frag6_high_thresh - INTEGER + default 262144 + + Maximum memory used to reassemble IPv6 fragments. When + nf_conntrack_frag6_high_thresh bytes of memory is allocated for this + purpose, the fragment handler will toss packets until + nf_conntrack_frag6_low_thresh is reached. + +nf_conntrack_frag6_low_thresh - INTEGER + default 196608 + + See nf_conntrack_frag6_low_thresh + +nf_conntrack_frag6_timeout - INTEGER (seconds) + default 60 + + Time to keep an IPv6 fragment in memory. + +nf_conntrack_generic_timeout - INTEGER (seconds) + default 600 + + Default for generic timeout. This refers to layer 4 unknown/unsupported + protocols. + +nf_conntrack_helper - BOOLEAN + 0 - disabled + not 0 - enabled (default) + + Enable automatic conntrack helper assignment. + +nf_conntrack_icmp_timeout - INTEGER (seconds) + default 30 + + Default for ICMP timeout. + +nf_conntrack_icmpv6_timeout - INTEGER (seconds) + default 30 + + Default for ICMP6 timeout. + +nf_conntrack_log_invalid - INTEGER + 0 - disable (default) + 1 - log ICMP packets + 6 - log TCP packets + 17 - log UDP packets + 33 - log DCCP packets + 41 - log ICMPv6 packets + 136 - log UDPLITE packets + 255 - log packets of any protocol + + Log invalid packets of a type specified by value. + +nf_conntrack_max - INTEGER + Size of connection tracking table. Default value is + nf_conntrack_buckets value * 4. + +nf_conntrack_tcp_be_liberal - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + Be conservative in what you do, be liberal in what you accept from others. + If it's non-zero, we mark only out of window RST segments as INVALID. + +nf_conntrack_tcp_loose - BOOLEAN + 0 - disabled + not 0 - enabled (default) + + If it is set to zero, we disable picking up already established + connections. + +nf_conntrack_tcp_max_retrans - INTEGER + default 3 + + Maximum number of packets that can be retransmitted without + received an (acceptable) ACK from the destination. If this number + is reached, a shorter timer will be started. + +nf_conntrack_tcp_timeout_close - INTEGER (seconds) + default 10 + +nf_conntrack_tcp_timeout_close_wait - INTEGER (seconds) + default 60 + +nf_conntrack_tcp_timeout_established - INTEGER (seconds) + default 432000 (5 days) + +nf_conntrack_tcp_timeout_fin_wait - INTEGER (seconds) + default 120 + +nf_conntrack_tcp_timeout_last_ack - INTEGER (seconds) + default 30 + +nf_conntrack_tcp_timeout_max_retrans - INTEGER (seconds) + default 300 + +nf_conntrack_tcp_timeout_syn_recv - INTEGER (seconds) + default 60 + +nf_conntrack_tcp_timeout_syn_sent - INTEGER (seconds) + default 120 + +nf_conntrack_tcp_timeout_time_wait - INTEGER (seconds) + default 120 + +nf_conntrack_tcp_timeout_unacknowledged - INTEGER (seconds) + default 300 + +nf_conntrack_timestamp - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + Enable connection tracking flow timestamping. + +nf_conntrack_udp_timeout - INTEGER (seconds) + default 30 + +nf_conntrack_udp_timeout_stream2 - INTEGER (seconds) + default 180 + + This extended timeout will be used in case there is an UDP stream + detected. diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 7e2723637b35..3e68bfbda6bc 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -29,7 +29,6 @@ socket to transmit pending data. */ #define SO_OOBINLINE 0x0100 /* Receive out-of-band data in-band. */ #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ -#endif #define SO_TYPE 0x1008 /* Compatible name for SO_STYLE. */ #define SO_STYLE SO_TYPE /* Synonym */ diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 21353f0fef63..3d5f6d463757 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -74,6 +74,7 @@ config E1000E tristate "Intel(R) PRO/1000 PCI-Express Gigabit Ethernet support" depends on PCI && (!SPARC32 || BROKEN) select CRC32 + select PTP_1588_CLOCK ---help--- This driver supports the PCI-Express Intel(R) PRO/1000 gigabit ethernet family of adapters. For PCI or PCI-X e1000 adapters, diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index e73c2c355993..ae96facee8fe 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -111,11 +111,10 @@ static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw); static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw); static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex); -static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw); -static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, - u16 *data); -static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, - u16 data); +static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, + u16 *data); +static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, + u16 data); static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw); /** @@ -696,7 +695,7 @@ static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw) static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = 0; + s32 ret_val; u16 phy_data, index; ret_val = e1e_rphy(hw, GG82563_PHY_DSP_DISTANCE, &phy_data); @@ -774,6 +773,9 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) ctrl = er32(CTRL); ret_val = e1000_acquire_phy_80003es2lan(hw); + if (ret_val) + return ret_val; + e_dbg("Issuing a global reset to MAC\n"); ew32(CTRL, ctrl | E1000_CTRL_RST); e1000_release_phy_80003es2lan(hw); @@ -833,6 +835,8 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); + if (ret_val) + return ret_val; /* Disable IBIST slave mode (far-end loopback) */ e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, @@ -1272,7 +1276,7 @@ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data) { u32 kmrnctrlsta; - s32 ret_val = 0; + s32 ret_val; ret_val = e1000_acquire_mac_csr_80003es2lan(hw); if (ret_val) @@ -1307,7 +1311,7 @@ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data) { u32 kmrnctrlsta; - s32 ret_val = 0; + s32 ret_val; ret_val = e1000_acquire_mac_csr_80003es2lan(hw); if (ret_val) @@ -1331,7 +1335,7 @@ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, **/ static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw) { - s32 ret_val = 0; + s32 ret_val; /* If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 587890d2d55e..0f55d79afe39 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -67,9 +67,7 @@ static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw); static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw); -static s32 e1000_setup_link_82571(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw); -static void e1000_clear_vfta_82571(struct e1000_hw *hw); static bool e1000_check_mng_mode_82574(struct e1000_hw *hw); static s32 e1000_led_on_82574(struct e1000_hw *hw); static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw); @@ -556,16 +554,14 @@ static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw) s32 i = 0; extcnf_ctrl = er32(EXTCNF_CTRL); - extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; do { + extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ew32(EXTCNF_CTRL, extcnf_ctrl); extcnf_ctrl = er32(EXTCNF_CTRL); if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP) break; - extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; - usleep_range(2000, 4000); i++; } while (i < MDIO_OWNERSHIP_TIMEOUT); @@ -937,6 +933,8 @@ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data); + if (ret_val) + return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) @@ -1399,7 +1397,7 @@ bool e1000_check_phy_82574(struct e1000_hw *hw) { u16 status_1kbt = 0; u16 receive_errors = 0; - s32 ret_val = 0; + s32 ret_val; /* Read PHY Receive Error counter first, if its is max - all F's then * read the Base1000T status register If both are max then PHY is hung. @@ -1544,7 +1542,7 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) ctrl = er32(CTRL); status = er32(STATUS); - rxcw = er32(RXCW); + er32(RXCW); /* SYNCH bit and IV bit are sticky */ udelay(10); rxcw = er32(RXCW); @@ -1799,6 +1797,8 @@ static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw) if (ret_val) return ret_val; ret_val = e1000e_update_nvm_checksum(hw); + if (ret_val) + return ret_val; } } @@ -1812,7 +1812,7 @@ static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw) static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw) { if (hw->mac.type == e1000_82571) { - s32 ret_val = 0; + s32 ret_val; /* If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm diff --git a/drivers/net/ethernet/intel/e1000e/Makefile b/drivers/net/ethernet/intel/e1000e/Makefile index 591b71324505..c2dcfcc10857 100644 --- a/drivers/net/ethernet/intel/e1000e/Makefile +++ b/drivers/net/ethernet/intel/e1000e/Makefile @@ -1,7 +1,7 @@ ################################################################################ # # Intel PRO/1000 Linux driver -# Copyright(c) 1999 - 2012 Intel Corporation. +# Copyright(c) 1999 - 2013 Intel Corporation. # # This program is free software; you can redistribute it and/or modify it # under the terms and conditions of the GNU General Public License, @@ -34,5 +34,5 @@ obj-$(CONFIG_E1000E) += e1000e.o e1000e-objs := 82571.o ich8lan.o 80003es2lan.o \ mac.o manage.o nvm.o phy.o \ - param.o ethtool.o netdev.o + param.o ethtool.o netdev.o ptp.o diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 36f9fad19a76..3b997194ac3b 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -233,6 +233,7 @@ #define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ #define E1000_CTRL_LANPHYPC_OVERRIDE 0x00010000 /* SW control of LANPHYPC */ #define E1000_CTRL_LANPHYPC_VALUE 0x00020000 /* SW value of LANPHYPC */ +#define E1000_CTRL_MEHE 0x00080000 /* Memory Error Handling Enable */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ #define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ @@ -394,6 +395,12 @@ #define E1000_PBS_16K E1000_PBA_16K +/* Uncorrectable/correctable ECC Error counts and enable bits */ +#define E1000_PBECCSTS_CORR_ERR_CNT_MASK 0x000000FF +#define E1000_PBECCSTS_UNCORR_ERR_CNT_MASK 0x0000FF00 +#define E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT 8 +#define E1000_PBECCSTS_ECC_ENABLE 0x00010000 + #define IFS_MAX 80 #define IFS_MIN 40 #define IFS_RATIO 4 @@ -413,6 +420,7 @@ #define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ +#define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ #define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_RXQ0 0x00100000 /* Rx Queue 0 Interrupt */ #define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ @@ -448,6 +456,7 @@ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ +#define E1000_IMS_ECCER E1000_ICR_ECCER /* Uncorrectable ECC Error */ #define E1000_IMS_RXQ0 E1000_ICR_RXQ0 /* Rx Queue 0 Interrupt */ #define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ #define E1000_IMS_TXQ0 E1000_ICR_TXQ0 /* Tx Queue 0 Interrupt */ @@ -544,9 +553,20 @@ #define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ #define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ +#define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 +#define E1000_TSYNCRXCTL_TYPE_L4_V1 0x02 +#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define E1000_TSYNCRXCTL_TYPE_ALL 0x08 +#define E1000_TSYNCRXCTL_TYPE_EVENT_V2 0x0A #define E1000_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */ #define E1000_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */ +#define E1000_RXMTRL_PTP_V1_SYNC_MESSAGE 0x00000000 +#define E1000_RXMTRL_PTP_V1_DELAY_REQ_MESSAGE 0x00010000 + +#define E1000_RXMTRL_PTP_V2_SYNC_MESSAGE 0x00000000 +#define E1000_RXMTRL_PTP_V2_DELAY_REQ_MESSAGE 0x01000000 + #define E1000_TIMINCA_INCPERIOD_SHIFT 24 #define E1000_TIMINCA_INCVALUE_MASK 0x00FFFFFF diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index dea9e5552966..e6b94835291e 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -43,7 +43,8 @@ #include <linux/if_vlan.h> #include <linux/clocksource.h> #include <linux/net_tstamp.h> - +#include <linux/ptp_clock_kernel.h> +#include <linux/ptp_classify.h> #include "hw.h" struct e1000_info; @@ -311,6 +312,8 @@ struct e1000_adapter { struct napi_struct napi; + unsigned int uncorr_errors; /* uncorrectable ECC errors */ + unsigned int corr_errors; /* correctable ECC errors */ unsigned int restart_queue; u32 txd_cmd; @@ -369,7 +372,7 @@ struct e1000_adapter { /* structs defined in e1000_hw.h */ struct e1000_hw hw; - spinlock_t stats64_lock; + spinlock_t stats64_lock; /* protects statistics counters */ struct e1000_hw_stats stats; struct e1000_phy_info phy_info; struct e1000_phy_stats phy_stats; @@ -413,6 +416,8 @@ struct e1000_adapter { spinlock_t systim_lock; /* protects SYSTIML/H regsters */ struct cyclecounter cc; struct timecounter tc; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; }; struct e1000_info { @@ -427,6 +432,8 @@ struct e1000_info { const struct e1000_nvm_operations *nvm_ops; }; +s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); + /* The system time is maintained by a 64-bit counter comprised of the 32-bit * SYSTIMH and SYSTIML registers. How the counter increments (and therefore * its resolution) is based on the contents of the TIMINCA register - it @@ -556,8 +563,6 @@ extern void e1000e_write_itr(struct e1000_adapter *adapter, u32 itr); extern unsigned int copybreak; -extern char *e1000e_get_hw_dev_name(struct e1000_hw *hw); - extern const struct e1000_info e1000_82571_info; extern const struct e1000_info e1000_82572_info; extern const struct e1000_info e1000_82573_info; @@ -704,6 +709,8 @@ extern s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw); extern s32 e1000_check_polarity_igp(struct e1000_hw *hw); extern bool e1000_check_phy_82574(struct e1000_hw *hw); extern s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data); +extern void e1000e_ptp_init(struct e1000_adapter *adapter); +extern void e1000e_ptp_remove(struct e1000_adapter *adapter); static inline s32 e1000_phy_hw_reset(struct e1000_hw *hw) { diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index f268cbcb751d..6ab949d0b39b 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -109,6 +109,8 @@ static const struct e1000_stats e1000_gstrings_stats[] = { E1000_STAT("rx_dma_failed", rx_dma_failed), E1000_STAT("tx_dma_failed", tx_dma_failed), E1000_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), + E1000_STAT("uncorr_ecc_errors", uncorr_errors), + E1000_STAT("corr_ecc_errors", corr_errors), }; #define E1000_GLOBAL_STATS_LEN ARRAY_SIZE(e1000_gstrings_stats) @@ -760,8 +762,9 @@ static bool reg_pattern_test(struct e1000_adapter *adapter, u64 *data, (test[pat] & write)); val = E1000_READ_REG_ARRAY(&adapter->hw, reg, offset); if (val != (test[pat] & write & mask)) { - e_err("pattern test reg %04X failed: got 0x%08X expected 0x%08X\n", - reg + offset, val, (test[pat] & write & mask)); + e_err("pattern test failed (reg 0x%05X): got 0x%08X expected 0x%08X\n", + reg + (offset << 2), val, + (test[pat] & write & mask)); *data = reg; return 1; } @@ -776,7 +779,7 @@ static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data, __ew32(&adapter->hw, reg, write & mask); val = __er32(&adapter->hw, reg); if ((write & mask) != (val & mask)) { - e_err("set/check reg %04X test failed: got 0x%08X expected 0x%08X\n", + e_err("set/check test failed (reg 0x%05X): got 0x%08X expected 0x%08X\n", reg, (val & mask), (write & mask)); *data = reg; return 1; @@ -884,12 +887,20 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) E1000_FWSM_WLOCK_MAC_SHIFT; for (i = 0; i < mac->rar_entry_count; i++) { - /* Cannot test write-protected SHRAL[n] registers */ - if ((wlock_mac == 1) || (wlock_mac && (i > wlock_mac))) - continue; + if (mac->type == e1000_pch_lpt) { + /* Cannot test write-protected SHRAL[n] registers */ + if ((wlock_mac == 1) || (wlock_mac && (i > wlock_mac))) + continue; + + /* SHRAH[9] different than the others */ + if (i == 10) + mask |= (1 << 30); + else + mask &= ~(1 << 30); + } - REG_PATTERN_TEST_ARRAY(E1000_RA, ((i << 1) + 1), - mask, 0xFFFFFFFF); + REG_PATTERN_TEST_ARRAY(E1000_RA, ((i << 1) + 1), mask, + 0xFFFFFFFF); } for (i = 0; i < mac->mta_reg_count; i++) @@ -1394,7 +1405,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ctrl = er32(CTRL); - int link = 0; + int link; /* special requirements for 82571/82572 fiber adapters */ @@ -2200,8 +2211,20 @@ static int e1000e_get_ts_info(struct net_device *netdev, info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); info->rx_filters = ((1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) | (1 << HWTSTAMP_FILTER_ALL)); + if (adapter->ptp_clock) + info->phc_index = ptp_clock_index(adapter->ptp_clock); + return 0; } diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 8e7e80345a60..a10de4d122cc 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -79,6 +79,7 @@ enum e1e_registers { #define E1000_POEMB E1000_PHY_CTRL /* PHY OEM Bits */ E1000_PBA = 0x01000, /* Packet Buffer Allocation - RW */ E1000_PBS = 0x01008, /* Packet Buffer Size */ + E1000_PBECCSTS = 0x0100C, /* Packet Buffer ECC Status - RW */ E1000_EEMNGCTL = 0x01010, /* MNG EEprom Control */ E1000_EEWR = 0x0102C, /* EEPROM Write Register - RW */ E1000_FLOP = 0x0103C, /* FLASH Opcode Register */ @@ -251,6 +252,8 @@ enum e1e_registers { E1000_TSYNCRXCTL = 0x0B620, /* Rx Time Sync Control register - RW */ E1000_RXSTMPL = 0x0B624, /* Rx timestamp Low - RO */ E1000_RXSTMPH = 0x0B628, /* Rx timestamp High - RO */ + E1000_RXMTRL = 0x0B634, /* Timesync Rx EtherType and Msg Type - RW */ + E1000_RXUDP = 0x0B638, /* Timesync Rx UDP Port - RW */ }; #define E1000_MAX_PHY_ADDR 4 @@ -539,16 +542,6 @@ enum e1000_serdes_link_state { e1000_serdes_link_forced_up }; -/* Receive Descriptor */ -struct e1000_rx_desc { - __le64 buffer_addr; /* Address of the descriptor's data buffer */ - __le16 length; /* Length of data DMAed into data buffer */ - __le16 csum; /* Packet checksum */ - u8 status; /* Descriptor status */ - u8 errors; /* Descriptor Errors */ - __le16 special; -}; - /* Receive Descriptor - Extended */ union e1000_rx_desc_extended { struct { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 50935ef48171..87676b652edc 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -237,7 +237,6 @@ union ich8_flash_protected_range { u32 regval; }; -static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw); static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw); static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank); @@ -249,9 +248,7 @@ static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 *data); -static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw); static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw); -static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw); static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw); static s32 e1000_led_on_ich8lan(struct e1000_hw *hw); static s32 e1000_led_off_ich8lan(struct e1000_hw *hw); @@ -263,7 +260,7 @@ static s32 e1000_led_off_pchlan(struct e1000_hw *hw); static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active); static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw); static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw); -static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link); +static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link); static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw); static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw); @@ -363,10 +360,15 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) s32 ret_val; u16 phy_reg; + /* Gate automatic PHY configuration by hardware on managed and + * non-managed 82579 and newer adapters. + */ + e1000_gate_hw_phy_config_ich8lan(hw, true); + ret_val = hw->phy.ops.acquire(hw); if (ret_val) { e_dbg("Failed to initialize PHY flow\n"); - return ret_val; + goto out; } /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is @@ -387,13 +389,6 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) /* fall-through */ case e1000_pch2lan: - /* Gate automatic PHY configuration by hardware on - * non-managed 82579 - */ - if ((hw->mac.type == e1000_pch2lan) && - !(fwsm & E1000_ICH_FWSM_FW_VALID)) - e1000_gate_hw_phy_config_ich8lan(hw, true); - if (e1000_phy_is_accessible_pchlan(hw)) { if (hw->mac.type == e1000_pch_lpt) { /* Unforce SMBus mode in PHY */ @@ -428,6 +423,15 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) mac_reg |= E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC; ew32(FEXTNVM3, mac_reg); + if (hw->mac.type == e1000_pch_lpt) { + /* Toggling LANPHYPC brings the PHY out of SMBus mode + * So ensure that the MAC is also out of SMBus mode + */ + mac_reg = er32(CTRL_EXT); + mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_reg); + } + /* Toggle LANPHYPC Value bit */ mac_reg = er32(CTRL); mac_reg |= E1000_CTRL_LANPHYPC_OVERRIDE; @@ -461,6 +465,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) */ ret_val = e1000e_phy_hw_reset_generic(hw); +out: /* Ungate automatic PHY configuration on non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(fwsm & E1000_ICH_FWSM_FW_VALID)) { @@ -480,7 +485,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - s32 ret_val = 0; + s32 ret_val; phy->addr = 1; phy->reset_delay_us = 100; @@ -763,13 +768,6 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) if (mac->type == e1000_ich8lan) e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, true); - /* Gate automatic PHY configuration by hardware on managed - * 82579 and i217 - */ - if ((mac->type == e1000_pch2lan || mac->type == e1000_pch_lpt) && - (er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) - e1000_gate_hw_phy_config_ich8lan(hw, true); - return 0; } @@ -785,7 +783,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) static s32 __e1000_access_emi_reg_locked(struct e1000_hw *hw, u16 address, u16 *data, bool read) { - s32 ret_val = 0; + s32 ret_val; ret_val = e1e_wphy_locked(hw, I82579_EMI_ADDR, address); if (ret_val) @@ -1389,7 +1387,7 @@ static s32 e1000_write_smbus_addr(struct e1000_hw *hw) u32 strap = er32(STRAP); u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >> E1000_STRAP_SMT_FREQ_SHIFT; - s32 ret_val = 0; + s32 ret_val; strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; @@ -1625,7 +1623,7 @@ release: **/ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable) { - s32 ret_val = 0; + s32 ret_val; u32 ctrl_reg = 0; u32 ctrl_ext = 0; u32 reg = 0; @@ -2286,7 +2284,7 @@ static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw) **/ static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active) { - s32 ret_val = 0; + s32 ret_val; u16 oem_reg; ret_val = e1e_rphy(hw, HV_OEM_BITS, &oem_reg); @@ -2344,6 +2342,8 @@ static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active) /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data); + if (ret_val) + return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) @@ -3704,6 +3704,17 @@ static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) if (hw->mac.type == e1000_ich8lan) reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS); ew32(RFCTL, reg); + + /* Enable ECC on Lynxpoint */ + if (hw->mac.type == e1000_pch_lpt) { + reg = er32(PBECCSTS); + reg |= E1000_PBECCSTS_ECC_ENABLE; + ew32(PBECCSTS, reg); + + reg = er32(CTRL); + reg |= E1000_CTRL_MEHE; + ew32(CTRL, reg); + } } /** @@ -4044,8 +4055,7 @@ void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) if (ret_val) return; reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK; - ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, - reg_data); + e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, reg_data); } /** @@ -4630,7 +4640,7 @@ const struct e1000_info e1000_pch_lpt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = DEFAULT_JUMBO, + .max_hw_frame_size = 9018, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index 0fa4c0656e09..0709f49f0335 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -165,7 +165,7 @@ void e1000e_init_rx_addrs(struct e1000_hw *hw, u16 rar_count) s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) { u32 i; - s32 ret_val = 0; + s32 ret_val; u16 offset, nvm_alt_mac_addr_offset, nvm_data; u8 alt_mac_addr[ETH_ALEN]; diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c index 6dc47beb3adc..4dae0dbda837 100644 --- a/drivers/net/ethernet/intel/e1000e/manage.c +++ b/drivers/net/ethernet/intel/e1000e/manage.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c15b7e438a44..e386e95102f4 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -1780,6 +1780,23 @@ static irqreturn_t e1000_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + /* Reset on uncorrectable ECC error */ + if ((icr & E1000_ICR_ECCER) && (hw->mac.type == e1000_pch_lpt)) { + u32 pbeccsts = er32(PBECCSTS); + + adapter->corr_errors += + pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; + adapter->uncorr_errors += + (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> + E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; + + /* Do the reset outside of interrupt context */ + schedule_work(&adapter->reset_task); + + /* return immediately since reset is imminent */ + return IRQ_HANDLED; + } + if (napi_schedule_prep(&adapter->napi)) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; @@ -1843,6 +1860,23 @@ static irqreturn_t e1000_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + /* Reset on uncorrectable ECC error */ + if ((icr & E1000_ICR_ECCER) && (hw->mac.type == e1000_pch_lpt)) { + u32 pbeccsts = er32(PBECCSTS); + + adapter->corr_errors += + pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; + adapter->uncorr_errors += + (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> + E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; + + /* Do the reset outside of interrupt context */ + schedule_work(&adapter->reset_task); + + /* return immediately since reset is imminent */ + return IRQ_HANDLED; + } + if (napi_schedule_prep(&adapter->napi)) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; @@ -2206,6 +2240,8 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); + } else if (hw->mac.type == e1000_pch_lpt) { + ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { ew32(IMS, IMS_ENABLE_MASK); } @@ -3413,7 +3449,7 @@ static void e1000e_setup_rss_hash(struct e1000_adapter *adapter) * Get attributes for incrementing the System Time Register SYSTIML/H at * the default base frequency, and set the cyclecounter shift value. **/ -static s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) +s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) { struct e1000_hw *hw = &adapter->hw; u32 incvalue, incperiod, shift; @@ -3485,6 +3521,10 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter) struct hwtstamp_config *config = &adapter->hwtstamp_config; u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + u32 rxmtrl = 0; + u16 rxudp = 0; + bool is_l4 = false; + bool is_l2 = false; u32 regval; s32 ret_val; @@ -3509,7 +3549,69 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter) case HWTSTAMP_FILTER_NONE: tsync_rx_ctl = 0; break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + rxmtrl = E1000_RXMTRL_PTP_V1_SYNC_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + rxmtrl = E1000_RXMTRL_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + /* Also time stamps V2 L2 Path Delay Request/Response */ + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_V2; + rxmtrl = E1000_RXMTRL_PTP_V2_SYNC_MESSAGE; + is_l2 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + /* Also time stamps V2 L2 Path Delay Request/Response. */ + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_V2; + rxmtrl = E1000_RXMTRL_PTP_V2_DELAY_REQ_MESSAGE; + is_l2 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + /* Hardware cannot filter just V2 L4 Sync messages; + * fall-through to V2 (both L2 and L4) Sync. + */ + case HWTSTAMP_FILTER_PTP_V2_SYNC: + /* Also time stamps V2 Path Delay Request/Response. */ + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + rxmtrl = E1000_RXMTRL_PTP_V2_SYNC_MESSAGE; + is_l2 = true; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + /* Hardware cannot filter just V2 L4 Delay Request messages; + * fall-through to V2 (both L2 and L4) Delay Request. + */ + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + /* Also time stamps V2 Path Delay Request/Response. */ + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + rxmtrl = E1000_RXMTRL_PTP_V2_DELAY_REQ_MESSAGE; + is_l2 = true; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + /* Hardware cannot filter just V2 L4 or L2 Event messages; + * fall-through to all V2 (both L2 and L4) Events. + */ + case HWTSTAMP_FILTER_PTP_V2_EVENT: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = true; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + /* For V1, the hardware can only filter Sync messages or + * Delay Request messages but not both so fall-through to + * time stamp all packets. + */ case HWTSTAMP_FILTER_ALL: + is_l2 = true; + is_l4 = true; tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; config->rx_filter = HWTSTAMP_FILTER_ALL; break; @@ -3541,9 +3643,25 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter) return -EAGAIN; } + /* L2: define ethertype filter for time stamped packets */ + if (is_l2) + rxmtrl |= ETH_P_1588; + + /* define which PTP packets get time stamped */ + ew32(RXMTRL, rxmtrl); + + /* Filter by destination port */ + if (is_l4) { + rxudp = PTP_EV_PORT; + cpu_to_be16s(&rxudp); + } + ew32(RXUDP, rxudp); + + e1e_flush(); + /* Clear TSYNCRXCTL_VALID & TSYNCTXCTL_VALID bit */ - regval = er32(RXSTMPH); - regval = er32(TXSTMPH); + er32(RXSTMPH); + er32(TXSTMPH); /* Get and set the System Time Register SYSTIM base frequency */ ret_val = e1000e_get_base_timinca(adapter, ®val); @@ -3724,14 +3842,17 @@ void e1000e_reset(struct e1000_adapter *adapter) break; case e1000_pch2lan: case e1000_pch_lpt: - fc->high_water = 0x05C20; - fc->low_water = 0x05048; - fc->pause_time = 0x0650; fc->refresh_time = 0x0400; - if (adapter->netdev->mtu > ETH_DATA_LEN) { - pba = 14; - ew32(PBA, pba); + + if (adapter->netdev->mtu <= ETH_DATA_LEN) { + fc->high_water = 0x05C20; + fc->low_water = 0x05048; + fc->pause_time = 0x0650; + break; } + + fc->high_water = ((pba << 10) * 9 / 10) & E1000_FCRTH_RTH; + fc->low_water = ((pba << 10) * 8 / 10) & E1000_FCRTL_RTL; break; } @@ -4534,6 +4655,16 @@ static void e1000e_update_stats(struct e1000_adapter *adapter) adapter->stats.mgptc += er32(MGTPTC); adapter->stats.mgprc += er32(MGTPRC); adapter->stats.mgpdc += er32(MGTPDC); + + /* Correctable ECC Errors */ + if (hw->mac.type == e1000_pch_lpt) { + u32 pbeccsts = er32(PBECCSTS); + adapter->corr_errors += + pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; + adapter->uncorr_errors += + (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> + E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; + } } /** @@ -5662,6 +5793,24 @@ static int e1000e_hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) config = adapter->hwtstamp_config; + switch (config.rx_filter) { + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + /* With V2 type filters which specify a Sync or Delay Request, + * Path Delay Request/Response messages are also time stamped + * by hardware so notify the caller the requested packets plus + * some others are time stamped. + */ + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + default: + break; + } + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; } @@ -5685,7 +5834,7 @@ static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc) struct e1000_hw *hw = &adapter->hw; u32 i, mac_reg; u16 phy_reg, wuc_enable; - int retval = 0; + int retval; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); @@ -6669,6 +6818,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(netdev); + /* init PTP hardware clock */ + e1000e_ptp_init(adapter); + e1000_print_device_info(adapter); if (pci_dev_run_wake(pdev)) @@ -6717,6 +6869,8 @@ static void e1000_remove(struct pci_dev *pdev) struct e1000_adapter *adapter = netdev_priv(netdev); bool down = test_bit(__E1000_DOWN, &adapter->state); + e1000e_ptp_remove(adapter); + /* The timers may be rescheduled, so explicitly disable them * from being rescheduled. */ @@ -6891,7 +7045,7 @@ static int __init e1000_init_module(void) int ret; pr_info("Intel(R) PRO/1000 Network Driver - %s\n", e1000e_driver_version); - pr_info("Copyright(c) 1999 - 2012 Intel Corporation.\n"); + pr_info("Copyright(c) 1999 - 2013 Intel Corporation.\n"); ret = pci_register_driver(&e1000_driver); return ret; diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index 1e7882c7d6d4..84fecc268162 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c index b29a8a5116a8..19c40d6f53cc 100644 --- a/drivers/net/ethernet/intel/e1000e/param.c +++ b/drivers/net/ethernet/intel/e1000e/param.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 28b38ff37e84..44fb432f3c10 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2013 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -32,12 +32,11 @@ static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw); static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw); static s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active); static s32 e1000_wait_autoneg(struct e1000_hw *hw); -static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg); static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data, bool read, bool page_set); static u32 e1000_get_phy_addr_for_hv_page(u32 page); static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, - u16 *data, bool read); + u16 *data, bool read); /* Cable length tables */ static const u16 e1000_m88_cable_length_table[] = { @@ -2672,7 +2671,7 @@ s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg) **/ s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg) { - s32 ret_val = 0; + s32 ret_val; /* Select Port Control Registers page */ ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT)); @@ -3104,8 +3103,8 @@ static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, u16 *data, bool read) { s32 ret_val; - u32 addr_reg = 0; - u32 data_reg = 0; + u32 addr_reg; + u32 data_reg; /* This takes care of the difference with desktop vs mobile phy */ addr_reg = (hw->phy.type == e1000_phy_82578) ? @@ -3333,7 +3332,7 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw) I82577_DSTATUS_CABLE_LENGTH_SHIFT; if (length == E1000_CABLE_LENGTH_UNDEFINED) - ret_val = -E1000_ERR_PHY; + return -E1000_ERR_PHY; phy->cable_length = length; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c new file mode 100644 index 000000000000..6b8df6587858 --- /dev/null +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -0,0 +1,276 @@ +/******************************************************************************* + + Intel PRO/1000 Linux driver + Copyright(c) 1999 - 2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS <linux.nics@intel.com> + e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* PTP 1588 Hardware Clock (PHC) + * Derived from PTP Hardware Clock driver for Intel 82576 and 82580 (igb) + * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com> + */ + +#include "e1000.h" + +/** + * e1000e_phc_adjfreq - adjust the frequency of the hardware clock + * @ptp: ptp clock structure + * @delta: Desired frequency change in parts per billion + * + * Adjust the frequency of the PHC cycle counter by the indicated delta from + * the base frequency. + **/ +static int e1000e_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, + ptp_clock_info); + struct e1000_hw *hw = &adapter->hw; + bool neg_adj = false; + u64 adjustment; + u32 timinca, incvalue; + s32 ret_val; + + if ((delta > ptp->max_adj) || (delta <= -1000000000)) + return -EINVAL; + + if (delta < 0) { + neg_adj = true; + delta = -delta; + } + + /* Get the System Time Register SYSTIM base frequency */ + ret_val = e1000e_get_base_timinca(adapter, &timinca); + if (ret_val) + return ret_val; + + incvalue = timinca & E1000_TIMINCA_INCVALUE_MASK; + + adjustment = incvalue; + adjustment *= delta; + adjustment = div_u64(adjustment, 1000000000); + + incvalue = neg_adj ? (incvalue - adjustment) : (incvalue + adjustment); + + timinca &= ~E1000_TIMINCA_INCVALUE_MASK; + timinca |= incvalue; + + ew32(TIMINCA, timinca); + + return 0; +} + +/** + * e1000e_phc_adjtime - Shift the time of the hardware clock + * @ptp: ptp clock structure + * @delta: Desired change in nanoseconds + * + * Adjust the timer by resetting the timecounter structure. + **/ +static int e1000e_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, + ptp_clock_info); + unsigned long flags; + s64 now; + + spin_lock_irqsave(&adapter->systim_lock, flags); + now = timecounter_read(&adapter->tc); + now += delta; + timecounter_init(&adapter->tc, &adapter->cc, now); + spin_unlock_irqrestore(&adapter->systim_lock, flags); + + return 0; +} + +/** + * e1000e_phc_gettime - Reads the current time from the hardware clock + * @ptp: ptp clock structure + * @ts: timespec structure to hold the current time value + * + * Read the timecounter and return the correct value in ns after converting + * it into a struct timespec. + **/ +static int e1000e_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +{ + struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, + ptp_clock_info); + unsigned long flags; + u32 remainder; + u64 ns; + + spin_lock_irqsave(&adapter->systim_lock, flags); + ns = timecounter_read(&adapter->tc); + spin_unlock_irqrestore(&adapter->systim_lock, flags); + + ts->tv_sec = div_u64_rem(ns, NSEC_PER_SEC, &remainder); + ts->tv_nsec = remainder; + + return 0; +} + +/** + * e1000e_phc_settime - Set the current time on the hardware clock + * @ptp: ptp clock structure + * @ts: timespec containing the new time for the cycle counter + * + * Reset the timecounter to use a new base value instead of the kernel + * wall timer value. + **/ +static int e1000e_phc_settime(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, + ptp_clock_info); + unsigned long flags; + u64 ns; + + ns = ts->tv_sec * NSEC_PER_SEC; + ns += ts->tv_nsec; + + /* reset the timecounter */ + spin_lock_irqsave(&adapter->systim_lock, flags); + timecounter_init(&adapter->tc, &adapter->cc, ns); + spin_unlock_irqrestore(&adapter->systim_lock, flags); + + return 0; +} + +/** + * e1000e_phc_enable - enable or disable an ancillary feature + * @ptp: ptp clock structure + * @request: Desired resource to enable or disable + * @on: Caller passes one to enable or zero to disable + * + * Enable (or disable) ancillary features of the PHC subsystem. + * Currently, no ancillary features are supported. + **/ +static int e1000e_phc_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, int on) +{ + return -EOPNOTSUPP; +} + +static void e1000e_systim_overflow_work(struct work_struct *work) +{ + struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, + systim_overflow_work.work); + struct e1000_hw *hw = &adapter->hw; + struct timespec ts; + + adapter->ptp_clock_info.gettime(&adapter->ptp_clock_info, &ts); + + e_dbg("SYSTIM overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); + + schedule_delayed_work(&adapter->systim_overflow_work, + E1000_SYSTIM_OVERFLOW_PERIOD); +} + +static const struct ptp_clock_info e1000e_ptp_clock_info = { + .owner = THIS_MODULE, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .pps = 0, + .adjfreq = e1000e_phc_adjfreq, + .adjtime = e1000e_phc_adjtime, + .gettime = e1000e_phc_gettime, + .settime = e1000e_phc_settime, + .enable = e1000e_phc_enable, +}; + +/** + * e1000e_ptp_init - initialize PTP for devices which support it + * @adapter: board private structure + * + * This function performs the required steps for enabling PTP support. + * If PTP support has already been loaded it simply calls the cyclecounter + * init routine and exits. + **/ +void e1000e_ptp_init(struct e1000_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + adapter->ptp_clock = NULL; + + if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) + return; + + adapter->ptp_clock_info = e1000e_ptp_clock_info; + + snprintf(adapter->ptp_clock_info.name, + sizeof(adapter->ptp_clock_info.name), "%pm", + adapter->netdev->perm_addr); + + switch (hw->mac.type) { + case e1000_pch2lan: + case e1000_pch_lpt: + if ((hw->mac.type != e1000_pch_lpt) || + (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { + adapter->ptp_clock_info.max_adj = 24000000 - 1; + break; + } + /* fall-through */ + case e1000_82574: + case e1000_82583: + adapter->ptp_clock_info.max_adj = 600000000 - 1; + break; + default: + break; + } + + INIT_DELAYED_WORK(&adapter->systim_overflow_work, + e1000e_systim_overflow_work); + + schedule_delayed_work(&adapter->systim_overflow_work, + E1000_SYSTIM_OVERFLOW_PERIOD); + + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_clock_info, + &adapter->pdev->dev); + if (IS_ERR(adapter->ptp_clock)) { + adapter->ptp_clock = NULL; + e_err("ptp_clock_register failed\n"); + } else { + e_info("registered PHC clock\n"); + } +} + +/** + * e1000e_ptp_remove - disable PTP device and stop the overflow check + * @adapter: board private structure + * + * Stop the PTP support, and cancel the delayed work. + **/ +void e1000e_ptp_remove(struct e1000_adapter *adapter) +{ + if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) + return; + + cancel_delayed_work_sync(&adapter->systim_overflow_work); + + if (adapter->ptp_clock) { + ptp_clock_unregister(adapter->ptp_clock); + adapter->ptp_clock = NULL; + e_info("removed PHC\n"); + } +} diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index f3a632bf8d96..687c83d1bdab 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_IXGBE) += ixgbe.o -ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o ixgbe_debugfs.o\ +ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o ixgbe_ptp.o @@ -40,4 +40,5 @@ ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \ ixgbe_dcb_82599.o ixgbe_dcb_nl.o ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o +ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index f94c085a9c0b..8371ae4265fe 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -35,6 +35,7 @@ #include <linux/cpumask.h> #include <linux/aer.h> #include <linux/if_vlan.h> +#include <linux/jiffies.h> #include <linux/clocksource.h> #include <linux/net_tstamp.h> @@ -231,6 +232,7 @@ struct ixgbe_ring { struct ixgbe_tx_buffer *tx_buffer_info; struct ixgbe_rx_buffer *rx_buffer_info; }; + unsigned long last_rx_timestamp; unsigned long state; u8 __iomem *tail; dma_addr_t dma; /* phys. address of descriptor ring */ @@ -580,11 +582,14 @@ struct ixgbe_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; + struct work_struct ptp_tx_work; + struct sk_buff *ptp_tx_skb; + unsigned long ptp_tx_start; unsigned long last_overflow_check; + unsigned long last_rx_ptp_check; spinlock_t tmreg_lock; struct cyclecounter cc; struct timecounter tc; - int rx_hwtstamp_filter; u32 base_incval; /* SR-IOV */ @@ -749,15 +754,32 @@ static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring) extern void ixgbe_ptp_init(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_stop(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); -extern void ixgbe_ptp_tx_hwtstamp(struct ixgbe_q_vector *q_vector, - struct sk_buff *skb); -extern void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb); +extern void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter); +extern void __ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb); +static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) + return; + + __ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, skb); + + /* + * Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + rx_ring->last_rx_timestamp = jiffies; +} + extern int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, struct ifreq *ifr, int cmd); extern void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_reset(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr); +#ifdef CONFIG_PCI_IOV +void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter); +#endif #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c index 50aa546b8c7a..3504686d3af5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -24,9 +24,6 @@ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 *******************************************************************************/ - -#ifdef CONFIG_DEBUG_FS - #include <linux/debugfs.h> #include <linux/module.h> @@ -277,5 +274,3 @@ void ixgbe_dbg_exit(void) { debugfs_remove_recursive(ixgbe_dbg_root); } - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 326858424345..1513b1052ee2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1837,19 +1837,11 @@ static void ixgbe_diag_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; bool if_running = netif_running(netdev); set_bit(__IXGBE_TESTING, &adapter->state); if (eth_test->flags == ETH_TEST_FL_OFFLINE) { - /* Offline tests */ - - e_info(hw, "offline testing starting\n"); - - /* Link test performed before hardware reset so autoneg doesn't - * interfere with test result */ - if (ixgbe_link_test(adapter, &data[4])) - eth_test->flags |= ETH_TEST_FL_FAILED; - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { int i; for (i = 0; i < adapter->num_vfs; i++) { @@ -1870,12 +1862,24 @@ static void ixgbe_diag_test(struct net_device *netdev, } } + /* Offline tests */ + e_info(hw, "offline testing starting\n"); + if (if_running) /* indicate we're in test mode */ dev_close(netdev); - else - ixgbe_reset(adapter); + /* bringing adapter down disables SFP+ optics */ + if (hw->mac.ops.enable_tx_laser) + hw->mac.ops.enable_tx_laser(hw); + + /* Link test performed before hardware reset so autoneg doesn't + * interfere with test result + */ + if (ixgbe_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + ixgbe_reset(adapter); e_info(hw, "register testing starting\n"); if (ixgbe_reg_test(adapter, &data[0])) eth_test->flags |= ETH_TEST_FL_FAILED; @@ -1908,16 +1912,22 @@ static void ixgbe_diag_test(struct net_device *netdev, skip_loopback: ixgbe_reset(adapter); + /* clear testing bit and return adapter to previous state */ clear_bit(__IXGBE_TESTING, &adapter->state); if (if_running) dev_open(netdev); } else { e_info(hw, "online testing starting\n"); + + /* if adapter is down, SFP+ optics will be disabled */ + if (!if_running && hw->mac.ops.enable_tx_laser) + hw->mac.ops.enable_tx_laser(hw); + /* Online tests */ if (ixgbe_link_test(adapter, &data[4])) eth_test->flags |= ETH_TEST_FL_FAILED; - /* Online tests aren't run; pass by default */ + /* Offline tests aren't run; pass by default */ data[0] = 0; data[1] = 0; data[2] = 0; @@ -1925,6 +1935,10 @@ skip_loopback: clear_bit(__IXGBE_TESTING, &adapter->state); } + + /* if adapter was down, ensure SFP+ optics are disabled again */ + if (!if_running && hw->mac.ops.disable_tx_laser) + hw->mac.ops.disable_tx_laser(hw); skip_ol_tests: msleep_interruptible(4 * 1000); } @@ -2695,6 +2709,14 @@ static int ixgbe_get_ts_info(struct net_device *dev, (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); break; default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e7109de2204a..5989b3fa9fdc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -803,6 +803,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) /* Do the reset outside of interrupt context */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) { adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + e_warn(drv, "initiating reset due to tx timeout\n"); ixgbe_service_event_schedule(adapter); } } @@ -850,9 +851,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; - if (unlikely(tx_buffer->tx_flags & IXGBE_TX_FLAGS_TSTAMP)) - ixgbe_ptp_tx_hwtstamp(q_vector, tx_buffer->skb); - /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); @@ -1441,7 +1439,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_rx_checksum(rx_ring, rx_desc, skb); - ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, rx_desc, skb); + ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); if ((dev->features & NETIF_F_HW_VLAN_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { @@ -5534,6 +5532,8 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) break; } + adapter->last_rx_ptp_check = jiffies; + if (adapter->flags2 & IXGBE_FLAG2_PTP_ENABLED) ixgbe_ptp_start_cyclecounter(adapter); @@ -5614,6 +5614,7 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ + e_warn(drv, "initiating reset to clear Tx work after link loss\n"); adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; } } @@ -5878,7 +5879,6 @@ static void ixgbe_service_task(struct work_struct *work) struct ixgbe_adapter *adapter = container_of(work, struct ixgbe_adapter, service_task); - ixgbe_reset_subtask(adapter); ixgbe_sfp_detection_subtask(adapter); ixgbe_sfp_link_config_subtask(adapter); @@ -5886,7 +5886,11 @@ static void ixgbe_service_task(struct work_struct *work) ixgbe_watchdog_subtask(adapter); ixgbe_fdir_reinit_subtask(adapter); ixgbe_check_hang_subtask(adapter); - ixgbe_ptp_overflow_check(adapter); + + if (adapter->flags2 & IXGBE_FLAG2_PTP_ENABLED) { + ixgbe_ptp_overflow_check(adapter); + ixgbe_ptp_rx_hang(adapter); + } ixgbe_service_event_complete(adapter); } @@ -6432,6 +6436,11 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IXGBE_TX_FLAGS_TSTAMP; + + /* schedule check for Tx timestamp */ + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + schedule_work(&adapter->ptp_tx_work); } #ifdef CONFIG_PCI_IOV @@ -6827,6 +6836,26 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) } #endif /* CONFIG_IXGBE_DCB */ +#ifdef CONFIG_PCI_IOV +void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + rtnl_lock(); +#ifdef CONFIG_IXGBE_DCB + ixgbe_setup_tc(netdev, netdev_get_num_tc(netdev)); +#else + if (netif_running(netdev)) + ixgbe_close(netdev); + ixgbe_clear_interrupt_scheme(adapter); + ixgbe_init_interrupt_scheme(adapter); + if (netif_running(netdev)) + ixgbe_open(netdev); +#endif + rtnl_unlock(); +} + +#endif void ixgbe_do_reset(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); @@ -7353,7 +7382,15 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } #ifdef CONFIG_PCI_IOV - ixgbe_enable_sriov(adapter, ii); + /* SR-IOV not supported on the 82598 */ + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + goto skip_sriov; + /* Mailbox */ + ixgbe_init_mbx_params_pf(hw); + memcpy(&hw->mbx.ops, ii->mbx_ops, sizeof(hw->mbx.ops)); + ixgbe_enable_sriov(adapter); + pci_sriov_set_totalvfs(pdev, 63); +skip_sriov: #endif netdev->features = NETIF_F_SG | @@ -7609,8 +7646,14 @@ static void ixgbe_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); - ixgbe_disable_sriov(adapter); - +#ifdef CONFIG_PCI_IOV + /* + * Only disable SR-IOV on unload if the user specified the now + * deprecated max_vfs module parameter. + */ + if (max_vfs) + ixgbe_disable_sriov(adapter); +#endif ixgbe_clear_interrupt_scheme(adapter); ixgbe_release_hw_control(adapter); @@ -7824,6 +7867,7 @@ static struct pci_driver ixgbe_driver = { .resume = ixgbe_resume, #endif .shutdown = ixgbe_shutdown, + .sriov_configure = ixgbe_pci_sriov_configure, .err_handler = &ixgbe_err_handler }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 1a751c9d09c4..53d204759711 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -96,15 +96,12 @@ #define IXGBE_MAX_TIMEADJ_VALUE 0x7FFFFFFFFFFFFFFFULL #define IXGBE_OVERFLOW_PERIOD (HZ * 30) +#define IXGBE_PTP_TX_TIMEOUT (HZ * 15) #ifndef NSECS_PER_SEC #define NSECS_PER_SEC 1000000000ULL #endif -static struct sock_filter ptp_filter[] = { - PTP_FILTER -}; - /** * ixgbe_ptp_setup_sdp * @hw: the hardware private structure @@ -405,149 +402,145 @@ void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr) } } - /** - * ixgbe_ptp_overflow_check - delayed work to detect SYSTIME overflow - * @work: structure containing information about this work task + * ixgbe_ptp_overflow_check - watchdog task to detect SYSTIME overflow + * @adapter: private adapter struct * - * this work function is scheduled to continue reading the timecounter + * this watchdog task periodically reads the timecounter * in order to prevent missing when the system time registers wrap - * around. This needs to be run approximately twice a minute when no - * PTP activity is occurring. + * around. This needs to be run approximately twice a minute. */ void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter) { - unsigned long elapsed_jiffies = adapter->last_overflow_check - jiffies; + bool timeout = time_is_before_jiffies(adapter->last_overflow_check + + IXGBE_OVERFLOW_PERIOD); struct timespec ts; - if ((adapter->flags2 & IXGBE_FLAG2_PTP_ENABLED) && - (elapsed_jiffies >= IXGBE_OVERFLOW_PERIOD)) { + if (timeout) { ixgbe_ptp_gettime(&adapter->ptp_caps, &ts); adapter->last_overflow_check = jiffies; } } /** - * ixgbe_ptp_match - determine if this skb matches a ptp packet - * @skb: pointer to the skb - * @hwtstamp: pointer to the hwtstamp_config to check - * - * Determine whether the skb should have been timestamped, assuming the - * hwtstamp was set via the hwtstamp ioctl. Returns non-zero when the packet - * should have a timestamp waiting in the registers, and 0 otherwise. + * ixgbe_ptp_rx_hang - detect error case when Rx timestamp registers latched + * @adapter: private network adapter structure * - * V1 packets have to check the version type to determine whether they are - * correct. However, we can't directly access the data because it might be - * fragmented in the SKB, in paged memory. In order to work around this, we - * use skb_copy_bits which will properly copy the data whether it is in the - * paged memory fragments or not. We have to copy the IP header as well as the - * message type. + * this watchdog task is scheduled to detect error case where hardware has + * dropped an Rx packet that was timestamped when the ring is full. The + * particular error is rare but leaves the device in a state unable to timestamp + * any future packets. */ -static int ixgbe_ptp_match(struct sk_buff *skb, int rx_filter) +void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) { - struct iphdr iph; - u8 msgtype; - unsigned int type, offset; - - if (rx_filter == HWTSTAMP_FILTER_NONE) - return 0; - - type = sk_run_filter(skb, ptp_filter); - - if (likely(rx_filter == HWTSTAMP_FILTER_PTP_V2_EVENT)) - return type & PTP_CLASS_V2; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_ring *rx_ring; + u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); + unsigned long rx_event; + int n; - /* For the remaining cases actually check message type */ - switch (type) { - case PTP_CLASS_V1_IPV4: - skb_copy_bits(skb, OFF_IHL, &iph, sizeof(iph)); - offset = ETH_HLEN + (iph.ihl << 2) + UDP_HLEN + OFF_PTP_CONTROL; - break; - case PTP_CLASS_V1_IPV6: - offset = OFF_PTP6 + OFF_PTP_CONTROL; - break; - default: - /* other cases invalid or handled above */ - return 0; + /* if we don't have a valid timestamp in the registers, just update the + * timeout counter and exit + */ + if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) { + adapter->last_rx_ptp_check = jiffies; + return; } - /* Make sure our buffer is long enough */ - if (skb->len < offset) - return 0; + /* determine the most recent watchdog or rx_timestamp event */ + rx_event = adapter->last_rx_ptp_check; + for (n = 0; n < adapter->num_rx_queues; n++) { + rx_ring = adapter->rx_ring[n]; + if (time_after(rx_ring->last_rx_timestamp, rx_event)) + rx_event = rx_ring->last_rx_timestamp; + } - skb_copy_bits(skb, offset, &msgtype, sizeof(msgtype)); + /* only need to read the high RXSTMP register to clear the lock */ + if (time_is_before_jiffies(rx_event + 5*HZ)) { + IXGBE_READ_REG(hw, IXGBE_RXSTMPH); + adapter->last_rx_ptp_check = jiffies; - switch (rx_filter) { - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - return (msgtype == IXGBE_RXMTRL_V1_SYNC_MSG); - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - return (msgtype == IXGBE_RXMTRL_V1_DELAY_REQ_MSG); - break; - default: - return 0; + e_warn(drv, "clearing RX Timestamp hang"); } } /** * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp - * @q_vector: structure containing interrupt and ring information - * @skb: particular skb to send timestamp with + * @adapter: the private adapter struct * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the shhwtstamps structure which * is passed up the network stack */ -void ixgbe_ptp_tx_hwtstamp(struct ixgbe_q_vector *q_vector, - struct sk_buff *skb) +static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter) { - struct ixgbe_adapter *adapter; - struct ixgbe_hw *hw; + struct ixgbe_hw *hw = &adapter->hw; struct skb_shared_hwtstamps shhwtstamps; u64 regval = 0, ns; - u32 tsynctxctl; unsigned long flags; - /* we cannot process timestamps on a ring without a q_vector */ - if (!q_vector || !q_vector->adapter) - return; - - adapter = q_vector->adapter; - hw = &adapter->hw; - - tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32; - /* - * if TX timestamp is not valid, exit after clearing the - * timestamp registers - */ - if (!(tsynctxctl & IXGBE_TSYNCTXCTL_VALID)) - return; - spin_lock_irqsave(&adapter->tmreg_lock, flags); ns = timecounter_cyc2time(&adapter->tc, regval); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(ns); - skb_tstamp_tx(skb, &shhwtstamps); + skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); + + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; +} + +/** + * ixgbe_ptp_tx_hwtstamp_work + * @work: pointer to the work struct + * + * This work item polls TSYNCTXCTL valid bit to determine when a Tx hardware + * timestamp has been taken for the current skb. It is necesary, because the + * descriptor's "done" bit does not correlate with the timestamp event. + */ +static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work) +{ + struct ixgbe_adapter *adapter = container_of(work, struct ixgbe_adapter, + ptp_tx_work); + struct ixgbe_hw *hw = &adapter->hw; + bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + + IXGBE_PTP_TX_TIMEOUT); + u32 tsynctxctl; + + /* we have to have a valid skb */ + if (!adapter->ptp_tx_skb) + return; + + if (timeout) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + e_warn(drv, "clearing Tx Timestamp hang"); + return; + } + + tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL); + if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) + ixgbe_ptp_tx_hwtstamp(adapter); + else + /* reschedule to keep checking if it's not available yet */ + schedule_work(&adapter->ptp_tx_work); } /** - * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp + * __ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp * @q_vector: structure containing interrupt and ring information - * @rx_desc: the rx descriptor * @skb: particular skb to send timestamp with * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the shhwtstamps structure which * is passed up the network stack */ -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) +void __ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) { struct ixgbe_adapter *adapter; struct ixgbe_hw *hw; @@ -563,37 +556,17 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, adapter = q_vector->adapter; hw = &adapter->hw; - if (likely(!ixgbe_ptp_match(skb, adapter->rx_hwtstamp_filter))) - return; - + /* + * Read the tsyncrxctl register afterwards in order to prevent taking an + * I/O hit on every packet. + */ tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); - - /* Check if we have a valid timestamp and make sure the skb should - * have been timestamped */ if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) return; - /* - * Always read the registers, in order to clear a possible fault - * because of stagnant RX timestamp values for a packet that never - * reached the queue. - */ regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32; - /* - * If the timestamp bit is set in the packet's descriptor, we know the - * timestamp belongs to this packet. No other packet can be - * timestamped until the registers for timestamping have been read. - * Therefor only one packet with this bit can be in the queue at a - * time, and the rx timestamp values that were in the registers belong - * to this packet. - * - * If nothing went wrong, then it should have a skb_shared_tx that we - * can turn into a skb_shared_hwtstamps. - */ - if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) - return; spin_lock_irqsave(&adapter->tmreg_lock, flags); ns = timecounter_cyc2time(&adapter->tc, regval); @@ -660,11 +633,11 @@ int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, break; case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_mtrl = IXGBE_RXMTRL_V1_SYNC_MSG; + tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG; break; case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_mtrl = IXGBE_RXMTRL_V1_DELAY_REQ_MSG; + tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG; break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: @@ -698,9 +671,6 @@ int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, return 0; } - /* Store filter value for later use */ - adapter->rx_hwtstamp_filter = config.rx_filter; - /* define ethertype filter for timestamping L2 packets */ if (is_l2) IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), @@ -902,11 +872,8 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) return; } - /* initialize the ptp filter */ - if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) - e_dev_warn("ptp_filter_init failed\n"); - spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, ixgbe_ptp_tx_hwtstamp_work); adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, &adapter->pdev->dev); @@ -938,6 +905,12 @@ void ixgbe_ptp_stop(struct ixgbe_adapter *adapter) ixgbe_ptp_setup_sdp(adapter); + cancel_work_sync(&adapter->ptp_tx_work); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + } + if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); adapter->ptp_clock = NULL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 647734b73202..ee3507f0ea53 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -44,50 +44,11 @@ #include "ixgbe_sriov.h" #ifdef CONFIG_PCI_IOV -void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, - const struct ixgbe_info *ii) +static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int num_vf_macvlans, i; struct vf_macvlans *mv_list; - int pre_existing_vfs = 0; - - pre_existing_vfs = pci_num_vf(adapter->pdev); - if (!pre_existing_vfs && !adapter->num_vfs) - return; - - /* If there are pre-existing VFs then we have to force - * use of that many because they were not deleted the last - * time someone removed the PF driver. That would have - * been because they were allocated to guest VMs and can't - * be removed. Go ahead and just re-enable the old amount. - * If the user wants to change the number of VFs they can - * use ethtool while making sure no VFs are allocated to - * guest VMs... i.e. the right way. - */ - if (pre_existing_vfs) { - adapter->num_vfs = pre_existing_vfs; - dev_warn(&adapter->pdev->dev, "Virtual Functions already " - "enabled for this device - Please reload all " - "VF drivers to avoid spoofed packet errors\n"); - } else { - int err; - /* - * The 82599 supports up to 64 VFs per physical function - * but this implementation limits allocation to 63 so that - * basic networking resources are still available to the - * physical function. If the user requests greater thn - * 63 VFs then it is an error - reset to default of zero. - */ - adapter->num_vfs = min_t(unsigned int, adapter->num_vfs, 63); - - err = pci_enable_sriov(adapter->pdev, adapter->num_vfs); - if (err) { - e_err(probe, "Failed to enable PCI sriov: %d\n", err); - adapter->num_vfs = 0; - return; - } - } adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED; e_info(probe, "SR-IOV enabled with %d VFs\n", adapter->num_vfs); @@ -128,12 +89,6 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, kcalloc(adapter->num_vfs, sizeof(struct vf_data_storage), GFP_KERNEL); if (adapter->vfinfo) { - /* Now that we're sure SR-IOV is enabled - * and memory allocated set up the mailbox parameters - */ - ixgbe_init_mbx_params_pf(hw); - memcpy(&hw->mbx.ops, ii->mbx_ops, sizeof(hw->mbx.ops)); - /* limit trafffic classes based on VFs enabled */ if ((adapter->hw.mac.type == ixgbe_mac_82599EB) && (adapter->num_vfs < 16)) { @@ -157,10 +112,62 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, /* enable spoof checking for all VFs */ for (i = 0; i < adapter->num_vfs; i++) adapter->vfinfo[i].spoofchk_enabled = true; + return 0; + } + + return -ENOMEM; +} + +/* Note this function is called when the user wants to enable SR-IOV + * VFs using the now deprecated module parameter + */ +void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) +{ + int pre_existing_vfs = 0; + + pre_existing_vfs = pci_num_vf(adapter->pdev); + if (!pre_existing_vfs && !adapter->num_vfs) return; + + if (!pre_existing_vfs) + dev_warn(&adapter->pdev->dev, + "Enabling SR-IOV VFs using the module parameter is deprecated - please use the pci sysfs interface.\n"); + + /* If there are pre-existing VFs then we have to force + * use of that many - over ride any module parameter value. + * This may result from the user unloading the PF driver + * while VFs were assigned to guest VMs or because the VFs + * have been created via the new PCI SR-IOV sysfs interface. + */ + if (pre_existing_vfs) { + adapter->num_vfs = pre_existing_vfs; + dev_warn(&adapter->pdev->dev, + "Virtual Functions already enabled for this device - Please reload all VF drivers to avoid spoofed packet errors\n"); + } else { + int err; + /* + * The 82599 supports up to 64 VFs per physical function + * but this implementation limits allocation to 63 so that + * basic networking resources are still available to the + * physical function. If the user requests greater thn + * 63 VFs then it is an error - reset to default of zero. + */ + adapter->num_vfs = min_t(unsigned int, adapter->num_vfs, 63); + + err = pci_enable_sriov(adapter->pdev, adapter->num_vfs); + if (err) { + e_err(probe, "Failed to enable PCI sriov: %d\n", err); + adapter->num_vfs = 0; + return; + } } - /* Oh oh */ + if (!__ixgbe_enable_sriov(adapter)) + return; + + /* If we have gotten to this point then there is no memory available + * to manage the VF devices - print message and bail. + */ e_err(probe, "Unable to allocate memory for VF Data Storage - " "SRIOV disabled\n"); ixgbe_disable_sriov(adapter); @@ -200,11 +207,12 @@ static bool ixgbe_vfs_are_assigned(struct ixgbe_adapter *adapter) } #endif /* #ifdef CONFIG_PCI_IOV */ -void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) +int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 gpie; u32 vmdctl; + int rss; /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; @@ -219,7 +227,7 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) /* if SR-IOV is already disabled then there is nothing to do */ if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) - return; + return 0; #ifdef CONFIG_PCI_IOV /* @@ -229,7 +237,7 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) */ if (ixgbe_vfs_are_assigned(adapter)) { e_dev_warn("Unloading driver while VFs are assigned - VFs will not be deallocated\n"); - return; + return -EPERM; } /* disable iov and allow time for transactions to clear */ pci_disable_sriov(adapter->pdev); @@ -252,10 +260,94 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; adapter->ring_feature[RING_F_VMDQ].offset = 0; + rss = min_t(int, IXGBE_MAX_RSS_INDICES, num_online_cpus()); + adapter->ring_feature[RING_F_RSS].limit = rss; + /* take a breather then clean up driver data */ msleep(100); adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; + return 0; +} + +static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) +{ +#ifdef CONFIG_PCI_IOV + struct ixgbe_adapter *adapter = pci_get_drvdata(dev); + int err = 0; + int i; + int pre_existing_vfs = pci_num_vf(dev); + + if (pre_existing_vfs && pre_existing_vfs != num_vfs) + err = ixgbe_disable_sriov(adapter); + else if (pre_existing_vfs && pre_existing_vfs == num_vfs) + goto out; + + if (err) + goto err_out; + + /* While the SR-IOV capability structure reports total VFs to be + * 64 we limit the actual number that can be allocated to 63 so + * that some transmit/receive resources can be reserved to the + * PF. The PCI bus driver already checks for other values out of + * range. + */ + if (num_vfs > 63) { + err = -EPERM; + goto err_out; + } + + adapter->num_vfs = num_vfs; + + err = __ixgbe_enable_sriov(adapter); + if (err) + goto err_out; + + for (i = 0; i < adapter->num_vfs; i++) + ixgbe_vf_configuration(dev, (i | 0x10000000)); + + err = pci_enable_sriov(dev, num_vfs); + if (err) { + e_dev_warn("Failed to enable PCI sriov: %d\n", err); + goto err_out; + } + ixgbe_sriov_reinit(adapter); + +out: + return num_vfs; + +err_out: + return err; +#endif + return 0; +} + +static int ixgbe_pci_sriov_disable(struct pci_dev *dev) +{ + struct ixgbe_adapter *adapter = pci_get_drvdata(dev); + int err; + u32 current_flags = adapter->flags; + + err = ixgbe_disable_sriov(adapter); + + /* Only reinit if no error and state changed */ + if (!err && current_flags != adapter->flags) { + /* ixgbe_disable_sriov() doesn't clear VMDQ flag */ + adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; +#ifdef CONFIG_PCI_IOV + ixgbe_sriov_reinit(adapter); +#endif + } + + return err; +} + +int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs) +{ + if (num_vfs == 0) + return ixgbe_pci_sriov_disable(dev); + else + return ixgbe_pci_sriov_enable(dev, num_vfs); } static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 21bc1dd1d33e..008f9cea68d1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -41,11 +41,11 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); int ixgbe_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); -void ixgbe_disable_sriov(struct ixgbe_adapter *adapter); +int ixgbe_disable_sriov(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV -void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, - const struct ixgbe_info *ii); +void ixgbe_enable_sriov(struct ixgbe_adapter *adapter); #endif +int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs); static inline void ixgbe_set_vmvir(struct ixgbe_adapter *adapter, u16 vid, u16 qos, u32 vf) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 551e31dc25c5..c3db6cd69b68 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2245,10 +2245,23 @@ static void ixgbevf_watchdog_task(struct work_struct *work) if (link_up) { if (!netif_carrier_ok(netdev)) { + char *link_speed_string; + switch (link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + link_speed_string = "10 Gbps"; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + link_speed_string = "1 Gbps"; + break; + case IXGBE_LINK_SPEED_100_FULL: + link_speed_string = "100 Mbps"; + break; + default: + link_speed_string = "unknown speed"; + break; + } dev_info(&adapter->pdev->dev, - "NIC Link is Up, %u Gbps\n", - (link_speed == IXGBE_LINK_SPEED_10GB_FULL) ? - 10 : 1); + "NIC Link is Up, %s\n", link_speed_string); netif_carrier_on(netdev); netif_tx_wake_all_queues(netdev); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index b4675138533a..9c42812d2f6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -767,9 +767,9 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); mlx4_en_cache_mclist(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); list_for_each_entry(mclist, &priv->mc_list, list) { mcast_addr = mlx4_en_mac_to_u64(mclist->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, @@ -977,12 +977,12 @@ static void mlx4_en_do_get_stats(struct work_struct *work) struct mlx4_en_dev *mdev = priv->mdev; int err; - err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); - if (err) - en_dbg(HW, priv, "Could not update stats\n"); - mutex_lock(&mdev->state_lock); if (mdev->device_up) { + err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); + if (err) + en_dbg(HW, priv, "Could not update stats\n"); + if (priv->port_up) mlx4_en_auto_moderation(priv); @@ -1167,15 +1167,6 @@ int mlx4_en_start_port(struct net_device *dev) /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); - if (mdev->dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - mlx4_flow_steer_promisc_remove(mdev->dev, - priv->port, - MLX4_FS_PROMISC_UPLINK); - mlx4_flow_steer_promisc_remove(mdev->dev, - priv->port, - MLX4_FS_PROMISC_ALL_MULTI); - } /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->mcast_task); @@ -1227,6 +1218,32 @@ void mlx4_en_stop_port(struct net_device *dev) /* Set port as not active */ priv->port_up = false; + /* Promsicuous mode */ + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + priv->flags &= ~(MLX4_EN_FLAG_PROMISC | + MLX4_EN_FLAG_MC_PROMISC); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_UPLINK); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_ALL_MULTI); + } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { + priv->flags &= ~MLX4_EN_FLAG_PROMISC; + + /* Disable promiscouos mode */ + mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, + priv->port); + + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, + priv->port); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + } + /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ @@ -1437,9 +1454,6 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); if (!priv->dev->rx_cpu_rmap) goto err; - - INIT_LIST_HEAD(&priv->filters); - spin_lock_init(&priv->filters_lock); #endif return 0; @@ -1634,6 +1648,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (err) goto out; +#ifdef CONFIG_RFS_ACCEL + INIT_LIST_HEAD(&priv->filters); + spin_lock_init(&priv->filters_lock); +#endif + /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 16af338880c3..3c5ffd2f5c6f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -588,7 +588,21 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(ring->tx_queue); priv->port_stats.queue_stopped++; - return NETDEV_TX_BUSY; + /* If queue was emptied after the if, and before the + * stop_queue - need to wake the queue, or else it will remain + * stopped forever. + * Need a memory barrier to make sure ring->cons was not + * updated before queue was stopped. + */ + wmb(); + + if (unlikely(((int)(ring->prod - ring->cons)) <= + ring->size - HEADROOM - MAX_DESC_TXBBS)) { + netif_tx_wake_queue(ring->tx_queue); + priv->port_stats.wake_queue++; + } else { + return NETDEV_TX_BUSY; + } } /* Track current inflight packets for performance analysis */ diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e1bafffbc3b1..983fd3d62bce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2169,7 +2169,8 @@ slave_start: dev->num_slaves = MLX4_MAX_NUM_SLAVES; else { dev->num_slaves = 0; - if (mlx4_multi_func_init(dev)) { + err = mlx4_multi_func_init(dev); + if (err) { mlx4_err(dev, "Failed to init slave mfunc" " interface, aborting.\n"); goto err_cmd; @@ -2193,7 +2194,8 @@ slave_start: /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { - if (mlx4_multi_func_init(dev)) { + err = mlx4_multi_func_init(dev); + if (err) { mlx4_err(dev, "Failed to init master mfunc" "interface, aborting.\n"); goto err_close; @@ -2210,6 +2212,7 @@ slave_start: mlx4_enable_msi_x(dev); if ((mlx4_is_mfunc(dev)) && !(dev->flags & MLX4_FLAG_MSI_X)) { + err = -ENOSYS; mlx4_err(dev, "INTx is not supported in multi-function mode." " aborting.\n"); goto err_free_eq; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 893cbe8dd8e2..f71aef58f84d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -37,9 +37,9 @@ #include "qlcnic_83xx_hw.h" #define _QLCNIC_LINUX_MAJOR 5 -#define _QLCNIC_LINUX_MINOR 0 -#define _QLCNIC_LINUX_SUBVERSION 31 -#define QLCNIC_LINUX_VERSIONID "5.1.31" +#define _QLCNIC_LINUX_MINOR 1 +#define _QLCNIC_LINUX_SUBVERSION 32 +#define QLCNIC_LINUX_VERSIONID "5.1.32" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) @@ -436,6 +436,7 @@ struct qlcnic_hardware_context { u16 act_pci_func; u32 capabilities; + u32 capabilities2; u32 temp; u32 int_vec_bit; u32 fw_hal_version; @@ -745,6 +746,11 @@ struct qlcnic_mac_list_s { uint8_t mac_addr[ETH_ALEN+2]; }; +/* MAC Learn */ +#define NO_MAC_LEARN 0 +#define DRV_MAC_LEARN 1 +#define FDB_MAC_LEARN 2 + #define QLCNIC_HOST_REQUEST 0x13 #define QLCNIC_REQUEST 0x14 @@ -798,6 +804,8 @@ struct qlcnic_mac_list_s { #define QLCNIC_FW_CAPABILITY_MORE_CAPS BIT_31 #define QLCNIC_FW_CAPABILITY_2_LRO_MAX_TCP_SEG BIT_2 +#define QLCNIC_FW_CAP2_HW_LRO_IPV6 BIT_3 +#define QLCNIC_FW_CAPABILITY_2_OCBB BIT_5 /* module types */ #define LINKEVENT_MODULE_NOT_PRESENT 1 @@ -978,7 +986,8 @@ struct qlcnic_adapter { u8 mac_addr[ETH_ALEN]; u64 dev_rst_time; - u8 mac_learn; + bool drv_mac_learn; + bool fdb_mac_learn; unsigned long vlans[BITS_TO_LONGS(VLAN_N_VID)]; u8 flash_mfg_id; struct qlcnic_npar_info *npars; @@ -1418,9 +1427,12 @@ void qlcnic_post_rx_buffers(struct qlcnic_adapter *adapter, struct qlcnic_host_rds_ring *rds_ring, u8 ring_id); int qlcnic_process_rcv_ring(struct qlcnic_host_sds_ring *sds_ring, int max); void qlcnic_set_multi(struct net_device *netdev); +int qlcnic_nic_add_mac(struct qlcnic_adapter *, const u8 *); +int qlcnic_nic_del_mac(struct qlcnic_adapter *, const u8 *); void qlcnic_free_mac_list(struct qlcnic_adapter *adapter); int qlcnic_fw_cmd_set_mtu(struct qlcnic_adapter *adapter, int mtu); +int qlcnic_fw_cmd_set_drv_version(struct qlcnic_adapter *); int qlcnic_change_mtu(struct net_device *netdev, int new_mtu); netdev_features_t qlcnic_fix_features(struct net_device *netdev, netdev_features_t features); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index ee68fe35a27e..7372964d3a76 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -160,6 +160,37 @@ int qlcnic_82xx_issue_cmd(struct qlcnic_adapter *adapter, return cmd->rsp.arg[0]; } +int qlcnic_fw_cmd_set_drv_version(struct qlcnic_adapter *adapter) +{ + struct qlcnic_cmd_args cmd; + u32 arg1, arg2, arg3; + char drv_string[12]; + int err = 0; + + memset(drv_string, 0, sizeof(drv_string)); + snprintf(drv_string, sizeof(drv_string), "%d"".""%d"".""%d", + _QLCNIC_LINUX_MAJOR, _QLCNIC_LINUX_MINOR, + _QLCNIC_LINUX_SUBVERSION); + + qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_SET_DRV_VER); + memcpy(&arg1, drv_string, sizeof(u32)); + memcpy(&arg2, drv_string + 4, sizeof(u32)); + memcpy(&arg3, drv_string + 8, sizeof(u32)); + + cmd.req.arg[1] = arg1; + cmd.req.arg[2] = arg2; + cmd.req.arg[3] = arg3; + + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) { + dev_info(&adapter->pdev->dev, + "Failed to set driver version in firmware\n"); + return -EIO; + } + + return 0; +} + int qlcnic_fw_cmd_set_mtu(struct qlcnic_adapter *adapter, int mtu) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 6f5b5eb2c44a..6c6ecfc152b8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -446,7 +446,29 @@ int qlcnic_82xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr, return qlcnic_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1); } -static int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, const u8 *addr) +int qlcnic_nic_del_mac(struct qlcnic_adapter *adapter, const u8 *addr) +{ + struct list_head *head; + struct qlcnic_mac_list_s *cur; + int err = -EINVAL; + + /* Delete MAC from the existing list */ + list_for_each(head, &adapter->mac_list) { + cur = list_entry(head, struct qlcnic_mac_list_s, list); + if (memcmp(addr, cur->mac_addr, ETH_ALEN) == 0) { + err = qlcnic_sre_macaddr_change(adapter, cur->mac_addr, + 0, QLCNIC_MAC_DEL); + if (err) + return err; + list_del(&cur->list); + kfree(cur); + return err; + } + } + return err; +} + +int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, const u8 *addr) { struct list_head *head; struct qlcnic_mac_list_s *cur; @@ -510,11 +532,11 @@ void qlcnic_set_multi(struct net_device *netdev) } send_fw_cmd: - if (mode == VPORT_MISS_MODE_ACCEPT_ALL) { + if (mode == VPORT_MISS_MODE_ACCEPT_ALL && !adapter->fdb_mac_learn) { qlcnic_alloc_lb_filters_mem(adapter); - adapter->mac_learn = 1; + adapter->drv_mac_learn = true; } else { - adapter->mac_learn = 0; + adapter->drv_mac_learn = false; } qlcnic_nic_set_promisc(adapter, mode); @@ -687,6 +709,11 @@ void qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *adapter) "Could not send interrupt coalescing parameters\n"); } +#define QLCNIC_ENABLE_IPV4_LRO 1 +#define QLCNIC_ENABLE_IPV6_LRO 2 +#define QLCNIC_NO_DEST_IPV4_CHECK (1 << 8) +#define QLCNIC_NO_DEST_IPV6_CHECK (2 << 8) + int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable) { struct qlcnic_nic_req req; @@ -703,7 +730,15 @@ int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable) word = QLCNIC_H2C_OPCODE_CONFIG_HW_LRO | ((u64)adapter->portnum << 16); req.req_hdr = cpu_to_le64(word); - req.words[0] = cpu_to_le64(enable); + word = 0; + if (enable) { + word = QLCNIC_ENABLE_IPV4_LRO | QLCNIC_NO_DEST_IPV4_CHECK; + if (adapter->ahw->capabilities2 & QLCNIC_FW_CAP2_HW_LRO_IPV6) + word |= QLCNIC_ENABLE_IPV6_LRO | + QLCNIC_NO_DEST_IPV6_CHECK; + } + + req.words[0] = cpu_to_le64(word); rv = qlcnic_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1); if (rv != 0) @@ -743,7 +778,10 @@ int qlcnic_config_bridged_mode(struct qlcnic_adapter *adapter, u32 enable) } -#define RSS_HASHTYPE_IP_TCP 0x3 +#define QLCNIC_RSS_HASHTYPE_IP_TCP 0x3 +#define QLCNIC_ENABLE_TYPE_C_RSS BIT_10 +#define QLCNIC_RSS_FEATURE_FLAG (1ULL << 63) +#define QLCNIC_RSS_IND_TABLE_MASK 0x7ULL int qlcnic_82xx_config_rss(struct qlcnic_adapter *adapter, int enable) { @@ -770,13 +808,19 @@ int qlcnic_82xx_config_rss(struct qlcnic_adapter *adapter, int enable) * 7-6: hash_type_ipv6 * 8: enable * 9: use indirection table - * 47-10: reserved - * 63-48: indirection table mask + * 10: type-c rss + * 11: udp rss + * 47-12: reserved + * 62-48: indirection table mask + * 63: feature flag */ - word = ((u64)(RSS_HASHTYPE_IP_TCP & 0x3) << 4) | - ((u64)(RSS_HASHTYPE_IP_TCP & 0x3) << 6) | + word = ((u64)(QLCNIC_RSS_HASHTYPE_IP_TCP & 0x3) << 4) | + ((u64)(QLCNIC_RSS_HASHTYPE_IP_TCP & 0x3) << 6) | ((u64)(enable & 0x1) << 8) | - ((0x7ULL) << 48); + ((u64)QLCNIC_RSS_IND_TABLE_MASK << 48) | + (u64)QLCNIC_ENABLE_TYPE_C_RSS | + (u64)QLCNIC_RSS_FEATURE_FLAG; + req.words[0] = cpu_to_le64(word); for (i = 0; i < 5; i++) req.words[i+1] = cpu_to_le64(key[i]); @@ -1358,7 +1402,7 @@ int qlcnic_82xx_config_led(struct qlcnic_adapter *adapter, u32 state, u32 rate) word = QLCNIC_H2C_OPCODE_CONFIG_LED | ((u64)adapter->portnum << 16); req.req_hdr = cpu_to_le64(word); - req.words[0] = cpu_to_le64((u64)rate << 32); + req.words[0] = cpu_to_le64(((u64)rate << 32) | adapter->portnum); req.words[1] = cpu_to_le64(state); rv = qlcnic_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 383ecd20d9b5..fdf34836ef41 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -521,7 +521,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (unlikely(qlcnic_tx_pkt(adapter, first_desc, skb))) goto unwind_buff; - if (adapter->mac_learn) + if (adapter->drv_mac_learn) qlcnic_send_filter(adapter, first_desc, skb); adapter->stats.txbytes += skb->len; @@ -973,6 +973,7 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, struct sk_buff *skb; struct qlcnic_host_rds_ring *rds_ring; struct iphdr *iph; + struct ipv6hdr *ipv6h; struct tcphdr *th; bool push, timestamp; int index, l2_hdr_offset, l4_hdr_offset; @@ -1016,12 +1017,21 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, } skb->protocol = eth_type_trans(skb, netdev); - iph = (struct iphdr *)skb->data; - th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); - length = (iph->ihl << 2) + (th->doff << 2) + lro_length; - iph->tot_len = htons(length); - iph->check = 0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + if (htons(skb->protocol) == ETH_P_IPV6) { + ipv6h = (struct ipv6hdr *)skb->data; + th = (struct tcphdr *)(skb->data + sizeof(struct ipv6hdr)); + length = (th->doff << 2) + lro_length; + ipv6h->payload_len = htons(length); + } else { + iph = (struct iphdr *)skb->data; + th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); + length = (iph->ihl << 2) + (th->doff << 2) + lro_length; + iph->tot_len = htons(length); + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + } + th->psh = push; th->seq = htonl(seq_number); length = skb->len; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index fb7ac8ecd45a..e6b363a7664f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -32,7 +32,8 @@ static const char qlcnic_driver_string[] = "QLogic 1/10 GbE " static int qlcnic_mac_learn; module_param(qlcnic_mac_learn, int, 0444); -MODULE_PARM_DESC(qlcnic_mac_learn, "Mac Filter (0=disabled, 1=enabled)"); +MODULE_PARM_DESC(qlcnic_mac_learn, + "Mac Filter (0=learning is disabled, 1=Driver learning is enabled, 2=FDB learning is enabled)"); int qlcnic_use_msi = 1; MODULE_PARM_DESC(use_msi, "MSI interrupt (0=disabled, 1=enabled"); @@ -246,6 +247,77 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) return 0; } +static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev, + const unsigned char *addr) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err = -EOPNOTSUPP; + + if (!adapter->fdb_mac_learn) { + pr_info("%s: Driver mac learn is enabled, FDB operation not allowed\n", + __func__); + return err; + } + + if (adapter->flags & QLCNIC_ESWITCH_ENABLED) { + if (is_unicast_ether_addr(addr)) + err = qlcnic_nic_del_mac(adapter, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(netdev, addr); + else + err = -EINVAL; + } + return err; +} + +static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *netdev, + const unsigned char *addr, u16 flags) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err = 0; + + if (!adapter->fdb_mac_learn) { + pr_info("%s: Driver mac learn is enabled, FDB operation not allowed\n", + __func__); + return -EOPNOTSUPP; + } + + if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED)) { + pr_info("%s: FDB e-switch is not enabled\n", __func__); + return -EOPNOTSUPP; + } + + if (ether_addr_equal(addr, adapter->mac_addr)) + return err; + + if (is_unicast_ether_addr(addr)) + err = qlcnic_nic_add_mac(adapter, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(netdev, addr); + else + err = -EINVAL; + + return err; +} + +static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb, + struct net_device *netdev, int idx) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + + if (!adapter->fdb_mac_learn) { + pr_info("%s: Driver mac learn is enabled, FDB operation not allowed\n", + __func__); + return -EOPNOTSUPP; + } + + if (adapter->flags & QLCNIC_ESWITCH_ENABLED) + idx = ndo_dflt_fdb_dump(skb, ncb, netdev, idx); + + return idx; +} + static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter) { while (test_and_set_bit(__QLCNIC_RESETTING, &adapter->state)) @@ -268,6 +340,9 @@ static const struct net_device_ops qlcnic_netdev_ops = { .ndo_tx_timeout = qlcnic_tx_timeout, .ndo_vlan_rx_add_vid = qlcnic_vlan_rx_add, .ndo_vlan_rx_kill_vid = qlcnic_vlan_rx_del, + .ndo_fdb_add = qlcnic_fdb_add, + .ndo_fdb_del = qlcnic_fdb_del, + .ndo_fdb_dump = qlcnic_fdb_dump, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = qlcnic_poll_controller, #endif @@ -395,8 +470,9 @@ int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) return err; } -static void qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter) +static int qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter) { + int err = 0; u32 offset, mask_reg; const struct qlcnic_legacy_intr_set *legacy_intrp; struct qlcnic_hardware_context *ahw = adapter->ahw; @@ -409,8 +485,10 @@ static void qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter) offset); dev_info(&pdev->dev, "using msi interrupts\n"); adapter->msix_entries[0].vector = pdev->irq; - return; + return err; } + if (qlcnic_use_msi || qlcnic_use_msi_x) + return -EOPNOTSUPP; legacy_intrp = &legacy_intr[adapter->ahw->pci_func]; adapter->ahw->int_vec_bit = legacy_intrp->int_vec_bit; @@ -422,11 +500,12 @@ static void qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter) adapter->crb_int_state_reg = qlcnic_get_ioaddr(ahw, ISR_INT_STATE_REG); dev_info(&pdev->dev, "using legacy interrupts\n"); adapter->msix_entries[0].vector = pdev->irq; + return err; } int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr) { - int num_msix, err; + int num_msix, err = 0; if (!num_intr) num_intr = QLCNIC_DEF_NUM_STS_DESC_RINGS; @@ -441,8 +520,11 @@ int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr) if (err == -ENOMEM || !err) return err; - qlcnic_enable_msi_legacy(adapter); - return 0; + err = qlcnic_enable_msi_legacy(adapter); + if (!err) + return err; + + return -EIO; } void qlcnic_teardown_intr(struct qlcnic_adapter *adapter) @@ -781,6 +863,12 @@ qlcnic_initialize_nic(struct qlcnic_adapter *adapter) adapter->ahw->max_tx_ques = nic_info.max_tx_ques; adapter->ahw->max_rx_ques = nic_info.max_rx_ques; adapter->ahw->capabilities = nic_info.capabilities; + + if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) { + u32 temp; + temp = QLCRD32(adapter, CRB_FW_CAPABILITIES_2); + adapter->ahw->capabilities2 = temp; + } adapter->ahw->max_mac_filters = nic_info.max_mac_filters; adapter->ahw->max_mtu = nic_info.max_mtu; @@ -1724,6 +1812,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct qlcnic_adapter *adapter = NULL; struct qlcnic_hardware_context *ahw; int err, pci_using_dac = -1; + u32 capab2; char board_name[QLCNIC_MAX_BOARD_NAME_LEN]; err = pci_enable_device(pdev); @@ -1788,7 +1877,10 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->dev_rst_time = jiffies; adapter->ahw->revision_id = pdev->revision; - adapter->mac_learn = qlcnic_mac_learn; + if (qlcnic_mac_learn == FDB_MAC_LEARN) + adapter->fdb_mac_learn = true; + else if (qlcnic_mac_learn == DRV_MAC_LEARN) + adapter->drv_mac_learn = true; adapter->max_drv_tx_rings = 1; rwlock_init(&adapter->ahw->crb_lock); @@ -1836,8 +1928,10 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) board_name, adapter->ahw->revision_id); } err = qlcnic_setup_intr(adapter, 0); - if (err) + if (err) { + dev_err(&pdev->dev, "Failed to setup interrupt\n"); goto err_out_disable_msi; + } if (qlcnic_83xx_check(adapter)) { err = qlcnic_83xx_setup_mbx_intr(adapter); @@ -1849,6 +1943,14 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_out_disable_mbx_intr; + if (qlcnic_82xx_check(adapter)) { + if (ahw->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) { + capab2 = QLCRD32(adapter, CRB_FW_CAPABILITIES_2); + if (capab2 & QLCNIC_FW_CAPABILITY_2_OCBB) + qlcnic_fw_cmd_set_drv_version(adapter); + } + } + pci_set_drvdata(pdev, adapter); if (qlcnic_82xx_check(adapter)) @@ -1869,7 +1971,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (qlcnic_get_act_pci_func(adapter)) goto err_out_disable_mbx_intr; - if (adapter->mac_learn) + if (adapter->drv_mac_learn) qlcnic_alloc_lb_filters_mem(adapter); qlcnic_add_sysfs(adapter); @@ -2118,7 +2220,7 @@ void qlcnic_alloc_lb_filters_mem(struct qlcnic_adapter *adapter) } head = kcalloc(adapter->fhash.fbucket_size, - sizeof(struct hlist_head), GFP_KERNEL); + sizeof(struct hlist_head), GFP_ATOMIC); if (!head) return; @@ -2961,6 +3063,12 @@ static int qlcnic_attach_func(struct pci_dev *pdev) adapter->msix_entries = NULL; err = qlcnic_setup_intr(adapter, 0); + if (err) { + kfree(adapter->msix_entries); + netdev_err(netdev, "failed to setup interrupt\n"); + return err; + } + if (qlcnic_83xx_check(adapter)) { err = qlcnic_83xx_setup_mbx_intr(adapter); if (err) { @@ -3116,9 +3224,11 @@ int qlcnic_set_max_rss(struct qlcnic_adapter *adapter, u8 data, size_t len) qlcnic_detach(adapter); qlcnic_teardown_intr(adapter); err = qlcnic_setup_intr(adapter, data); - if (err) - dev_err(&adapter->pdev->dev, - "failed setting max_rss; rss disabled\n"); + if (err) { + kfree(adapter->msix_entries); + netdev_err(netdev, "failed to setup interrupt\n"); + return err; + } if (qlcnic_83xx_check(adapter)) { err = qlcnic_83xx_setup_mbx_intr(adapter); diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 387bdd02945d..ba7f571a2b1c 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -4,12 +4,15 @@ #include <net/netfilter/nf_conntrack_expect.h> +#include <linux/types.h> + #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 struct nf_ct_sip_master { unsigned int register_cseq; unsigned int invite_cseq; + __be16 forced_dport; }; enum sip_expectation_classes { diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 4f7d6a182381..2e9d317c82dc 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -16,7 +16,7 @@ *******************************************************************************/ extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); -extern int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *, struct sock *sk, int flags); +extern int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *, struct sock *sk, int flags); extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); extern void dn_rt_cache_flush(int delay); diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index e5062c955ea6..734d9b5f577a 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -73,8 +73,8 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de int i; gcells->gro_cells_mask = roundup_pow_of_two(netif_get_num_default_rss_queues()) - 1; - gcells->cells = kcalloc(sizeof(struct gro_cell), - gcells->gro_cells_mask + 1, + gcells->cells = kcalloc(gcells->gro_cells_mask + 1, + sizeof(struct gro_cell), GFP_KERNEL); if (!gcells->cells) return -ENOMEM; diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 463ae8e16696..2bdb7a15fe06 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -57,7 +57,9 @@ static inline void nf_ct_set_acct(struct net *net, bool enable) net->ct.sysctl_acct = enable; } -extern int nf_conntrack_acct_init(struct net *net); -extern void nf_conntrack_acct_fini(struct net *net); +extern int nf_conntrack_acct_pernet_init(struct net *net); +extern void nf_conntrack_acct_pernet_fini(struct net *net); +extern int nf_conntrack_acct_init(void); +extern void nf_conntrack_acct_fini(void); #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index e98aeb3da033..930275fa2ea6 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -25,12 +25,19 @@ extern unsigned int nf_conntrack_in(struct net *net, unsigned int hooknum, struct sk_buff *skb); -extern int nf_conntrack_init(struct net *net); -extern void nf_conntrack_cleanup(struct net *net); +extern int nf_conntrack_init_net(struct net *net); +extern void nf_conntrack_cleanup_net(struct net *net); -extern int nf_conntrack_proto_init(struct net *net); -extern void nf_conntrack_proto_fini(struct net *net); +extern int nf_conntrack_proto_pernet_init(struct net *net); +extern void nf_conntrack_proto_pernet_fini(struct net *net); +extern int nf_conntrack_proto_init(void); +extern void nf_conntrack_proto_fini(void); + +extern int nf_conntrack_init_start(void); +extern void nf_conntrack_cleanup_start(void); + +extern void nf_conntrack_init_end(void); extern void nf_conntrack_cleanup_end(void); extern bool diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 5654d292efd4..092dc651689f 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -207,9 +207,11 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event, nf_ct_expect_event_report(event, exp, 0, 0); } -extern int nf_conntrack_ecache_init(struct net *net); -extern void nf_conntrack_ecache_fini(struct net *net); +extern int nf_conntrack_ecache_pernet_init(struct net *net); +extern void nf_conntrack_ecache_pernet_fini(struct net *net); +extern int nf_conntrack_ecache_init(void); +extern void nf_conntrack_ecache_fini(void); #else /* CONFIG_NF_CONNTRACK_EVENTS */ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, @@ -232,12 +234,21 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, u32 portid, int report) {} -static inline int nf_conntrack_ecache_init(struct net *net) +static inline int nf_conntrack_ecache_pernet_init(struct net *net) { return 0; } -static inline void nf_conntrack_ecache_fini(struct net *net) +static inline void nf_conntrack_ecache_pernet_fini(struct net *net) +{ +} + +static inline int nf_conntrack_ecache_init(void) +{ + return 0; +} + +static inline void nf_conntrack_ecache_fini(void) { } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cc13f377a705..cbbae7621e22 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -69,8 +69,11 @@ struct nf_conntrack_expect_policy { #define NF_CT_EXPECT_CLASS_DEFAULT 0 -int nf_conntrack_expect_init(struct net *net); -void nf_conntrack_expect_fini(struct net *net); +int nf_conntrack_expect_pernet_init(struct net *net); +void nf_conntrack_expect_pernet_fini(struct net *net); + +int nf_conntrack_expect_init(void); +void nf_conntrack_expect_fini(void); struct nf_conntrack_expect * __nf_ct_expect_find(struct net *net, u16 zone, diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 8b4d1fc29096..977bc8a46444 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -23,6 +23,9 @@ enum nf_ct_ext_id { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT NF_CT_EXT_TIMEOUT, #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + NF_CT_EXT_LABELS, +#endif NF_CT_EXT_NUM, }; @@ -33,6 +36,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout +#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 9aad956d1008..ce27edf57570 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -82,8 +82,11 @@ static inline void *nfct_help_data(const struct nf_conn *ct) return (void *)help->data; } -extern int nf_conntrack_helper_init(struct net *net); -extern void nf_conntrack_helper_fini(struct net *net); +extern int nf_conntrack_helper_pernet_init(struct net *net); +extern void nf_conntrack_helper_pernet_fini(struct net *net); + +extern int nf_conntrack_helper_init(void); +extern void nf_conntrack_helper_fini(void); extern int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int protoff, diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6f7c13f4ac03..3bb89eac3fa1 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -76,11 +76,16 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; -/* Protocol registration. */ -extern int nf_conntrack_l3proto_register(struct net *net, +/* Protocol pernet registration. */ +extern int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto); -extern void nf_conntrack_l3proto_unregister(struct net *net, +extern void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto); + +/* Protocol global registration. */ +extern int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); +extern void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); + extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index c3be4aef6bf7..914d8d900798 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -121,12 +121,16 @@ extern struct nf_conntrack_l4proto * nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); -/* Protocol registration. */ -extern int nf_conntrack_l4proto_register(struct net *net, +/* Protocol pernet registration. */ +extern int nf_ct_l4proto_pernet_register(struct net *net, struct nf_conntrack_l4proto *proto); -extern void nf_conntrack_l4proto_unregister(struct net *net, +extern void nf_ct_l4proto_pernet_unregister(struct net *net, struct nf_conntrack_l4proto *proto); +/* Protocol global registration. */ +extern int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); +extern void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); + static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) { #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h new file mode 100644 index 000000000000..c985695283b3 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -0,0 +1,58 @@ +#include <linux/types.h> +#include <net/net_namespace.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> + +#include <uapi/linux/netfilter/xt_connlabel.h> + +struct nf_conn_labels { + u8 words; + unsigned long bits[]; +}; + +static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); +#else + return NULL; +#endif +} + +static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + struct nf_conn_labels *cl_ext; + struct net *net = nf_ct_net(ct); + u8 words; + + words = ACCESS_ONCE(net->ct.label_words); + if (words == 0 || WARN_ON_ONCE(words > 8)) + return NULL; + + cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + words * sizeof(long), GFP_ATOMIC); + if (cl_ext != NULL) + cl_ext->words = words; + + return cl_ext; +#else + return NULL; +#endif +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); +int nf_connlabel_set(struct nf_conn *ct, u16 bit); + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, const u32 *mask, unsigned int words); + +#ifdef CONFIG_NF_CONNTRACK_LABELS +int nf_conntrack_labels_init(void); +void nf_conntrack_labels_fini(void); +#else +static inline int nf_conntrack_labels_init(void) { return 0; } +static inline void nf_conntrack_labels_fini(void) {} +#endif diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index e41e472d08f2..d23aceb16d94 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -76,15 +76,15 @@ nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -extern int nf_conntrack_timeout_init(struct net *net); -extern void nf_conntrack_timeout_fini(struct net *net); +extern int nf_conntrack_timeout_init(void); +extern void nf_conntrack_timeout_fini(void); #else -static inline int nf_conntrack_timeout_init(struct net *net) +static inline int nf_conntrack_timeout_init(void) { return 0; } -static inline void nf_conntrack_timeout_fini(struct net *net) +static inline void nf_conntrack_timeout_fini(void) { return; } diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index fc9c82b1f06b..b00461413efd 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -48,15 +48,28 @@ static inline void nf_ct_set_tstamp(struct net *net, bool enable) } #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP -extern int nf_conntrack_tstamp_init(struct net *net); -extern void nf_conntrack_tstamp_fini(struct net *net); +extern int nf_conntrack_tstamp_pernet_init(struct net *net); +extern void nf_conntrack_tstamp_pernet_fini(struct net *net); + +extern int nf_conntrack_tstamp_init(void); +extern void nf_conntrack_tstamp_fini(void); #else -static inline int nf_conntrack_tstamp_init(struct net *net) +static inline int nf_conntrack_tstamp_pernet_init(struct net *net) +{ + return 0; +} + +static inline void nf_conntrack_tstamp_pernet_fini(struct net *net) +{ + return; +} + +static inline int nf_conntrack_tstamp_init(void) { return 0; } -static inline void nf_conntrack_tstamp_fini(struct net *net) +static inline void nf_conntrack_tstamp_fini(void) { return; } diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 923cb20051ed..c9c0c538b68b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -84,6 +84,10 @@ struct netns_ct { int sysctl_auto_assign_helper; bool auto_assign_helper_warned; struct nf_ip_net nf_ct_proto; +#if defined(CONFIG_NF_CONNTRACK_LABELS) + unsigned int labels_used; + u8 label_words; +#endif #ifdef CONFIG_NF_NAT_NEEDED struct hlist_head *nat_bysource; unsigned int nat_htable_size; diff --git a/include/net/sock.h b/include/net/sock.h index 581dc6bd7dc6..a340ab46b41c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -340,7 +340,7 @@ struct sock { #endif unsigned long sk_flags; struct dst_entry *sk_rx_dst; - struct dst_entry *sk_dst_cache; + struct dst_entry __rcu *sk_dst_cache; spinlock_t sk_dst_lock; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 08f555fef13f..41115776d76f 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -35,9 +35,11 @@ header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_addrtype.h +header-y += xt_bpf.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h +header-y += xt_connlabel.h header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 1644cdd8be91..d69483fb3825 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -101,6 +101,7 @@ enum ip_conntrack_events { IPCT_MARK, /* new mark has been set */ IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ IPCT_SECMARK, /* new security mark has been set */ + IPCT_LABEL, /* new connlabel has been set */ }; enum ip_conntrack_expect_events { diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 86e930cf3dfb..08fabc6c93f3 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -49,6 +49,8 @@ enum ctattr_type { CTA_SECCTX, CTA_TIMESTAMP, CTA_MARK_MASK, + CTA_LABELS, + CTA_LABELS_MASK, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h new file mode 100644 index 000000000000..5dda450eb55b --- /dev/null +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -0,0 +1,17 @@ +#ifndef _XT_BPF_H +#define _XT_BPF_H + +#include <linux/filter.h> +#include <linux/types.h> + +#define XT_BPF_MAX_NUM_INSTR 64 + +struct xt_bpf_info { + __u16 bpf_program_num_elem; + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + + /* only used in the kernel */ + struct sk_filter *filter __attribute__((aligned(8))); +}; + +#endif /*_XT_BPF_H */ diff --git a/include/uapi/linux/netfilter/xt_connlabel.h b/include/uapi/linux/netfilter/xt_connlabel.h new file mode 100644 index 000000000000..c4bc9ee9b330 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_connlabel.h @@ -0,0 +1,12 @@ +#include <linux/types.h> + +#define XT_CONNLABEL_MAXBIT 127 +enum xt_connlabel_mtopts { + XT_CONNLABEL_OP_INVERT = 1 << 0, + XT_CONNLABEL_OP_SET = 1 << 1, +}; + +struct xt_connlabel_mtinfo { + __u16 bit; + __u16 options; +}; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 307c322d53bb..64d9843f9e04 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -909,6 +909,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, struct dn_scp *scp = DN_SK(sk); int err = -EISCONN; struct flowidn fld; + struct dst_entry *dst; if (sock->state == SS_CONNECTED) goto out; @@ -955,10 +956,11 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, fld.flowidn_proto = DNPROTO_NSP; if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) goto out; - sk->sk_route_caps = sk->sk_dst_cache->dev->features; + dst = __sk_dst_get(sk); + sk->sk_route_caps = dst->dev->features; sock->state = SS_CONNECTING; scp->state = DN_CI; - scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache); + scp->segsize_loc = dst_metric_advmss(dst); dn_nsp_send_conninit(sk, NSP_CI); err = -EINPROGRESS; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 8a96047c7c94..1aaa51ebbda6 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -598,7 +598,7 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, if (reason == 0) reason = le16_to_cpu(scp->discdata_out.opt_status); - dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl, + dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl, scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b57419cc41a4..1550028fcd8e 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1282,7 +1282,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int return err; } -int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags) +int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags) { int err; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8bb623d357ad..11cb4979a465 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -204,7 +204,8 @@ tb_found: ret = 1; if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || - (sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size != -1 && --attempts >= 0) { spin_unlock(&head->lock); goto again; @@ -227,19 +228,15 @@ tb_not_found: if (sk->sk_reuseport) { tb->fastreuseport = 1; tb->fastuid = uid; - } else { + } else tb->fastreuseport = 0; - tb->fastuid = 0; - } } else { if (tb->fastreuse && (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0; if (tb->fastreuseport && - (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) { + (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) tb->fastreuseport = 0; - tb->fastuid = 0; - } } success: if (!inet_csk(sk)->icsk_bind_hash) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fcdd0c2406e6..48990ada0e1e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -420,54 +420,43 @@ static int ipv4_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_tcp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n"); + pr_err("nf_conntrack_tcp4: pernet registration failed\n"); goto out_tcp; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n"); + pr_err("nf_conntrack_udp4: pernet registration failed\n"); goto out_udp; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_icmp); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp); if (ret < 0) { - pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n"); + pr_err("nf_conntrack_icmp4: pernet registration failed\n"); goto out_icmp; } - ret = nf_conntrack_l3proto_register(net, - &nf_conntrack_l3proto_ipv4); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { - pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n"); + pr_err("nf_conntrack_ipv4: pernet registration failed\n"); goto out_ipv4; } return 0; out_ipv4: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); out_icmp: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); out_udp: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); out_tcp: return ret; } static void ipv4_net_exit(struct net *net) { - nf_conntrack_l3proto_unregister(net, - &nf_conntrack_l3proto_ipv4); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmp); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp4); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp4); + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); } static struct pernet_operations ipv4_net_ops = { @@ -500,16 +489,49 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) pr_err("nf_conntrack_ipv4: can't register hooks.\n"); goto cleanup_pernet; } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n"); + goto cleanup_tcp4; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n"); + goto cleanup_udp4; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); + goto cleanup_icmpv4; + } + #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) ret = nf_conntrack_ipv4_compat_init(); if (ret < 0) - goto cleanup_hooks; + goto cleanup_proto; #endif return ret; #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + cleanup_proto: + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); +#endif + cleanup_icmpv4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + cleanup_udp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + cleanup_tcp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); cleanup_hooks: nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); -#endif cleanup_pernet: unregister_pernet_subsys(&ipv4_net_ops); cleanup_sockopt: @@ -523,6 +545,10 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) nf_conntrack_ipv4_compat_fini(); #endif + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 137e245860ab..8a45bb20bedb 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -421,54 +421,43 @@ static int ipv6_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_tcp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); + pr_err("nf_conntrack_tcp6: pernet registration failed\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); + pr_err("nf_conntrack_udp6: pernet registration failed\n"); goto cleanup_tcp6; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_icmpv6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); + pr_err("nf_conntrack_icmp6: pernet registration failed\n"); goto cleanup_udp6; } - ret = nf_conntrack_l3proto_register(net, - &nf_conntrack_l3proto_ipv6); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); + pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); goto cleanup_icmpv6; } return 0; cleanup_icmpv6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); cleanup_udp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); cleanup_tcp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); out: return ret; } static void ipv6_net_exit(struct net *net) { - nf_conntrack_l3proto_unregister(net, - &nf_conntrack_l3proto_ipv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); } static struct pernet_operations ipv6_net_ops = { @@ -491,19 +480,52 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) ret = register_pernet_subsys(&ipv6_net_ops); if (ret < 0) - goto cleanup_pernet; + goto cleanup_sockopt; + ret = nf_register_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " "hook.\n"); - goto cleanup_ipv6; + goto cleanup_pernet; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n"); + goto cleanup_tcp6; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n"); + goto cleanup_udp6; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); + goto cleanup_icmpv6; } return ret; - cleanup_ipv6: - unregister_pernet_subsys(&ipv6_net_ops); + cleanup_icmpv6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + cleanup_udp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + cleanup_tcp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + cleanup_hooks: + nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: + unregister_pernet_subsys(&ipv6_net_ops); + cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst6); return ret; } @@ -511,6 +533,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 2bb2beb6a373..3c83a1e5ab03 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -214,8 +214,7 @@ irnet_get_discovery_log(irnet_socket * ap) * After reading : discoveries = NULL ; disco_index = Y ; disco_number = -1 */ static inline int -irnet_read_discovery_log(irnet_socket * ap, - char * event) +irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size) { int done_event = 0; @@ -237,12 +236,13 @@ irnet_read_discovery_log(irnet_socket * ap, if(ap->disco_index < ap->disco_number) { /* Write an event */ - sprintf(event, "Found %08x (%s) behind %08x {hints %02X-%02X}\n", - ap->discoveries[ap->disco_index].daddr, - ap->discoveries[ap->disco_index].info, - ap->discoveries[ap->disco_index].saddr, - ap->discoveries[ap->disco_index].hints[0], - ap->discoveries[ap->disco_index].hints[1]); + snprintf(event, buf_size, + "Found %08x (%s) behind %08x {hints %02X-%02X}\n", + ap->discoveries[ap->disco_index].daddr, + ap->discoveries[ap->disco_index].info, + ap->discoveries[ap->disco_index].saddr, + ap->discoveries[ap->disco_index].hints[0], + ap->discoveries[ap->disco_index].hints[1]); DEBUG(CTRL_INFO, "Writing discovery %d : %s\n", ap->disco_index, ap->discoveries[ap->disco_index].info); @@ -282,27 +282,24 @@ irnet_ctrl_read(irnet_socket * ap, size_t count) { DECLARE_WAITQUEUE(wait, current); - char event[64]; /* Max event is 61 char */ + char event[75]; ssize_t ret = 0; DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count); - /* Check if we can write an event out in one go */ - DABORT(count < sizeof(event), -EOVERFLOW, CTRL_ERROR, "Buffer to small.\n"); - #ifdef INITIAL_DISCOVERY /* Check if we have read the log */ - if(irnet_read_discovery_log(ap, event)) + if (irnet_read_discovery_log(ap, event, sizeof(event))) { - /* We have an event !!! Copy it to the user */ - if(copy_to_user(buf, event, strlen(event))) + count = min(strlen(event), count); + if (copy_to_user(buf, event, count)) { DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); return -EFAULT; } DEXIT(CTRL_TRACE, "\n"); - return strlen(event); + return count; } #endif /* INITIAL_DISCOVERY */ @@ -339,79 +336,81 @@ irnet_ctrl_read(irnet_socket * ap, switch(irnet_events.log[ap->event_index].event) { case IRNET_DISCOVER: - sprintf(event, "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); + snprintf(event, sizeof(event), + "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr, + irnet_events.log[ap->event_index].hints.byte[0], + irnet_events.log[ap->event_index].hints.byte[1]); break; case IRNET_EXPIRE: - sprintf(event, "Expired %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); + snprintf(event, sizeof(event), + "Expired %08x (%s) behind %08x {hints %02X-%02X}\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr, + irnet_events.log[ap->event_index].hints.byte[0], + irnet_events.log[ap->event_index].hints.byte[1]); break; case IRNET_CONNECT_TO: - sprintf(event, "Connected to %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_CONNECT_FROM: - sprintf(event, "Connection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_REQUEST_FROM: - sprintf(event, "Request from %08x (%s) behind %08x\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr); + snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr); break; case IRNET_NOANSWER_FROM: - sprintf(event, "No-answer from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_BLOCKED_LINK: - sprintf(event, "Blocked link with %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_DISCONNECT_FROM: - sprintf(event, "Disconnection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_DISCONNECT_TO: - sprintf(event, "Disconnected to %08x (%s)\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name); + snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name); break; default: - sprintf(event, "Bug\n"); + snprintf(event, sizeof(event), "Bug\n"); } /* Increment our event index */ ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS; DEBUG(CTRL_INFO, "Event is :%s", event); - /* Copy it to the user */ - if(copy_to_user(buf, event, strlen(event))) + count = min(strlen(event), count); + if (copy_to_user(buf, event, count)) { DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); return -EFAULT; } DEXIT(CTRL_TRACE, "\n"); - return strlen(event); + return count; } /*------------------------------------------------------------------*/ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e96df5fbc4..eb2c8ebf6d99 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -124,6 +124,12 @@ config NF_CONNTRACK_TIMESTAMP If unsure, say `N'. +config NF_CONNTRACK_LABELS + bool + help + This option enables support for assigning user-defined flag bits + to connection tracking entries. It selected by the connlabel match. + config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL @@ -805,6 +811,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_BPF + tristate '"bpf" match support' + depends on NETFILTER_ADVANCED + help + BPF matching applies a linux socket filter to each packet and + accepts those for which the filter returns non-zero. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK @@ -842,6 +857,18 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLABEL + tristate '"connlabel" match support' + select NF_CONNTRACK_LABELS + depends on NETFILTER_ADVANCED + ---help--- + This match allows you to test and assign userspace-defined labels names + to a connection. The kernel only stores bit values - mapping + names to bits is done by userspace. + + Unlike connmark, more than 32 flag bits may be assigned to a + connection simultaneously. + config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 32596978df1d..a1abf87d43bf 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -4,6 +4,7 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -98,9 +99,11 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 7df424e2d10c..2d3030ab5b61 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -106,36 +106,26 @@ static void nf_conntrack_acct_fini_sysctl(struct net *net) } #endif -int nf_conntrack_acct_init(struct net *net) +int nf_conntrack_acct_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_acct = nf_ct_acct; + return nf_conntrack_acct_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&acct_extend); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); - goto out_extend_register; - } - } +void nf_conntrack_acct_pernet_fini(struct net *net) +{ + nf_conntrack_acct_fini_sysctl(net); +} - ret = nf_conntrack_acct_init_sysctl(net); +int nf_conntrack_acct_init(void) +{ + int ret = nf_ct_extend_register(&acct_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&acct_extend); -out_extend_register: + pr_err("nf_conntrack_acct: Unable to register extension\n"); return ret; } -void nf_conntrack_acct_fini(struct net *net) +void nf_conntrack_acct_fini(void) { - nf_conntrack_acct_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&acct_extend); + nf_ct_extend_unregister(&acct_extend); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e4a0c4fb3a7c..c8e001a9c45b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -45,6 +45,7 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_timeout.h> +#include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> @@ -763,6 +764,7 @@ void nf_conntrack_free(struct nf_conn *ct) } EXPORT_SYMBOL_GPL(nf_conntrack_free); + /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * @@ -809,6 +811,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, @@ -1331,18 +1334,42 @@ static int untrack_refs(void) return cnt; } -static void nf_conntrack_cleanup_init_net(void) +void nf_conntrack_cleanup_start(void) +{ + RCU_INIT_POINTER(ip_ct_attach, NULL); +} + +void nf_conntrack_cleanup_end(void) { + RCU_INIT_POINTER(nf_ct_destroy, NULL); while (untrack_refs() > 0) schedule(); #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); #endif + nf_conntrack_proto_fini(); + nf_conntrack_labels_fini(); + nf_conntrack_helper_fini(); + nf_conntrack_timeout_fini(); + nf_conntrack_ecache_fini(); + nf_conntrack_tstamp_fini(); + nf_conntrack_acct_fini(); + nf_conntrack_expect_fini(); } -static void nf_conntrack_cleanup_net(struct net *net) +/* + * Mishearing the voices in his head, our hero wonders how he's + * supposed to kill the mall. + */ +void nf_conntrack_cleanup_net(struct net *net) { + /* + * This makes sure all current packets have passed through + * netfilter framework. Roll on, two-stage module + * delete... + */ + synchronize_net(); i_see_dead_people: nf_ct_iterate_cleanup(net, kill_all, NULL); nf_ct_release_dying_list(net); @@ -1352,38 +1379,17 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); - nf_conntrack_helper_fini(net); - nf_conntrack_timeout_fini(net); - nf_conntrack_ecache_fini(net); - nf_conntrack_tstamp_fini(net); - nf_conntrack_acct_fini(net); - nf_conntrack_expect_fini(net); + nf_conntrack_proto_pernet_fini(net); + nf_conntrack_helper_pernet_fini(net); + nf_conntrack_ecache_pernet_fini(net); + nf_conntrack_tstamp_pernet_fini(net); + nf_conntrack_acct_pernet_fini(net); + nf_conntrack_expect_pernet_fini(net); kmem_cache_destroy(net->ct.nf_conntrack_cachep); kfree(net->ct.slabname); free_percpu(net->ct.stat); } -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void nf_conntrack_cleanup(struct net *net) -{ - if (net_eq(net, &init_net)) - RCU_INIT_POINTER(ip_ct_attach, NULL); - - /* This makes sure all current packets have passed through - netfilter framework. Roll on, two-stage module - delete... */ - synchronize_net(); - nf_conntrack_proto_fini(net); - nf_conntrack_cleanup_net(net); -} - -void nf_conntrack_cleanup_end(void) -{ - RCU_INIT_POINTER(nf_ct_destroy, NULL); - nf_conntrack_cleanup_init_net(); -} - void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) { struct hlist_nulls_head *hash; @@ -1474,7 +1480,7 @@ void nf_ct_untracked_status_or(unsigned long bits) } EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); -static int nf_conntrack_init_init_net(void) +int nf_conntrack_init_start(void) { int max_factor = 8; int ret, cpu; @@ -1501,11 +1507,44 @@ static int nf_conntrack_init_init_net(void) printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); + + ret = nf_conntrack_expect_init(); + if (ret < 0) + goto err_expect; + + ret = nf_conntrack_acct_init(); + if (ret < 0) + goto err_acct; + + ret = nf_conntrack_tstamp_init(); + if (ret < 0) + goto err_tstamp; + + ret = nf_conntrack_ecache_init(); + if (ret < 0) + goto err_ecache; + + ret = nf_conntrack_timeout_init(); + if (ret < 0) + goto err_timeout; + + ret = nf_conntrack_helper_init(); + if (ret < 0) + goto err_helper; + + ret = nf_conntrack_labels_init(); + if (ret < 0) + goto err_labels; + #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) goto err_extend; #endif + ret = nf_conntrack_proto_init(); + if (ret < 0) + goto err_proto; + /* Set up fake conntrack: to never be deleted, not in any hashes */ for_each_possible_cpu(cpu) { struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); @@ -1516,12 +1555,38 @@ static int nf_conntrack_init_init_net(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); return 0; +err_proto: #ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_extend_unregister(&nf_ct_zone_extend); err_extend: #endif + nf_conntrack_labels_fini(); +err_labels: + nf_conntrack_helper_fini(); +err_helper: + nf_conntrack_timeout_fini(); +err_timeout: + nf_conntrack_ecache_fini(); +err_ecache: + nf_conntrack_tstamp_fini(); +err_tstamp: + nf_conntrack_acct_fini(); +err_acct: + nf_conntrack_expect_fini(); +err_expect: return ret; } +void nf_conntrack_init_end(void) +{ + /* For use by REJECT target */ + RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); + RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); + + /* Howto get NAT offsets */ + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); +} + /* * We need to use special "null" values, not used in hash table */ @@ -1529,7 +1594,7 @@ err_extend: #define DYING_NULLS_VAL ((1<<30)+1) #define TEMPLATE_NULLS_VAL ((1<<30)+2) -static int nf_conntrack_init_net(struct net *net) +int nf_conntrack_init_net(struct net *net) { int ret; @@ -1565,35 +1630,36 @@ static int nf_conntrack_init_net(struct net *net) printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_hash; } - ret = nf_conntrack_expect_init(net); + ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; - ret = nf_conntrack_acct_init(net); + ret = nf_conntrack_acct_pernet_init(net); if (ret < 0) goto err_acct; - ret = nf_conntrack_tstamp_init(net); + ret = nf_conntrack_tstamp_pernet_init(net); if (ret < 0) goto err_tstamp; - ret = nf_conntrack_ecache_init(net); + ret = nf_conntrack_ecache_pernet_init(net); if (ret < 0) goto err_ecache; - ret = nf_conntrack_timeout_init(net); - if (ret < 0) - goto err_timeout; - ret = nf_conntrack_helper_init(net); + ret = nf_conntrack_helper_pernet_init(net); if (ret < 0) goto err_helper; + ret = nf_conntrack_proto_pernet_init(net); + if (ret < 0) + goto err_proto; return 0; + +err_proto: + nf_conntrack_helper_pernet_fini(net); err_helper: - nf_conntrack_timeout_fini(net); -err_timeout: - nf_conntrack_ecache_fini(net); + nf_conntrack_ecache_pernet_fini(net); err_ecache: - nf_conntrack_tstamp_fini(net); + nf_conntrack_tstamp_pernet_fini(net); err_tstamp: - nf_conntrack_acct_fini(net); + nf_conntrack_acct_pernet_fini(net); err_acct: - nf_conntrack_expect_fini(net); + nf_conntrack_expect_pernet_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); err_hash: @@ -1610,38 +1676,3 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); EXPORT_SYMBOL_GPL(nf_ct_nat_offset); - -int nf_conntrack_init(struct net *net) -{ - int ret; - - if (net_eq(net, &init_net)) { - ret = nf_conntrack_init_init_net(); - if (ret < 0) - goto out_init_net; - } - ret = nf_conntrack_proto_init(net); - if (ret < 0) - goto out_proto; - ret = nf_conntrack_init_net(net); - if (ret < 0) - goto out_net; - - if (net_eq(net, &init_net)) { - /* For use by REJECT target */ - RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); - RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); - - /* Howto get NAT offsets */ - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); - } - return 0; - -out_net: - nf_conntrack_proto_fini(net); -out_proto: - if (net_eq(net, &init_net)) - nf_conntrack_cleanup_init_net(); -out_init_net: - return ret; -} diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index faa978f1714b..b5d2eb8bf0d5 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -233,38 +233,27 @@ static void nf_conntrack_event_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ -int nf_conntrack_ecache_init(struct net *net) +int nf_conntrack_ecache_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_events = nf_ct_events; net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout; + return nf_conntrack_event_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&event_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_event: Unable to register " - "event extension.\n"); - goto out_extend_register; - } - } +void nf_conntrack_ecache_pernet_fini(struct net *net) +{ + nf_conntrack_event_fini_sysctl(net); +} - ret = nf_conntrack_event_init_sysctl(net); +int nf_conntrack_ecache_init(void) +{ + int ret = nf_ct_extend_register(&event_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&event_extend); -out_extend_register: + pr_err("nf_ct_event: Unable to register event extension.\n"); return ret; } -void nf_conntrack_ecache_fini(struct net *net) +void nf_conntrack_ecache_fini(void) { - nf_conntrack_event_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&event_extend); + nf_ct_extend_unregister(&event_extend); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 527651a53a45..bdd341899ed3 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -587,53 +587,50 @@ static void exp_proc_remove(struct net *net) module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); -int nf_conntrack_expect_init(struct net *net) +int nf_conntrack_expect_pernet_init(struct net *net) { int err = -ENOMEM; - if (net_eq(net, &init_net)) { - if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = net->ct.htable_size / 256; - if (!nf_ct_expect_hsize) - nf_ct_expect_hsize = 1; - } - nf_ct_expect_max = nf_ct_expect_hsize * 4; - } - net->ct.expect_count = 0; net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); if (net->ct.expect_hash == NULL) goto err1; - if (net_eq(net, &init_net)) { - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", - sizeof(struct nf_conntrack_expect), - 0, 0, NULL); - if (!nf_ct_expect_cachep) - goto err2; - } - err = exp_proc_init(net); if (err < 0) - goto err3; + goto err2; return 0; - -err3: - if (net_eq(net, &init_net)) - kmem_cache_destroy(nf_ct_expect_cachep); err2: nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(struct net *net) +void nf_conntrack_expect_pernet_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) { - rcu_barrier(); /* Wait for call_rcu() before destroy */ - kmem_cache_destroy(nf_ct_expect_cachep); - } nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); } + +int nf_conntrack_expect_init(void) +{ + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL); + if (!nf_ct_expect_cachep) + return -ENOMEM; + return 0; +} + +void nf_conntrack_expect_fini(void) +{ + rcu_barrier(); /* Wait for call_rcu() before destroy */ + kmem_cache_destroy(nf_ct_expect_cachep); +} diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 884f2b39319a..2f380f73c4c0 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -423,44 +423,41 @@ static struct nf_ct_ext_type helper_extend __read_mostly = { .id = NF_CT_EXT_HELPER, }; -int nf_conntrack_helper_init(struct net *net) +int nf_conntrack_helper_pernet_init(struct net *net) { - int err; - net->ct.auto_assign_helper_warned = false; net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; + return nf_conntrack_helper_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ - nf_ct_helper_hash = - nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); - if (!nf_ct_helper_hash) - return -ENOMEM; +void nf_conntrack_helper_pernet_fini(struct net *net) +{ + nf_conntrack_helper_fini_sysctl(net); +} - err = nf_ct_extend_register(&helper_extend); - if (err < 0) - goto err1; +int nf_conntrack_helper_init(void) +{ + int ret; + nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ + nf_ct_helper_hash = + nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); + if (!nf_ct_helper_hash) + return -ENOMEM; + + ret = nf_ct_extend_register(&helper_extend); + if (ret < 0) { + pr_err("nf_ct_helper: Unable to register helper extension.\n"); + goto out_extend; } - err = nf_conntrack_helper_init_sysctl(net); - if (err < 0) - goto out_sysctl; - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&helper_extend); -err1: +out_extend: nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); - return err; + return ret; } -void nf_conntrack_helper_fini(struct net *net) +void nf_conntrack_helper_fini(void) { - nf_conntrack_helper_fini_sysctl(net); - if (net_eq(net, &init_net)) { - nf_ct_extend_unregister(&helper_extend); - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); - } + nf_ct_extend_unregister(&helper_extend); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); } diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c new file mode 100644 index 000000000000..8fe2e99428b7 --- /dev/null +++ b/net/netfilter/nf_conntrack_labels.c @@ -0,0 +1,112 @@ +/* + * test/set flag bits stored in conntrack extension area. + * + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ctype.h> +#include <linux/export.h> +#include <linux/jhash.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/slab.h> + +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_labels.h> + +static unsigned int label_bits(const struct nf_conn_labels *l) +{ + unsigned int longs = l->words; + return longs * BITS_PER_LONG; +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return bit < label_bits(labels) && test_bit(bit, labels->bits); +} +EXPORT_SYMBOL_GPL(nf_connlabel_match); + +int nf_connlabel_set(struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels || bit >= label_bits(labels)) + return -ENOSPC; + + if (test_bit(bit, labels->bits)) + return 0; + + if (test_and_set_bit(bit, labels->bits)) + nf_conntrack_event_cache(IPCT_LABEL, ct); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabel_set); + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) +static void replace_u32(u32 *address, u32 mask, u32 new) +{ + u32 old, tmp; + + do { + old = *address; + tmp = (old & mask) ^ new; + } while (cmpxchg(address, old, tmp) != old); +} + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, + const u32 *mask, unsigned int words32) +{ + struct nf_conn_labels *labels; + unsigned int size, i; + u32 *dst; + + labels = nf_ct_labels_find(ct); + if (!labels) + return -ENOSPC; + + size = labels->words * sizeof(long); + if (size < (words32 * sizeof(u32))) + words32 = size / sizeof(u32); + + dst = (u32 *) labels->bits; + if (words32) { + for (i = 0; i < words32; i++) + replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); + } + + size /= sizeof(u32); + for (i = words32; i < size; i++) /* pad */ + replace_u32(&dst[i], 0, 0); + + nf_conntrack_event_cache(IPCT_LABEL, ct); + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_replace); +#endif + +static struct nf_ct_ext_type labels_extend __read_mostly = { + .len = sizeof(struct nf_conn_labels), + .align = __alignof__(struct nf_conn_labels), + .id = NF_CT_EXT_LABELS, +}; + +int nf_conntrack_labels_init(void) +{ + return nf_ct_extend_register(&labels_extend); +} + +void nf_conntrack_labels_fini(void) +{ + nf_ct_extend_unregister(&labels_extend); +} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 627b0e50b238..2334cc5d2b16 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -43,6 +43,7 @@ #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> +#include <net/netfilter/nf_conntrack_labels.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_l4proto.h> @@ -323,6 +324,40 @@ nla_put_failure: #define ctnetlink_dump_secctx(a, b) (0) #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS +static int ctnetlink_label_size(const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return 0; + return nla_total_size(labels->words * sizeof(long)); +} + +static int +ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + unsigned int len, i; + + if (!labels) + return 0; + + len = labels->words * sizeof(long); + i = 0; + do { + if (labels->bits[i] != 0) + return nla_put(skb, CTA_LABELS, len, labels->bits); + i++; + } while (i < labels->words); + + return 0; +} +#else +#define ctnetlink_dump_labels(a, b) (0) +#define ctnetlink_label_size(a) (0) +#endif + #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) static inline int @@ -463,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_labels(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || @@ -561,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif + ctnetlink_proto_size(ct) + + ctnetlink_label_size(ct) ; } @@ -662,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif + if (events & (1 << IPCT_LABEL) && + ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) @@ -921,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, return 0; } +#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, @@ -937,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, + [CTA_LABELS] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, + [CTA_LABELS_MASK] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, }; static int @@ -1465,6 +1510,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, #endif static int +ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + size_t len = nla_len(cda[CTA_LABELS]); + const void *mask = cda[CTA_LABELS_MASK]; + + if (len & (sizeof(u32)-1)) /* must be multiple of u32 */ + return -EINVAL; + + if (mask) { + if (nla_len(cda[CTA_LABELS_MASK]) == 0 || + nla_len(cda[CTA_LABELS_MASK]) != len) + return -EINVAL; + mask = nla_data(cda[CTA_LABELS_MASK]); + } + + len /= sizeof(u32); + + return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len); +#else + return -EOPNOTSUPP; +#endif +} + +static int ctnetlink_change_conntrack(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1510,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct, return err; } #endif + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } return 0; } @@ -1598,6 +1673,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); + /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; @@ -1716,6 +1793,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; + if (cda[CTA_LABELS] && + ctnetlink_attach_labels(ct, cda) == 0) + events |= (1 << IPCT_LABEL); + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1983,6 +2064,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif + if (ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; rcu_read_unlock(); return 0; @@ -2011,6 +2094,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; } + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 51e928db48c8..58ab4050830c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -212,8 +212,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net, #endif } -static int -nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto) +int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; struct nf_conntrack_l3proto *old; @@ -242,8 +241,9 @@ out_unlock: return ret; } +EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); -int nf_conntrack_l3proto_register(struct net *net, +int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { int ret = 0; @@ -254,22 +254,11 @@ int nf_conntrack_l3proto_register(struct net *net, return ret; } - ret = nf_ct_l3proto_register_sysctl(net, proto); - if (ret < 0) - return ret; - - if (net == &init_net) { - ret = nf_conntrack_l3proto_register_net(proto); - if (ret < 0) - nf_ct_l3proto_unregister_sysctl(net, proto); - } - - return ret; + return nf_ct_l3proto_register_sysctl(net, proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); -static void -nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) +void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { BUG_ON(proto->l3proto >= AF_MAX); @@ -283,19 +272,17 @@ nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); -void nf_conntrack_l3proto_unregister(struct net *net, +void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { - if (net == &init_net) - nf_conntrack_l3proto_unregister_net(proto); - nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) @@ -376,8 +363,7 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -static int -nf_conntrack_l4proto_register_net(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -431,8 +417,9 @@ out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); -int nf_conntrack_l4proto_register(struct net *net, +int nf_ct_l4proto_pernet_register(struct net *net, struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -452,22 +439,13 @@ int nf_conntrack_l4proto_register(struct net *net, if (ret < 0) goto out; - if (net == &init_net) { - ret = nf_conntrack_l4proto_register_net(l4proto); - if (ret < 0) { - nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); - goto out; - } - } - pn->users++; out: return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); -static void -nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= PF_MAX); @@ -482,15 +460,13 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); -void nf_conntrack_l4proto_unregister(struct net *net, +void nf_ct_l4proto_pernet_unregister(struct net *net, struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = NULL; - if (net == &init_net) - nf_conntrack_l4proto_unregister_net(l4proto); - pn = nf_ct_l4proto_net(net, l4proto); if (pn == NULL) return; @@ -501,11 +477,10 @@ void nf_conntrack_l4proto_unregister(struct net *net, /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); -int nf_conntrack_proto_init(struct net *net) +int nf_conntrack_proto_pernet_init(struct net *net) { - unsigned int i; int err; struct nf_proto_net *pn = nf_ct_l4proto_net(net, &nf_conntrack_l4proto_generic); @@ -520,19 +495,12 @@ int nf_conntrack_proto_init(struct net *net) if (err < 0) return err; - if (net == &init_net) { - for (i = 0; i < AF_MAX; i++) - rcu_assign_pointer(nf_ct_l3protos[i], - &nf_conntrack_l3proto_generic); - } - pn->users++; return 0; } -void nf_conntrack_proto_fini(struct net *net) +void nf_conntrack_proto_pernet_fini(struct net *net) { - unsigned int i; struct nf_proto_net *pn = nf_ct_l4proto_net(net, &nf_conntrack_l4proto_generic); @@ -540,9 +508,21 @@ void nf_conntrack_proto_fini(struct net *net) nf_ct_l4proto_unregister_sysctl(net, pn, &nf_conntrack_l4proto_generic); - if (net == &init_net) { - /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) - kfree(nf_ct_protos[i]); - } +} + +int nf_conntrack_proto_init(void) +{ + unsigned int i; + for (i = 0; i < AF_MAX; i++) + rcu_assign_pointer(nf_ct_l3protos[i], + &nf_conntrack_l3proto_generic); + return 0; +} + +void nf_conntrack_proto_fini(void) +{ + unsigned int i; + /* free l3proto protocol tables */ + for (i = 0; i < PF_MAX; i++) + kfree(nf_ct_protos[i]); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a8ae287bc7af..432f95780003 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -935,32 +935,27 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { static __net_init int dccp_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &dccp_proto4); + ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_dccp4 :protocol register failed.\n"); + pr_err("nf_conntrack_dccp4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &dccp_proto6); + ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_dccp6 :protocol register failed.\n"); + pr_err("nf_conntrack_dccp6: pernet registration failed.\n"); goto cleanup_dccp4; } return 0; cleanup_dccp4: - nf_conntrack_l4proto_unregister(net, - &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); out: return ret; } static __net_exit void dccp_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, - &dccp_proto6); - nf_conntrack_l4proto_unregister(net, - &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto6); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); } static struct pernet_operations dccp_net_ops = { @@ -972,11 +967,33 @@ static struct pernet_operations dccp_net_ops = { static int __init nf_conntrack_proto_dccp_init(void) { - return register_pernet_subsys(&dccp_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&dccp_proto4); + if (ret < 0) + goto out_dccp4; + + ret = nf_ct_l4proto_register(&dccp_proto6); + if (ret < 0) + goto out_dccp6; + + ret = register_pernet_subsys(&dccp_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&dccp_proto6); +out_dccp6: + nf_ct_l4proto_unregister(&dccp_proto4); +out_dccp4: + return ret; } static void __exit nf_conntrack_proto_dccp_fini(void) { + nf_ct_l4proto_unregister(&dccp_proto6); + nf_ct_l4proto_unregister(&dccp_proto4); unregister_pernet_subsys(&dccp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index b09b7af7f6f8..bd7d01d9c7e7 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -397,15 +397,15 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { static int proto_gre_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, &nf_conntrack_l4proto_gre4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); if (ret < 0) - pr_err("nf_conntrack_l4proto_gre4 :protocol register failed.\n"); + pr_err("nf_conntrack_gre4: pernet registration failed.\n"); return ret; } static void proto_gre_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_gre4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); nf_ct_gre_keymap_flush(net); } @@ -418,11 +418,26 @@ static struct pernet_operations proto_gre_net_ops = { static int __init nf_ct_proto_gre_init(void) { - return register_pernet_subsys(&proto_gre_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + if (ret < 0) + goto out_gre4; + + ret = register_pernet_subsys(&proto_gre_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); +out_gre4: + return ret; } static void __exit nf_ct_proto_gre_fini(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c746d61f83ed..480f616d5936 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -853,33 +853,28 @@ static int sctp_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_sctp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_sctp4 :protocol register failed.\n"); + pr_err("nf_conntrack_sctp4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_sctp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_sctp6 :protocol register failed.\n"); + pr_err("nf_conntrack_sctp6: pernet registration failed.\n"); goto cleanup_sctp4; } return 0; cleanup_sctp4: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); out: return ret; } static void sctp_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); } static struct pernet_operations sctp_net_ops = { @@ -891,11 +886,33 @@ static struct pernet_operations sctp_net_ops = { static int __init nf_conntrack_proto_sctp_init(void) { - return register_pernet_subsys(&sctp_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); + if (ret < 0) + goto out_sctp4; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6); + if (ret < 0) + goto out_sctp6; + + ret = register_pernet_subsys(&sctp_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); +out_sctp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); +out_sctp4: + return ret; } static void __exit nf_conntrack_proto_sctp_fini(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); unregister_pernet_subsys(&sctp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4b66df209286..157489581c31 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -336,30 +336,28 @@ static int udplite_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udplite4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n"); + pr_err("nf_conntrack_udplite4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udplite6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n"); + pr_err("nf_conntrack_udplite6: pernet registration failed.\n"); goto cleanup_udplite4; } return 0; cleanup_udplite4: - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); out: return ret; } static void udplite_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite6); - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); } static struct pernet_operations udplite_net_ops = { @@ -371,11 +369,33 @@ static struct pernet_operations udplite_net_ops = { static int __init nf_conntrack_proto_udplite_init(void) { - return register_pernet_subsys(&udplite_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); + if (ret < 0) + goto out_udplite4; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6); + if (ret < 0) + goto out_udplite6; + + ret = register_pernet_subsys(&udplite_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); +out_udplite6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); +out_udplite4: + return ret; } static void __exit nf_conntrack_proto_udplite_exit(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); unregister_pernet_subsys(&udplite_net_ops); } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index df8f4f284481..72a67bbe3518 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1440,8 +1440,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; unsigned int cseq, i; + union nf_inet_addr addr; + __be16 port; + + /* Many Cisco IP phones use a high source port for SIP requests, but + * listen for the response on port 5060. If we are the local + * router for one of these phones, save the port number from the + * Via: header so that nf_nat_sip can redirect the responses to + * the correct port. + */ + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, + SIP_HDR_VIA_UDP, NULL, &matchoff, + &matchlen, &addr, &port) > 0 && + port != ct->tuplehash[dir].tuple.src.u.udp.port && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3)) + ct_sip_info->forced_dport = port; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 6e545e26289e..87b95a2c270c 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -16,6 +16,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_snmp.h> #define SNMP_PORT 161 diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e7185c684816..7936bf7f90ba 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -472,13 +472,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) { struct ctl_table *table; - if (net_eq(net, &init_net)) { - nf_ct_netfilter_header = - register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); - if (!nf_ct_netfilter_header) - goto out; - } - table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), GFP_KERNEL); if (!table) @@ -502,10 +495,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) out_unregister_netfilter: kfree(table); out_kmemdup: - if (net_eq(net, &init_net)) - unregister_net_sysctl_table(nf_ct_netfilter_header); -out: - printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n"); return -ENOMEM; } @@ -513,8 +502,6 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) { struct ctl_table *table; - if (net_eq(net, &init_net)) - unregister_net_sysctl_table(nf_ct_netfilter_header); table = net->ct.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->ct.sysctl_header); kfree(table); @@ -530,51 +517,85 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ -static int nf_conntrack_net_init(struct net *net) +static int nf_conntrack_pernet_init(struct net *net) { int ret; - ret = nf_conntrack_init(net); + ret = nf_conntrack_init_net(net); if (ret < 0) goto out_init; + ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; + net->ct.sysctl_checksum = 1; net->ct.sysctl_log_invalid = 0; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; + return 0; out_sysctl: nf_conntrack_standalone_fini_proc(net); out_proc: - nf_conntrack_cleanup(net); + nf_conntrack_cleanup_net(net); out_init: return ret; } -static void nf_conntrack_net_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct net *net) { nf_conntrack_standalone_fini_sysctl(net); nf_conntrack_standalone_fini_proc(net); - nf_conntrack_cleanup(net); + nf_conntrack_cleanup_net(net); } static struct pernet_operations nf_conntrack_net_ops = { - .init = nf_conntrack_net_init, - .exit = nf_conntrack_net_exit, + .init = nf_conntrack_pernet_init, + .exit = nf_conntrack_pernet_exit, }; static int __init nf_conntrack_standalone_init(void) { - return register_pernet_subsys(&nf_conntrack_net_ops); + int ret = nf_conntrack_init_start(); + if (ret < 0) + goto out_start; + +#ifdef CONFIG_SYSCTL + nf_ct_netfilter_header = + register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); + if (!nf_ct_netfilter_header) { + pr_err("nf_conntrack: can't register to sysctl.\n"); + goto out_sysctl; + } +#endif + + ret = register_pernet_subsys(&nf_conntrack_net_ops); + if (ret < 0) + goto out_pernet; + + nf_conntrack_init_end(); + return 0; + +out_pernet: +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(nf_ct_netfilter_header); +out_sysctl: +#endif + nf_conntrack_cleanup_end(); +out_start: + return ret; } static void __exit nf_conntrack_standalone_fini(void) { + nf_conntrack_cleanup_start(); unregister_pernet_subsys(&nf_conntrack_net_ops); +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(nf_ct_netfilter_header); +#endif nf_conntrack_cleanup_end(); } diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index a878ce5b252c..93da609d9d29 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -37,24 +37,15 @@ static struct nf_ct_ext_type timeout_extend __read_mostly = { .id = NF_CT_EXT_TIMEOUT, }; -int nf_conntrack_timeout_init(struct net *net) +int nf_conntrack_timeout_init(void) { - int ret = 0; - - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&timeout_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_timeout: Unable to register " - "timeout extension.\n"); - return ret; - } - } - - return 0; + int ret = nf_ct_extend_register(&timeout_extend); + if (ret < 0) + pr_err("nf_ct_timeout: Unable to register timeout extension.\n"); + return ret; } -void nf_conntrack_timeout_fini(struct net *net) +void nf_conntrack_timeout_fini(void) { - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&timeout_extend); + nf_ct_extend_unregister(&timeout_extend); } diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 7ea8026f07c9..902fb0a6b38a 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -88,37 +88,28 @@ static void nf_conntrack_tstamp_fini_sysctl(struct net *net) } #endif -int nf_conntrack_tstamp_init(struct net *net) +int nf_conntrack_tstamp_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_tstamp = nf_ct_tstamp; + return nf_conntrack_tstamp_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&tstamp_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_tstamp: Unable to register " - "extension\n"); - goto out_extend_register; - } - } +void nf_conntrack_tstamp_pernet_fini(struct net *net) +{ + nf_conntrack_tstamp_fini_sysctl(net); + nf_ct_extend_unregister(&tstamp_extend); +} - ret = nf_conntrack_tstamp_init_sysctl(net); +int nf_conntrack_tstamp_init(void) +{ + int ret; + ret = nf_ct_extend_register(&tstamp_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&tstamp_extend); -out_extend_register: + pr_err("nf_ct_tstamp: Unable to register extension\n"); return ret; } -void nf_conntrack_tstamp_fini(struct net *net) +void nf_conntrack_tstamp_fini(void) { - nf_conntrack_tstamp_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&tstamp_extend); + nf_ct_extend_unregister(&tstamp_extend); } diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index 16303c752213..5951146e7688 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; union nf_inet_addr newaddr; @@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, } else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) && ct->tuplehash[dir].tuple.dst.u.udp.port == port) { newaddr = ct->tuplehash[!dir].tuple.src.u3; - newport = ct->tuplehash[!dir].tuple.src.u.udp.port; + newport = ct_sip_info->forced_dport ? : + ct->tuplehash[!dir].tuple.src.u.udp.port; } else return 1; @@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); unsigned int coff, matchoff, matchlen; enum sip_header_types hdr; union nf_inet_addr addr; @@ -258,6 +261,21 @@ next: !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; + /* Mangle destination port for Cisco phones, then fix up checksums */ + if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) { + struct udphdr *uh; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + uh = (void *)skb->data + protoff; + uh->dest = ct_sip_info->forced_dport; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff, + 0, 0, NULL, 0)) + return NF_DROP; + } + return NF_ACCEPT; } @@ -311,8 +329,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); union nf_inet_addr newaddr; u_int16_t port; + __be16 srcport; char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; @@ -326,8 +346,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, /* If the signalling port matches the connection's source port in the * original direction, try to use the destination port in the opposite * direction. */ - if (exp->tuple.dst.u.udp.port == - ct->tuplehash[dir].tuple.src.u.udp.port) + srcport = ct_sip_info->forced_dport ? : + ct->tuplehash[dir].tuple.src.u.udp.port; + if (exp->tuple.dst.u.udp.port == srcport) port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); else port = ntohs(exp->tuple.dst.u.udp.port); diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c new file mode 100644 index 000000000000..12d4da8e6c77 --- /dev/null +++ b/net/netfilter/xt_bpf.c @@ -0,0 +1,73 @@ +/* Xtables module to match packets using a BPF filter. + * Copyright 2013 Google Inc. + * Written by Willem de Bruijn <willemb@google.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/filter.h> + +#include <linux/netfilter/xt_bpf.h> +#include <linux/netfilter/x_tables.h> + +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); +MODULE_DESCRIPTION("Xtables: BPF filter match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_bpf"); +MODULE_ALIAS("ip6t_bpf"); + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + struct sock_fprog program; + + program.len = info->bpf_program_num_elem; + program.filter = (struct sock_filter __user *) info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { + pr_info("bpf: check failed: parse error\n"); + return -EINVAL; + } + + return 0; +} + +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + + return SK_RUN_FILTER(info->filter, skb); +} + +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + sk_unattached_filter_destroy(info->filter); +} + +static struct xt_match bpf_mt_reg __read_mostly = { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, +}; + +static int __init bpf_mt_init(void) +{ + return xt_register_match(&bpf_mt_reg); +} + +static void __exit bpf_mt_exit(void) +{ + xt_unregister_match(&bpf_mt_reg); +} + +module_init(bpf_mt_init); +module_exit(bpf_mt_exit); diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c new file mode 100644 index 000000000000..9f8719df2001 --- /dev/null +++ b/net/netfilter/xt_connlabel.c @@ -0,0 +1,99 @@ +/* + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_ALIAS("ipt_connlabel"); +MODULE_ALIAS("ip6t_connlabel"); + +static bool +connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_connlabel_mtinfo *info = par->matchinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + bool invert = info->options & XT_CONNLABEL_OP_INVERT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL || nf_ct_is_untracked(ct)) + return invert; + + if (info->options & XT_CONNLABEL_OP_SET) + return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + + return nf_connlabel_match(ct, info->bit) ^ invert; +} + +static int connlabel_mt_check(const struct xt_mtchk_param *par) +{ + const int options = XT_CONNLABEL_OP_INVERT | + XT_CONNLABEL_OP_SET; + struct xt_connlabel_mtinfo *info = par->matchinfo; + int ret; + size_t words; + + if (info->bit > XT_CONNLABEL_MAXBIT) + return -ERANGE; + + if (info->options & ~options) { + pr_err("Unknown options in mask %x\n", info->options); + return -EINVAL; + } + + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) { + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + + par->net->ct.labels_used++; + words = BITS_TO_LONGS(info->bit+1); + if (words > par->net->ct.label_words) + par->net->ct.label_words = words; + + return ret; +} + +static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) +{ + par->net->ct.labels_used--; + if (par->net->ct.labels_used == 0) + par->net->ct.label_words = 0; + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_match connlabels_mt_reg __read_mostly = { + .name = "connlabel", + .family = NFPROTO_UNSPEC, + .checkentry = connlabel_mt_check, + .match = connlabel_mt, + .matchsize = sizeof(struct xt_connlabel_mtinfo), + .destroy = connlabel_mt_destroy, + .me = THIS_MODULE, +}; + +static int __init connlabel_mt_init(void) +{ + return xt_register_match(&connlabels_mt_reg); +} + +static void __exit connlabel_mt_exit(void) +{ + xt_unregister_match(&connlabels_mt_reg); +} + +module_init(connlabel_mt_init); +module_exit(connlabel_mt_exit); |