summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/dsa/b53.txt9
-rw-r--r--Documentation/devicetree/bindings/net/socfpga-dwmac.txt19
-rw-r--r--Documentation/networking/vrf.txt201
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/net/dsa/b53/b53_common.c12
-rw-r--r--drivers/net/dsa/b53/b53_priv.h1
-rw-r--r--drivers/net/dsa/b53/b53_srab.c47
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig23
-rw-r--r--drivers/net/ethernet/broadcom/Makefile2
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c266
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma.c315
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-platform.c189
-rw-r--r--drivers/net/ethernet/broadcom/bgmac.c658
-rw-r--r--drivers/net/ethernet/broadcom/bgmac.h112
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c28
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c8
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c18
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c2
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c274
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h36
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c140
-rw-r--r--drivers/net/ethernet/synopsys/dwc_eth_qos.c2
-rw-r--r--drivers/net/geneve.c4
-rw-r--r--drivers/net/hyperv/netvsc.c88
-rw-r--r--drivers/net/tun.c3
-rw-r--r--drivers/net/usb/r8152.c27
-rw-r--r--drivers/net/vxlan.c4
-rw-r--r--include/linux/rbtree.h2
-rw-r--r--include/linux/rbtree_augmented.h13
-rw-r--r--include/linux/rcupdate.h8
-rw-r--r--include/net/dsa.h2
-rw-r--r--include/net/sctp/structs.h23
-rw-r--r--include/trace/events/devlink.h68
-rw-r--r--include/trace/events/napi.h13
-rw-r--r--include/uapi/linux/bpf.h7
-rw-r--r--include/uapi/linux/if_bridge.h7
-rw-r--r--include/uapi/linux/sctp.h42
-rw-r--r--kernel/bpf/inode.c4
-rw-r--r--kernel/trace/bpf_trace.c13
-rw-r--r--lib/rbtree.c26
-rw-r--r--net/bridge/br_forward.c7
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_multicast.c48
-rw-r--r--net/bridge/br_private.h5
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/devlink.c4
-rw-r--r--net/core/drop_monitor.c3
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/dsa/dsa.c6
-rw-r--r--net/dsa/dsa2.c9
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/ipip.c137
-rw-r--r--net/ipv4/tunnel4.c72
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/ip6mr.c13
-rw-r--r--net/ipv6/sit.c93
-rw-r--r--net/mpls/af_mpls.c6
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c24
-rw-r--r--net/rxrpc/ar-internal.h156
-rw-r--r--net/rxrpc/call_accept.c38
-rw-r--r--net/rxrpc/call_event.c14
-rw-r--r--net/rxrpc/call_object.c273
-rw-r--r--net/rxrpc/conn_client.c282
-rw-r--r--net/rxrpc/conn_event.c44
-rw-r--r--net/rxrpc/conn_object.c636
-rw-r--r--net/rxrpc/conn_service.c230
-rw-r--r--net/rxrpc/input.c71
-rw-r--r--net/rxrpc/insecure.c7
-rw-r--r--net/rxrpc/local_object.c19
-rw-r--r--net/rxrpc/peer_object.c2
-rw-r--r--net/rxrpc/proc.c23
-rw-r--r--net/rxrpc/rxkad.c191
-rw-r--r--net/rxrpc/utils.c37
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sctp/associola.c1
-rw-r--r--net/sctp/chunk.c25
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/output.c20
-rw-r--r--net/sctp/outqueue.c99
-rw-r--r--net/sctp/sm_make_chunk.c27
-rw-r--r--net/sctp/socket.c240
-rwxr-xr-xtools/hv/bondvf.sh193
-rw-r--r--tools/perf/scripts/python/netdev-times.py11
90 files changed, 3939 insertions, 1917 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
index ca752db14dff..d6c6e41648d4 100644
--- a/Documentation/devicetree/bindings/net/dsa/b53.txt
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt
@@ -20,6 +20,15 @@ Required properties:
"brcm,bcm53018-srab"
"brcm,bcm53019-srab" and the mandatory "brcm,bcm5301x-srab" string
+ For the BCM585xx/586XX/88312 SoCs with an integrated switch, must be one of:
+ "brcm,bcm58522-srab"
+ "brcm,bcm58523-srab"
+ "brcm,bcm58525-srab"
+ "brcm,bcm58622-srab"
+ "brcm,bcm58623-srab"
+ "brcm,bcm58625-srab"
+ "brcm,bcm88312-srab" and the mandatory "brcm,nsp-srab string
+
For the BCM63xx/33xx SoCs with an integrated switch, must be one of:
"brcm,bcm3384-switch"
"brcm,bcm6328-switch"
diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
index 72d82d684342..2e68a3cd8513 100644
--- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
@@ -17,9 +17,26 @@ Required properties:
Optional properties:
altr,emac-splitter: Should be the phandle to the emac splitter soft IP node if
DWMAC controller is connected emac splitter.
+phy-mode: The phy mode the ethernet operates in
+altr,sgmii-to-sgmii-converter: phandle to the TSE SGMII converter
+
+This device node has additional phandle dependency, the sgmii converter:
+
+Required properties:
+ - compatible : Should be altr,gmii-to-sgmii-2.0
+ - reg-names : Should be "eth_tse_control_port"
Example:
+gmii_to_sgmii_converter: phy@0x100000240 {
+ compatible = "altr,gmii-to-sgmii-2.0";
+ reg = <0x00000001 0x00000240 0x00000008>,
+ <0x00000001 0x00000200 0x00000040>;
+ reg-names = "eth_tse_control_port";
+ clocks = <&sgmii_1_clk_0 &emac1 1 &sgmii_clk_125 &sgmii_clk_125>;
+ clock-names = "tse_pcs_ref_clk_clock_connection", "tse_rx_cdr_refclk";
+};
+
gmac0: ethernet@ff700000 {
compatible = "altr,socfpga-stmmac", "snps,dwmac-3.70a", "snps,dwmac";
altr,sysmgr-syscon = <&sysmgr 0x60 0>;
@@ -30,4 +47,6 @@ gmac0: ethernet@ff700000 {
mac-address = [00 00 00 00 00 00];/* Filled in by U-Boot */
clocks = <&emac_0_clk>;
clock-names = "stmmaceth";
+ phy-mode = "sgmii";
+ altr,gmii-to-sgmii-converter = <&gmii_to_sgmii_converter>;
};
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index 5da679c573d2..11a2b99bdbb9 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -15,9 +15,9 @@ the use of higher priority ip rules (Policy Based Routing, PBR) to take
precedence over the VRF device rules directing specific traffic as desired.
In addition, VRF devices allow VRFs to be nested within namespaces. For
-example network namespaces provide separation of network interfaces at L1
-(Layer 1 separation), VLANs on the interfaces within a namespace provide
-L2 separation and then VRF devices provide L3 separation.
+example network namespaces provide separation of network interfaces at the
+device layer, VLANs on the interfaces within a namespace provide L2 separation
+and then VRF devices provide L3 separation.
Design
------
@@ -37,21 +37,22 @@ are then enslaved to a VRF device:
+------+ +------+
Packets received on an enslaved device and are switched to the VRF device
-using an rx_handler which gives the impression that packets flow through
-the VRF device. Similarly on egress routing rules are used to send packets
-to the VRF device driver before getting sent out the actual interface. This
-allows tcpdump on a VRF device to capture all packets into and out of the
-VRF as a whole.[1] Similarly, netfilter [2] and tc rules can be applied
-using the VRF device to specify rules that apply to the VRF domain as a whole.
+in the IPv4 and IPv6 processing stacks giving the impression that packets
+flow through the VRF device. Similarly on egress routing rules are used to
+send packets to the VRF device driver before getting sent out the actual
+interface. This allows tcpdump on a VRF device to capture all packets into
+and out of the VRF as a whole.[1] Similarly, netfilter[2] and tc rules can be
+applied using the VRF device to specify rules that apply to the VRF domain
+as a whole.
[1] Packets in the forwarded state do not flow through the device, so those
packets are not seen by tcpdump. Will revisit this limitation in a
future release.
-[2] Iptables on ingress is limited to NF_INET_PRE_ROUTING only with skb->dev
- set to real ingress device and egress is limited to NF_INET_POST_ROUTING.
- Will revisit this limitation in a future release.
-
+[2] Iptables on ingress supports PREROUTING with skb->dev set to the real
+ ingress device and both INPUT and PREROUTING rules with skb->dev set to
+ the VRF device. For egress POSTROUTING and OUTPUT rules can be written
+ using either the VRF device or real egress device.
Setup
-----
@@ -59,23 +60,33 @@ Setup
e.g, ip link add vrf-blue type vrf table 10
ip link set dev vrf-blue up
-2. Rules are added that send lookups to the associated FIB table when the
- iif or oif is the VRF device. e.g.,
+2. An l3mdev FIB rule directs lookups to the table associated with the device.
+ A single l3mdev rule is sufficient for all VRFs. The VRF device adds the
+ l3mdev rule for IPv4 and IPv6 when the first device is created with a
+ default preference of 1000. Users may delete the rule if desired and add
+ with a different priority or install per-VRF rules.
+
+ Prior to the v4.8 kernel iif and oif rules are needed for each VRF device:
ip ru add oif vrf-blue table 10
ip ru add iif vrf-blue table 10
- Set the default route for the table (and hence default route for the VRF).
- e.g, ip route add table 10 prohibit default
+3. Set the default route for the table (and hence default route for the VRF).
+ ip route add table 10 unreachable default
-3. Enslave L3 interfaces to a VRF device.
- e.g, ip link set dev eth1 master vrf-blue
+4. Enslave L3 interfaces to a VRF device.
+ ip link set dev eth1 master vrf-blue
Local and connected routes for enslaved devices are automatically moved to
the table associated with VRF device. Any additional routes depending on
- the enslaved device will need to be reinserted following the enslavement.
+ the enslaved device are dropped and will need to be reinserted to the VRF
+ FIB table following the enslavement.
+
+ The IPv6 sysctl option keep_addr_on_down can be enabled to keep IPv6 global
+ addresses as VRF enslavement changes.
+ sysctl -w net.ipv6.conf.all.keep_addr_on_down=1
-4. Additional VRF routes are added to associated table.
- e.g., ip route add table 10 ...
+5. Additional VRF routes are added to associated table.
+ ip route add table 10 ...
Applications
@@ -87,39 +98,34 @@ VRF device:
or to specify the output device using cmsg and IP_PKTINFO.
+TCP services running in the default VRF context (ie., not bound to any VRF
+device) can work across all VRF domains by enabling the tcp_l3mdev_accept
+sysctl option:
+ sysctl -w net.ipv4.tcp_l3mdev_accept=1
-Limitations
------------
-Index of original ingress interface is not available via cmsg. Will address
-soon.
+netfilter rules on the VRF device can be used to limit access to services
+running in the default VRF context as well.
+
+The default VRF does not have limited scope with respect to port bindings.
+That is, if a process does a wildcard bind to a port in the default VRF it
+owns the port across all VRF domains within the network namespace.
################################################################################
Using iproute2 for VRFs
=======================
-VRF devices do *not* have to start with 'vrf-'. That is a convention used here
-for emphasis of the device type, similar to use of 'br' in bridge names.
+iproute2 supports the vrf keyword as of v4.7. For backwards compatibility this
+section lists both commands where appropriate -- with the vrf keyword and the
+older form without it.
1. Create a VRF
To instantiate a VRF device and associate it with a table:
$ ip link add dev NAME type vrf table ID
- Remember to add the ip rules as well:
- $ ip ru add oif NAME table 10
- $ ip ru add iif NAME table 10
- $ ip -6 ru add oif NAME table 10
- $ ip -6 ru add iif NAME table 10
-
- Without the rules route lookups are not directed to the table.
-
- For example:
- $ ip link add dev vrf-blue type vrf table 10
- $ ip ru add pref 200 oif vrf-blue table 10
- $ ip ru add pref 200 iif vrf-blue table 10
- $ ip -6 ru add pref 200 oif vrf-blue table 10
- $ ip -6 ru add pref 200 iif vrf-blue table 10
-
+ As of v4.8 the kernel supports the l3mdev FIB rule where a single rule
+ covers all VRFs. The l3mdev rule is created for IPv4 and IPv6 on first
+ device create.
2. List VRFs
@@ -129,16 +135,16 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
For example:
$ ip -d link show type vrf
- 11: vrf-mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+ 11: mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 72:b3:ba:91:e2:24 brd ff:ff:ff:ff:ff:ff promiscuity 0
vrf table 1 addrgenmode eui64
- 12: vrf-red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+ 12: red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether b6:6f:6e:f6:da:73 brd ff:ff:ff:ff:ff:ff promiscuity 0
vrf table 10 addrgenmode eui64
- 13: vrf-blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+ 13: blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 36:62:e8:7d:bb:8c brd ff:ff:ff:ff:ff:ff promiscuity 0
vrf table 66 addrgenmode eui64
- 14: vrf-green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+ 14: green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether e6:28:b8:63:70:bb brd ff:ff:ff:ff:ff:ff promiscuity 0
vrf table 81 addrgenmode eui64
@@ -146,43 +152,44 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
Or in brief output:
$ ip -br link show type vrf
- vrf-mgmt UP 72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
- vrf-red UP b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
- vrf-blue UP 36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
- vrf-green UP e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
+ mgmt UP 72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
+ red UP b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
+ blue UP 36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
+ green UP e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
3. Assign a Network Interface to a VRF
Network interfaces are assigned to a VRF by enslaving the netdevice to a
VRF device:
- $ ip link set dev NAME master VRF-NAME
+ $ ip link set dev NAME master NAME
On enslavement connected and local routes are automatically moved to the
table associated with the VRF device.
For example:
- $ ip link set dev eth0 master vrf-mgmt
+ $ ip link set dev eth0 master mgmt
4. Show Devices Assigned to a VRF
To show devices that have been assigned to a specific VRF add the master
option to the ip command:
- $ ip link show master VRF-NAME
+ $ ip link show vrf NAME
+ $ ip link show master NAME
For example:
- $ ip link show master vrf-red
- 3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP mode DEFAULT group default qlen 1000
+ $ ip link show vrf red
+ 3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
- 4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP mode DEFAULT group default qlen 1000
+ 4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
- 7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master vrf-red state DOWN mode DEFAULT group default qlen 1000
+ 7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN mode DEFAULT group default qlen 1000
link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
Or using the brief output:
- $ ip -br link show master vrf-red
+ $ ip -br link show master red
eth1 UP 02:00:00:00:02:02 <BROADCAST,MULTICAST,UP,LOWER_UP>
eth2 UP 02:00:00:00:02:03 <BROADCAST,MULTICAST,UP,LOWER_UP>
eth5 DOWN 02:00:00:00:02:06 <BROADCAST,MULTICAST>
@@ -192,14 +199,15 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
To list neighbor entries associated with devices enslaved to a VRF device
add the master option to the ip command:
- $ ip [-6] neigh show master VRF-NAME
+ $ ip [-6] neigh show vrf NAME
+ $ ip [-6] neigh show master NAME
For example:
- $ ip neigh show master vrf-red
+ $ ip neigh show vrf red
10.2.1.254 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
10.2.2.254 dev eth2 lladdr 5e:54:01:6a:ee:80 REACHABLE
- $ ip -6 neigh show master vrf-red
+ $ ip -6 neigh show vrf red
2002:1::64 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
@@ -207,11 +215,12 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
To show addresses for interfaces associated with a VRF add the master
option to the ip command:
- $ ip addr show master VRF-NAME
+ $ ip addr show vrf NAME
+ $ ip addr show master NAME
For example:
- $ ip addr show master vrf-red
- 3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP group default qlen 1000
+ $ ip addr show vrf red
+ 3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
inet 10.2.1.2/24 brd 10.2.1.255 scope global eth1
valid_lft forever preferred_lft forever
@@ -219,7 +228,7 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
valid_lft forever preferred_lft forever
inet6 fe80::ff:fe00:202/64 scope link
valid_lft forever preferred_lft forever
- 4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master vrf-red state UP group default qlen 1000
+ 4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
inet 10.2.2.2/24 brd 10.2.2.255 scope global eth2
valid_lft forever preferred_lft forever
@@ -227,11 +236,11 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
valid_lft forever preferred_lft forever
inet6 fe80::ff:fe00:203/64 scope link
valid_lft forever preferred_lft forever
- 7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master vrf-red state DOWN group default qlen 1000
+ 7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN group default qlen 1000
link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
Or in brief format:
- $ ip -br addr show master vrf-red
+ $ ip -br addr show vrf red
eth1 UP 10.2.1.2/24 2002:1::2/120 fe80::ff:fe00:202/64
eth2 UP 10.2.2.2/24 2002:2::2/120 fe80::ff:fe00:203/64
eth5 DOWN
@@ -241,10 +250,11 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
To show routes for a VRF use the ip command to display the table associated
with the VRF device:
+ $ ip [-6] route show vrf NAME
$ ip [-6] route show table ID
For example:
- $ ip route show table vrf-red
+ $ ip route show vrf red
prohibit default
broadcast 10.2.1.0 dev eth1 proto kernel scope link src 10.2.1.2
10.2.1.0/24 dev eth1 proto kernel scope link src 10.2.1.2
@@ -255,7 +265,7 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
local 10.2.2.2 dev eth2 proto kernel scope host src 10.2.2.2
broadcast 10.2.2.255 dev eth2 proto kernel scope link src 10.2.2.2
- $ ip -6 route show table vrf-red
+ $ ip -6 route show vrf red
local 2002:1:: dev lo proto none metric 0 pref medium
local 2002:1::2 dev lo proto none metric 0 pref medium
2002:1::/120 dev eth1 proto kernel metric 256 pref medium
@@ -268,23 +278,24 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
local fe80::ff:fe00:203 dev lo proto none metric 0 pref medium
fe80::/64 dev eth1 proto kernel metric 256 pref medium
fe80::/64 dev eth2 proto kernel metric 256 pref medium
- ff00::/8 dev vrf-red metric 256 pref medium
+ ff00::/8 dev red metric 256 pref medium
ff00::/8 dev eth1 metric 256 pref medium
ff00::/8 dev eth2 metric 256 pref medium
8. Route Lookup for a VRF
- A test route lookup can be done for a VRF by adding the oif option to ip:
- $ ip [-6] route get oif VRF-NAME ADDRESS
+ A test route lookup can be done for a VRF:
+ $ ip [-6] route get vrf NAME ADDRESS
+ $ ip [-6] route get oif NAME ADDRESS
For example:
- $ ip route get 10.2.1.40 oif vrf-red
- 10.2.1.40 dev eth1 table vrf-red src 10.2.1.2
+ $ ip route get 10.2.1.40 vrf red
+ 10.2.1.40 dev eth1 table red src 10.2.1.2
cache
- $ ip -6 route get 2002:1::32 oif vrf-red
- 2002:1::32 from :: dev eth1 table vrf-red proto kernel src 2002:1::2 metric 256 pref medium
+ $ ip -6 route get 2002:1::32 vrf red
+ 2002:1::32 from :: dev eth1 table red proto kernel src 2002:1::2 metric 256 pref medium
9. Removing Network Interface from a VRF
@@ -303,46 +314,40 @@ for emphasis of the device type, similar to use of 'br' in bridge names.
Commands used in this example:
-cat >> /etc/iproute2/rt_tables <<EOF
-1 vrf-mgmt
-10 vrf-red
-66 vrf-blue
-81 vrf-green
+cat >> /etc/iproute2/rt_tables.d/vrf.conf <<EOF
+1 mgmt
+10 red
+66 blue
+81 green
EOF
function vrf_create
{
VRF=$1
TBID=$2
- # create VRF device
- ip link add vrf-${VRF} type vrf table ${TBID}
- # add rules that direct lookups to vrf table
- ip ru add pref 200 oif vrf-${VRF} table ${TBID}
- ip ru add pref 200 iif vrf-${VRF} table ${TBID}
- ip -6 ru add pref 200 oif vrf-${VRF} table ${TBID}
- ip -6 ru add pref 200 iif vrf-${VRF} table ${TBID}
+ # create VRF device
+ ip link add ${VRF} type vrf table ${TBID}
if [ "${VRF}" != "mgmt" ]; then
- ip route add table ${TBID} prohibit default
+ ip route add table ${TBID} unreachable default
fi
- ip link set dev vrf-${VRF} up
- ip link set dev vrf-${VRF} state up
+ ip link set dev ${VRF} up
}
vrf_create mgmt 1
-ip link set dev eth0 master vrf-mgmt
+ip link set dev eth0 master mgmt
vrf_create red 10
-ip link set dev eth1 master vrf-red
-ip link set dev eth2 master vrf-red
-ip link set dev eth5 master vrf-red
+ip link set dev eth1 master red
+ip link set dev eth2 master red
+ip link set dev eth5 master red
vrf_create blue 66
-ip link set dev eth3 master vrf-blue
+ip link set dev eth3 master blue
vrf_create green 81
-ip link set dev eth4 master vrf-green
+ip link set dev eth4 master green
Interface addresses from /etc/network/interfaces:
diff --git a/MAINTAINERS b/MAINTAINERS
index 6374be26dde3..06e84119c690 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9710,7 +9710,6 @@ F: Documentation/ABI/*/sysfs-driver-hid-roccat*
ROCKER DRIVER
M: Jiri Pirko <jiri@resnulli.us>
-M: Scott Feldman <sfeldma@gmail.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/rocker/
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 444de66667b9..bda37d336736 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1581,6 +1581,18 @@ static const struct b53_chip_data b53_switch_chips[] = {
.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
},
+ {
+ .chip_id = BCM58XX_DEVICE_ID,
+ .dev_name = "BCM585xx/586xx/88312",
+ .vlans = 4096,
+ .enabled_ports = 0x1ff,
+ .arl_entries = 4,
+ .cpu_port = B53_CPU_PORT_25,
+ .vta_regs = B53_VTA_REGS,
+ .duplex_reg = B53_DUPLEX_STAT_GE,
+ .jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+ .jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+ },
};
static int b53_switch_init(struct b53_device *dev)
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 5d8c602fb877..835a744f206e 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -59,6 +59,7 @@ enum {
BCM53012_DEVICE_ID = 0x53012,
BCM53018_DEVICE_ID = 0x53018,
BCM53019_DEVICE_ID = 0x53019,
+ BCM58XX_DEVICE_ID = 0x5800,
};
#define B53_N_PORTS 9
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index 70fd47284535..3e2d4a5fcd5a 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -21,6 +21,7 @@
#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/platform_data/b53.h>
+#include <linux/of.h>
#include "b53_priv.h"
@@ -356,12 +357,45 @@ static struct b53_io_ops b53_srab_ops = {
.write64 = b53_srab_write64,
};
+static const struct of_device_id b53_srab_of_match[] = {
+ { .compatible = "brcm,bcm53010-srab" },
+ { .compatible = "brcm,bcm53011-srab" },
+ { .compatible = "brcm,bcm53012-srab" },
+ { .compatible = "brcm,bcm53018-srab" },
+ { .compatible = "brcm,bcm53019-srab" },
+ { .compatible = "brcm,bcm5301x-srab" },
+ { .compatible = "brcm,bcm58522-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { .compatible = "brcm,bcm58525-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { .compatible = "brcm,bcm58535-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { .compatible = "brcm,bcm58622-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { .compatible = "brcm,bcm58623-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { .compatible = "brcm,bcm58625-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { .compatible = "brcm,bcm88312-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { .compatible = "brcm,nsp-srab", .data = (void *)BCM58XX_DEVICE_ID },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, b53_srab_of_match);
+
static int b53_srab_probe(struct platform_device *pdev)
{
+ struct b53_platform_data *pdata = pdev->dev.platform_data;
+ struct device_node *dn = pdev->dev.of_node;
+ const struct of_device_id *of_id = NULL;
struct b53_srab_priv *priv;
struct b53_device *dev;
struct resource *r;
+ if (dn)
+ of_id = of_match_node(b53_srab_of_match, dn);
+
+ if (of_id) {
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ pdata->chip_id = (u32)(unsigned long)of_id->data;
+ }
+
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
@@ -375,6 +409,9 @@ static int b53_srab_probe(struct platform_device *pdev)
if (!dev)
return -ENOMEM;
+ if (pdata)
+ dev->pdata = pdata;
+
platform_set_drvdata(pdev, dev);
return b53_switch_register(dev);
@@ -390,16 +427,6 @@ static int b53_srab_remove(struct platform_device *pdev)
return 0;
}
-static const struct of_device_id b53_srab_of_match[] = {
- { .compatible = "brcm,bcm53010-srab" },
- { .compatible = "brcm,bcm53011-srab" },
- { .compatible = "brcm,bcm53012-srab" },
- { .compatible = "brcm,bcm53018-srab" },
- { .compatible = "brcm,bcm53019-srab" },
- { .compatible = "brcm,bcm5301x-srab" },
- { /* sentinel */ },
-};
-
static struct platform_driver b53_srab_driver = {
.probe = b53_srab_probe,
.remove = b53_srab_remove,
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index d74a92e1c27d..bd8c80c0b71c 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -140,10 +140,18 @@ config BNX2X_SRIOV
allows for virtual function acceleration in virtual environments.
config BGMAC
- tristate "BCMA bus GBit core support"
+ tristate
+ help
+ This enables the integrated ethernet controller support for many
+ Broadcom (mostly iProc) SoCs. An appropriate bus interface driver
+ needs to be enabled to select this.
+
+config BGMAC_BCMA
+ tristate "Broadcom iProc GBit BCMA support"
depends on BCMA && BCMA_HOST_SOC
depends on HAS_DMA
depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
+ select BGMAC
select PHYLIB
select FIXED_PHY
---help---
@@ -152,6 +160,19 @@ config BGMAC
In case of using this driver on BCM4706 it's also requires to enable
BCMA_DRIVER_GMAC_CMN to make it work.
+config BGMAC_PLATFORM
+ tristate "Broadcom iProc GBit platform support"
+ depends on HAS_DMA
+ depends on ARCH_BCM_IPROC || COMPILE_TEST
+ depends on OF
+ select BGMAC
+ select PHYLIB
+ select FIXED_PHY
+ default ARCH_BCM_IPROC
+ ---help---
+ Say Y here if you want to use the Broadcom iProc Gigabit Ethernet
+ controller through the generic platform interface
+
config SYSTEMPORT
tristate "Broadcom SYSTEMPORT internal MAC support"
depends on OF
diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index 00584d78b3e0..79f2372c66ec 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile
@@ -11,5 +11,7 @@ obj-$(CONFIG_BNX2X) += bnx2x/
obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o
obj-$(CONFIG_TIGON3) += tg3.o
obj-$(CONFIG_BGMAC) += bgmac.o
+obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o
+obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o
obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o
obj-$(CONFIG_BNXT) += bnxt/
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
new file mode 100644
index 000000000000..7c19c8e2bf91
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
@@ -0,0 +1,266 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
+#include "bgmac.h"
+
+struct bcma_mdio {
+ struct bcma_device *core;
+ u8 phyaddr;
+};
+
+static bool bcma_mdio_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+ u32 value, int timeout)
+{
+ u32 val;
+ int i;
+
+ for (i = 0; i < timeout / 10; i++) {
+ val = bcma_read32(core, reg);
+ if ((val & mask) == value)
+ return true;
+ udelay(10);
+ }
+ dev_err(&core->dev, "Timeout waiting for reg 0x%X\n", reg);
+ return false;
+}
+
+/**************************************************
+ * PHY ops
+ **************************************************/
+
+static u16 bcma_mdio_phy_read(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg)
+{
+ struct bcma_device *core;
+ u16 phy_access_addr;
+ u16 phy_ctl_addr;
+ u32 tmp;
+
+ BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
+ BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
+ BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
+ BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
+ BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
+ BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
+ BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
+ BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
+ BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
+ BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
+ BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
+
+ if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+ core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+ phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+ phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+ } else {
+ core = bcma_mdio->core;
+ phy_access_addr = BGMAC_PHY_ACCESS;
+ phy_ctl_addr = BGMAC_PHY_CNTL;
+ }
+
+ tmp = bcma_read32(core, phy_ctl_addr);
+ tmp &= ~BGMAC_PC_EPA_MASK;
+ tmp |= phyaddr;
+ bcma_write32(core, phy_ctl_addr, tmp);
+
+ tmp = BGMAC_PA_START;
+ tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+ tmp |= reg << BGMAC_PA_REG_SHIFT;
+ bcma_write32(core, phy_access_addr, tmp);
+
+ if (!bcma_mdio_wait_value(core, phy_access_addr, BGMAC_PA_START, 0,
+ 1000)) {
+ dev_err(&core->dev, "Reading PHY %d register 0x%X failed\n",
+ phyaddr, reg);
+ return 0xffff;
+ }
+
+ return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
+static int bcma_mdio_phy_write(struct bcma_mdio *bcma_mdio, u8 phyaddr, u8 reg,
+ u16 value)
+{
+ struct bcma_device *core;
+ u16 phy_access_addr;
+ u16 phy_ctl_addr;
+ u32 tmp;
+
+ if (bcma_mdio->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+ core = bcma_mdio->core->bus->drv_gmac_cmn.core;
+ phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+ phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+ } else {
+ core = bcma_mdio->core;
+ phy_access_addr = BGMAC_PHY_ACCESS;
+ phy_ctl_addr = BGMAC_PHY_CNTL;
+ }
+
+ tmp = bcma_read32(core, phy_ctl_addr);
+ tmp &= ~BGMAC_PC_EPA_MASK;
+ tmp |= phyaddr;
+ bcma_write32(core, phy_ctl_addr, tmp);
+
+ bcma_write32(bcma_mdio->core, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
+ if (bcma_read32(bcma_mdio->core, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
+ dev_warn(&core->dev, "Error setting MDIO int\n");
+
+ tmp = BGMAC_PA_START;
+ tmp |= BGMAC_PA_WRITE;
+ tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+ tmp |= reg << BGMAC_PA_REG_SHIFT;
+ tmp |= value;
+ bcma_write32(core, phy_access_addr, tmp);
+
+ if (!bcma_mdio_wait_value(core, phy_access_addr, BGMAC_PA_START, 0,
+ 1000)) {
+ dev_err(&core->dev, "Writing to PHY %d register 0x%X failed\n",
+ phyaddr, reg);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
+static void bcma_mdio_phy_init(struct bcma_mdio *bcma_mdio)
+{
+ struct bcma_chipinfo *ci = &bcma_mdio->core->bus->chipinfo;
+ u8 i;
+
+ if (ci->id == BCMA_CHIP_ID_BCM5356) {
+ for (i = 0; i < 5; i++) {
+ bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x008b);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x15, 0x0100);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x12, 0x2aaa);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+ }
+ }
+ if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
+ (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
+ (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
+ struct bcma_drv_cc *cc = &bcma_mdio->core->bus->drv_cc;
+
+ bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
+ bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
+ for (i = 0; i < 5; i++) {
+ bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x5284);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x0010);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000f);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x5296);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x1073);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x9073);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x16, 0x52b6);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x17, 0x9273);
+ bcma_mdio_phy_write(bcma_mdio, i, 0x1f, 0x000b);
+ }
+ }
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
+static int bcma_mdio_phy_reset(struct mii_bus *bus)
+{
+ struct bcma_mdio *bcma_mdio = bus->priv;
+ u8 phyaddr = bcma_mdio->phyaddr;
+
+ if (bcma_mdio->phyaddr == BGMAC_PHY_NOREGS)
+ return 0;
+
+ bcma_mdio_phy_write(bcma_mdio, phyaddr, MII_BMCR, BMCR_RESET);
+ udelay(100);
+ if (bcma_mdio_phy_read(bcma_mdio, phyaddr, MII_BMCR) & BMCR_RESET)
+ dev_err(&bcma_mdio->core->dev, "PHY reset failed\n");
+ bcma_mdio_phy_init(bcma_mdio);
+
+ return 0;
+}
+
+/**************************************************
+ * MII
+ **************************************************/
+
+static int bcma_mdio_mii_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+ return bcma_mdio_phy_read(bus->priv, mii_id, regnum);
+}
+
+static int bcma_mdio_mii_write(struct mii_bus *bus, int mii_id, int regnum,
+ u16 value)
+{
+ return bcma_mdio_phy_write(bus->priv, mii_id, regnum, value);
+}
+
+struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr)
+{
+ struct bcma_mdio *bcma_mdio;
+ struct mii_bus *mii_bus;
+ int err;
+
+ bcma_mdio = kzalloc(sizeof(*bcma_mdio), GFP_KERNEL);
+ if (!bcma_mdio)
+ return ERR_PTR(-ENOMEM);
+
+ mii_bus = mdiobus_alloc();
+ if (!mii_bus) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ mii_bus->name = "bcma_mdio mii bus";
+ sprintf(mii_bus->id, "%s-%d-%d", "bcma_mdio", core->bus->num,
+ core->core_unit);
+ mii_bus->priv = bcma_mdio;
+ mii_bus->read = bcma_mdio_mii_read;
+ mii_bus->write = bcma_mdio_mii_write;
+ mii_bus->reset = bcma_mdio_phy_reset;
+ mii_bus->parent = &core->dev;
+ mii_bus->phy_mask = ~(1 << phyaddr);
+
+ bcma_mdio->core = core;
+ bcma_mdio->phyaddr = phyaddr;
+
+ err = mdiobus_register(mii_bus);
+ if (err) {
+ dev_err(&core->dev, "Registration of mii bus failed\n");
+ goto err_free_bus;
+ }
+
+ return mii_bus;
+
+err_free_bus:
+ mdiobus_free(mii_bus);
+err:
+ kfree(bcma_mdio);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(bcma_mdio_mii_register);
+
+void bcma_mdio_mii_unregister(struct mii_bus *mii_bus)
+{
+ struct bcma_mdio *bcma_mdio;
+
+ if (!mii_bus)
+ return;
+
+ bcma_mdio = mii_bus->priv;
+
+ mdiobus_unregister(mii_bus);
+ mdiobus_free(mii_bus);
+ kfree(bcma_mdio);
+}
+EXPORT_SYMBOL_GPL(bcma_mdio_mii_unregister);
+
+MODULE_AUTHOR("Rafał Miłecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
new file mode 100644
index 000000000000..9a9745c4047c
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -0,0 +1,315 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
+#include <linux/etherdevice.h>
+#include "bgmac.h"
+
+static inline bool bgmac_is_bcm4707_family(struct bcma_device *core)
+{
+ switch (core->bus->chipinfo.id) {
+ case BCMA_CHIP_ID_BCM4707:
+ case BCMA_CHIP_ID_BCM47094:
+ case BCMA_CHIP_ID_BCM53018:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**************************************************
+ * BCMA bus ops
+ **************************************************/
+
+static u32 bcma_bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+ return bcma_read32(bgmac->bcma.core, offset);
+}
+
+static void bcma_bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+ bcma_write32(bgmac->bcma.core, offset, value);
+}
+
+static u32 bcma_bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+ return bcma_aread32(bgmac->bcma.core, offset);
+}
+
+static void bcma_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+ return bcma_awrite32(bgmac->bcma.core, offset, value);
+}
+
+static bool bcma_bgmac_clk_enabled(struct bgmac *bgmac)
+{
+ return bcma_core_is_enabled(bgmac->bcma.core);
+}
+
+static void bcma_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+ bcma_core_enable(bgmac->bcma.core, flags);
+}
+
+static void bcma_bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+ u32 mask, u32 set)
+{
+ struct bcma_drv_cc *cc = &bgmac->bcma.core->bus->drv_cc;
+
+ bcma_chipco_chipctl_maskset(cc, offset, mask, set);
+}
+
+static u32 bcma_bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+ struct bcma_drv_cc *cc = &bgmac->bcma.core->bus->drv_cc;
+
+ return bcma_pmu_get_bus_clock(cc);
+}
+
+static void bcma_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, u32 mask,
+ u32 set)
+{
+ bcma_maskset32(bgmac->bcma.cmn, offset, mask, set);
+}
+
+static const struct bcma_device_id bgmac_bcma_tbl[] = {
+ BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT,
+ BCMA_ANY_REV, BCMA_ANY_CLASS),
+ BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV,
+ BCMA_ANY_CLASS),
+ {},
+};
+MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
+static int bgmac_probe(struct bcma_device *core)
+{
+ struct ssb_sprom *sprom = &core->bus->sprom;
+ struct mii_bus *mii_bus;
+ struct bgmac *bgmac;
+ u8 *mac;
+ int err;
+
+ bgmac = kzalloc(sizeof(*bgmac), GFP_KERNEL);
+ if (!bgmac)
+ return -ENOMEM;
+
+ bgmac->bcma.core = core;
+ bgmac->dev = &core->dev;
+ bgmac->dma_dev = core->dma_dev;
+ bgmac->irq = core->irq;
+
+ bcma_set_drvdata(core, bgmac);
+
+ switch (core->core_unit) {
+ case 0:
+ mac = sprom->et0mac;
+ break;
+ case 1:
+ mac = sprom->et1mac;
+ break;
+ case 2:
+ mac = sprom->et2mac;
+ break;
+ default:
+ dev_err(bgmac->dev, "Unsupported core_unit %d\n",
+ core->core_unit);
+ err = -ENOTSUPP;
+ goto err;
+ }
+
+ ether_addr_copy(bgmac->mac_addr, mac);
+
+ /* On BCM4706 we need common core to access PHY */
+ if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
+ !core->bus->drv_gmac_cmn.core) {
+ dev_err(bgmac->dev, "GMAC CMN core not found (required for BCM4706)\n");
+ err = -ENODEV;
+ goto err;
+ }
+ bgmac->bcma.cmn = core->bus->drv_gmac_cmn.core;
+
+ switch (core->core_unit) {
+ case 0:
+ bgmac->phyaddr = sprom->et0phyaddr;
+ break;
+ case 1:
+ bgmac->phyaddr = sprom->et1phyaddr;
+ break;
+ case 2:
+ bgmac->phyaddr = sprom->et2phyaddr;
+ break;
+ }
+ bgmac->phyaddr &= BGMAC_PHY_MASK;
+ if (bgmac->phyaddr == BGMAC_PHY_MASK) {
+ dev_err(bgmac->dev, "No PHY found\n");
+ err = -ENODEV;
+ goto err;
+ }
+ dev_info(bgmac->dev, "Found PHY addr: %d%s\n", bgmac->phyaddr,
+ bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
+
+ if (!bgmac_is_bcm4707_family(core)) {
+ mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr);
+ if (!IS_ERR(mii_bus)) {
+ err = PTR_ERR(mii_bus);
+ goto err;
+ }
+
+ bgmac->mii_bus = mii_bus;
+ }
+
+ if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
+ dev_err(bgmac->dev, "PCI setup not implemented\n");
+ err = -ENOTSUPP;
+ goto err1;
+ }
+
+ bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
+ BGMAC_BFL_ENETROBO);
+ if (bgmac->has_robosw)
+ dev_warn(bgmac->dev, "Support for Roboswitch not implemented\n");
+
+ if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
+ dev_warn(bgmac->dev, "Support for ADMtek ethernet switch not implemented\n");
+
+ /* Feature Flags */
+ switch (core->bus->chipinfo.id) {
+ case BCMA_CHIP_ID_BCM5357:
+ bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+ if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47186) {
+ bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+ }
+ if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM5358)
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII;
+ break;
+ case BCMA_CHIP_ID_BCM53572:
+ bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+ if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47188) {
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+ bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+ }
+ break;
+ case BCMA_CHIP_ID_BCM4749:
+ bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+ if (core->bus->chipinfo.pkg == 10) {
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+ bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+ }
+ break;
+ case BCMA_CHIP_ID_BCM4716:
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ /* fallthrough */
+ case BCMA_CHIP_ID_BCM47162:
+ bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL2;
+ bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+ break;
+ /* bcm4707_family */
+ case BCMA_CHIP_ID_BCM4707:
+ case BCMA_CHIP_ID_BCM47094:
+ case BCMA_CHIP_ID_BCM53018:
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
+ bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+ break;
+ default:
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+ }
+
+ if (!bgmac_is_bcm4707_family(core) && core->id.rev > 2)
+ bgmac->feature_flags |= BGMAC_FEAT_MISC_PLL_REQ;
+
+ if (core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+ bgmac->feature_flags |= BGMAC_FEAT_CMN_PHY_CTL;
+ bgmac->feature_flags |= BGMAC_FEAT_NO_CLR_MIB;
+ }
+
+ if (core->id.rev >= 4) {
+ bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
+ bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
+ bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
+ }
+
+ bgmac->read = bcma_bgmac_read;
+ bgmac->write = bcma_bgmac_write;
+ bgmac->idm_read = bcma_bgmac_idm_read;
+ bgmac->idm_write = bcma_bgmac_idm_write;
+ bgmac->clk_enabled = bcma_bgmac_clk_enabled;
+ bgmac->clk_enable = bcma_bgmac_clk_enable;
+ bgmac->cco_ctl_maskset = bcma_bgmac_cco_ctl_maskset;
+ bgmac->get_bus_clock = bcma_bgmac_get_bus_clock;
+ bgmac->cmn_maskset32 = bcma_bgmac_cmn_maskset32;
+
+ err = bgmac_enet_probe(bgmac);
+ if (err)
+ goto err1;
+
+ return 0;
+
+err1:
+ bcma_mdio_mii_unregister(bgmac->mii_bus);
+err:
+ kfree(bgmac);
+ bcma_set_drvdata(core, NULL);
+
+ return err;
+}
+
+static void bgmac_remove(struct bcma_device *core)
+{
+ struct bgmac *bgmac = bcma_get_drvdata(core);
+
+ bcma_mdio_mii_unregister(bgmac->mii_bus);
+ bgmac_enet_remove(bgmac);
+ bcma_set_drvdata(core, NULL);
+ kfree(bgmac);
+}
+
+static struct bcma_driver bgmac_bcma_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = bgmac_bcma_tbl,
+ .probe = bgmac_probe,
+ .remove = bgmac_remove,
+};
+
+static int __init bgmac_init(void)
+{
+ int err;
+
+ err = bcma_driver_register(&bgmac_bcma_driver);
+ if (err)
+ return err;
+ pr_info("Broadcom 47xx GBit MAC driver loaded\n");
+
+ return 0;
+}
+
+static void __exit bgmac_exit(void)
+{
+ bcma_driver_unregister(&bgmac_bcma_driver);
+}
+
+module_init(bgmac_init)
+module_exit(bgmac_exit)
+
+MODULE_AUTHOR("Rafał Miłecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
new file mode 100644
index 000000000000..1a2d8418e7c1
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
+#include <linux/etherdevice.h>
+#include <linux/of_address.h>
+#include <linux/of_net.h>
+#include "bgmac.h"
+
+static u32 platform_bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+ return readl(bgmac->plat.base + offset);
+}
+
+static void platform_bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+ writel(value, bgmac->plat.base + offset);
+}
+
+static u32 platform_bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+ return readl(bgmac->plat.idm_base + offset);
+}
+
+static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+ return writel(value, bgmac->plat.idm_base + offset);
+}
+
+static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
+{
+ if ((bgmac_idm_read(bgmac, BCMA_IOCTL) &
+ (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC)) != BCMA_IOCTL_CLK)
+ return false;
+ if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
+ return false;
+ return true;
+}
+
+static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+ bgmac_idm_write(bgmac, BCMA_IOCTL,
+ (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
+ bgmac_idm_read(bgmac, BCMA_IOCTL);
+
+ bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
+ bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+ udelay(1);
+
+ bgmac_idm_write(bgmac, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+ bgmac_idm_read(bgmac, BCMA_IOCTL);
+ udelay(1);
+}
+
+static void platform_bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+ u32 mask, u32 set)
+{
+ /* This shouldn't be encountered */
+ WARN_ON(1);
+}
+
+static u32 platform_bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+ /* This shouldn't be encountered */
+ WARN_ON(1);
+
+ return 0;
+}
+
+static void platform_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
+ u32 mask, u32 set)
+{
+ /* This shouldn't be encountered */
+ WARN_ON(1);
+}
+
+static int bgmac_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct bgmac *bgmac;
+ struct resource *regs;
+ const u8 *mac_addr;
+
+ bgmac = devm_kzalloc(&pdev->dev, sizeof(*bgmac), GFP_KERNEL);
+ if (!bgmac)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, bgmac);
+
+ /* Set the features of the 4707 family */
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
+ bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+ bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
+ bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
+ bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
+
+ bgmac->dev = &pdev->dev;
+ bgmac->dma_dev = &pdev->dev;
+
+ mac_addr = of_get_mac_address(np);
+ if (mac_addr)
+ ether_addr_copy(bgmac->mac_addr, mac_addr);
+ else
+ dev_warn(&pdev->dev, "MAC address not present in device tree\n");
+
+ bgmac->irq = platform_get_irq(pdev, 0);
+ if (bgmac->irq < 0) {
+ dev_err(&pdev->dev, "Unable to obtain IRQ\n");
+ return bgmac->irq;
+ }
+
+ regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "amac_base");
+ if (!regs) {
+ dev_err(&pdev->dev, "Unable to obtain base resource\n");
+ return -EINVAL;
+ }
+
+ bgmac->plat.base = devm_ioremap_resource(&pdev->dev, regs);
+ if (IS_ERR(bgmac->plat.base)) {
+ dev_err(&pdev->dev, "Unable to map base resource\n");
+ return PTR_ERR(bgmac->plat.base);
+ }
+
+ regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
+ if (!regs) {
+ dev_err(&pdev->dev, "Unable to obtain idm resource\n");
+ return -EINVAL;
+ }
+
+ bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
+ if (IS_ERR(bgmac->plat.idm_base)) {
+ dev_err(&pdev->dev, "Unable to map idm resource\n");
+ return PTR_ERR(bgmac->plat.idm_base);
+ }
+
+ bgmac->read = platform_bgmac_read;
+ bgmac->write = platform_bgmac_write;
+ bgmac->idm_read = platform_bgmac_idm_read;
+ bgmac->idm_write = platform_bgmac_idm_write;
+ bgmac->clk_enabled = platform_bgmac_clk_enabled;
+ bgmac->clk_enable = platform_bgmac_clk_enable;
+ bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset;
+ bgmac->get_bus_clock = platform_bgmac_get_bus_clock;
+ bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32;
+
+ return bgmac_enet_probe(bgmac);
+}
+
+static int bgmac_remove(struct platform_device *pdev)
+{
+ struct bgmac *bgmac = platform_get_drvdata(pdev);
+
+ bgmac_enet_remove(bgmac);
+
+ return 0;
+}
+
+static const struct of_device_id bgmac_of_enet_match[] = {
+ {.compatible = "brcm,amac",},
+ {.compatible = "brcm,nsp-amac",},
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, bgmac_of_enet_match);
+
+static struct platform_driver bgmac_enet_driver = {
+ .driver = {
+ .name = "bgmac-enet",
+ .of_match_table = bgmac_of_enet_match,
+ },
+ .probe = bgmac_probe,
+ .remove = bgmac_remove,
+};
+
+module_platform_driver(bgmac_enet_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index b045dc072c40..13b072591332 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -6,51 +6,27 @@
* Licensed under the GNU/GPL. See COPYING for details.
*/
-#include "bgmac.h"
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/delay.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bcma/bcma.h>
#include <linux/etherdevice.h>
-#include <linux/mii.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
#include <linux/bcm47xx_nvram.h>
+#include "bgmac.h"
-static const struct bcma_device_id bgmac_bcma_tbl[] = {
- BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
- BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
- {},
-};
-MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
-
-static inline bool bgmac_is_bcm4707_family(struct bgmac *bgmac)
-{
- switch (bgmac->core->bus->chipinfo.id) {
- case BCMA_CHIP_ID_BCM4707:
- case BCMA_CHIP_ID_BCM47094:
- case BCMA_CHIP_ID_BCM53018:
- return true;
- default:
- return false;
- }
-}
-
-static bool bgmac_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+static bool bgmac_wait_value(struct bgmac *bgmac, u16 reg, u32 mask,
u32 value, int timeout)
{
u32 val;
int i;
for (i = 0; i < timeout / 10; i++) {
- val = bcma_read32(core, reg);
+ val = bgmac_read(bgmac, reg);
if ((val & mask) == value)
return true;
udelay(10);
}
- pr_err("Timeout waiting for reg 0x%X\n", reg);
+ dev_err(bgmac->dev, "Timeout waiting for reg 0x%X\n", reg);
return false;
}
@@ -84,22 +60,22 @@ static void bgmac_dma_tx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
udelay(10);
}
if (i)
- bgmac_err(bgmac, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
- ring->mmio_base, val);
+ dev_err(bgmac->dev, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
+ ring->mmio_base, val);
/* Remove SUSPEND bit */
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, 0);
- if (!bgmac_wait_value(bgmac->core,
+ if (!bgmac_wait_value(bgmac,
ring->mmio_base + BGMAC_DMA_TX_STATUS,
BGMAC_DMA_TX_STAT, BGMAC_DMA_TX_STAT_DISABLED,
10000)) {
- bgmac_warn(bgmac, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
- ring->mmio_base);
+ dev_warn(bgmac->dev, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
+ ring->mmio_base);
udelay(300);
val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
if ((val & BGMAC_DMA_TX_STAT) != BGMAC_DMA_TX_STAT_DISABLED)
- bgmac_err(bgmac, "Reset of DMA TX ring 0x%X failed\n",
- ring->mmio_base);
+ dev_err(bgmac->dev, "Reset of DMA TX ring 0x%X failed\n",
+ ring->mmio_base);
}
}
@@ -109,7 +85,7 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac,
u32 ctl;
ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL);
- if (bgmac->core->id.rev >= 4) {
+ if (bgmac->feature_flags & BGMAC_FEAT_TX_MASK_SETUP) {
ctl &= ~BGMAC_DMA_TX_BL_MASK;
ctl |= BGMAC_DMA_TX_BL_128 << BGMAC_DMA_TX_BL_SHIFT;
@@ -152,7 +128,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
struct bgmac_dma_ring *ring,
struct sk_buff *skb)
{
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
struct net_device *net_dev = bgmac->net_dev;
int index = ring->end % BGMAC_TX_RING_SLOTS;
struct bgmac_slot_info *slot = &ring->slots[index];
@@ -161,7 +137,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
int i;
if (skb->len > BGMAC_DESC_CTL1_LEN) {
- bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+ netdev_err(bgmac->net_dev, "Too long skb (%d)\n", skb->len);
goto err_drop;
}
@@ -174,7 +150,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
* even when ring->end overflows
*/
if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
- bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+ netdev_err(bgmac->net_dev, "TX ring is full, queue should be stopped!\n");
netif_stop_queue(net_dev);
return NETDEV_TX_BUSY;
}
@@ -241,8 +217,8 @@ err_dma:
}
err_dma_head:
- bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
- ring->mmio_base);
+ netdev_err(bgmac->net_dev, "Mapping error of skb on ring 0x%X\n",
+ ring->mmio_base);
err_drop:
dev_kfree_skb(skb);
@@ -254,7 +230,7 @@ err_drop:
/* Free transmitted packets */
static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
{
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
int empty_slot;
bool freed = false;
unsigned bytes_compl = 0, pkts_compl = 0;
@@ -317,12 +293,12 @@ static void bgmac_dma_rx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
return;
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, 0);
- if (!bgmac_wait_value(bgmac->core,
+ if (!bgmac_wait_value(bgmac,
ring->mmio_base + BGMAC_DMA_RX_STATUS,
BGMAC_DMA_RX_STAT, BGMAC_DMA_RX_STAT_DISABLED,
10000))
- bgmac_err(bgmac, "Reset of ring 0x%X RX failed\n",
- ring->mmio_base);
+ dev_err(bgmac->dev, "Reset of ring 0x%X RX failed\n",
+ ring->mmio_base);
}
static void bgmac_dma_rx_enable(struct bgmac *bgmac,
@@ -331,7 +307,7 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
u32 ctl;
ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
- if (bgmac->core->id.rev >= 4) {
+ if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
ctl &= ~BGMAC_DMA_RX_BL_MASK;
ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
@@ -352,7 +328,7 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
struct bgmac_slot_info *slot)
{
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
dma_addr_t dma_addr;
struct bgmac_rx_header *rx;
void *buf;
@@ -371,7 +347,7 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
if (dma_mapping_error(dma_dev, dma_addr)) {
- bgmac_err(bgmac, "DMA mapping error\n");
+ netdev_err(bgmac->net_dev, "DMA mapping error\n");
put_page(virt_to_head_page(buf));
return -ENOMEM;
}
@@ -441,7 +417,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
end_slot /= sizeof(struct bgmac_dma_desc);
while (ring->start != end_slot) {
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
struct bgmac_slot_info *slot = &ring->slots[ring->start];
struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
struct sk_buff *skb;
@@ -466,16 +442,16 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
/* Check for poison and drop or pass the packet */
if (len == 0xdead && flags == 0xbeef) {
- bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n",
- ring->start);
+ netdev_err(bgmac->net_dev, "Found poisoned packet at slot %d, DMA issue!\n",
+ ring->start);
put_page(virt_to_head_page(buf));
bgmac->net_dev->stats.rx_errors++;
break;
}
if (len > BGMAC_RX_ALLOC_SIZE) {
- bgmac_err(bgmac, "Found oversized packet at slot %d, DMA issue!\n",
- ring->start);
+ netdev_err(bgmac->net_dev, "Found oversized packet at slot %d, DMA issue!\n",
+ ring->start);
put_page(virt_to_head_page(buf));
bgmac->net_dev->stats.rx_length_errors++;
bgmac->net_dev->stats.rx_errors++;
@@ -487,7 +463,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
if (unlikely(!skb)) {
- bgmac_err(bgmac, "build_skb failed\n");
+ netdev_err(bgmac->net_dev, "build_skb failed\n");
put_page(virt_to_head_page(buf));
bgmac->net_dev->stats.rx_errors++;
break;
@@ -544,7 +520,7 @@ static bool bgmac_dma_unaligned(struct bgmac *bgmac,
static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
struct bgmac_dma_ring *ring)
{
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
struct bgmac_dma_desc *dma_desc = ring->cpu_base;
struct bgmac_slot_info *slot;
int i;
@@ -570,7 +546,7 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
static void bgmac_dma_rx_ring_free(struct bgmac *bgmac,
struct bgmac_dma_ring *ring)
{
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
struct bgmac_slot_info *slot;
int i;
@@ -591,7 +567,7 @@ static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
struct bgmac_dma_ring *ring,
int num_slots)
{
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
int size;
if (!ring->cpu_base)
@@ -629,7 +605,7 @@ static void bgmac_dma_free(struct bgmac *bgmac)
static int bgmac_dma_alloc(struct bgmac *bgmac)
{
- struct device *dma_dev = bgmac->core->dma_dev;
+ struct device *dma_dev = bgmac->dma_dev;
struct bgmac_dma_ring *ring;
static const u16 ring_base[] = { BGMAC_DMA_BASE0, BGMAC_DMA_BASE1,
BGMAC_DMA_BASE2, BGMAC_DMA_BASE3, };
@@ -640,8 +616,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base));
BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base));
- if (!(bcma_aread32(bgmac->core, BCMA_IOST) & BCMA_IOST_DMA64)) {
- bgmac_err(bgmac, "Core does not report 64-bit DMA\n");
+ if (!(bgmac_idm_read(bgmac, BCMA_IOST) & BCMA_IOST_DMA64)) {
+ dev_err(bgmac->dev, "Core does not report 64-bit DMA\n");
return -ENOTSUPP;
}
@@ -655,8 +631,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
&ring->dma_base,
GFP_KERNEL);
if (!ring->cpu_base) {
- bgmac_err(bgmac, "Allocation of TX ring 0x%X failed\n",
- ring->mmio_base);
+ dev_err(bgmac->dev, "Allocation of TX ring 0x%X failed\n",
+ ring->mmio_base);
goto err_dma_free;
}
@@ -680,8 +656,8 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
&ring->dma_base,
GFP_KERNEL);
if (!ring->cpu_base) {
- bgmac_err(bgmac, "Allocation of RX ring 0x%X failed\n",
- ring->mmio_base);
+ dev_err(bgmac->dev, "Allocation of RX ring 0x%X failed\n",
+ ring->mmio_base);
err = -ENOMEM;
goto err_dma_free;
}
@@ -756,150 +732,6 @@ error:
return err;
}
-/**************************************************
- * PHY ops
- **************************************************/
-
-static u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg)
-{
- struct bcma_device *core;
- u16 phy_access_addr;
- u16 phy_ctl_addr;
- u32 tmp;
-
- BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
- BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
- BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
- BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
- BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
- BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
- BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
- BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
- BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
- BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
- BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
-
- if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
- core = bgmac->core->bus->drv_gmac_cmn.core;
- phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
- phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
- } else {
- core = bgmac->core;
- phy_access_addr = BGMAC_PHY_ACCESS;
- phy_ctl_addr = BGMAC_PHY_CNTL;
- }
-
- tmp = bcma_read32(core, phy_ctl_addr);
- tmp &= ~BGMAC_PC_EPA_MASK;
- tmp |= phyaddr;
- bcma_write32(core, phy_ctl_addr, tmp);
-
- tmp = BGMAC_PA_START;
- tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
- tmp |= reg << BGMAC_PA_REG_SHIFT;
- bcma_write32(core, phy_access_addr, tmp);
-
- if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
- bgmac_err(bgmac, "Reading PHY %d register 0x%X failed\n",
- phyaddr, reg);
- return 0xffff;
- }
-
- return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
-static int bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value)
-{
- struct bcma_device *core;
- u16 phy_access_addr;
- u16 phy_ctl_addr;
- u32 tmp;
-
- if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
- core = bgmac->core->bus->drv_gmac_cmn.core;
- phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
- phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
- } else {
- core = bgmac->core;
- phy_access_addr = BGMAC_PHY_ACCESS;
- phy_ctl_addr = BGMAC_PHY_CNTL;
- }
-
- tmp = bcma_read32(core, phy_ctl_addr);
- tmp &= ~BGMAC_PC_EPA_MASK;
- tmp |= phyaddr;
- bcma_write32(core, phy_ctl_addr, tmp);
-
- bgmac_write(bgmac, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
- if (bgmac_read(bgmac, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
- bgmac_warn(bgmac, "Error setting MDIO int\n");
-
- tmp = BGMAC_PA_START;
- tmp |= BGMAC_PA_WRITE;
- tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
- tmp |= reg << BGMAC_PA_REG_SHIFT;
- tmp |= value;
- bcma_write32(core, phy_access_addr, tmp);
-
- if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
- bgmac_err(bgmac, "Writing to PHY %d register 0x%X failed\n",
- phyaddr, reg);
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
-static void bgmac_phy_init(struct bgmac *bgmac)
-{
- struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
- struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
- u8 i;
-
- if (ci->id == BCMA_CHIP_ID_BCM5356) {
- for (i = 0; i < 5; i++) {
- bgmac_phy_write(bgmac, i, 0x1f, 0x008b);
- bgmac_phy_write(bgmac, i, 0x15, 0x0100);
- bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
- bgmac_phy_write(bgmac, i, 0x12, 0x2aaa);
- bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
- }
- }
- if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
- (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
- (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
- bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
- bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
- for (i = 0; i < 5; i++) {
- bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
- bgmac_phy_write(bgmac, i, 0x16, 0x5284);
- bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
- bgmac_phy_write(bgmac, i, 0x17, 0x0010);
- bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
- bgmac_phy_write(bgmac, i, 0x16, 0x5296);
- bgmac_phy_write(bgmac, i, 0x17, 0x1073);
- bgmac_phy_write(bgmac, i, 0x17, 0x9073);
- bgmac_phy_write(bgmac, i, 0x16, 0x52b6);
- bgmac_phy_write(bgmac, i, 0x17, 0x9273);
- bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
- }
- }
-}
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
-static void bgmac_phy_reset(struct bgmac *bgmac)
-{
- if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
- return;
-
- bgmac_phy_write(bgmac, bgmac->phyaddr, MII_BMCR, BMCR_RESET);
- udelay(100);
- if (bgmac_phy_read(bgmac, bgmac->phyaddr, MII_BMCR) & BMCR_RESET)
- bgmac_err(bgmac, "PHY reset failed\n");
- bgmac_phy_init(bgmac);
-}
/**************************************************
* Chip ops
@@ -913,14 +745,20 @@ static void bgmac_cmdcfg_maskset(struct bgmac *bgmac, u32 mask, u32 set,
{
u32 cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
u32 new_val = (cmdcfg & mask) | set;
+ u32 cmdcfg_sr;
+
+ if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+ cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+ else
+ cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
- bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR(bgmac->core->id.rev));
+ bgmac_set(bgmac, BGMAC_CMDCFG, cmdcfg_sr);
udelay(2);
if (new_val != cmdcfg || force)
bgmac_write(bgmac, BGMAC_CMDCFG, new_val);
- bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR(bgmac->core->id.rev));
+ bgmac_mask(bgmac, BGMAC_CMDCFG, ~cmdcfg_sr);
udelay(2);
}
@@ -949,7 +787,7 @@ static void bgmac_chip_stats_update(struct bgmac *bgmac)
{
int i;
- if (bgmac->core->id.id != BCMA_CORE_4706_MAC_GBIT) {
+ if (!(bgmac->feature_flags & BGMAC_FEAT_NO_CLR_MIB)) {
for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++)
bgmac->mib_tx_regs[i] =
bgmac_read(bgmac,
@@ -968,7 +806,7 @@ static void bgmac_clear_mib(struct bgmac *bgmac)
{
int i;
- if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT)
+ if (bgmac->feature_flags & BGMAC_FEAT_NO_CLR_MIB)
return;
bgmac_set(bgmac, BGMAC_DEV_CTL, BGMAC_DC_MROR);
@@ -998,7 +836,8 @@ static void bgmac_mac_speed(struct bgmac *bgmac)
set |= BGMAC_CMDCFG_ES_2500;
break;
default:
- bgmac_err(bgmac, "Unsupported speed: %d\n", bgmac->mac_speed);
+ dev_err(bgmac->dev, "Unsupported speed: %d\n",
+ bgmac->mac_speed);
}
if (bgmac->mac_duplex == DUPLEX_HALF)
@@ -1009,17 +848,16 @@ static void bgmac_mac_speed(struct bgmac *bgmac)
static void bgmac_miiconfig(struct bgmac *bgmac)
{
- struct bcma_device *core = bgmac->core;
- u8 imode;
-
- if (bgmac_is_bcm4707_family(bgmac)) {
- bcma_awrite32(core, BCMA_IOCTL,
- bcma_aread32(core, BCMA_IOCTL) | 0x40 |
- BGMAC_BCMA_IOCTL_SW_CLKEN);
+ if (bgmac->feature_flags & BGMAC_FEAT_FORCE_SPEED_2500) {
+ bgmac_idm_write(bgmac, BCMA_IOCTL,
+ bgmac_idm_read(bgmac, BCMA_IOCTL) | 0x40 |
+ BGMAC_BCMA_IOCTL_SW_CLKEN);
bgmac->mac_speed = SPEED_2500;
bgmac->mac_duplex = DUPLEX_FULL;
bgmac_mac_speed(bgmac);
} else {
+ u8 imode;
+
imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) &
BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT;
if (imode == 0 || imode == 1) {
@@ -1033,14 +871,11 @@ static void bgmac_miiconfig(struct bgmac *bgmac)
/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */
static void bgmac_chip_reset(struct bgmac *bgmac)
{
- struct bcma_device *core = bgmac->core;
- struct bcma_bus *bus = core->bus;
- struct bcma_chipinfo *ci = &bus->chipinfo;
- u32 flags;
+ u32 cmdcfg_sr;
u32 iost;
int i;
- if (bcma_core_is_enabled(core)) {
+ if (bgmac_clk_enabled(bgmac)) {
if (!bgmac->stats_grabbed) {
/* bgmac_chip_stats_update(bgmac); */
bgmac->stats_grabbed = true;
@@ -1058,38 +893,32 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
/* TODO: Clear software multicast filter list */
}
- iost = bcma_aread32(core, BCMA_IOST);
- if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
- (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
- (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188))
+ iost = bgmac_idm_read(bgmac, BCMA_IOST);
+ if (bgmac->feature_flags & BGMAC_FEAT_IOST_ATTACHED)
iost &= ~BGMAC_BCMA_IOST_ATTACHED;
/* 3GMAC: for BCM4707 & BCM47094, only do core reset at bgmac_probe() */
- if (ci->id != BCMA_CHIP_ID_BCM4707 &&
- ci->id != BCMA_CHIP_ID_BCM47094) {
- flags = 0;
+ if (!(bgmac->feature_flags & BGMAC_FEAT_NO_RESET)) {
+ u32 flags = 0;
if (iost & BGMAC_BCMA_IOST_ATTACHED) {
flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
if (!bgmac->has_robosw)
flags |= BGMAC_BCMA_IOCTL_SW_RESET;
}
- bcma_core_enable(core, flags);
+ bgmac_clk_enable(bgmac, flags);
}
/* Request Misc PLL for corerev > 2 */
- if (core->id.rev > 2 && !bgmac_is_bcm4707_family(bgmac)) {
+ if (bgmac->feature_flags & BGMAC_FEAT_MISC_PLL_REQ) {
bgmac_set(bgmac, BCMA_CLKCTLST,
BGMAC_BCMA_CLKCTLST_MISC_PLL_REQ);
- bgmac_wait_value(bgmac->core, BCMA_CLKCTLST,
+ bgmac_wait_value(bgmac, BCMA_CLKCTLST,
BGMAC_BCMA_CLKCTLST_MISC_PLL_ST,
BGMAC_BCMA_CLKCTLST_MISC_PLL_ST,
1000);
}
- if (ci->id == BCMA_CHIP_ID_BCM5357 ||
- ci->id == BCMA_CHIP_ID_BCM4749 ||
- ci->id == BCMA_CHIP_ID_BCM53572) {
- struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
+ if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_PHY) {
u8 et_swtype = 0;
u8 sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHY |
BGMAC_CHIPCTL_1_IF_TYPE_MII;
@@ -1097,35 +926,37 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
if (bcm47xx_nvram_getenv("et_swtype", buf, sizeof(buf)) > 0) {
if (kstrtou8(buf, 0, &et_swtype))
- bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n",
- buf);
+ dev_err(bgmac->dev, "Failed to parse et_swtype (%s)\n",
+ buf);
et_swtype &= 0x0f;
et_swtype <<= 4;
sw_type = et_swtype;
- } else if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358) {
+ } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_EPHYRMII) {
sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
- } else if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
- (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
- (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) {
+ } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_RGMII) {
sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RGMII |
BGMAC_CHIPCTL_1_SW_TYPE_RGMII;
}
- bcma_chipco_chipctl_maskset(cc, 1,
- ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
- BGMAC_CHIPCTL_1_SW_TYPE_MASK),
- sw_type);
+ bgmac_cco_ctl_maskset(bgmac, 1, ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
+ BGMAC_CHIPCTL_1_SW_TYPE_MASK),
+ sw_type);
}
if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
- bcma_awrite32(core, BCMA_IOCTL,
- bcma_aread32(core, BCMA_IOCTL) &
- ~BGMAC_BCMA_IOCTL_SW_RESET);
+ bgmac_idm_write(bgmac, BCMA_IOCTL,
+ bgmac_idm_read(bgmac, BCMA_IOCTL) &
+ ~BGMAC_BCMA_IOCTL_SW_RESET);
/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset
* Specs don't say about using BGMAC_CMDCFG_SR, but in this routine
* BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to
* be keps until taking MAC out of the reset.
*/
+ if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+ cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+ else
+ cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
+
bgmac_cmdcfg_maskset(bgmac,
~(BGMAC_CMDCFG_TE |
BGMAC_CMDCFG_RE |
@@ -1143,19 +974,20 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
BGMAC_CMDCFG_PROM |
BGMAC_CMDCFG_NLC |
BGMAC_CMDCFG_CFE |
- BGMAC_CMDCFG_SR(core->id.rev),
+ cmdcfg_sr,
false);
bgmac->mac_speed = SPEED_UNKNOWN;
bgmac->mac_duplex = DUPLEX_UNKNOWN;
bgmac_clear_mib(bgmac);
- if (core->id.id == BCMA_CORE_4706_MAC_GBIT)
- bcma_maskset32(bgmac->cmn, BCMA_GMAC_CMN_PHY_CTL, ~0,
- BCMA_GMAC_CMN_PC_MTE);
+ if (bgmac->feature_flags & BGMAC_FEAT_CMN_PHY_CTL)
+ bgmac_cmn_maskset32(bgmac, BCMA_GMAC_CMN_PHY_CTL, ~0,
+ BCMA_GMAC_CMN_PC_MTE);
else
bgmac_set(bgmac, BGMAC_PHY_CNTL, BGMAC_PC_MTE);
bgmac_miiconfig(bgmac);
- bgmac_phy_init(bgmac);
+ if (bgmac->mii_bus)
+ bgmac->mii_bus->reset(bgmac->mii_bus);
netdev_reset_queue(bgmac->net_dev);
}
@@ -1174,50 +1006,51 @@ static void bgmac_chip_intrs_off(struct bgmac *bgmac)
/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_enable */
static void bgmac_enable(struct bgmac *bgmac)
{
- struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
+ u32 cmdcfg_sr;
u32 cmdcfg;
u32 mode;
- u32 rxq_ctl;
- u32 fl_ctl;
- u16 bp_clk;
- u8 mdp;
+
+ if (bgmac->feature_flags & BGMAC_FEAT_CMDCFG_SR_REV4)
+ cmdcfg_sr = BGMAC_CMDCFG_SR_REV4;
+ else
+ cmdcfg_sr = BGMAC_CMDCFG_SR_REV0;
cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
bgmac_cmdcfg_maskset(bgmac, ~(BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE),
- BGMAC_CMDCFG_SR(bgmac->core->id.rev), true);
+ cmdcfg_sr, true);
udelay(2);
cmdcfg |= BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE;
bgmac_write(bgmac, BGMAC_CMDCFG, cmdcfg);
mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
BGMAC_DS_MM_SHIFT;
- if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0)
+ if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0)
bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
- if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2)
- bcma_chipco_chipctl_maskset(&bgmac->core->bus->drv_cc, 1, ~0,
- BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
-
- switch (ci->id) {
- case BCMA_CHIP_ID_BCM5357:
- case BCMA_CHIP_ID_BCM4749:
- case BCMA_CHIP_ID_BCM53572:
- case BCMA_CHIP_ID_BCM4716:
- case BCMA_CHIP_ID_BCM47162:
- fl_ctl = 0x03cb04cb;
- if (ci->id == BCMA_CHIP_ID_BCM5357 ||
- ci->id == BCMA_CHIP_ID_BCM4749 ||
- ci->id == BCMA_CHIP_ID_BCM53572)
+ if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2)
+ bgmac_cco_ctl_maskset(bgmac, 1, ~0,
+ BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
+
+ if (bgmac->feature_flags & (BGMAC_FEAT_FLW_CTRL1 |
+ BGMAC_FEAT_FLW_CTRL2)) {
+ u32 fl_ctl;
+
+ if (bgmac->feature_flags & BGMAC_FEAT_FLW_CTRL1)
fl_ctl = 0x2300e1;
+ else
+ fl_ctl = 0x03cb04cb;
+
bgmac_write(bgmac, BGMAC_FLOW_CTL_THRESH, fl_ctl);
bgmac_write(bgmac, BGMAC_PAUSE_CTL, 0x27fff);
- break;
}
- if (!bgmac_is_bcm4707_family(bgmac)) {
+ if (bgmac->feature_flags & BGMAC_FEAT_SET_RXQ_CLK) {
+ u32 rxq_ctl;
+ u16 bp_clk;
+ u8 mdp;
+
rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL);
rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK;
- bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) /
- 1000000;
+ bp_clk = bgmac_get_bus_clock(bgmac) / 1000000;
mdp = (bp_clk * 128 / 1000) - 3;
rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT);
bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl);
@@ -1261,7 +1094,7 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
if (int_status)
- bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
+ dev_err(bgmac->dev, "Unknown IRQs: 0x%08X\n", int_status);
/* Disable new interrupts until handling existing ones */
bgmac_chip_intrs_off(bgmac);
@@ -1312,10 +1145,10 @@ static int bgmac_open(struct net_device *net_dev)
/* Specs say about reclaiming rings here, but we do that in DMA init */
bgmac_chip_init(bgmac);
- err = request_irq(bgmac->core->irq, bgmac_interrupt, IRQF_SHARED,
+ err = request_irq(bgmac->irq, bgmac_interrupt, IRQF_SHARED,
KBUILD_MODNAME, net_dev);
if (err < 0) {
- bgmac_err(bgmac, "IRQ request error: %d!\n", err);
+ dev_err(bgmac->dev, "IRQ request error: %d!\n", err);
bgmac_dma_cleanup(bgmac);
return err;
}
@@ -1338,7 +1171,7 @@ static int bgmac_stop(struct net_device *net_dev)
napi_disable(&bgmac->napi);
bgmac_chip_intrs_off(bgmac);
- free_irq(bgmac->core->irq, net_dev);
+ free_irq(bgmac->irq, net_dev);
bgmac_chip_reset(bgmac);
bgmac_dma_cleanup(bgmac);
@@ -1517,7 +1350,7 @@ static void bgmac_get_drvinfo(struct net_device *net_dev,
struct ethtool_drvinfo *info)
{
strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
- strlcpy(info->bus_info, "BCMA", sizeof(info->bus_info));
+ strlcpy(info->bus_info, "AXI", sizeof(info->bus_info));
}
static const struct ethtool_ops bgmac_ethtool_ops = {
@@ -1533,17 +1366,6 @@ static const struct ethtool_ops bgmac_ethtool_ops = {
* MII
**************************************************/
-static int bgmac_mii_read(struct mii_bus *bus, int mii_id, int regnum)
-{
- return bgmac_phy_read(bus->priv, mii_id, regnum);
-}
-
-static int bgmac_mii_write(struct mii_bus *bus, int mii_id, int regnum,
- u16 value)
-{
- return bgmac_phy_write(bus->priv, mii_id, regnum, value);
-}
-
static void bgmac_adjust_link(struct net_device *net_dev)
{
struct bgmac *bgmac = netdev_priv(net_dev);
@@ -1568,7 +1390,7 @@ static void bgmac_adjust_link(struct net_device *net_dev)
}
}
-static int bgmac_fixed_phy_register(struct bgmac *bgmac)
+static int bgmac_phy_connect_direct(struct bgmac *bgmac)
{
struct fixed_phy_status fphy_status = {
.link = 1,
@@ -1580,194 +1402,76 @@ static int bgmac_fixed_phy_register(struct bgmac *bgmac)
phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL);
if (!phy_dev || IS_ERR(phy_dev)) {
- bgmac_err(bgmac, "Failed to register fixed PHY device\n");
+ dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
return -ENODEV;
}
err = phy_connect_direct(bgmac->net_dev, phy_dev, bgmac_adjust_link,
PHY_INTERFACE_MODE_MII);
if (err) {
- bgmac_err(bgmac, "Connecting PHY failed\n");
+ dev_err(bgmac->dev, "Connecting PHY failed\n");
return err;
}
return err;
}
-static int bgmac_mii_register(struct bgmac *bgmac)
+static int bgmac_phy_connect(struct bgmac *bgmac)
{
- struct mii_bus *mii_bus;
struct phy_device *phy_dev;
char bus_id[MII_BUS_ID_SIZE + 3];
- int err = 0;
-
- if (bgmac_is_bcm4707_family(bgmac))
- return bgmac_fixed_phy_register(bgmac);
-
- mii_bus = mdiobus_alloc();
- if (!mii_bus)
- return -ENOMEM;
-
- mii_bus->name = "bgmac mii bus";
- sprintf(mii_bus->id, "%s-%d-%d", "bgmac", bgmac->core->bus->num,
- bgmac->core->core_unit);
- mii_bus->priv = bgmac;
- mii_bus->read = bgmac_mii_read;
- mii_bus->write = bgmac_mii_write;
- mii_bus->parent = &bgmac->core->dev;
- mii_bus->phy_mask = ~(1 << bgmac->phyaddr);
-
- err = mdiobus_register(mii_bus);
- if (err) {
- bgmac_err(bgmac, "Registration of mii bus failed\n");
- goto err_free_bus;
- }
-
- bgmac->mii_bus = mii_bus;
/* Connect to the PHY */
- snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, mii_bus->id,
+ snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id,
bgmac->phyaddr);
phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link,
PHY_INTERFACE_MODE_MII);
if (IS_ERR(phy_dev)) {
- bgmac_err(bgmac, "PHY connection failed\n");
- err = PTR_ERR(phy_dev);
- goto err_unregister_bus;
+ dev_err(bgmac->dev, "PHY connecton failed\n");
+ return PTR_ERR(phy_dev);
}
- return err;
-
-err_unregister_bus:
- mdiobus_unregister(mii_bus);
-err_free_bus:
- mdiobus_free(mii_bus);
- return err;
-}
-
-static void bgmac_mii_unregister(struct bgmac *bgmac)
-{
- struct mii_bus *mii_bus = bgmac->mii_bus;
-
- mdiobus_unregister(mii_bus);
- mdiobus_free(mii_bus);
+ return 0;
}
-/**************************************************
- * BCMA bus ops
- **************************************************/
-
-/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
-static int bgmac_probe(struct bcma_device *core)
+int bgmac_enet_probe(struct bgmac *info)
{
struct net_device *net_dev;
struct bgmac *bgmac;
- struct ssb_sprom *sprom = &core->bus->sprom;
- u8 *mac;
int err;
- switch (core->core_unit) {
- case 0:
- mac = sprom->et0mac;
- break;
- case 1:
- mac = sprom->et1mac;
- break;
- case 2:
- mac = sprom->et2mac;
- break;
- default:
- pr_err("Unsupported core_unit %d\n", core->core_unit);
- return -ENOTSUPP;
- }
-
- if (!is_valid_ether_addr(mac)) {
- dev_err(&core->dev, "Invalid MAC addr: %pM\n", mac);
- eth_random_addr(mac);
- dev_warn(&core->dev, "Using random MAC: %pM\n", mac);
- }
-
- /* This (reset &) enable is not preset in specs or reference driver but
- * Broadcom does it in arch PCI code when enabling fake PCI device.
- */
- bcma_core_enable(core, 0);
-
/* Allocation and references */
net_dev = alloc_etherdev(sizeof(*bgmac));
if (!net_dev)
return -ENOMEM;
+
net_dev->netdev_ops = &bgmac_netdev_ops;
- net_dev->irq = core->irq;
net_dev->ethtool_ops = &bgmac_ethtool_ops;
bgmac = netdev_priv(net_dev);
+ memcpy(bgmac, info, sizeof(*bgmac));
bgmac->net_dev = net_dev;
- bgmac->core = core;
- bcma_set_drvdata(core, bgmac);
- SET_NETDEV_DEV(net_dev, &core->dev);
-
- /* Defaults */
- memcpy(bgmac->net_dev->dev_addr, mac, ETH_ALEN);
-
- /* On BCM4706 we need common core to access PHY */
- if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
- !core->bus->drv_gmac_cmn.core) {
- bgmac_err(bgmac, "GMAC CMN core not found (required for BCM4706)\n");
- err = -ENODEV;
- goto err_netdev_free;
+ net_dev->irq = bgmac->irq;
+ SET_NETDEV_DEV(net_dev, bgmac->dev);
+
+ if (!is_valid_ether_addr(bgmac->mac_addr)) {
+ dev_err(bgmac->dev, "Invalid MAC addr: %pM\n",
+ bgmac->mac_addr);
+ eth_random_addr(bgmac->mac_addr);
+ dev_warn(bgmac->dev, "Using random MAC: %pM\n",
+ bgmac->mac_addr);
}
- bgmac->cmn = core->bus->drv_gmac_cmn.core;
+ ether_addr_copy(net_dev->dev_addr, bgmac->mac_addr);
- switch (core->core_unit) {
- case 0:
- bgmac->phyaddr = sprom->et0phyaddr;
- break;
- case 1:
- bgmac->phyaddr = sprom->et1phyaddr;
- break;
- case 2:
- bgmac->phyaddr = sprom->et2phyaddr;
- break;
- }
- bgmac->phyaddr &= BGMAC_PHY_MASK;
- if (bgmac->phyaddr == BGMAC_PHY_MASK) {
- bgmac_err(bgmac, "No PHY found\n");
- err = -ENODEV;
- goto err_netdev_free;
- }
- bgmac_info(bgmac, "Found PHY addr: %d%s\n", bgmac->phyaddr,
- bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
-
- if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
- bgmac_err(bgmac, "PCI setup not implemented\n");
- err = -ENOTSUPP;
- goto err_netdev_free;
- }
+ /* This (reset &) enable is not preset in specs or reference driver but
+ * Broadcom does it in arch PCI code when enabling fake PCI device.
+ */
+ bgmac_clk_enable(bgmac, 0);
bgmac_chip_reset(bgmac);
- /* For Northstar, we have to take all GMAC core out of reset */
- if (bgmac_is_bcm4707_family(bgmac)) {
- struct bcma_device *ns_core;
- int ns_gmac;
-
- /* Northstar has 4 GMAC cores */
- for (ns_gmac = 0; ns_gmac < 4; ns_gmac++) {
- /* As Northstar requirement, we have to reset all GMACs
- * before accessing one. bgmac_chip_reset() call
- * bcma_core_enable() for this core. Then the other
- * three GMACs didn't reset. We do it here.
- */
- ns_core = bcma_find_core_unit(core->bus,
- BCMA_CORE_MAC_GBIT,
- ns_gmac);
- if (ns_core && !bcma_core_is_enabled(ns_core))
- bcma_core_enable(ns_core, 0);
- }
- }
-
err = bgmac_dma_alloc(bgmac);
if (err) {
- bgmac_err(bgmac, "Unable to alloc memory for DMA\n");
+ dev_err(bgmac->dev, "Unable to alloc memory for DMA\n");
goto err_netdev_free;
}
@@ -1775,22 +1479,14 @@ static int bgmac_probe(struct bcma_device *core)
if (bcm47xx_nvram_getenv("et0_no_txint", NULL, 0) == 0)
bgmac->int_mask &= ~BGMAC_IS_TX_MASK;
- /* TODO: reset the external phy. Specs are needed */
- bgmac_phy_reset(bgmac);
-
- bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
- BGMAC_BFL_ENETROBO);
- if (bgmac->has_robosw)
- bgmac_warn(bgmac, "Support for Roboswitch not implemented\n");
-
- if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
- bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n");
-
netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
- err = bgmac_mii_register(bgmac);
+ if (!bgmac->mii_bus)
+ err = bgmac_phy_connect_direct(bgmac);
+ else
+ err = bgmac_phy_connect(bgmac);
if (err) {
- bgmac_err(bgmac, "Cannot register MDIO\n");
+ dev_err(bgmac->dev, "Cannot connect to phy\n");
goto err_dma_free;
}
@@ -1800,64 +1496,34 @@ static int bgmac_probe(struct bcma_device *core)
err = register_netdev(bgmac->net_dev);
if (err) {
- bgmac_err(bgmac, "Cannot register net device\n");
- goto err_mii_unregister;
+ dev_err(bgmac->dev, "Cannot register net device\n");
+ goto err_phy_disconnect;
}
netif_carrier_off(net_dev);
return 0;
-err_mii_unregister:
- bgmac_mii_unregister(bgmac);
+err_phy_disconnect:
+ phy_disconnect(net_dev->phydev);
err_dma_free:
bgmac_dma_free(bgmac);
-
err_netdev_free:
- bcma_set_drvdata(core, NULL);
free_netdev(net_dev);
return err;
}
+EXPORT_SYMBOL_GPL(bgmac_enet_probe);
-static void bgmac_remove(struct bcma_device *core)
+void bgmac_enet_remove(struct bgmac *bgmac)
{
- struct bgmac *bgmac = bcma_get_drvdata(core);
-
unregister_netdev(bgmac->net_dev);
- bgmac_mii_unregister(bgmac);
+ phy_disconnect(bgmac->net_dev->phydev);
netif_napi_del(&bgmac->napi);
bgmac_dma_free(bgmac);
- bcma_set_drvdata(core, NULL);
free_netdev(bgmac->net_dev);
}
-
-static struct bcma_driver bgmac_bcma_driver = {
- .name = KBUILD_MODNAME,
- .id_table = bgmac_bcma_tbl,
- .probe = bgmac_probe,
- .remove = bgmac_remove,
-};
-
-static int __init bgmac_init(void)
-{
- int err;
-
- err = bcma_driver_register(&bgmac_bcma_driver);
- if (err)
- return err;
- pr_info("Broadcom 47xx GBit MAC driver loaded\n");
-
- return 0;
-}
-
-static void __exit bgmac_exit(void)
-{
- bcma_driver_unregister(&bgmac_bcma_driver);
-}
-
-module_init(bgmac_init)
-module_exit(bgmac_exit)
+EXPORT_SYMBOL_GPL(bgmac_enet_remove);
MODULE_AUTHOR("Rafał Miłecki");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 99beb181f577..24a250267b88 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -1,19 +1,6 @@
#ifndef _BGMAC_H
#define _BGMAC_H
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#define bgmac_err(bgmac, fmt, ...) \
- dev_err(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-#define bgmac_warn(bgmac, fmt, ...) \
- dev_warn(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-#define bgmac_info(bgmac, fmt, ...) \
- dev_info(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-#define bgmac_dbg(bgmac, fmt, ...) \
- dev_dbg(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
-
-#include <linux/bcma/bcma.h>
-#include <linux/brcmphy.h>
#include <linux/netdevice.h>
#define BGMAC_DEV_CTL 0x000
@@ -201,7 +188,6 @@
#define BGMAC_CMDCFG_HD_SHIFT 10
#define BGMAC_CMDCFG_SR_REV0 0x00000800 /* Set to reset mode, for core rev 0-3 */
#define BGMAC_CMDCFG_SR_REV4 0x00002000 /* Set to reset mode, for core rev >= 4 */
-#define BGMAC_CMDCFG_SR(rev) ((rev >= 4) ? BGMAC_CMDCFG_SR_REV4 : BGMAC_CMDCFG_SR_REV0)
#define BGMAC_CMDCFG_ML 0x00008000 /* Set to activate mac loopback mode */
#define BGMAC_CMDCFG_AE 0x00400000
#define BGMAC_CMDCFG_CFE 0x00800000
@@ -387,6 +373,24 @@
#define ETHER_MAX_LEN 1518
+/* Feature Flags */
+#define BGMAC_FEAT_TX_MASK_SETUP BIT(0)
+#define BGMAC_FEAT_RX_MASK_SETUP BIT(1)
+#define BGMAC_FEAT_IOST_ATTACHED BIT(2)
+#define BGMAC_FEAT_NO_RESET BIT(3)
+#define BGMAC_FEAT_MISC_PLL_REQ BIT(4)
+#define BGMAC_FEAT_SW_TYPE_PHY BIT(5)
+#define BGMAC_FEAT_SW_TYPE_EPHYRMII BIT(6)
+#define BGMAC_FEAT_SW_TYPE_RGMII BIT(7)
+#define BGMAC_FEAT_CMN_PHY_CTL BIT(8)
+#define BGMAC_FEAT_FLW_CTRL1 BIT(9)
+#define BGMAC_FEAT_FLW_CTRL2 BIT(10)
+#define BGMAC_FEAT_SET_RXQ_CLK BIT(11)
+#define BGMAC_FEAT_CLKCTLST BIT(12)
+#define BGMAC_FEAT_NO_CLR_MIB BIT(13)
+#define BGMAC_FEAT_FORCE_SPEED_2500 BIT(14)
+#define BGMAC_FEAT_CMDCFG_SR_REV4 BIT(15)
+
struct bgmac_slot_info {
union {
struct sk_buff *skb;
@@ -436,8 +440,23 @@ struct bgmac_rx_header {
};
struct bgmac {
- struct bcma_device *core;
- struct bcma_device *cmn; /* Reference to CMN core for BCM4706 */
+ union {
+ struct {
+ void *base;
+ void *idm_base;
+ } plat;
+ struct {
+ struct bcma_device *core;
+ /* Reference to CMN core for BCM4706 */
+ struct bcma_device *cmn;
+ } bcma;
+ };
+
+ struct device *dev;
+ struct device *dma_dev;
+ unsigned char mac_addr[ETH_ALEN];
+ u32 feature_flags;
+
struct net_device *net_dev;
struct napi_struct napi;
struct mii_bus *mii_bus;
@@ -452,6 +471,7 @@ struct bgmac {
u32 mib_rx_regs[BGMAC_NUM_MIB_RX_REGS];
/* Int */
+ int irq;
u32 int_mask;
/* Current MAC state */
@@ -462,16 +482,71 @@ struct bgmac {
bool has_robosw;
bool loopback;
+
+ u32 (*read)(struct bgmac *bgmac, u16 offset);
+ void (*write)(struct bgmac *bgmac, u16 offset, u32 value);
+ u32 (*idm_read)(struct bgmac *bgmac, u16 offset);
+ void (*idm_write)(struct bgmac *bgmac, u16 offset, u32 value);
+ bool (*clk_enabled)(struct bgmac *bgmac);
+ void (*clk_enable)(struct bgmac *bgmac, u32 flags);
+ void (*cco_ctl_maskset)(struct bgmac *bgmac, u32 offset, u32 mask,
+ u32 set);
+ u32 (*get_bus_clock)(struct bgmac *bgmac);
+ void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask,
+ u32 set);
};
+int bgmac_enet_probe(struct bgmac *info);
+void bgmac_enet_remove(struct bgmac *bgmac);
+
+struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr);
+void bcma_mdio_mii_unregister(struct mii_bus *mii_bus);
+
static inline u32 bgmac_read(struct bgmac *bgmac, u16 offset)
{
- return bcma_read32(bgmac->core, offset);
+ return bgmac->read(bgmac, offset);
}
static inline void bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
{
- bcma_write32(bgmac->core, offset, value);
+ bgmac->write(bgmac, offset, value);
+}
+
+static inline u32 bgmac_idm_read(struct bgmac *bgmac, u16 offset)
+{
+ return bgmac->idm_read(bgmac, offset);
+}
+
+static inline void bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+ bgmac->idm_write(bgmac, offset, value);
+}
+
+static inline bool bgmac_clk_enabled(struct bgmac *bgmac)
+{
+ return bgmac->clk_enabled(bgmac);
+}
+
+static inline void bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
+{
+ bgmac->clk_enable(bgmac, flags);
+}
+
+static inline void bgmac_cco_ctl_maskset(struct bgmac *bgmac, u32 offset,
+ u32 mask, u32 set)
+{
+ bgmac->cco_ctl_maskset(bgmac, offset, mask, set);
+}
+
+static inline u32 bgmac_get_bus_clock(struct bgmac *bgmac)
+{
+ return bgmac->get_bus_clock(bgmac);
+}
+
+static inline void bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
+ u32 mask, u32 set)
+{
+ bgmac->cmn_maskset32(bgmac, offset, mask, set);
}
static inline void bgmac_maskset(struct bgmac *bgmac, u16 offset, u32 mask,
@@ -489,5 +564,4 @@ static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set)
{
bgmac_maskset(bgmac, offset, ~0, set);
}
-
#endif /* _BGMAC_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 70b148a10ec8..659faa6511c0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -997,7 +997,7 @@ static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info,
* correct protocol ID, it must be a loopback packet where
* the offsets are off by 4.
*/
- if (proto != htons(ETH_P_IP) && proto && htons(ETH_P_IPV6))
+ if (proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6))
loopback = true;
}
if (loopback) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 76ed6df0fe53..8d4f8495dbb3 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -450,6 +450,30 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
genet_dma_ring_regs[r]);
}
+static int bcmgenet_get_settings(struct net_device *dev,
+ struct ethtool_cmd *cmd)
+{
+ if (!netif_running(dev))
+ return -EINVAL;
+
+ if (!dev->phydev)
+ return -ENODEV;
+
+ return phy_ethtool_gset(dev->phydev, cmd);
+}
+
+static int bcmgenet_set_settings(struct net_device *dev,
+ struct ethtool_cmd *cmd)
+{
+ if (!netif_running(dev))
+ return -EINVAL;
+
+ if (!dev->phydev)
+ return -ENODEV;
+
+ return phy_ethtool_sset(dev->phydev, cmd);
+}
+
static int bcmgenet_set_rx_csum(struct net_device *dev,
netdev_features_t wanted)
{
@@ -953,6 +977,8 @@ static struct ethtool_ops bcmgenet_ethtool_ops = {
.get_strings = bcmgenet_get_strings,
.get_sset_count = bcmgenet_get_sset_count,
.get_ethtool_stats = bcmgenet_get_ethtool_stats,
+ .get_settings = bcmgenet_get_settings,
+ .set_settings = bcmgenet_set_settings,
.get_drvinfo = bcmgenet_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_msglevel = bcmgenet_get_msglevel,
@@ -964,8 +990,6 @@ static struct ethtool_ops bcmgenet_ethtool_ops = {
.nway_reset = bcmgenet_nway_reset,
.get_coalesce = bcmgenet_get_coalesce,
.set_coalesce = bcmgenet_set_coalesce,
- .get_link_ksettings = phy_ethtool_get_link_ksettings,
- .set_link_ksettings = phy_ethtool_set_link_ksettings,
};
/* Power down the unimac, based on mode. */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index d00cb193da9a..20a5bbe3f536 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -432,9 +432,7 @@ static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface)
/**
* fm10k_add_vxlan_port
* @netdev: network interface device structure
- * @sa_family: Address family of new port
- * @port: port number used for VXLAN
- * @type: Enumerated value specifying udp encapsulation type
+ * @ti: Tunnel endpoint information
*
* This function is called when a new VXLAN interface has added a new port
* number to the range that is currently in use for VXLAN. The new port
@@ -480,9 +478,7 @@ insert_tail:
/**
* fm10k_del_vxlan_port
* @netdev: network interface device structure
- * @sa_family: Address family of freed port
- * @port: port number used for VXLAN
- * @type: Enumerated value specifying udp encapsulation type
+ * @ti: Tunnel endpoint information
*
* This function is called when a new VXLAN interface has freed a port
* number from the range that is currently in use for VXLAN. The freed
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index fd5a761c68f3..918b94baca81 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8851,9 +8851,7 @@ static int ixgbe_set_features(struct net_device *netdev,
/**
* ixgbe_add_vxlan_port - Get notifications about VXLAN ports that come up
* @dev: The port's netdev
- * @sa_family: Socket Family that VXLAN is notifiying us about
- * @port: New UDP port number that VXLAN started listening to
- * @type: Enumerated type specifying UDP tunnel type
+ * @ti: Tunnel endpoint information
**/
static void ixgbe_add_vxlan_port(struct net_device *dev,
struct udp_tunnel_info *ti)
@@ -8888,9 +8886,7 @@ static void ixgbe_add_vxlan_port(struct net_device *dev,
/**
* ixgbe_del_vxlan_port - Get notifications about VXLAN ports that go away
* @dev: The port's netdev
- * @sa_family: Socket Family that VXLAN is notifying us about
- * @port: UDP port number that VXLAN stopped listening to
- * @type: Enumerated type specifying UDP tunnel type
+ * @ti: Tunnel endpoint information
**/
static void ixgbe_del_vxlan_port(struct net_device *dev,
struct udp_tunnel_info *ti)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 760f3d71eda3..b57ae3afb994 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -76,8 +76,8 @@ static int mtk_mdio_busy_wait(struct mtk_eth *eth)
return -1;
}
-u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
- u32 phy_register, u32 write_data)
+static u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
+ u32 phy_register, u32 write_data)
{
if (mtk_mdio_busy_wait(eth))
return -1;
@@ -95,7 +95,7 @@ u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
return 0;
}
-u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
+static u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
{
u32 d;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 01ae54826d5c..480a3ba714dd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -58,6 +58,7 @@
#include <linux/workqueue.h>
#include <asm/byteorder.h>
#include <net/devlink.h>
+#include <trace/events/devlink.h>
#include "core.h"
#include "item.h"
@@ -447,6 +448,10 @@ static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
if (!skb)
return -ENOMEM;
+ trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0,
+ skb->data + mlxsw_core->driver->txhdr_len,
+ skb->len - mlxsw_core->driver->txhdr_len);
+
atomic_set(&trans->active, 1);
err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info);
if (err) {
@@ -529,6 +534,9 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port,
struct mlxsw_core *mlxsw_core = priv;
struct mlxsw_reg_trans *trans;
+ trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0,
+ skb->data, skb->len);
+
if (!mlxsw_emad_is_resp(skb))
goto free_skb;
@@ -1110,14 +1118,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_emad_init;
- err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
- if (err)
- goto err_hwmon_init;
-
err = devlink_register(devlink, mlxsw_bus_info->dev);
if (err)
goto err_devlink_register;
+ err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
+ if (err)
+ goto err_hwmon_init;
+
err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
if (err)
goto err_driver_init;
@@ -1131,9 +1139,9 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
err_debugfs_init:
mlxsw_core->driver->fini(mlxsw_core);
err_driver_init:
+err_hwmon_init:
devlink_unregister(devlink);
err_devlink_register:
-err_hwmon_init:
mlxsw_emad_fini(mlxsw_core);
err_emad_init:
mlxsw_bus->fini(bus_priv);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1e74b911accb..88678c172b19 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2578,7 +2578,7 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev,
return;
idx = nfp_net_find_vxlan_idx(nn, ti->port);
- if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC)
+ if (idx == -ENOSPC || !nn->vxlan_usecnt[idx])
return;
if (!--nn->vxlan_usecnt[idx])
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 18ac52ded696..726b80f45906 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2195,6 +2195,12 @@ static void smc_release_datacs(struct platform_device *pdev, struct net_device *
}
}
+static const struct acpi_device_id smc91x_acpi_match[] = {
+ { "LNRO0003", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, smc91x_acpi_match);
+
#if IS_BUILTIN(CONFIG_OF)
static const struct of_device_id smc91x_match[] = {
{ .compatible = "smsc,lan91c94", },
@@ -2274,7 +2280,6 @@ static int smc_drv_probe(struct platform_device *pdev)
#if IS_BUILTIN(CONFIG_OF)
match = of_match_device(of_match_ptr(smc91x_match), &pdev->dev);
if (match) {
- struct device_node *np = pdev->dev.of_node;
u32 val;
/* Optional pwrdwn GPIO configured? */
@@ -2300,7 +2305,8 @@ static int smc_drv_probe(struct platform_device *pdev)
usleep_range(750, 1000);
/* Combination of IO widths supported, default to 16-bit */
- if (!of_property_read_u32(np, "reg-io-width", &val)) {
+ if (!device_property_read_u32(&pdev->dev, "reg-io-width",
+ &val)) {
if (val & 1)
lp->cfg.flags |= SMC91X_USE_8BIT;
if ((val == 0) || (val & 2))
@@ -2478,7 +2484,8 @@ static struct platform_driver smc_driver = {
.driver = {
.name = CARDNAME,
.pm = &smc_drv_pm_ops,
- .of_match_table = of_match_ptr(smc91x_match),
+ .of_match_table = of_match_ptr(smc91x_match),
+ .acpi_match_table = smc91x_acpi_match,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 0fb362d5a722..44b630cd1755 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -11,11 +11,12 @@ obj-$(CONFIG_DWMAC_IPQ806X) += dwmac-ipq806x.o
obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o
obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o
obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o
-obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-socfpga.o
+obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o
obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o
obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o
obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o
stmmac-platform-objs:= stmmac_platform.o
+dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
stmmac-pci-objs:= stmmac_pci.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
new file mode 100644
index 000000000000..2920e2ee3864
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -0,0 +1,274 @@
+/* Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Tien Hock Loh <thloh@altera.com>
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+#include "altr_tse_pcs.h"
+
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII BIT(1)
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII BIT(2)
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2
+#define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK GENMASK(1, 0)
+
+#define TSE_PCS_CONTROL_AN_EN_MASK BIT(12)
+#define TSE_PCS_CONTROL_REG 0x00
+#define TSE_PCS_CONTROL_RESTART_AN_MASK BIT(9)
+#define TSE_PCS_IF_MODE_REG 0x28
+#define TSE_PCS_LINK_TIMER_0_REG 0x24
+#define TSE_PCS_LINK_TIMER_1_REG 0x26
+#define TSE_PCS_SIZE 0x40
+#define TSE_PCS_STATUS_AN_COMPLETED_MASK BIT(5)
+#define TSE_PCS_STATUS_LINK_MASK 0x0004
+#define TSE_PCS_STATUS_REG 0x02
+#define TSE_PCS_SGMII_SPEED_1000 BIT(3)
+#define TSE_PCS_SGMII_SPEED_100 BIT(2)
+#define TSE_PCS_SGMII_SPEED_10 0x0
+#define TSE_PCS_SW_RST_MASK 0x8000
+#define TSE_PCS_PARTNER_ABILITY_REG 0x0A
+#define TSE_PCS_PARTNER_DUPLEX_FULL 0x1000
+#define TSE_PCS_PARTNER_DUPLEX_HALF 0x0000
+#define TSE_PCS_PARTNER_DUPLEX_MASK 0x1000
+#define TSE_PCS_PARTNER_SPEED_MASK GENMASK(11, 10)
+#define TSE_PCS_PARTNER_SPEED_1000 BIT(11)
+#define TSE_PCS_PARTNER_SPEED_100 BIT(10)
+#define TSE_PCS_PARTNER_SPEED_10 0x0000
+#define TSE_PCS_PARTNER_SPEED_1000 BIT(11)
+#define TSE_PCS_PARTNER_SPEED_100 BIT(10)
+#define TSE_PCS_PARTNER_SPEED_10 0x0000
+#define TSE_PCS_SGMII_SPEED_MASK GENMASK(3, 2)
+#define TSE_PCS_SGMII_LINK_TIMER_0 0x0D40
+#define TSE_PCS_SGMII_LINK_TIMER_1 0x0003
+#define TSE_PCS_SW_RESET_TIMEOUT 100
+#define TSE_PCS_USE_SGMII_AN_MASK BIT(2)
+#define TSE_PCS_USE_SGMII_ENA BIT(1)
+
+#define SGMII_ADAPTER_CTRL_REG 0x00
+#define SGMII_ADAPTER_DISABLE 0x0001
+#define SGMII_ADAPTER_ENABLE 0x0000
+
+#define AUTONEGO_LINK_TIMER 20
+
+static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs)
+{
+ int counter = 0;
+ u16 val;
+
+ val = readw(base + TSE_PCS_CONTROL_REG);
+ val |= TSE_PCS_SW_RST_MASK;
+ writew(val, base + TSE_PCS_CONTROL_REG);
+
+ while (counter < TSE_PCS_SW_RESET_TIMEOUT) {
+ val = readw(base + TSE_PCS_CONTROL_REG);
+ val &= TSE_PCS_SW_RST_MASK;
+ if (val == 0)
+ break;
+ counter++;
+ udelay(1);
+ }
+ if (counter >= TSE_PCS_SW_RESET_TIMEOUT) {
+ dev_err(pcs->dev, "PCS could not get out of sw reset\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
+{
+ int ret = 0;
+
+ writew(TSE_PCS_USE_SGMII_ENA, base + TSE_PCS_IF_MODE_REG);
+
+ writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG);
+ writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG);
+
+ ret = tse_pcs_reset(base, pcs);
+ if (ret == 0)
+ writew(SGMII_ADAPTER_ENABLE,
+ pcs->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+ return ret;
+}
+
+static void pcs_link_timer_callback(unsigned long data)
+{
+ u16 val = 0;
+ struct tse_pcs *pcs = (struct tse_pcs *)data;
+ void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+ void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+
+ val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
+ val &= TSE_PCS_STATUS_LINK_MASK;
+
+ if (val != 0) {
+ dev_dbg(pcs->dev, "Adapter: Link is established\n");
+ writew(SGMII_ADAPTER_ENABLE,
+ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+ } else {
+ mod_timer(&pcs->aneg_link_timer, jiffies +
+ msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+ }
+}
+
+static void auto_nego_timer_callback(unsigned long data)
+{
+ u16 val = 0;
+ u16 speed = 0;
+ u16 duplex = 0;
+ struct tse_pcs *pcs = (struct tse_pcs *)data;
+ void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+ void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+
+ val = readw(tse_pcs_base + TSE_PCS_STATUS_REG);
+ val &= TSE_PCS_STATUS_AN_COMPLETED_MASK;
+
+ if (val != 0) {
+ dev_dbg(pcs->dev, "Adapter: Auto Negotiation is completed\n");
+ val = readw(tse_pcs_base + TSE_PCS_PARTNER_ABILITY_REG);
+ speed = val & TSE_PCS_PARTNER_SPEED_MASK;
+ duplex = val & TSE_PCS_PARTNER_DUPLEX_MASK;
+
+ if (speed == TSE_PCS_PARTNER_SPEED_10 &&
+ duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+ dev_dbg(pcs->dev,
+ "Adapter: Link Partner is Up - 10/Full\n");
+ else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
+ duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+ dev_dbg(pcs->dev,
+ "Adapter: Link Partner is Up - 100/Full\n");
+ else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
+ duplex == TSE_PCS_PARTNER_DUPLEX_FULL)
+ dev_dbg(pcs->dev,
+ "Adapter: Link Partner is Up - 1000/Full\n");
+ else if (speed == TSE_PCS_PARTNER_SPEED_10 &&
+ duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+ dev_err(pcs->dev,
+ "Adapter does not support Half Duplex\n");
+ else if (speed == TSE_PCS_PARTNER_SPEED_100 &&
+ duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+ dev_err(pcs->dev,
+ "Adapter does not support Half Duplex\n");
+ else if (speed == TSE_PCS_PARTNER_SPEED_1000 &&
+ duplex == TSE_PCS_PARTNER_DUPLEX_HALF)
+ dev_err(pcs->dev,
+ "Adapter does not support Half Duplex\n");
+ else
+ dev_err(pcs->dev,
+ "Adapter: Invalid Partner Speed and Duplex\n");
+
+ if (duplex == TSE_PCS_PARTNER_DUPLEX_FULL &&
+ (speed == TSE_PCS_PARTNER_SPEED_10 ||
+ speed == TSE_PCS_PARTNER_SPEED_100 ||
+ speed == TSE_PCS_PARTNER_SPEED_1000))
+ writew(SGMII_ADAPTER_ENABLE,
+ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+ } else {
+ val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+ val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
+ writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+ tse_pcs_reset(tse_pcs_base, pcs);
+ mod_timer(&pcs->aneg_link_timer, jiffies +
+ msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+ }
+}
+
+static void aneg_link_timer_callback(unsigned long data)
+{
+ struct tse_pcs *pcs = (struct tse_pcs *)data;
+
+ if (pcs->autoneg == AUTONEG_ENABLE)
+ auto_nego_timer_callback(data);
+ else if (pcs->autoneg == AUTONEG_DISABLE)
+ pcs_link_timer_callback(data);
+}
+
+void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
+ unsigned int speed)
+{
+ void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+ void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+ u32 val;
+
+ writew(SGMII_ADAPTER_ENABLE,
+ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+ pcs->autoneg = phy_dev->autoneg;
+
+ if (phy_dev->autoneg == AUTONEG_ENABLE) {
+ val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+ val |= TSE_PCS_CONTROL_AN_EN_MASK;
+ writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+ val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+ val |= TSE_PCS_USE_SGMII_AN_MASK;
+ writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+ val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+ val |= TSE_PCS_CONTROL_RESTART_AN_MASK;
+
+ tse_pcs_reset(tse_pcs_base, pcs);
+
+ setup_timer(&pcs->aneg_link_timer,
+ aneg_link_timer_callback, (unsigned long)pcs);
+ mod_timer(&pcs->aneg_link_timer, jiffies +
+ msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+ } else if (phy_dev->autoneg == AUTONEG_DISABLE) {
+ val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG);
+ val &= ~TSE_PCS_CONTROL_AN_EN_MASK;
+ writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG);
+
+ val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+ val &= ~TSE_PCS_USE_SGMII_AN_MASK;
+ writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+ val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG);
+ val &= ~TSE_PCS_SGMII_SPEED_MASK;
+
+ switch (speed) {
+ case 1000:
+ val |= TSE_PCS_SGMII_SPEED_1000;
+ break;
+ case 100:
+ val |= TSE_PCS_SGMII_SPEED_100;
+ break;
+ case 10:
+ val |= TSE_PCS_SGMII_SPEED_10;
+ break;
+ default:
+ return;
+ }
+ writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG);
+
+ tse_pcs_reset(tse_pcs_base, pcs);
+
+ setup_timer(&pcs->aneg_link_timer,
+ aneg_link_timer_callback, (unsigned long)pcs);
+ mod_timer(&pcs->aneg_link_timer, jiffies +
+ msecs_to_jiffies(AUTONEGO_LINK_TIMER));
+ }
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
new file mode 100644
index 000000000000..2f5882450b06
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
@@ -0,0 +1,36 @@
+/* Copyright Altera Corporation (C) 2016. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Tien Hock Loh <thloh@altera.com>
+ */
+
+#ifndef __TSE_PCS_H__
+#define __TSE_PCS_H__
+
+#include <linux/phy.h>
+#include <linux/timer.h>
+
+struct tse_pcs {
+ struct device *dev;
+ void __iomem *tse_pcs_base;
+ void __iomem *sgmii_adapter_base;
+ struct timer_list aneg_link_timer;
+ int autoneg;
+};
+
+int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs);
+void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
+ unsigned int speed);
+
+#endif /* __TSE_PCS_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index f13499fa1f58..3bc1fa2c1001 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -27,6 +27,11 @@
#include "stmmac.h"
#include "stmmac_platform.h"
+#include "altr_tse_pcs.h"
+
+#define SGMII_ADAPTER_CTRL_REG 0x00
+#define SGMII_ADAPTER_DISABLE 0x0001
+
#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0
#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1
#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
@@ -52,35 +57,46 @@ struct socfpga_dwmac {
struct reset_control *stmmac_rst;
void __iomem *splitter_base;
bool f2h_ptp_ref_clk;
+ struct tse_pcs pcs;
};
static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
{
struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
void __iomem *splitter_base = dwmac->splitter_base;
+ void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base;
+ void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base;
+ struct device *dev = dwmac->dev;
+ struct net_device *ndev = dev_get_drvdata(dev);
+ struct phy_device *phy_dev = ndev->phydev;
u32 val;
- if (!splitter_base)
- return;
-
- val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
- val &= ~EMAC_SPLITTER_CTRL_SPEED_MASK;
-
- switch (speed) {
- case 1000:
- val |= EMAC_SPLITTER_CTRL_SPEED_1000;
- break;
- case 100:
- val |= EMAC_SPLITTER_CTRL_SPEED_100;
- break;
- case 10:
- val |= EMAC_SPLITTER_CTRL_SPEED_10;
- break;
- default:
- return;
+ if ((tse_pcs_base) && (sgmii_adapter_base))
+ writew(SGMII_ADAPTER_DISABLE,
+ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+ if (splitter_base) {
+ val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
+ val &= ~EMAC_SPLITTER_CTRL_SPEED_MASK;
+
+ switch (speed) {
+ case 1000:
+ val |= EMAC_SPLITTER_CTRL_SPEED_1000;
+ break;
+ case 100:
+ val |= EMAC_SPLITTER_CTRL_SPEED_100;
+ break;
+ case 10:
+ val |= EMAC_SPLITTER_CTRL_SPEED_10;
+ break;
+ default:
+ return;
+ }
+ writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
}
- writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
+ if (tse_pcs_base && sgmii_adapter_base)
+ tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
}
static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev)
@@ -88,9 +104,12 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
struct device_node *np = dev->of_node;
struct regmap *sys_mgr_base_addr;
u32 reg_offset, reg_shift;
- int ret;
- struct device_node *np_splitter;
+ int ret, index;
+ struct device_node *np_splitter = NULL;
+ struct device_node *np_sgmii_adapter = NULL;
struct resource res_splitter;
+ struct resource res_tse_pcs;
+ struct resource res_sgmii_adapter;
dwmac->interface = of_get_phy_mode(np);
@@ -128,6 +147,77 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
}
}
+ np_sgmii_adapter = of_parse_phandle(np,
+ "altr,gmii-to-sgmii-converter", 0);
+ if (np_sgmii_adapter) {
+ index = of_property_match_string(np_sgmii_adapter, "reg-names",
+ "hps_emac_interface_splitter_avalon_slave");
+
+ if (index >= 0) {
+ if (of_address_to_resource(np_sgmii_adapter, index,
+ &res_splitter)) {
+ dev_err(dev,
+ "%s: ERROR: missing emac splitter address\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ dwmac->splitter_base =
+ devm_ioremap_resource(dev, &res_splitter);
+
+ if (IS_ERR(dwmac->splitter_base)) {
+ dev_err(dev,
+ "%s: ERROR: failed mapping emac splitter\n",
+ __func__);
+ return PTR_ERR(dwmac->splitter_base);
+ }
+ }
+
+ index = of_property_match_string(np_sgmii_adapter, "reg-names",
+ "gmii_to_sgmii_adapter_avalon_slave");
+
+ if (index >= 0) {
+ if (of_address_to_resource(np_sgmii_adapter, index,
+ &res_sgmii_adapter)) {
+ dev_err(dev,
+ "%s: ERROR: failed mapping adapter\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ dwmac->pcs.sgmii_adapter_base =
+ devm_ioremap_resource(dev, &res_sgmii_adapter);
+
+ if (IS_ERR(dwmac->pcs.sgmii_adapter_base)) {
+ dev_err(dev, "%s: failed to mapping adapter\n",
+ __func__);
+ return PTR_ERR(dwmac->pcs.sgmii_adapter_base);
+ }
+ }
+
+ index = of_property_match_string(np_sgmii_adapter, "reg-names",
+ "eth_tse_control_port");
+
+ if (index >= 0) {
+ if (of_address_to_resource(np_sgmii_adapter, index,
+ &res_tse_pcs)) {
+ dev_err(dev,
+ "%s: ERROR: failed mapping tse control port\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ dwmac->pcs.tse_pcs_base =
+ devm_ioremap_resource(dev, &res_tse_pcs);
+
+ if (IS_ERR(dwmac->pcs.tse_pcs_base)) {
+ dev_err(dev,
+ "%s: ERROR: failed mapping tse control port\n",
+ __func__);
+ return PTR_ERR(dwmac->pcs.tse_pcs_base);
+ }
+ }
+ }
dwmac->reg_offset = reg_offset;
dwmac->reg_shift = reg_shift;
dwmac->sys_mgr_base_addr = sys_mgr_base_addr;
@@ -151,6 +241,7 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
break;
case PHY_INTERFACE_MODE_MII:
case PHY_INTERFACE_MODE_GMII:
+ case PHY_INTERFACE_MODE_SGMII:
val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
break;
default:
@@ -191,6 +282,12 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
*/
if (dwmac->stmmac_rst)
reset_control_deassert(dwmac->stmmac_rst);
+ if (phymode == PHY_INTERFACE_MODE_SGMII) {
+ if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
+ dev_err(dwmac->dev, "Unable to initialize TSE PCS");
+ return -EINVAL;
+ }
+ }
return 0;
}
@@ -225,6 +322,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed;
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+
if (!ret) {
struct net_device *ndev = platform_get_drvdata(pdev);
struct stmmac_priv *stpriv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index c34111b390c7..fc1ea8083f1f 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
@@ -2877,7 +2877,7 @@ static int dwceqos_probe(struct platform_device *pdev)
ret = of_phy_register_fixed_link(lp->pdev->dev.of_node);
if (ret < 0) {
dev_err(&pdev->dev, "invalid fixed-link");
- goto err_out_unregister_netdev;
+ goto err_out_unregister_clk_notifier;
}
lp->phy_node = of_node_get(lp->pdev->dev.of_node);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 5de892f3c0e0..3c20e87bb761 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1134,9 +1134,9 @@ static struct device_type geneve_type = {
.name = "geneve",
};
-/* Calls the ndo_add_udp_enc_port of the caller in order to
+/* Calls the ndo_udp_tunnel_add of the caller in order to
* supply the listening GENEVE udp ports. Callers are expected
- * to implement the ndo_add_udp_enc_port.
+ * to implement the ndo_udp_tunnel_add.
*/
static void geneve_push_rx_ports(struct net_device *dev)
{
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 6909c322de4e..20e09174ff62 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1128,6 +1128,39 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
}
}
+static void netvsc_process_raw_pkt(struct hv_device *device,
+ struct vmbus_channel *channel,
+ struct netvsc_device *net_device,
+ struct net_device *ndev,
+ u64 request_id,
+ struct vmpacket_descriptor *desc)
+{
+ struct nvsp_message *nvmsg;
+
+ nvmsg = (struct nvsp_message *)((unsigned long)
+ desc + (desc->offset8 << 3));
+
+ switch (desc->type) {
+ case VM_PKT_COMP:
+ netvsc_send_completion(net_device, channel, device, desc);
+ break;
+
+ case VM_PKT_DATA_USING_XFER_PAGES:
+ netvsc_receive(net_device, channel, device, desc);
+ break;
+
+ case VM_PKT_DATA_INBAND:
+ netvsc_receive_inband(device, net_device, nvmsg);
+ break;
+
+ default:
+ netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
+ desc->type, request_id);
+ break;
+ }
+}
+
+
void netvsc_channel_cb(void *context)
{
int ret;
@@ -1140,7 +1173,7 @@ void netvsc_channel_cb(void *context)
unsigned char *buffer;
int bufferlen = NETVSC_PACKET_SIZE;
struct net_device *ndev;
- struct nvsp_message *nvmsg;
+ bool need_to_commit = false;
if (channel->primary_channel != NULL)
device = channel->primary_channel->device_obj;
@@ -1154,39 +1187,36 @@ void netvsc_channel_cb(void *context)
buffer = get_per_channel_state(channel);
do {
+ desc = get_next_pkt_raw(channel);
+ if (desc != NULL) {
+ netvsc_process_raw_pkt(device,
+ channel,
+ net_device,
+ ndev,
+ desc->trans_id,
+ desc);
+
+ put_pkt_raw(channel, desc);
+ need_to_commit = true;
+ continue;
+ }
+ if (need_to_commit) {
+ need_to_commit = false;
+ commit_rd_index(channel);
+ }
+
ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
&bytes_recvd, &request_id);
if (ret == 0) {
if (bytes_recvd > 0) {
desc = (struct vmpacket_descriptor *)buffer;
- nvmsg = (struct nvsp_message *)((unsigned long)
- desc + (desc->offset8 << 3));
- switch (desc->type) {
- case VM_PKT_COMP:
- netvsc_send_completion(net_device,
- channel,
- device, desc);
- break;
-
- case VM_PKT_DATA_USING_XFER_PAGES:
- netvsc_receive(net_device, channel,
- device, desc);
- break;
-
- case VM_PKT_DATA_INBAND:
- netvsc_receive_inband(device,
- net_device,
- nvmsg);
- break;
-
- default:
- netdev_err(ndev,
- "unhandled packet type %d, "
- "tid %llx len %d\n",
- desc->type, request_id,
- bytes_recvd);
- break;
- }
+ netvsc_process_raw_pkt(device,
+ channel,
+ net_device,
+ ndev,
+ request_id,
+ desc);
+
} else {
/*
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5eadb7a1ad7b..9c8b5bc2b9d8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2503,6 +2503,9 @@ static int tun_device_event(struct notifier_block *unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tun_struct *tun = netdev_priv(dev);
+ if (dev->rtnl_link_ops != &tun_link_ops)
+ return NOTIFY_DONE;
+
switch (event) {
case NETDEV_CHANGE_TX_QUEUE_LEN:
if (tun_queue_resize(tun))
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b225bc27fbe2..168a8e2e15c8 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -2450,27 +2450,6 @@ static void rtl8153_runtime_enable(struct r8152 *tp, bool enable)
}
}
-static void rtl_phy_reset(struct r8152 *tp)
-{
- u16 data;
- int i;
-
- data = r8152_mdio_read(tp, MII_BMCR);
-
- /* don't reset again before the previous one complete */
- if (data & BMCR_RESET)
- return;
-
- data |= BMCR_RESET;
- r8152_mdio_write(tp, MII_BMCR, data);
-
- for (i = 0; i < 50; i++) {
- msleep(20);
- if ((r8152_mdio_read(tp, MII_BMCR) & BMCR_RESET) == 0)
- break;
- }
-}
-
static void r8153_teredo_off(struct r8152 *tp)
{
u32 ocp_data;
@@ -2850,7 +2829,6 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
u16 bmcr, anar, gbcr;
int ret = 0;
- cancel_delayed_work_sync(&tp->schedule);
anar = r8152_mdio_read(tp, MII_ADVERTISE);
anar &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
ADVERTISE_100HALF | ADVERTISE_100FULL);
@@ -3069,9 +3047,6 @@ static void rtl_work_func_t(struct work_struct *work)
netif_carrier_ok(tp->netdev))
napi_schedule(&tp->napi);
- if (test_and_clear_bit(PHY_RESET, &tp->flags))
- rtl_phy_reset(tp);
-
mutex_unlock(&tp->control);
out1:
@@ -3135,8 +3110,6 @@ static int rtl8152_open(struct net_device *netdev)
if (res)
goto out;
- netif_carrier_off(netdev);
-
res = usb_autopm_get_interface(tp->intf);
if (res < 0) {
free_all_mem(tp);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ae7455da1687..da4e3d6632f6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2475,9 +2475,9 @@ static struct device_type vxlan_type = {
.name = "vxlan",
};
-/* Calls the ndo_add_udp_enc_port of the caller in order to
+/* Calls the ndo_udp_tunnel_add of the caller in order to
* supply the listening VXLAN udp ports. Callers are expected
- * to implement the ndo_add_udp_enc_port.
+ * to implement the ndo_udp_tunnel_add.
*/
static void vxlan_push_rx_ports(struct net_device *dev)
{
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index b6900099ea81..e585018498d5 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -76,6 +76,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
+extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root);
static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 14d7b831b63a..d076183e49be 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -130,6 +130,19 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
WRITE_ONCE(root->rb_node, new);
}
+static inline void
+__rb_change_child_rcu(struct rb_node *old, struct rb_node *new,
+ struct rb_node *parent, struct rb_root *root)
+{
+ if (parent) {
+ if (parent->rb_left == old)
+ rcu_assign_pointer(parent->rb_left, new);
+ else
+ rcu_assign_pointer(parent->rb_right, new);
+ } else
+ rcu_assign_pointer(root->rb_node, new);
+}
+
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5f1533e3d032..85830e6c797b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -611,6 +611,12 @@ static inline void rcu_preempt_sleep_check(void)
rcu_dereference_sparse(p, space); \
((typeof(*p) __force __kernel *)(p)); \
})
+#define rcu_dereference_raw(p) \
+({ \
+ /* Dependency order vs. p above. */ \
+ typeof(p) ________p1 = lockless_dereference(p); \
+ ((typeof(*p) __force __kernel *)(________p1)); \
+})
/**
* RCU_INITIALIZER() - statically initialize an RCU-protected global variable
@@ -729,8 +735,6 @@ static inline void rcu_preempt_sleep_check(void)
__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
__rcu)
-#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
-
/*
* The tracing infrastructure traces RCU (we want that), but unfortunately
* some of the RCU checks causes tracing to lock up the system.
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 20b3087ad193..52ab18bc2b0d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -32,6 +32,8 @@ enum dsa_tag_protocol {
#define DSA_MAX_SWITCHES 4
#define DSA_MAX_PORTS 12
+#define DSA_RTABLE_NONE -1
+
struct dsa_chip_data {
/*
* How to access the switch configuration registers.
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 83c5ec58b93a..8626bdd3249a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -602,6 +602,16 @@ struct sctp_chunk {
/* This needs to be recoverable for SCTP_SEND_FAILED events. */
struct sctp_sndrcvinfo sinfo;
+ /* We use this field to record param for prsctp policies,
+ * for TTL policy, it is the time_to_drop of this chunk,
+ * for RTX policy, it is the max_sent_count of this chunk,
+ * for PRIO policy, it is the priority of this chunk.
+ */
+ unsigned long prsctp_param;
+
+ /* How many times this chunk have been sent, for prsctp RTX policy */
+ int sent_count;
+
/* Which association does this belong to? */
struct sctp_association *asoc;
@@ -1074,6 +1084,8 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
sctp_retransmit_reason_t);
void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
+void sctp_prsctp_prune(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo, int msg_len);
/* Uncork and flush an outqueue. */
static inline void sctp_outq_cork(struct sctp_outq *q)
{
@@ -1256,7 +1268,8 @@ struct sctp_endpoint {
/* SCTP-AUTH: endpoint shared keys */
struct list_head endpoint_shared_keys;
__u16 active_key_id;
- __u8 auth_enable;
+ __u8 auth_enable:1,
+ prsctp_enable:1;
};
/* Recover the outter endpoint structure. */
@@ -1848,9 +1861,15 @@ struct sctp_association {
__u16 active_key_id;
__u8 need_ecne:1, /* Need to send an ECNE Chunk? */
- temp:1; /* Is it a temporary association? */
+ temp:1, /* Is it a temporary association? */
+ prsctp_enable:1;
struct sctp_priv_assoc_stats stats;
+
+ int sent_cnt_removable;
+
+ __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+ __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
};
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
new file mode 100644
index 000000000000..333c32ac9bfa
--- /dev/null
+++ b/include/trace/events/devlink.h
@@ -0,0 +1,68 @@
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM devlink
+
+#if !defined(_TRACE_DEVLINK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DEVLINK_H
+
+#include <linux/device.h>
+#include <net/devlink.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for devlink hardware message:
+ */
+TRACE_EVENT(devlink_hwmsg,
+ TP_PROTO(const struct devlink *devlink, bool incoming,
+ unsigned long type, const u8 *buf, size_t len),
+
+ TP_ARGS(devlink, incoming, type, buf, len),
+
+ TP_STRUCT__entry(
+ __string(bus_name, devlink->dev->bus->name)
+ __string(dev_name, dev_name(devlink->dev))
+ __string(owner_name, devlink->dev->driver->owner->name)
+ __field(bool, incoming)
+ __field(unsigned long, type)
+ __dynamic_array(u8, buf, len)
+ __field(size_t, len)
+ ),
+
+ TP_fast_assign(
+ __assign_str(bus_name, devlink->dev->bus->name);
+ __assign_str(dev_name, dev_name(devlink->dev));
+ __assign_str(owner_name, devlink->dev->driver->owner->name);
+ __entry->incoming = incoming;
+ __entry->type = type;
+ memcpy(__get_dynamic_array(buf), buf, len);
+ __entry->len = len;
+ ),
+
+ TP_printk("bus_name=%s dev_name=%s owner_name=%s incoming=%d type=%lu buf=0x[%*phD] len=%lu",
+ __get_str(bus_name), __get_str(dev_name),
+ __get_str(owner_name), __entry->incoming, __entry->type,
+ (int) __entry->len, __get_dynamic_array(buf), __entry->len)
+);
+
+#endif /* _TRACE_DEVLINK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#else /* CONFIG_NET_DEVLINK */
+
+#if !defined(_TRACE_DEVLINK_H)
+#define _TRACE_DEVLINK_H
+
+#include <net/devlink.h>
+
+static inline void trace_devlink_hwmsg(const struct devlink *devlink,
+ bool incoming, unsigned long type,
+ const u8 *buf, size_t len)
+{
+}
+
+#endif /* _TRACE_DEVLINK_H */
+
+#endif
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 8fe1e93f531d..118ed7767639 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -12,22 +12,27 @@
TRACE_EVENT(napi_poll,
- TP_PROTO(struct napi_struct *napi),
+ TP_PROTO(struct napi_struct *napi, int work, int budget),
- TP_ARGS(napi),
+ TP_ARGS(napi, work, budget),
TP_STRUCT__entry(
__field( struct napi_struct *, napi)
+ __field( int, work)
+ __field( int, budget)
__string( dev_name, napi->dev ? napi->dev->name : NO_DEV)
),
TP_fast_assign(
__entry->napi = napi;
+ __entry->work = work;
+ __entry->budget = budget;
__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
),
- TP_printk("napi poll on napi struct %p for device %s",
- __entry->napi, __get_str(dev_name))
+ TP_printk("napi poll on napi struct %p for device %s work %d budget %d",
+ __entry->napi, __get_str(dev_name),
+ __entry->work, __entry->budget)
);
#undef NO_DEV
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c14ca1cd6297..262a7e883b19 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -357,6 +357,13 @@ enum bpf_func_id {
*/
BPF_FUNC_get_hash_recalc,
+ /**
+ * u64 bpf_get_current_task(void)
+ * Returns current task_struct
+ * Return: current
+ */
+ BPF_FUNC_get_current_task,
+
__BPF_FUNC_MAX_ID,
};
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 8304fe6f0561..c186f64fffca 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -261,14 +261,17 @@ enum {
/* IGMP/MLD statistics */
struct br_mcast_stats {
- __u64 igmp_queries[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v1queries[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v2queries[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v3queries[BR_MCAST_DIR_SIZE];
__u64 igmp_leaves[BR_MCAST_DIR_SIZE];
__u64 igmp_v1reports[BR_MCAST_DIR_SIZE];
__u64 igmp_v2reports[BR_MCAST_DIR_SIZE];
__u64 igmp_v3reports[BR_MCAST_DIR_SIZE];
__u64 igmp_parse_errors;
- __u64 mld_queries[BR_MCAST_DIR_SIZE];
+ __u64 mld_v1queries[BR_MCAST_DIR_SIZE];
+ __u64 mld_v2queries[BR_MCAST_DIR_SIZE];
__u64 mld_leaves[BR_MCAST_DIR_SIZE];
__u64 mld_v1reports[BR_MCAST_DIR_SIZE];
__u64 mld_v2reports[BR_MCAST_DIR_SIZE];
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ce70fe6b45df..d304f4c9792c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -112,6 +112,31 @@ typedef __s32 sctp_assoc_t;
#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */
#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */
#define SCTP_GET_ASSOC_STATS 112 /* Read only */
+#define SCTP_PR_SUPPORTED 113
+#define SCTP_DEFAULT_PRINFO 114
+#define SCTP_PR_ASSOC_STATUS 115
+
+/* PR-SCTP policies */
+#define SCTP_PR_SCTP_NONE 0x0000
+#define SCTP_PR_SCTP_TTL 0x0010
+#define SCTP_PR_SCTP_RTX 0x0020
+#define SCTP_PR_SCTP_PRIO 0x0030
+#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_PRIO
+#define SCTP_PR_SCTP_MASK 0x0030
+
+#define __SCTP_PR_INDEX(x) ((x >> 4) - 1)
+#define SCTP_PR_INDEX(x) __SCTP_PR_INDEX(SCTP_PR_SCTP_ ## x)
+
+#define SCTP_PR_POLICY(x) ((x) & SCTP_PR_SCTP_MASK)
+#define SCTP_PR_SET_POLICY(flags, x) \
+ do { \
+ flags &= ~SCTP_PR_SCTP_MASK; \
+ flags |= x; \
+ } while (0)
+
+#define SCTP_PR_TTL_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define SCTP_PR_RTX_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define SCTP_PR_PRIO_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO)
/* These are bit fields for msghdr->msg_flags. See section 5.1. */
/* On user space Linux, these live in <bits/socket.h> as an enum. */
@@ -902,4 +927,21 @@ struct sctp_paddrthlds {
__u16 spt_pathpfthld;
};
+/*
+ * Socket Option for Getting the Association/Stream-Specific PR-SCTP Status
+ */
+struct sctp_prstatus {
+ sctp_assoc_t sprstat_assoc_id;
+ __u16 sprstat_sid;
+ __u16 sprstat_policy;
+ __u64 sprstat_abandoned_unsent;
+ __u64 sprstat_abandoned_sent;
+};
+
+struct sctp_default_prinfo {
+ sctp_assoc_t pr_assoc_id;
+ __u32 pr_value;
+ __u16 pr_policy;
+};
+
#endif /* _UAPI_SCTP_H */
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 318858edb1cd..5967b870a895 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -11,7 +11,7 @@
* version 2 as published by the Free Software Foundation.
*/
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/magic.h>
#include <linux/major.h>
#include <linux/mount.h>
@@ -367,8 +367,6 @@ static struct file_system_type bpf_fs_type = {
.kill_sb = kill_litter_super,
};
-MODULE_ALIAS_FS("bpf");
-
static int __init bpf_init(void)
{
int ret;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 19c5b4a5c3eb..094c716154ed 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -312,6 +312,17 @@ const struct bpf_func_proto *bpf_get_event_output_proto(void)
return &bpf_event_output_proto;
}
+static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return (long) current;
+}
+
+static const struct bpf_func_proto bpf_get_current_task_proto = {
+ .func = bpf_get_current_task,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+};
+
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -329,6 +340,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_tail_call_proto;
case BPF_FUNC_get_current_pid_tgid:
return &bpf_get_current_pid_tgid_proto;
+ case BPF_FUNC_get_current_task:
+ return &bpf_get_current_task_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_current_comm:
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1356454e36de..eb8a19fee110 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -539,17 +539,39 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
{
struct rb_node *parent = rb_parent(victim);
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+
/* Set the surrounding nodes to point to the replacement */
- __rb_change_child(victim, new, parent, root);
if (victim->rb_left)
rb_set_parent(victim->rb_left, new);
if (victim->rb_right)
rb_set_parent(victim->rb_right, new);
+ __rb_change_child(victim, new, parent, root);
+}
+EXPORT_SYMBOL(rb_replace_node);
+
+void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
/* Copy the pointers/colour from the victim to the replacement */
*new = *victim;
+
+ /* Set the surrounding nodes to point to the replacement */
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Set the parent's pointer to the new node last after an RCU barrier
+ * so that the pointers onwards are seen to be set correctly when doing
+ * an RCU walk over the tree.
+ */
+ __rb_change_child_rcu(victim, new, parent, root);
}
-EXPORT_SYMBOL(rb_replace_node);
+EXPORT_SYMBOL(rb_replace_node_rcu);
static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
{
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6c196037d818..d610644368b9 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -199,7 +199,6 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
bool unicast)
{
u8 igmp_type = br_multicast_igmp_type(skb);
- __be16 proto = skb->protocol;
struct net_bridge_port *prev;
struct net_bridge_port *p;
@@ -221,7 +220,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
if (IS_ERR(prev))
goto out;
if (prev == p)
- br_multicast_count(p->br, p, proto, igmp_type,
+ br_multicast_count(p->br, p, skb, igmp_type,
BR_MCAST_DIR_TX);
}
@@ -266,8 +265,6 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
- __be16 proto = skb->protocol;
-
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
@@ -286,7 +283,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
if (IS_ERR(prev))
goto out;
if (prev == port)
- br_multicast_count(port->br, port, proto, igmp_type,
+ br_multicast_count(port->br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
if ((unsigned long)lport >= (unsigned long)port)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 786602bc0567..a7817e6f306f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -61,7 +61,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
if (!skb)
return NET_RX_DROP;
/* update the multicast stats if the packet is IGMP/MLD */
- br_multicast_count(br, NULL, skb->protocol, br_multicast_igmp_type(skb),
+ br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e405eef0ae2e..a5423a1eec05 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -843,14 +843,14 @@ static void __br_multicast_send_query(struct net_bridge *br,
if (port) {
skb->dev = port->dev;
- br_multicast_count(br, port, skb->protocol, igmp_type,
+ br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
dev_net(port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
} else {
br_multicast_select_own_querier(br, ip, skb);
- br_multicast_count(br, port, skb->protocol, igmp_type,
+ br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
}
@@ -1676,7 +1676,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
- br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
@@ -1725,7 +1725,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
- br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
@@ -2251,13 +2251,16 @@ unlock:
EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats *pstats = this_cpu_ptr(stats);
+ __be16 proto = skb->protocol;
+ unsigned int t_len;
u64_stats_update_begin(&pstats->syncp);
switch (proto) {
case htons(ETH_P_IP):
+ t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
switch (type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
pstats->mstats.igmp_v1reports[dir]++;
@@ -2269,7 +2272,21 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
pstats->mstats.igmp_v3reports[dir]++;
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- pstats->mstats.igmp_queries[dir]++;
+ if (t_len != sizeof(struct igmphdr)) {
+ pstats->mstats.igmp_v3queries[dir]++;
+ } else {
+ unsigned int offset = skb_transport_offset(skb);
+ struct igmphdr *ih, _ihdr;
+
+ ih = skb_header_pointer(skb, offset,
+ sizeof(_ihdr), &_ihdr);
+ if (!ih)
+ break;
+ if (!ih->code)
+ pstats->mstats.igmp_v1queries[dir]++;
+ else
+ pstats->mstats.igmp_v2queries[dir]++;
+ }
break;
case IGMP_HOST_LEAVE_MESSAGE:
pstats->mstats.igmp_leaves[dir]++;
@@ -2278,6 +2295,9 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
+ t_len = ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr);
+ t_len -= skb_network_header_len(skb);
switch (type) {
case ICMPV6_MGM_REPORT:
pstats->mstats.mld_v1reports[dir]++;
@@ -2286,7 +2306,10 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
pstats->mstats.mld_v2reports[dir]++;
break;
case ICMPV6_MGM_QUERY:
- pstats->mstats.mld_queries[dir]++;
+ if (t_len != sizeof(struct mld_msg))
+ pstats->mstats.mld_v2queries[dir]++;
+ else
+ pstats->mstats.mld_v1queries[dir]++;
break;
case ICMPV6_MGM_REDUCTION:
pstats->mstats.mld_leaves[dir]++;
@@ -2299,7 +2322,7 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
}
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats __percpu *stats;
@@ -2314,7 +2337,7 @@ void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
if (WARN_ON(!stats))
return;
- br_mcast_stats_add(stats, proto, type, dir);
+ br_mcast_stats_add(stats, skb, type, dir);
}
int br_multicast_init_stats(struct net_bridge *br)
@@ -2359,14 +2382,17 @@ void br_multicast_get_stats(const struct net_bridge *br,
memcpy(&temp, &cpu_stats->mstats, sizeof(temp));
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- mcast_stats_add_dir(tdst.igmp_queries, temp.igmp_queries);
+ mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries);
+ mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries);
+ mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries);
mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves);
mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports);
mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports);
mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports);
tdst.igmp_parse_errors += temp.igmp_parse_errors;
- mcast_stats_add_dir(tdst.mld_queries, temp.mld_queries);
+ mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries);
+ mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries);
mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves);
mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports);
mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4dc851166ad1..40f200947ddc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -586,7 +586,7 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
int type);
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir);
+ const struct sk_buff *skb, u8 type, u8 dir);
int br_multicast_init_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
@@ -719,7 +719,8 @@ static inline void br_mdb_uninit(void)
static inline void br_multicast_count(struct net_bridge *br,
const struct net_bridge_port *p,
- __be16 proto, u8 type, u8 dir)
+ const struct sk_buff *skb,
+ u8 type, u8 dir)
{
}
diff --git a/net/core/dev.c b/net/core/dev.c
index b92d63bfde7a..7894e406c806 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4972,7 +4972,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
rc = napi->poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
if (rc == BUSY_POLL_BUDGET) {
napi_complete_done(napi, rc);
napi_schedule(napi);
@@ -5128,7 +5128,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
- trace_napi_poll(n);
+ trace_napi_poll(n, work, weight);
}
WARN_ON_ONCE(work > weight);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b2e592a198c0..1b5063088f1a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -26,6 +26,10 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
static LIST_HEAD(devlink_list);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 252e155c837b..d6b3b579560d 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -187,7 +187,8 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *locatio
trace_drop_common(skb, location);
}
-static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
+ int work, int budget)
{
struct dm_hw_stat_delta *new_stat;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94acfc89ad97..53599bd0c82d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -163,7 +163,7 @@ static void poll_one_napi(struct napi_struct *napi)
*/
work = napi->poll(napi, 0);
WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, work, 0);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 766d2a525ada..7e68bc6bc853 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -774,11 +774,17 @@ static int dsa_of_probe(struct device *dev)
chip_index = -1;
for_each_available_child_of_node(np, child) {
+ int i;
+
chip_index++;
cd = &pd->chip[chip_index];
cd->of_node = child;
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ cd->rtable[i] = DSA_RTABLE_NONE;
+
/* When assigning the host device, increment its refcount */
cd->host_dev = get_device(&mdio_bus->dev);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 83b95fc4cede..f30bad9678f0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -595,7 +595,7 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
struct device_node *ports = dsa_get_ports(ds, np);
struct dsa_switch_tree *dst;
u32 tree, index;
- int err;
+ int i, err;
err = dsa_parse_member(np, &tree, &index);
if (err)
@@ -622,6 +622,11 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
ds->dst = dst;
ds->index = index;
+
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ ds->rtable[i] = DSA_RTABLE_NONE;
+
dsa_dst_add_ds(dst, ds, index);
err = dsa_dst_complete(dst);
@@ -672,7 +677,7 @@ int dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
}
EXPORT_SYMBOL_GPL(dsa_register_switch);
-void _dsa_unregister_switch(struct dsa_switch *ds)
+static void _dsa_unregister_switch(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d39e9e47a26e..55513e654d79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -73,7 +73,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/kmod.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
@@ -1916,6 +1916,3 @@ static int __init ipv4_proc_init(void)
return 0;
}
#endif /* CONFIG_PROC_FS */
-
-MODULE_ALIAS_NETPROTO(PF_INET);
-
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e333bc86bd39..415e117967c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1834,7 +1834,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -1846,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
if (err < 0)
@@ -1903,7 +1903,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
if (!skb)
goto errout;
@@ -2027,16 +2027,16 @@ static void inet_forward_change(struct net *net)
for_each_netdev(net, dev) {
struct in_device *in_dev;
+
if (on)
dev_disable_lro(dev);
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
+
+ in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
- rcu_read_unlock();
}
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 978370132f29..4ae3f8e6c6cc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,14 +148,14 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, iph->protocol, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- IPPROTO_IPIP, 0);
+ iph->protocol, 0);
err = 0;
goto out;
}
@@ -177,12 +177,19 @@ out:
return err;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
@@ -193,11 +200,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
+ tunnel->parms.iph.protocol != 0)
+ goto drop;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return -1;
@@ -207,24 +226,51 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
*/
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
+ u8 ipproto;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipproto = IPPROTO_IPIP;
+ break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ ipproto = IPPROTO_MPLS;
+ break;
+#endif
+ default:
+ goto tx_error;
+ }
- if (unlikely(skb->protocol != htons(ETH_P_IP)))
+ if (tiph->protocol != ipproto && tiph->protocol != 0)
goto tx_error;
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
@@ -234,6 +280,20 @@ tx_error:
return NETDEV_TX_OK;
}
+static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
+{
+ switch (ipproto) {
+ case 0:
+ case IPPROTO_IPIP:
+#if IS_ENABLED(CONFIG_MPLS)
+ case IPPROTO_MPLS:
+#endif
+ return true;
+ }
+
+ return false;
+}
+
static int
ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -244,7 +304,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EFAULT;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ if (p.iph.version != 4 ||
+ !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
return -EINVAL;
}
@@ -301,10 +362,23 @@ static int ipip_tunnel_init(struct net_device *dev)
tunnel->tun_hlen = 0;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
- tunnel->parms.iph.protocol = IPPROTO_IPIP;
return ip_tunnel_init(dev);
}
+static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data || !data[IFLA_IPTUN_PROTO])
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
static void ipip_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
{
@@ -335,6 +409,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
}
@@ -427,6 +504,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_TOS */
nla_total_size(1) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
/* IFLA_IPTUN_PMTUDISC */
nla_total_size(1) +
/* IFLA_IPTUN_ENCAP_TYPE */
@@ -450,6 +529,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
@@ -476,6 +556,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
[IFLA_IPTUN_TTL] = { .type = NLA_U8 },
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
[IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
@@ -489,6 +570,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
.policy = ipip_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipip_tunnel_setup,
+ .validate = ipip_tunnel_validate,
.newlink = ipip_newlink,
.changelink = ipip_changelink,
.dellink = ip_tunnel_dellink,
@@ -503,6 +585,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 1,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip_err,
+ .priority = 1,
+};
+#endif
+
static int __net_init ipip_init_net(struct net *net)
{
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
@@ -525,7 +615,7 @@ static int __init ipip_init(void)
{
int err;
- pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
+ pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&ipip_net_ops);
if (err < 0)
@@ -533,8 +623,15 @@ static int __init ipip_init(void)
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
if (err < 0) {
pr_info("%s: can't register tunnel\n", __func__);
- goto xfrm_tunnel_failed;
+ goto xfrm_tunnel_ipip_failed;
+ }
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register tunnel\n", __func__);
+ goto xfrm_tunnel_mplsip_failed;
}
+#endif
err = rtnl_link_register(&ipip_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -543,8 +640,13 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+xfrm_tunnel_mplsip_failed:
+
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-xfrm_tunnel_failed:
+xfrm_tunnel_ipip_failed:
unregister_pernet_device(&ipip_net_ops);
goto out;
}
@@ -554,7 +656,10 @@ static void __exit ipip_fini(void)
rtnl_link_unregister(&ipip_link_ops);
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
pr_info("%s: can't deregister tunnel\n", __func__);
-
+#if IS_ENABLED(CONFIG_MPLS)
+ if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
+ pr_info("%s: can't deregister tunnel\n", __func__);
+#endif
unregister_pernet_device(&ipip_net_ops);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 0d0171830620..ec35eaa5c029 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/mpls.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -16,11 +17,14 @@
static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnelmpls4_handlers __read_mostly;
static DEFINE_MUTEX(tunnel4_mutex);
static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
{
- return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
+ return (family == AF_INET) ? &tunnel4_handlers :
+ (family == AF_INET6) ? &tunnel64_handlers :
+ &tunnelmpls4_handlers;
}
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
@@ -125,6 +129,26 @@ drop:
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static int tunnelmpls4_rcv(struct sk_buff *skb)
+{
+ struct xfrm_tunnel *handler;
+
+ if (!pskb_may_pull(skb, sizeof(struct mpls_label)))
+ goto drop;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->handler(skb))
+ return 0;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+#endif
+
static void tunnel4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
@@ -145,6 +169,17 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
}
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm_tunnel *handler;
+
+ for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+#endif
+
static const struct net_protocol tunnel4_protocol = {
.handler = tunnel4_rcv,
.err_handler = tunnel4_err,
@@ -161,24 +196,47 @@ static const struct net_protocol tunnel64_protocol = {
};
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct net_protocol tunnelmpls4_protocol = {
+ .handler = tunnelmpls4_rcv,
+ .err_handler = tunnelmpls4_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+#endif
+
static int __init tunnel4_init(void)
{
- if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) {
- pr_err("%s: can't add protocol\n", __func__);
- return -EAGAIN;
- }
+ if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP))
+ goto err;
#if IS_ENABLED(CONFIG_IPV6)
if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
- pr_err("tunnel64 init: can't add protocol\n");
inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
- return -EAGAIN;
+ goto err;
+ }
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_add_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) {
+ inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
+#if IS_ENABLED(CONFIG_IPV6)
+ inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6);
+#endif
+ goto err;
}
#endif
return 0;
+
+err:
+ pr_err("%s: can't add protocol\n", __func__);
+ return -EAGAIN;
}
static void __exit tunnel4_fini(void)
{
+#if IS_ENABLED(CONFIG_MPLS)
+ if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS))
+ pr_err("tunnelmpls4 close: can't remove protocol\n");
+#endif
#if IS_ENABLED(CONFIG_IPV6)
if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
pr_err("tunnel64 close: can't remove protocol\n");
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a1f6b7b31531..24f1b0898e40 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -547,7 +547,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -559,7 +559,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 487ef3bc7bbc..c7ca0f5d1a3b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1592,14 +1592,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
if (likely(mrt->mroute6_sk == NULL)) {
mrt->mroute6_sk = sk;
net->ipv6.devconf_all->mc_forwarding++;
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
- NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all);
- }
- else
+ } else {
err = -EADDRINUSE;
+ }
write_unlock_bh(&mrt_lock);
+ if (!err)
+ inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
rtnl_unlock();
return err;
@@ -1617,11 +1618,11 @@ int ip6mr_sk_done(struct sock *sk)
write_lock_bh(&mrt_lock);
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
+ write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- write_unlock_bh(&mrt_lock);
mroute_clean_tables(mrt, false);
err = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 917a5cd4b8fc..182b6a9be29d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -688,12 +688,19 @@ out:
return 0;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
const struct iphdr *iph;
struct ip_tunnel *tunnel;
@@ -702,15 +709,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel) {
- if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
tunnel->parms.iph.protocol != 0)
goto drop;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return 1;
@@ -720,6 +735,18 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
* stores the embedded IPv4 address in v4dst and returns true.
@@ -958,7 +985,8 @@ tx_error:
return NETDEV_TX_OK;
}
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
+ struct net_device *dev, u8 ipproto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
@@ -966,9 +994,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
kfree_skb(skb);
@@ -981,11 +1009,16 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
{
switch (skb->protocol) {
case htons(ETH_P_IP):
- ipip_tunnel_xmit(skb, dev);
+ sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
break;
case htons(ETH_P_IPV6):
ipip6_tunnel_xmit(skb, dev);
break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
+ break;
+#endif
default:
goto tx_err;
}
@@ -1093,6 +1126,16 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
#endif
+bool ipip6_valid_ip_proto(u8 ipproto)
+{
+ return ipproto == IPPROTO_IPV6 ||
+ ipproto == IPPROTO_IPIP ||
+#if IS_ENABLED(CONFIG_MPLS)
+ ipproto == IPPROTO_MPLS ||
+#endif
+ ipproto == 0;
+}
+
static int
ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -1152,9 +1195,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
err = -EINVAL;
- if (p.iph.protocol != IPPROTO_IPV6 &&
- p.iph.protocol != IPPROTO_IPIP &&
- p.iph.protocol != 0)
+ if (!ipip6_valid_ip_proto(p.iph.protocol))
goto done;
if (p.iph.version != 4 ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
@@ -1379,9 +1420,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
- if (proto != IPPROTO_IPV6 &&
- proto != IPPROTO_IPIP &&
- proto != 0)
+ if (!ipip6_valid_ip_proto(proto))
return -EINVAL;
return 0;
@@ -1723,6 +1762,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 2,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip6_err,
+ .priority = 2,
+};
+#endif
+
static void __net_exit sit_destroy_tunnels(struct net *net,
struct list_head *head)
{
@@ -1818,6 +1865,9 @@ static void __exit sit_cleanup(void)
rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+#endif
unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1827,7 +1877,7 @@ static int __init sit_init(void)
{
int err;
- pr_info("IPv6 over IPv4 tunneling driver\n");
+ pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&sit_net_ops);
if (err < 0)
@@ -1842,6 +1892,13 @@ static int __init sit_init(void)
pr_info("%s: can't register ip4ip4\n", __func__);
goto xfrm_tunnel4_failed;
}
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register mplsip\n", __func__);
+ goto xfrm_tunnel_mpls_failed;
+ }
+#endif
err = rtnl_link_register(&sit_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1850,6 +1907,10 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+xfrm_tunnel_mpls_failed:
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_tunnel4_failed:
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index e9beaa58573c..5c161e7759b5 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1009,10 +1009,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
unsigned int flags;
if (event == NETDEV_REGISTER) {
- /* For now just support Ethernet and IPGRE devices */
+ /* For now just support Ethernet, IPGRE, SIT and IPIP devices */
if (dev->type == ARPHRD_ETHER ||
dev->type == ARPHRD_LOOPBACK ||
- dev->type == ARPHRD_IPGRE) {
+ dev->type == ARPHRD_IPGRE ||
+ dev->type == ARPHRD_SIT ||
+ dev->type == ARPHRD_TUNNEL) {
mdev = mpls_add_dev(dev);
if (IS_ERR(mdev))
return notifier_from_errno(PTR_ERR(mdev));
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6522e50fb750..10f3f48a16a8 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -10,6 +10,7 @@ af-rxrpc-y := \
conn_client.o \
conn_event.o \
conn_object.o \
+ conn_service.o \
input.o \
insecure.o \
key.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 5d3e795a7c48..88effadd4b16 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -766,9 +766,9 @@ error_key_type:
error_sock:
proto_unregister(&rxrpc_proto);
error_proto:
- destroy_workqueue(rxrpc_workqueue);
-error_security:
rxrpc_exit_security();
+error_security:
+ destroy_workqueue(rxrpc_workqueue);
error_work_queue:
kmem_cache_destroy(rxrpc_call_jar);
error_call_jar:
@@ -788,27 +788,7 @@ static void __exit af_rxrpc_exit(void)
proto_unregister(&rxrpc_proto);
rxrpc_destroy_all_calls();
rxrpc_destroy_all_connections();
-
ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
-
- /* We need to flush the scheduled work twice because the local endpoint
- * records involve a work item in their destruction as they can only be
- * destroyed from process context. However, a connection may have a
- * work item outstanding - and this will pin the local endpoint record
- * until the connection goes away.
- *
- * Peers don't pin locals and calls pin sockets - which prevents the
- * module from being unloaded - so we should only need two flushes.
- */
- _debug("flush scheduled work");
- flush_workqueue(rxrpc_workqueue);
- _debug("flush scheduled work 2");
- flush_workqueue(rxrpc_workqueue);
- _debug("synchronise RCU");
- rcu_barrier();
- _debug("destroy locals");
- ASSERT(idr_is_empty(&rxrpc_client_conn_ids));
- idr_destroy(&rxrpc_client_conn_ids);
rxrpc_destroy_all_locals();
remove_proc_entry("rxrpc_conns", init_net.proc_net);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 702db72196fb..1bb9e7ac9e14 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -10,6 +10,7 @@
*/
#include <linux/atomic.h>
+#include <linux/seqlock.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <rxrpc/packet.h>
@@ -35,7 +36,6 @@ struct rxrpc_crypt {
queue_delayed_work(rxrpc_workqueue, (WS), (D))
#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
-#define rxrpc_queue_conn(CONN) rxrpc_queue_work(&(CONN)->processor)
struct rxrpc_connection;
@@ -141,17 +141,16 @@ struct rxrpc_security {
int (*init_connection_security)(struct rxrpc_connection *);
/* prime a connection's packet security */
- void (*prime_packet_security)(struct rxrpc_connection *);
+ int (*prime_packet_security)(struct rxrpc_connection *);
/* impose security on a packet */
- int (*secure_packet)(const struct rxrpc_call *,
+ int (*secure_packet)(struct rxrpc_call *,
struct sk_buff *,
size_t,
void *);
/* verify the security on a received packet */
- int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
- u32 *);
+ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
/* issue a challenge */
int (*issue_challenge)(struct rxrpc_connection *);
@@ -208,7 +207,7 @@ struct rxrpc_peer {
struct hlist_head error_targets; /* targets for net error distribution */
struct work_struct error_distributor;
struct rb_root service_conns; /* Service connections */
- rwlock_t conn_lock;
+ seqlock_t service_conn_lock;
spinlock_t lock; /* access lock */
unsigned int if_mtu; /* interface MTU for this peer */
unsigned int mtu; /* network MTU for this peer */
@@ -231,18 +230,12 @@ struct rxrpc_peer {
* Keys for matching a connection.
*/
struct rxrpc_conn_proto {
- unsigned long hash_key;
- struct rxrpc_local *local; /* Representation of local endpoint */
- u32 epoch; /* epoch of this connection */
- u32 cid; /* connection ID */
- u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
- u8 addr_size; /* Size of the address */
- sa_family_t family; /* Transport protocol */
- __be16 port; /* Peer UDP/UDP6 port */
- union { /* Peer address */
- struct in_addr ipv4_addr;
- struct in6_addr ipv6_addr;
- u32 raw_addr[0];
+ union {
+ struct {
+ u32 epoch; /* epoch of this connection */
+ u32 cid; /* connection ID */
+ };
+ u64 index_key;
};
};
@@ -256,6 +249,37 @@ struct rxrpc_conn_parameters {
};
/*
+ * Bits in the connection flags.
+ */
+enum rxrpc_conn_flag {
+ RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
+ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
+ RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */
+};
+
+/*
+ * Events that can be raised upon a connection.
+ */
+enum rxrpc_conn_event {
+ RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */
+};
+
+/*
+ * The connection protocol state.
+ */
+enum rxrpc_conn_proto_state {
+ RXRPC_CONN_UNUSED, /* Connection not yet attempted */
+ RXRPC_CONN_CLIENT, /* Client connection */
+ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
+ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
+ RXRPC_CONN_SERVICE, /* Service secured connection */
+ RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
+ RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
+ RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */
+ RXRPC_CONN__NR_STATES
+};
+
+/*
* RxRPC connection definition
* - matched by { local, peer, epoch, conn_id, direction }
* - each connection can only handle four simultaneous calls
@@ -265,44 +289,38 @@ struct rxrpc_connection {
struct rxrpc_conn_parameters params;
spinlock_t channel_lock;
- struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* active calls */
+
+ struct rxrpc_channel {
+ struct rxrpc_call __rcu *call; /* Active call */
+ u32 call_id; /* ID of current call */
+ u32 call_counter; /* Call ID counter */
+ u32 last_call; /* ID of last call */
+ u32 last_result; /* Result of last call (0/abort) */
+ } channels[RXRPC_MAXCALLS];
wait_queue_head_t channel_wq; /* queue to wait for channel to become available */
+ struct rcu_head rcu;
struct work_struct processor; /* connection event processor */
union {
struct rb_node client_node; /* Node in local->client_conns */
struct rb_node service_node; /* Node in peer->service_conns */
};
struct list_head link; /* link in master connection list */
- struct rb_root calls; /* calls on this connection */
struct sk_buff_head rx_queue; /* received conn-level packets */
const struct rxrpc_security *security; /* applied security module */
struct key *server_key; /* security for this service */
struct crypto_skcipher *cipher; /* encryption handle */
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long flags;
-#define RXRPC_CONN_HAS_IDR 0 /* - Has a client conn ID assigned */
unsigned long events;
-#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
unsigned long put_time; /* Time at which last put */
- rwlock_t lock; /* access lock */
spinlock_t state_lock; /* state-change lock */
atomic_t usage;
- enum { /* current state of connection */
- RXRPC_CONN_UNUSED, /* - connection not yet attempted */
- RXRPC_CONN_CLIENT, /* - client connection */
- RXRPC_CONN_SERVER_UNSECURED, /* - server unsecured connection */
- RXRPC_CONN_SERVER_CHALLENGING, /* - server challenging for security */
- RXRPC_CONN_SERVER, /* - server secured connection */
- RXRPC_CONN_REMOTELY_ABORTED, /* - conn aborted by peer */
- RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */
- RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */
- } state;
+ enum rxrpc_conn_proto_state state : 8; /* current state of connection */
u32 local_abort; /* local abort code */
u32 remote_abort; /* remote abort code */
int error; /* local error incurred */
int debug_id; /* debug ID for printks */
- unsigned int call_counter; /* call ID counter */
atomic_t serial; /* packet serial number counter */
atomic_t hi_serial; /* highest serial number received */
atomic_t avail_chans; /* number of channels available */
@@ -382,6 +400,7 @@ enum rxrpc_call_state {
* - matched by { connection, call_id }
*/
struct rxrpc_call {
+ struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_sock *socket; /* socket responsible */
struct timer_list lifetimer; /* lifetime remaining on call */
@@ -394,11 +413,11 @@ struct rxrpc_call {
struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* calls awaiting acceptance */
struct rb_node sock_node; /* node in socket call tree */
- struct rb_node conn_node; /* node in connection call tree */
struct sk_buff_head rx_queue; /* received packets */
struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
struct sk_buff *tx_pending; /* Tx socket buffer being filled */
wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
+ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */
unsigned long user_call_ID; /* user-defined call ID */
unsigned long creation_jif; /* time of call creation */
unsigned long flags;
@@ -442,19 +461,12 @@ struct rxrpc_call {
#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
- struct hlist_node hash_node;
- unsigned long hash_key; /* Full hash key */
- u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
- struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
- sa_family_t family; /* Frame protocol */
+ u8 in_clientflag; /* Copy of conn->in_clientflag */
+ struct rxrpc_local *local; /* Local endpoint. */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
u32 epoch; /* epoch of this connection */
u16 service_id; /* service ID */
- union { /* Peer IP address for hashing */
- __be32 ipv4_addr;
- __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
- } peer_ip;
};
/*
@@ -502,8 +514,6 @@ extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
-struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
- void *, sa_family_t, const void *);
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
@@ -522,8 +532,10 @@ void __exit rxrpc_destroy_all_calls(void);
*/
extern struct idr rxrpc_client_conn_ids;
-int rxrpc_get_client_connection_id(struct rxrpc_connection *, gfp_t);
-void rxrpc_put_client_connection_id(struct rxrpc_connection *);
+void rxrpc_destroy_client_conn_ids(void);
+int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
+ struct sockaddr_rxrpc *, gfp_t);
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
/*
* conn_event.c
@@ -539,17 +551,14 @@ extern unsigned int rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
extern rwlock_t rxrpc_connection_lock;
-int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
- struct sockaddr_rxrpc *, gfp_t);
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *,
- struct rxrpc_peer *,
- struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
+ struct sk_buff *);
+void __rxrpc_disconnect_call(struct rxrpc_call *);
void rxrpc_disconnect_call(struct rxrpc_call *);
void rxrpc_put_connection(struct rxrpc_connection *);
void __exit rxrpc_destroy_all_connections(void);
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
- struct rxrpc_peer *,
- struct sk_buff *);
static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
{
@@ -558,7 +567,7 @@ static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
{
- return conn->proto.in_clientflag;
+ return !rxrpc_conn_is_client(conn);
}
static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
@@ -566,6 +575,31 @@ static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
atomic_inc(&conn->usage);
}
+static inline
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+{
+ return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
+}
+
+static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+{
+ if (!rxrpc_get_connection_maybe(conn))
+ return false;
+ if (!rxrpc_queue_work(&conn->processor))
+ rxrpc_put_connection(conn);
+ return true;
+}
+
+/*
+ * conn_service.c
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
+ struct sk_buff *);
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
+ struct sockaddr_rxrpc *,
+ struct sk_buff *);
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
+
/*
* input.c
*/
@@ -618,6 +652,11 @@ static inline void rxrpc_put_local(struct rxrpc_local *local)
__rxrpc_put_local(local);
}
+static inline void rxrpc_queue_local(struct rxrpc_local *local)
+{
+ rxrpc_queue_work(&local->processor);
+}
+
/*
* misc.c
*/
@@ -722,8 +761,7 @@ static inline void rxrpc_sysctl_exit(void) {}
/*
* utils.c
*/
-void rxrpc_get_addr_from_skb(struct rxrpc_local *, const struct sk_buff *,
- struct sockaddr_rxrpc *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
/*
* debug tracing
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 202e053a3c6d..0b2832141bd0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -75,7 +75,6 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
{
struct rxrpc_connection *conn;
struct rxrpc_skb_priv *sp, *nsp;
- struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct sk_buff *notification;
int ret;
@@ -94,15 +93,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
rxrpc_new_skb(notification);
notification->mark = RXRPC_SKB_MARK_NEW_CALL;
- peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
- if (!peer) {
- _debug("no peer");
- ret = -EBUSY;
- goto error;
- }
-
- conn = rxrpc_incoming_connection(local, peer, skb);
- rxrpc_put_peer(peer);
+ conn = rxrpc_incoming_connection(local, srx, skb);
if (IS_ERR(conn)) {
_debug("no conn");
ret = PTR_ERR(conn);
@@ -128,12 +119,11 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
spin_lock(&call->conn->state_lock);
if (sp->hdr.securityIndex > 0 &&
- call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+ call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
_debug("await conn sec");
list_add_tail(&call->accept_link, &rx->secureq);
- call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
- rxrpc_get_connection(call->conn);
- set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+ call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+ set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
rxrpc_queue_conn(call->conn);
} else {
_debug("conn ready");
@@ -227,20 +217,8 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
whdr._rsvd = 0;
whdr.serviceId = htons(sp->hdr.serviceId);
- /* determine the remote address */
- memset(&srx, 0, sizeof(srx));
- srx.srx_family = AF_RXRPC;
- srx.transport.family = local->srx.transport.family;
- srx.transport_type = local->srx.transport_type;
- switch (srx.transport.family) {
- case AF_INET:
- srx.transport_len = sizeof(struct sockaddr_in);
- srx.transport.sin.sin_port = udp_hdr(skb)->source;
- srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
- break;
- default:
- goto busy;
- }
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ goto drop;
/* get the socket providing the service */
read_lock_bh(&local->services_lock);
@@ -286,6 +264,10 @@ busy:
rxrpc_free_skb(skb);
return;
+drop:
+ rxrpc_free_skb(skb);
+ return;
+
invalid_service:
skb->priority = RX_INVALID_OPERATION;
rxrpc_reject_packet(local, skb);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 0ba84295f913..fc32aa5764a2 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -858,11 +858,6 @@ void rxrpc_process_call(struct work_struct *work)
iov[0].iov_len = sizeof(whdr);
/* deal with events of a final nature */
- if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
- rxrpc_release_call(call);
- clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
- }
-
if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
enum rxrpc_skb_mark mark;
int error;
@@ -1094,7 +1089,7 @@ void rxrpc_process_call(struct work_struct *work)
if (call->state == RXRPC_CALL_SERVER_SECURING) {
_debug("securing");
- write_lock(&call->conn->lock);
+ write_lock(&call->socket->call_lock);
if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
!test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
_debug("not released");
@@ -1102,7 +1097,7 @@ void rxrpc_process_call(struct work_struct *work)
list_move_tail(&call->accept_link,
&call->socket->acceptq);
}
- write_unlock(&call->conn->lock);
+ write_unlock(&call->socket->call_lock);
read_lock(&call->state_lock);
if (call->state < RXRPC_CALL_COMPLETE)
set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
@@ -1144,6 +1139,11 @@ void rxrpc_process_call(struct work_struct *work)
goto maybe_reschedule;
}
+ if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
+ rxrpc_release_call(call);
+ clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
+ }
+
/* other events may have been raised since we started checking */
goto maybe_reschedule;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index ad933daae13b..91287c9d01bb 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -14,7 +14,6 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
-#include <linux/hashtable.h>
#include <linux/spinlock_types.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
@@ -61,142 +60,6 @@ static void rxrpc_dead_call_expired(unsigned long _call);
static void rxrpc_ack_time_expired(unsigned long _call);
static void rxrpc_resend_time_expired(unsigned long _call);
-static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
-static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
-
-/*
- * Hash function for rxrpc_call_hash
- */
-static unsigned long rxrpc_call_hashfunc(
- u8 in_clientflag,
- u32 cid,
- u32 call_id,
- u32 epoch,
- u16 service_id,
- sa_family_t family,
- void *localptr,
- unsigned int addr_size,
- const u8 *peer_addr)
-{
- const u16 *p;
- unsigned int i;
- unsigned long key;
-
- _enter("");
-
- key = (unsigned long)localptr;
- /* We just want to add up the __be32 values, so forcing the
- * cast should be okay.
- */
- key += epoch;
- key += service_id;
- key += call_id;
- key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
- key += cid & RXRPC_CHANNELMASK;
- key += in_clientflag;
- key += family;
- /* Step through the peer address in 16-bit portions for speed */
- for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
- key += *p;
- _leave(" key = 0x%lx", key);
- return key;
-}
-
-/*
- * Add a call to the hashtable
- */
-static void rxrpc_call_hash_add(struct rxrpc_call *call)
-{
- unsigned long key;
- unsigned int addr_size = 0;
-
- _enter("");
- switch (call->family) {
- case AF_INET:
- addr_size = sizeof(call->peer_ip.ipv4_addr);
- break;
- case AF_INET6:
- addr_size = sizeof(call->peer_ip.ipv6_addr);
- break;
- default:
- break;
- }
- key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
- call->call_id, call->epoch,
- call->service_id, call->family,
- call->conn->params.local, addr_size,
- call->peer_ip.ipv6_addr);
- /* Store the full key in the call */
- call->hash_key = key;
- spin_lock(&rxrpc_call_hash_lock);
- hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
- spin_unlock(&rxrpc_call_hash_lock);
- _leave("");
-}
-
-/*
- * Remove a call from the hashtable
- */
-static void rxrpc_call_hash_del(struct rxrpc_call *call)
-{
- _enter("");
- spin_lock(&rxrpc_call_hash_lock);
- hash_del_rcu(&call->hash_node);
- spin_unlock(&rxrpc_call_hash_lock);
- _leave("");
-}
-
-/*
- * Find a call in the hashtable and return it, or NULL if it
- * isn't there.
- */
-struct rxrpc_call *rxrpc_find_call_hash(
- struct rxrpc_host_header *hdr,
- void *localptr,
- sa_family_t family,
- const void *peer_addr)
-{
- unsigned long key;
- unsigned int addr_size = 0;
- struct rxrpc_call *call = NULL;
- struct rxrpc_call *ret = NULL;
- u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
-
- _enter("");
- switch (family) {
- case AF_INET:
- addr_size = sizeof(call->peer_ip.ipv4_addr);
- break;
- case AF_INET6:
- addr_size = sizeof(call->peer_ip.ipv6_addr);
- break;
- default:
- break;
- }
-
- key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
- hdr->epoch, hdr->serviceId,
- family, localptr, addr_size,
- peer_addr);
- hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
- if (call->hash_key == key &&
- call->call_id == hdr->callNumber &&
- call->cid == hdr->cid &&
- call->in_clientflag == in_clientflag &&
- call->service_id == hdr->serviceId &&
- call->family == family &&
- call->local == localptr &&
- memcmp(call->peer_ip.ipv6_addr, peer_addr,
- addr_size) == 0 &&
- call->epoch == hdr->epoch) {
- ret = call;
- break;
- }
- }
- _leave(" = %p", ret);
- return ret;
-}
-
/*
* find an extant server call
* - called in process context with IRQs enabled
@@ -305,20 +168,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
call->socket = rx;
call->rx_data_post = 1;
- /* Record copies of information for hashtable lookup */
- call->family = rx->family;
call->local = rx->local;
- switch (call->family) {
- case AF_INET:
- call->peer_ip.ipv4_addr = srx->transport.sin.sin_addr.s_addr;
- break;
- case AF_INET6:
- memcpy(call->peer_ip.ipv6_addr,
- srx->transport.sin6.sin6_addr.in6_u.u6_addr8,
- sizeof(call->peer_ip.ipv6_addr));
- break;
- }
-
call->service_id = srx->srx_service;
call->in_clientflag = 0;
@@ -345,9 +195,6 @@ static int rxrpc_begin_client_call(struct rxrpc_call *call,
call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- /* Add the new call to the hashtable */
- rxrpc_call_hash_add(call);
-
spin_lock(&call->conn->params.peer->lock);
hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
spin_unlock(&call->conn->params.peer->lock);
@@ -425,9 +272,10 @@ error:
rxrpc_put_call(call);
write_lock_bh(&rxrpc_call_lock);
- list_del(&call->link);
+ list_del_init(&call->link);
write_unlock_bh(&rxrpc_call_lock);
+ call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -439,6 +287,7 @@ error:
*/
found_user_ID_now_present:
write_unlock(&rx->call_lock);
+ call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = -EEXIST [%p]", call);
return ERR_PTR(-EEXIST);
@@ -454,8 +303,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_call *call, *candidate;
- struct rb_node **p, *parent;
- u32 call_id;
+ u32 call_id, chan;
_enter(",%d", conn->debug_id);
@@ -465,20 +313,23 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
if (!candidate)
return ERR_PTR(-EBUSY);
+ chan = sp->hdr.cid & RXRPC_CHANNELMASK;
candidate->socket = rx;
candidate->conn = conn;
candidate->cid = sp->hdr.cid;
candidate->call_id = sp->hdr.callNumber;
- candidate->channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ candidate->channel = chan;
candidate->rx_data_post = 0;
candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
if (conn->security_ix > 0)
candidate->state = RXRPC_CALL_SERVER_SECURING;
- write_lock_bh(&conn->lock);
+ spin_lock(&conn->channel_lock);
/* set the channel for this call */
- call = conn->channels[candidate->channel];
+ call = rcu_dereference_protected(conn->channels[chan].call,
+ lockdep_is_held(&conn->channel_lock));
+
_debug("channel[%u] is %p", candidate->channel, call);
if (call && call->call_id == sp->hdr.callNumber) {
/* already set; must've been a duplicate packet */
@@ -507,9 +358,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
call->debug_id, rxrpc_call_states[call->state]);
if (call->state >= RXRPC_CALL_COMPLETE) {
- conn->channels[call->channel] = NULL;
+ __rxrpc_disconnect_call(call);
} else {
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -EBUSY");
return ERR_PTR(-EBUSY);
@@ -519,33 +370,22 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
/* check the call number isn't duplicate */
_debug("check dup");
call_id = sp->hdr.callNumber;
- p = &conn->calls.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- call = rb_entry(parent, struct rxrpc_call, conn_node);
-
- /* The tree is sorted in order of the __be32 value without
- * turning it into host order.
- */
- if (call_id < call->call_id)
- p = &(*p)->rb_left;
- else if (call_id > call->call_id)
- p = &(*p)->rb_right;
- else
- goto old_call;
- }
+
+ /* We just ignore calls prior to the current call ID. Terminated calls
+ * are handled via the connection.
+ */
+ if (call_id <= conn->channels[chan].call_counter)
+ goto old_call; /* TODO: Just drop packet */
/* make the call available */
_debug("new call");
call = candidate;
candidate = NULL;
- rb_link_node(&call->conn_node, parent, p);
- rb_insert_color(&call->conn_node, &conn->calls);
- conn->channels[call->channel] = call;
+ conn->channels[chan].call_counter = call_id;
+ rcu_assign_pointer(conn->channels[chan].call, call);
sock_hold(&rx->sk);
rxrpc_get_connection(conn);
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
spin_lock(&conn->params.peer->lock);
hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
@@ -555,27 +395,10 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
- /* Record copies of information for hashtable lookup */
- call->family = rx->family;
call->local = conn->params.local;
- switch (call->family) {
- case AF_INET:
- call->peer_ip.ipv4_addr =
- conn->params.peer->srx.transport.sin.sin_addr.s_addr;
- break;
- case AF_INET6:
- memcpy(call->peer_ip.ipv6_addr,
- conn->params.peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
- sizeof(call->peer_ip.ipv6_addr));
- break;
- default:
- break;
- }
call->epoch = conn->proto.epoch;
call->service_id = conn->params.service_id;
- call->in_clientflag = conn->proto.in_clientflag;
- /* Add the new call to the hashtable */
- rxrpc_call_hash_add(call);
+ call->in_clientflag = RXRPC_CLIENT_INITIATED;
_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -585,19 +408,19 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
return call;
extant_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
return call;
aborted_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -ECONNABORTED");
return ERR_PTR(-ECONNABORTED);
old_call:
- write_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
_leave(" = -ECONNRESET [old]");
return ERR_PTR(-ECONNRESET);
@@ -626,6 +449,10 @@ void rxrpc_release_call(struct rxrpc_call *call)
*/
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+ spin_lock(&conn->params.peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&conn->params.peer->lock);
+
write_lock_bh(&rx->call_lock);
if (!list_empty(&call->accept_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -641,24 +468,17 @@ void rxrpc_release_call(struct rxrpc_call *call)
write_unlock_bh(&rx->call_lock);
/* free up the channel for reuse */
- spin_lock(&conn->channel_lock);
- write_lock_bh(&conn->lock);
- write_lock(&call->state_lock);
-
- rxrpc_disconnect_call(call);
-
- spin_unlock(&conn->channel_lock);
+ write_lock_bh(&call->state_lock);
if (call->state < RXRPC_CALL_COMPLETE &&
call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
_debug("+++ ABORTING STATE %d +++\n", call->state);
call->state = RXRPC_CALL_LOCALLY_ABORTED;
call->local_abort = RX_CALL_DEAD;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
}
- write_unlock(&call->state_lock);
- write_unlock_bh(&conn->lock);
+ write_unlock_bh(&call->state_lock);
+
+ rxrpc_disconnect_call(call);
/* clean up the Rx queue */
if (!skb_queue_empty(&call->rx_queue) ||
@@ -792,6 +612,17 @@ void __rxrpc_put_call(struct rxrpc_call *call)
}
/*
+ * Final call destruction under RCU.
+ */
+static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+{
+ struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+
+ rxrpc_purge_queue(&call->rx_queue);
+ kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
* clean up a call
*/
static void rxrpc_cleanup_call(struct rxrpc_call *call)
@@ -815,19 +646,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
return;
}
- if (call->conn) {
- spin_lock(&call->conn->params.peer->lock);
- hlist_del_init(&call->error_link);
- spin_unlock(&call->conn->params.peer->lock);
-
- write_lock_bh(&call->conn->lock);
- rb_erase(&call->conn_node, &call->conn->calls);
- write_unlock_bh(&call->conn->lock);
- rxrpc_put_connection(call->conn);
- }
-
- /* Remove the call from the hash */
- rxrpc_call_hash_del(call);
+ ASSERTCMP(call->conn, ==, NULL);
if (call->acks_window) {
_debug("kill Tx window %d",
@@ -855,7 +674,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
rxrpc_purge_queue(&call->rx_queue);
ASSERT(skb_queue_empty(&call->rx_oos_queue));
sock_put(&call->socket->sk);
- kmem_cache_free(rxrpc_call_jar, call);
+ call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
}
/*
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 82488d6adb83..9e91f27b0d0f 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -33,7 +33,8 @@ static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
* client conns away from the current allocation point to try and keep the IDs
* concentrated. We will also need to retire connections from an old epoch.
*/
-int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp)
+static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
+ gfp_t gfp)
{
u32 epoch;
int id;
@@ -83,7 +84,7 @@ error:
/*
* Release a connection ID for a client connection from the global pool.
*/
-void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
{
if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
spin_lock(&rxrpc_conn_id_lock);
@@ -92,3 +93,280 @@ void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
spin_unlock(&rxrpc_conn_id_lock);
}
}
+
+/*
+ * Destroy the client connection ID tree.
+ */
+void rxrpc_destroy_client_conn_ids(void)
+{
+ struct rxrpc_connection *conn;
+ int id;
+
+ if (!idr_is_empty(&rxrpc_client_conn_ids)) {
+ idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+ pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
+ conn, atomic_read(&conn->usage));
+ }
+ BUG();
+ }
+
+ idr_destroy(&rxrpc_client_conn_ids);
+}
+
+/*
+ * Allocate a client connection. The caller must take care to clear any
+ * padding bytes in *cp.
+ */
+static struct rxrpc_connection *
+rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+ int ret;
+
+ _enter("");
+
+ conn = rxrpc_alloc_connection(gfp);
+ if (!conn) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ conn->params = *cp;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->state = RXRPC_CONN_CLIENT;
+
+ ret = rxrpc_get_client_connection_id(conn, gfp);
+ if (ret < 0)
+ goto error_0;
+
+ ret = rxrpc_init_client_conn_security(conn);
+ if (ret < 0)
+ goto error_1;
+
+ ret = conn->security->prime_packet_security(conn);
+ if (ret < 0)
+ goto error_2;
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ /* We steal the caller's peer ref. */
+ cp->peer = NULL;
+ rxrpc_get_local(conn->params.local);
+ key_get(conn->params.key);
+
+ _leave(" = %p", conn);
+ return conn;
+
+error_2:
+ conn->security->clear(conn);
+error_1:
+ rxrpc_put_client_connection_id(conn);
+error_0:
+ kfree(conn);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate = NULL;
+ struct rxrpc_local *local = cp->local;
+ struct rb_node *p, **pp, *parent;
+ long diff;
+ int chan;
+
+ DECLARE_WAITQUEUE(myself, current);
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
+ if (!cp->peer)
+ return -ENOMEM;
+
+ if (!cp->exclusive) {
+ /* Search for a existing client connection unless this is going
+ * to be a connection that's used exclusively for a single call.
+ */
+ _debug("search 1");
+ spin_lock(&local->client_conns_lock);
+ p = local->client_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, client_node);
+
+#define cmp(X) ((long)conn->params.X - (long)cp->X)
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ p = p->rb_left;
+ else if (diff > 0)
+ p = p->rb_right;
+ else
+ goto found_extant_conn;
+ }
+ spin_unlock(&local->client_conns_lock);
+ }
+
+ /* We didn't find a connection or we want an exclusive one. */
+ _debug("get new conn");
+ candidate = rxrpc_alloc_client_connection(cp, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ if (cp->exclusive) {
+ /* Assign the call on an exclusive connection to channel 0 and
+ * don't add the connection to the endpoint's shareable conn
+ * lookup tree.
+ */
+ _debug("exclusive chan 0");
+ conn = candidate;
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ chan = 0;
+ goto found_channel;
+ }
+
+ /* We need to redo the search before attempting to add a new connection
+ * lest we race with someone else adding a conflicting instance.
+ */
+ _debug("search 2");
+ spin_lock(&local->client_conns_lock);
+
+ pp = &local->client_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ conn = rb_entry(parent, struct rxrpc_connection, client_node);
+
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ pp = &(*pp)->rb_left;
+ else if (diff > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ /* The second search also failed; simply add the new connection with
+ * the new call in channel 0. Note that we need to take the channel
+ * lock before dropping the client conn lock.
+ */
+ _debug("new conn");
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ rb_link_node(&candidate->client_node, parent, pp);
+ rb_insert_color(&candidate->client_node, &local->client_conns);
+attached:
+ conn = candidate;
+ candidate = NULL;
+
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ spin_unlock(&local->client_conns_lock);
+ chan = 0;
+
+found_channel:
+ _debug("found chan");
+ call->conn = conn;
+ call->channel = chan;
+ call->epoch = conn->proto.epoch;
+ call->cid = conn->proto.cid | chan;
+ call->call_id = ++conn->channels[chan].call_counter;
+ conn->channels[chan].call_id = call->call_id;
+ rcu_assign_pointer(conn->channels[chan].call, call);
+
+ _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+
+ spin_unlock(&conn->channel_lock);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return 0;
+
+ /* We found a potentially suitable connection already in existence. If
+ * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
+ * reaper), discard any candidate we may have allocated, and try to get
+ * a channel on this one, otherwise we have to replace it.
+ */
+found_extant_conn:
+ _debug("found conn");
+ if (!rxrpc_get_connection_maybe(conn)) {
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ rb_replace_node(&conn->client_node,
+ &candidate->client_node,
+ &local->client_conns);
+ clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+ goto attached;
+ }
+
+ spin_unlock(&local->client_conns_lock);
+
+ rxrpc_put_connection(candidate);
+
+ if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
+ if (!gfpflags_allow_blocking(gfp)) {
+ rxrpc_put_connection(conn);
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
+ }
+
+ add_wait_queue(&conn->channel_wq, &myself);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (atomic_add_unless(&conn->avail_chans, -1, 0))
+ break;
+ if (signal_pending(current))
+ goto interrupted;
+ schedule();
+ }
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ }
+
+ /* The connection allegedly now has a free channel and we can now
+ * attach the call to it.
+ */
+ spin_lock(&conn->channel_lock);
+
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan].call)
+ goto found_channel;
+ BUG();
+
+interrupted:
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ rxrpc_put_connection(conn);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = -ERESTARTSYS");
+ return -ERESTARTSYS;
+}
+
+/*
+ * Remove a client connection from the local endpoint's tree, thereby removing
+ * it as a target for reuse for new client calls.
+ */
+void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_local *local = conn->params.local;
+
+ spin_lock(&local->client_conns_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
+ rb_erase(&conn->client_node, &local->client_conns);
+ spin_unlock(&local->client_conns_lock);
+
+ rxrpc_put_client_connection_id(conn);
+}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index bf6971555eac..cee0f35bc1cf 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -31,15 +31,17 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
u32 abort_code)
{
struct rxrpc_call *call;
- struct rb_node *p;
+ int i;
_enter("{%d},%x", conn->debug_id, abort_code);
- read_lock_bh(&conn->lock);
+ spin_lock(&conn->channel_lock);
- for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
- call = rb_entry(p, struct rxrpc_call, conn_node);
- write_lock(&call->state_lock);
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ call = rcu_dereference_protected(
+ conn->channels[i].call,
+ lockdep_is_held(&conn->channel_lock));
+ write_lock_bh(&call->state_lock);
if (call->state <= RXRPC_CALL_COMPLETE) {
call->state = state;
if (state == RXRPC_CALL_LOCALLY_ABORTED) {
@@ -51,10 +53,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
}
rxrpc_queue_call(call);
}
- write_unlock(&call->state_lock);
+ write_unlock_bh(&call->state_lock);
}
- read_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
_leave("");
}
@@ -188,18 +190,24 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (ret < 0)
return ret;
- conn->security->prime_packet_security(conn);
- read_lock_bh(&conn->lock);
+ ret = conn->security->prime_packet_security(conn);
+ if (ret < 0)
+ return ret;
+
+ spin_lock(&conn->channel_lock);
spin_lock(&conn->state_lock);
- if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
- conn->state = RXRPC_CONN_SERVER;
+ if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+ conn->state = RXRPC_CONN_SERVICE;
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
- rxrpc_call_is_secure(conn->channels[loop]);
+ rxrpc_call_is_secure(
+ rcu_dereference_protected(
+ conn->channels[loop].call,
+ lockdep_is_held(&conn->channel_lock)));
}
spin_unlock(&conn->state_lock);
- read_unlock_bh(&conn->lock);
+ spin_unlock(&conn->channel_lock);
return 0;
default:
@@ -263,12 +271,8 @@ void rxrpc_process_connection(struct work_struct *work)
_enter("{%d}", conn->debug_id);
- rxrpc_get_connection(conn);
-
- if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+ if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
- rxrpc_put_connection(conn);
- }
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
@@ -283,7 +287,6 @@ void rxrpc_process_connection(struct work_struct *work)
goto requeue_and_leave;
case -ECONNABORTED:
default:
- rxrpc_put_connection(conn);
rxrpc_free_skb(skb);
break;
}
@@ -301,7 +304,6 @@ requeue_and_leave:
protocol_error:
if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
goto requeue_and_leave;
- rxrpc_put_connection(conn);
rxrpc_free_skb(skb);
_leave(" [EPROTO]");
goto out;
@@ -315,7 +317,7 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
CHECK_SLAB_OKAY(&local->usage);
skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_work(&local->processor);
+ rxrpc_queue_local(local);
}
/*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4bfad7cf96cb..896d84493a05 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/skbuff.h>
-#include <linux/crypto.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
@@ -34,7 +33,7 @@ static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
/*
* allocate a new connection
*/
-static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
{
struct rxrpc_connection *conn;
@@ -46,12 +45,13 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
init_waitqueue_head(&conn->channel_wq);
INIT_WORK(&conn->processor, &rxrpc_process_connection);
INIT_LIST_HEAD(&conn->link);
- conn->calls = RB_ROOT;
skb_queue_head_init(&conn->rx_queue);
conn->security = &rxrpc_no_security;
- rwlock_init(&conn->lock);
spin_lock_init(&conn->state_lock);
- atomic_set(&conn->usage, 1);
+ /* We maintain an extra ref on the connection whilst it is
+ * on the rxrpc_connections list.
+ */
+ atomic_set(&conn->usage, 2);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
conn->size_align = 4;
@@ -63,465 +63,118 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
}
/*
- * add a call to a connection's call-by-ID tree
- */
-static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
- struct rxrpc_call *call)
-{
- struct rxrpc_call *xcall;
- struct rb_node *parent, **p;
- __be32 call_id;
-
- write_lock_bh(&conn->lock);
-
- call_id = call->call_id;
- p = &conn->calls.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- xcall = rb_entry(parent, struct rxrpc_call, conn_node);
-
- if (call_id < xcall->call_id)
- p = &(*p)->rb_left;
- else if (call_id > xcall->call_id)
- p = &(*p)->rb_right;
- else
- BUG();
- }
-
- rb_link_node(&call->conn_node, parent, p);
- rb_insert_color(&call->conn_node, &conn->calls);
-
- write_unlock_bh(&conn->lock);
-}
-
-/*
- * Allocate a client connection. The caller must take care to clear any
- * padding bytes in *cp.
+ * Look up a connection in the cache by protocol parameters.
+ *
+ * If successful, a pointer to the connection is returned, but no ref is taken.
+ * NULL is returned if there is no match.
+ *
+ * The caller must be holding the RCU read lock.
*/
-static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
+ struct sk_buff *skb)
{
struct rxrpc_connection *conn;
- int ret;
-
- _enter("");
-
- conn = rxrpc_alloc_connection(gfp);
- if (!conn) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
-
- conn->params = *cp;
- conn->proto.local = cp->local;
- conn->proto.epoch = rxrpc_epoch;
- conn->proto.cid = 0;
- conn->proto.in_clientflag = 0;
- conn->proto.family = cp->peer->srx.transport.family;
- conn->out_clientflag = RXRPC_CLIENT_INITIATED;
- conn->state = RXRPC_CONN_CLIENT;
-
- switch (conn->proto.family) {
- case AF_INET:
- conn->proto.addr_size = sizeof(conn->proto.ipv4_addr);
- conn->proto.ipv4_addr = cp->peer->srx.transport.sin.sin_addr;
- conn->proto.port = cp->peer->srx.transport.sin.sin_port;
- break;
- }
-
- ret = rxrpc_get_client_connection_id(conn, gfp);
- if (ret < 0)
- goto error_0;
-
- ret = rxrpc_init_client_conn_security(conn);
- if (ret < 0)
- goto error_1;
-
- conn->security->prime_packet_security(conn);
-
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock(&rxrpc_connection_lock);
-
- /* We steal the caller's peer ref. */
- cp->peer = NULL;
- rxrpc_get_local(conn->params.local);
- key_get(conn->params.key);
-
- _leave(" = %p", conn);
- return conn;
-
-error_1:
- rxrpc_put_client_connection_id(conn);
-error_0:
- kfree(conn);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
- */
-int rxrpc_connect_call(struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct rxrpc_local *local = cp->local;
- struct rb_node *p, **pp, *parent;
- long diff;
- int chan;
+ struct rxrpc_conn_proto k;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer;
- DECLARE_WAITQUEUE(myself, current);
+ _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ goto not_found;
- cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
- if (!cp->peer)
- return -ENOMEM;
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
- if (!cp->exclusive) {
- /* Search for a existing client connection unless this is going
- * to be a connection that's used exclusively for a single call.
- */
- _debug("search 1");
- spin_lock(&local->client_conns_lock);
- p = local->client_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, client_node);
-
-#define cmp(X) ((long)conn->params.X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level));
- if (diff < 0)
- p = p->rb_left;
- else if (diff > 0)
- p = p->rb_right;
- else
- goto found_extant_conn;
- }
- spin_unlock(&local->client_conns_lock);
+ /* We may have to handle mixing IPv4 and IPv6 */
+ if (srx.transport.family != local->srx.transport.family) {
+ pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+ srx.transport.family,
+ local->srx.transport.family);
+ goto not_found;
}
- /* We didn't find a connection or we want an exclusive one. */
- _debug("get new conn");
- candidate = rxrpc_alloc_client_connection(cp, gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
- if (cp->exclusive) {
- /* Assign the call on an exclusive connection to channel 0 and
- * don't add the connection to the endpoint's shareable conn
- * lookup tree.
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
+ /* We need to look up service connections by the full protocol
+ * parameter set. We look up the peer first as an intermediate
+ * step and then the connection from the peer's tree.
*/
- _debug("exclusive chan 0");
- conn = candidate;
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- chan = 0;
- goto found_channel;
- }
-
- /* We need to redo the search before attempting to add a new connection
- * lest we race with someone else adding a conflicting instance.
- */
- _debug("search 2");
- spin_lock(&local->client_conns_lock);
-
- pp = &local->client_conns.rb_node;
- parent = NULL;
- while (*pp) {
- parent = *pp;
- conn = rb_entry(parent, struct rxrpc_connection, client_node);
-
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level));
- if (diff < 0)
- pp = &(*pp)->rb_left;
- else if (diff > 0)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_conn;
- }
-
- /* The second search also failed; simply add the new connection with
- * the new call in channel 0. Note that we need to take the channel
- * lock before dropping the client conn lock.
- */
- _debug("new conn");
- conn = candidate;
- candidate = NULL;
-
- rb_link_node(&conn->client_node, parent, pp);
- rb_insert_color(&conn->client_node, &local->client_conns);
-
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- spin_unlock(&local->client_conns_lock);
- chan = 0;
-
-found_channel:
- _debug("found chan");
- call->conn = conn;
- call->channel = chan;
- call->epoch = conn->proto.epoch;
- call->cid = conn->proto.cid | chan;
- call->call_id = ++conn->call_counter;
- rcu_assign_pointer(conn->channels[chan], call);
-
- _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
-
- rxrpc_add_call_ID_to_conn(conn, call);
- spin_unlock(&conn->channel_lock);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return 0;
-
- /* We found a suitable connection already in existence. Discard any
- * candidate we may have allocated, and try to get a channel on this
- * one.
- */
-found_extant_conn:
- _debug("found conn");
- rxrpc_get_connection(conn);
- spin_unlock(&local->client_conns_lock);
-
- rxrpc_put_connection(candidate);
-
- if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
- if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_put_connection(conn);
- _leave(" = -EAGAIN");
- return -EAGAIN;
+ peer = rxrpc_lookup_peer_rcu(local, &srx);
+ if (!peer)
+ goto not_found;
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ if (!conn || atomic_read(&conn->usage) == 0)
+ goto not_found;
+ _leave(" = %p", conn);
+ return conn;
+ } else {
+ /* Look up client connections by connection ID alone as their
+ * IDs are unique for this machine.
+ */
+ conn = idr_find(&rxrpc_client_conn_ids,
+ sp->hdr.cid >> RXRPC_CIDSHIFT);
+ if (!conn || atomic_read(&conn->usage) == 0) {
+ _debug("no conn");
+ goto not_found;
}
- add_wait_queue(&conn->channel_wq, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_add_unless(&conn->avail_chans, -1, 0))
- break;
- if (signal_pending(current))
- goto interrupted;
- schedule();
+ if (conn->proto.epoch != k.epoch ||
+ conn->params.local != local)
+ goto not_found;
+
+ peer = conn->params.peer;
+ switch (srx.transport.family) {
+ case AF_INET:
+ if (peer->srx.transport.sin.sin_port !=
+ srx.transport.sin.sin_port ||
+ peer->srx.transport.sin.sin_addr.s_addr !=
+ srx.transport.sin.sin_addr.s_addr)
+ goto not_found;
+ break;
+ default:
+ BUG();
}
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
- }
-
- /* The connection allegedly now has a free channel and we can now
- * attach the call to it.
- */
- spin_lock(&conn->channel_lock);
-
- for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
- if (!conn->channels[chan])
- goto found_channel;
- BUG();
-
-interrupted:
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
- rxrpc_put_connection(conn);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = -ERESTARTSYS");
- return -ERESTARTSYS;
-}
-
-/*
- * get a record of an incoming connection
- */
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct sk_buff *skb)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rb_node *p, **pp;
- const char *new = "old";
- __be32 epoch;
- u32 cid;
-
- _enter("");
-
- ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
-
- epoch = sp->hdr.epoch;
- cid = sp->hdr.cid & RXRPC_CIDMASK;
-
- /* search the connection list first */
- read_lock_bh(&peer->conn_lock);
-
- p = peer->service_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, service_node);
- _debug("maybe %x", conn->proto.cid);
-
- if (epoch < conn->proto.epoch)
- p = p->rb_left;
- else if (epoch > conn->proto.epoch)
- p = p->rb_right;
- else if (cid < conn->proto.cid)
- p = p->rb_left;
- else if (cid > conn->proto.cid)
- p = p->rb_right;
- else
- goto found_extant_connection;
- }
- read_unlock_bh(&peer->conn_lock);
-
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_connection(GFP_NOIO);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
+ _leave(" = %p", conn);
+ return conn;
}
- candidate->proto.local = local;
- candidate->proto.epoch = sp->hdr.epoch;
- candidate->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
- candidate->proto.in_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->params.local = local;
- candidate->params.peer = peer;
- candidate->params.service_id = sp->hdr.serviceId;
- candidate->security_ix = sp->hdr.securityIndex;
- candidate->out_clientflag = 0;
- candidate->state = RXRPC_CONN_SERVER;
- if (candidate->params.service_id)
- candidate->state = RXRPC_CONN_SERVER_UNSECURED;
-
- write_lock_bh(&peer->conn_lock);
-
- pp = &peer->service_conns.rb_node;
- p = NULL;
- while (*pp) {
- p = *pp;
- conn = rb_entry(p, struct rxrpc_connection, service_node);
-
- if (epoch < conn->proto.epoch)
- pp = &(*pp)->rb_left;
- else if (epoch > conn->proto.epoch)
- pp = &(*pp)->rb_right;
- else if (cid < conn->proto.cid)
- pp = &(*pp)->rb_left;
- else if (cid > conn->proto.cid)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_second;
- }
-
- /* we can now add the new candidate to the list */
- conn = candidate;
- candidate = NULL;
- rb_link_node(&conn->service_node, p, pp);
- rb_insert_color(&conn->service_node, &peer->service_conns);
- rxrpc_get_peer(peer);
- rxrpc_get_local(local);
-
- write_unlock_bh(&peer->conn_lock);
-
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock(&rxrpc_connection_lock);
-
- new = "new";
-
-success:
- _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
-
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return conn;
-
- /* we found the connection in the list immediately */
-found_extant_connection:
- if (sp->hdr.securityIndex != conn->security_ix) {
- read_unlock_bh(&peer->conn_lock);
- goto security_mismatch;
- }
- rxrpc_get_connection(conn);
- read_unlock_bh(&peer->conn_lock);
- goto success;
-
- /* we found the connection on the second time through the list */
-found_extant_second:
- if (sp->hdr.securityIndex != conn->security_ix) {
- write_unlock_bh(&peer->conn_lock);
- goto security_mismatch;
- }
- rxrpc_get_connection(conn);
- write_unlock_bh(&peer->conn_lock);
- kfree(candidate);
- goto success;
-
-security_mismatch:
- kfree(candidate);
- _leave(" = -EKEYREJECTED");
- return ERR_PTR(-EKEYREJECTED);
+not_found:
+ _leave(" = NULL");
+ return NULL;
}
/*
- * find a connection based on transport and RxRPC connection ID for an incoming
- * packet
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates. The caller must hold the channel_lock and must release the
+ * call's ref on the connection.
*/
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct sk_buff *skb)
+void __rxrpc_disconnect_call(struct rxrpc_call *call)
{
- struct rxrpc_connection *conn;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rb_node *p;
- u32 epoch, cid;
-
- _enter(",{%x,%x}", sp->hdr.cid, sp->hdr.flags);
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan = &conn->channels[call->channel];
- read_lock_bh(&peer->conn_lock);
+ _enter("%d,%d", conn->debug_id, call->channel);
- cid = sp->hdr.cid & RXRPC_CIDMASK;
- epoch = sp->hdr.epoch;
+ if (rcu_access_pointer(chan->call) == call) {
+ /* Save the result of the call so that we can repeat it if necessary
+ * through the channel, whilst disposing of the actual call record.
+ */
+ chan->last_result = call->local_abort;
+ smp_wmb();
+ chan->last_call = chan->call_id;
+ chan->call_id = chan->call_counter;
- if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
- p = peer->service_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, service_node);
-
- _debug("maybe %x", conn->proto.cid);
-
- if (epoch < conn->proto.epoch)
- p = p->rb_left;
- else if (epoch > conn->proto.epoch)
- p = p->rb_right;
- else if (cid < conn->proto.cid)
- p = p->rb_left;
- else if (cid > conn->proto.cid)
- p = p->rb_right;
- else
- goto found;
- }
- } else {
- conn = idr_find(&rxrpc_client_conn_ids, cid >> RXRPC_CIDSHIFT);
- if (conn && conn->proto.epoch == epoch)
- goto found;
+ rcu_assign_pointer(chan->call, NULL);
+ atomic_inc(&conn->avail_chans);
+ wake_up(&conn->channel_wq);
}
- read_unlock_bh(&peer->conn_lock);
- _leave(" = NULL");
- return NULL;
-
-found:
- rxrpc_get_connection(conn);
- read_unlock_bh(&peer->conn_lock);
- _leave(" = %p", conn);
- return conn;
+ _leave("");
}
/*
@@ -531,15 +184,13 @@ found:
void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- unsigned chan = call->channel;
- _enter("%d,%d", conn->debug_id, call->channel);
+ spin_lock(&conn->channel_lock);
+ __rxrpc_disconnect_call(call);
+ spin_unlock(&conn->channel_lock);
- if (conn->channels[chan] == call) {
- rcu_assign_pointer(conn->channels[chan], NULL);
- atomic_inc(&conn->avail_chans);
- wake_up(&conn->channel_wq);
- }
+ call->conn = NULL;
+ rxrpc_put_connection(conn);
}
/*
@@ -553,10 +204,10 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
_enter("%p{u=%d,d=%d}",
conn, atomic_read(&conn->usage), conn->debug_id);
- ASSERTCMP(atomic_read(&conn->usage), >, 0);
+ ASSERTCMP(atomic_read(&conn->usage), >, 1);
conn->put_time = ktime_get_seconds();
- if (atomic_dec_and_test(&conn->usage)) {
+ if (atomic_dec_return(&conn->usage) == 1) {
_debug("zombie");
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
}
@@ -567,15 +218,17 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
/*
* destroy a virtual connection
*/
-static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+static void rxrpc_destroy_connection(struct rcu_head *rcu)
{
- _enter("%p{%d}", conn, atomic_read(&conn->usage));
+ struct rxrpc_connection *conn =
+ container_of(rcu, struct rxrpc_connection, rcu);
+
+ _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
_net("DESTROY CONN %d", conn->debug_id);
- ASSERT(RB_EMPTY_ROOT(&conn->calls));
rxrpc_purge_queue(&conn->rx_queue);
conn->security->clear(conn);
@@ -594,59 +247,41 @@ static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
static void rxrpc_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
- struct rxrpc_peer *peer;
- unsigned long now, earliest, reap_time;
+ unsigned long reap_older_than, earliest, put_time, now;
LIST_HEAD(graveyard);
_enter("");
now = ktime_get_seconds();
+ reap_older_than = now - rxrpc_connection_expiry;
earliest = ULONG_MAX;
write_lock(&rxrpc_connection_lock);
list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
- (long) now - (long) conn->put_time);
-
- if (likely(atomic_read(&conn->usage) > 0))
+ ASSERTCMP(atomic_read(&conn->usage), >, 0);
+ if (likely(atomic_read(&conn->usage) > 1))
continue;
- if (rxrpc_conn_is_client(conn)) {
- struct rxrpc_local *local = conn->params.local;
- spin_lock(&local->client_conns_lock);
- reap_time = conn->put_time + rxrpc_connection_expiry;
-
- if (atomic_read(&conn->usage) > 0) {
- ;
- } else if (reap_time <= now) {
- list_move_tail(&conn->link, &graveyard);
- rxrpc_put_client_connection_id(conn);
- rb_erase(&conn->client_node,
- &local->client_conns);
- } else if (reap_time < earliest) {
- earliest = reap_time;
- }
-
- spin_unlock(&local->client_conns_lock);
- } else {
- peer = conn->params.peer;
- write_lock_bh(&peer->conn_lock);
- reap_time = conn->put_time + rxrpc_connection_expiry;
-
- if (atomic_read(&conn->usage) > 0) {
- ;
- } else if (reap_time <= now) {
- list_move_tail(&conn->link, &graveyard);
- rb_erase(&conn->service_node,
- &peer->service_conns);
- } else if (reap_time < earliest) {
- earliest = reap_time;
- }
-
- write_unlock_bh(&peer->conn_lock);
+ put_time = READ_ONCE(conn->put_time);
+ if (time_after(put_time, reap_older_than)) {
+ if (time_before(put_time, earliest))
+ earliest = put_time;
+ continue;
}
+
+ /* The usage count sits at 1 whilst the object is unused on the
+ * list; we reduce that to 0 to make the object unavailable.
+ */
+ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+ continue;
+
+ if (rxrpc_conn_is_client(conn))
+ rxrpc_unpublish_client_conn(conn);
+ else
+ rxrpc_unpublish_service_conn(conn);
+
+ list_move_tail(&conn->link, &graveyard);
}
write_unlock(&rxrpc_connection_lock);
@@ -657,14 +292,14 @@ static void rxrpc_connection_reaper(struct work_struct *work)
(earliest - now) * HZ);
}
- /* then destroy all those pulled out */
while (!list_empty(&graveyard)) {
conn = list_entry(graveyard.next, struct rxrpc_connection,
link);
list_del_init(&conn->link);
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
- rxrpc_destroy_connection(conn);
+ skb_queue_purge(&conn->rx_queue);
+ call_rcu(&conn->rcu, rxrpc_destroy_connection);
}
_leave("");
@@ -676,11 +311,30 @@ static void rxrpc_connection_reaper(struct work_struct *work)
*/
void __exit rxrpc_destroy_all_connections(void)
{
+ struct rxrpc_connection *conn, *_p;
+ bool leak = false;
+
_enter("");
rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ flush_workqueue(rxrpc_workqueue);
+
+ write_lock(&rxrpc_connection_lock);
+ list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
+ conn, atomic_read(&conn->usage));
+ leak = true;
+ }
+ write_unlock(&rxrpc_connection_lock);
+ BUG_ON(leak);
+
+ /* Make sure the local and peer records pinned by any dying connections
+ * are released.
+ */
+ rcu_barrier();
+ rxrpc_destroy_client_conn_ids();
_leave("");
}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
new file mode 100644
index 000000000000..7cbd612be0d7
--- /dev/null
+++ b/net/rxrpc/conn_service.c
@@ -0,0 +1,230 @@
+/* Service connection management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+/*
+ * Find a service connection under RCU conditions.
+ *
+ * We could use a hash table, but that is subject to bucket stuffing by an
+ * attacker as the client gets to pick the epoch and cid values and would know
+ * the hash function. So, instead, we use a hash table for the peer and from
+ * that an rbtree to find the service connection. Under ordinary circumstances
+ * it might be slower than a large hash table, but it is at least limited in
+ * depth.
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn = NULL;
+ struct rxrpc_conn_proto k;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rb_node *p;
+ unsigned int seq = 0;
+
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
+
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
+
+ p = rcu_dereference_raw(peer->service_conns.rb_node);
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+ if (conn->proto.index_key < k.index_key)
+ p = rcu_dereference_raw(p->rb_left);
+ else if (conn->proto.index_key > k.index_key)
+ p = rcu_dereference_raw(p->rb_right);
+ else
+ goto done;
+ conn = NULL;
+ }
+ } while (need_seqretry(&peer->service_conn_lock, seq));
+
+done:
+ done_seqretry(&peer->service_conn_lock, seq);
+ _leave(" = %d", conn ? conn->debug_id : -1);
+ return conn;
+}
+
+/*
+ * Insert a service connection into a peer's tree, thereby making it a target
+ * for incoming packets.
+ */
+static struct rxrpc_connection *
+rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn)
+{
+ struct rxrpc_connection *cursor = NULL;
+ struct rxrpc_conn_proto k = conn->proto;
+ struct rb_node **pp, *parent;
+
+ write_seqlock_bh(&peer->service_conn_lock);
+
+ pp = &peer->service_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ cursor = rb_entry(parent,
+ struct rxrpc_connection, service_node);
+
+ if (cursor->proto.index_key < k.index_key)
+ pp = &(*pp)->rb_left;
+ else if (cursor->proto.index_key > k.index_key)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ rb_link_node_rcu(&conn->service_node, parent, pp);
+ rb_insert_color(&conn->service_node, &peer->service_conns);
+conn_published:
+ set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
+ write_sequnlock_bh(&peer->service_conn_lock);
+ _leave(" = %d [new]", conn->debug_id);
+ return conn;
+
+found_extant_conn:
+ if (atomic_read(&cursor->usage) == 0)
+ goto replace_old_connection;
+ write_sequnlock_bh(&peer->service_conn_lock);
+ /* We should not be able to get here. rxrpc_incoming_connection() is
+ * called in a non-reentrant context, so there can't be a race to
+ * insert a new connection.
+ */
+ BUG();
+
+replace_old_connection:
+ /* The old connection is from an outdated epoch. */
+ _debug("replace conn");
+ rb_replace_node_rcu(&cursor->service_node,
+ &conn->service_node,
+ &peer->service_conns);
+ clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags);
+ goto conn_published;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_peer *peer;
+ const char *new = "old";
+
+ _enter("");
+
+ peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+ if (!peer) {
+ _debug("no peer");
+ return ERR_PTR(-EBUSY);
+ }
+
+ ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
+
+ rcu_read_lock();
+ peer = rxrpc_lookup_peer_rcu(local, srx);
+ if (peer) {
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ if (conn) {
+ if (sp->hdr.securityIndex != conn->security_ix)
+ goto security_mismatch_rcu;
+ if (rxrpc_get_connection_maybe(conn))
+ goto found_extant_connection_rcu;
+
+ /* The conn has expired but we can't remove it without
+ * the appropriate lock, so we attempt to replace it
+ * when we have a new candidate.
+ */
+ }
+
+ if (!rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ }
+ rcu_read_unlock();
+
+ if (!peer) {
+ peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+ if (IS_ERR(peer))
+ goto enomem;
+ }
+
+ /* We don't have a matching record yet. */
+ conn = rxrpc_alloc_connection(GFP_NOIO);
+ if (!conn)
+ goto enomem_peer;
+
+ conn->proto.epoch = sp->hdr.epoch;
+ conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
+ conn->params.local = local;
+ conn->params.peer = peer;
+ conn->params.service_id = sp->hdr.serviceId;
+ conn->security_ix = sp->hdr.securityIndex;
+ conn->out_clientflag = 0;
+ conn->state = RXRPC_CONN_SERVICE;
+ if (conn->params.service_id)
+ conn->state = RXRPC_CONN_SERVICE_UNSECURED;
+
+ rxrpc_get_local(local);
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ /* Make the connection a target for incoming packets. */
+ rxrpc_publish_service_conn(peer, conn);
+
+ new = "new";
+
+success:
+ _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return conn;
+
+found_extant_connection_rcu:
+ rcu_read_unlock();
+ goto success;
+
+security_mismatch_rcu:
+ rcu_read_unlock();
+ _leave(" = -EKEYREJECTED");
+ return ERR_PTR(-EKEYREJECTED);
+
+enomem_peer:
+ rxrpc_put_peer(peer);
+enomem:
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Remove the service connection from the peer's tree, thereby removing it as a
+ * target for incoming packets.
+ */
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_peer *peer = conn->params.peer;
+
+ write_seqlock_bh(&peer->service_conn_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
+ rb_erase(&conn->service_node, &peer->service_conns);
+ write_sequnlock_bh(&peer->service_conn_lock);
+}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index f4bd57b77b93..991a20d25093 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -476,7 +476,7 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
sp->hdr.seq += 1;
sp->hdr.serial += 1;
sp->hdr.flags = jhdr.flags;
- sp->hdr._rsvd = jhdr._rsvd;
+ sp->hdr._rsvd = ntohs(jhdr._rsvd);
_proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
@@ -575,14 +575,13 @@ done:
* post connection-level events to the connection
* - this includes challenges, responses and some aborts
*/
-static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
_enter("%p,%p", conn, skb);
- rxrpc_get_connection(conn);
skb_queue_tail(&conn->rx_queue, skb);
- rxrpc_queue_conn(conn);
+ return rxrpc_queue_conn(conn);
}
/*
@@ -595,7 +594,7 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
_enter("%p,%p", local, skb);
skb_queue_tail(&local->event_queue, skb);
- rxrpc_queue_work(&local->processor);
+ rxrpc_queue_local(local);
}
/*
@@ -627,32 +626,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
return 0;
}
-static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
- struct sk_buff *skb)
-{
- struct rxrpc_peer *peer;
- struct rxrpc_connection *conn;
- struct sockaddr_rxrpc srx;
-
- rxrpc_get_addr_from_skb(local, skb, &srx);
- rcu_read_lock();
- peer = rxrpc_lookup_peer_rcu(local, &srx);
- if (!peer)
- goto cant_find_peer;
-
- conn = rxrpc_find_connection(local, peer, skb);
- rcu_read_unlock();
- if (!conn)
- goto cant_find_conn;
-
- return conn;
-
-cant_find_peer:
- rcu_read_unlock();
-cant_find_conn:
- return NULL;
-}
-
/*
* handle data received on the local endpoint
* - may be called in interrupt context
@@ -663,6 +636,7 @@ cant_find_conn:
*/
void rxrpc_data_ready(struct sock *sk)
{
+ struct rxrpc_connection *conn;
struct rxrpc_skb_priv *sp;
struct rxrpc_local *local = sk->sk_user_data;
struct sk_buff *skb;
@@ -726,34 +700,37 @@ void rxrpc_data_ready(struct sock *sk)
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
- if (sp->hdr.callNumber == 0) {
- /* This is a connection-level packet. These should be
- * fairly rare, so the extra overhead of looking them up the
- * old-fashioned way doesn't really hurt */
- struct rxrpc_connection *conn;
+ rcu_read_lock();
- conn = rxrpc_conn_from_local(local, skb);
- if (!conn)
- goto cant_route_call;
+retry_find_conn:
+ conn = rxrpc_find_connection_rcu(local, skb);
+ if (!conn)
+ goto cant_route_call;
+ if (sp->hdr.callNumber == 0) {
+ /* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
- rxrpc_post_packet_to_conn(conn, skb);
- rxrpc_put_connection(conn);
+ if (!rxrpc_post_packet_to_conn(conn, skb))
+ goto retry_find_conn;
} else {
- struct rxrpc_call *call;
+ /* Call-bound packets are routed by connection channel. */
+ unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_call *call = rcu_dereference(chan->call);
- call = rxrpc_find_call_hash(&sp->hdr, local,
- AF_INET, &ip_hdr(skb)->saddr);
- if (call)
- rxrpc_post_packet_to_call(call, skb);
- else
+ if (!call || atomic_read(&call->usage) == 0)
goto cant_route_call;
+
+ rxrpc_post_packet_to_call(call, skb);
}
+ rcu_read_unlock();
out:
return;
cant_route_call:
+ rcu_read_unlock();
+
_debug("can't route call");
if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index e571403613c1..c21ad213b337 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -17,11 +17,12 @@ static int none_init_connection_security(struct rxrpc_connection *conn)
return 0;
}
-static void none_prime_packet_security(struct rxrpc_connection *conn)
+static int none_prime_packet_security(struct rxrpc_connection *conn)
{
+ return 0;
}
-static int none_secure_packet(const struct rxrpc_call *call,
+static int none_secure_packet(struct rxrpc_call *call,
struct sk_buff *skb,
size_t data_size,
void *sechdr)
@@ -29,7 +30,7 @@ static int none_secure_packet(const struct rxrpc_call *call,
return 0;
}
-static int none_verify_packet(const struct rxrpc_call *call,
+static int none_verify_packet(struct rxrpc_call *call,
struct sk_buff *skb,
u32 *_abort_code)
{
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 3ab7764f7cd8..a753796fbe8f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -374,14 +374,17 @@ void __exit rxrpc_destroy_all_locals(void)
_enter("");
- if (list_empty(&rxrpc_local_endpoints))
- return;
+ flush_workqueue(rxrpc_workqueue);
- mutex_lock(&rxrpc_local_mutex);
- list_for_each_entry(local, &rxrpc_local_endpoints, link) {
- pr_err("AF_RXRPC: Leaked local %p {%d}\n",
- local, atomic_read(&local->usage));
+ if (!list_empty(&rxrpc_local_endpoints)) {
+ mutex_lock(&rxrpc_local_mutex);
+ list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+ pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+ local, atomic_read(&local->usage));
+ }
+ mutex_unlock(&rxrpc_local_mutex);
+ BUG();
}
- mutex_unlock(&rxrpc_local_mutex);
- BUG();
+
+ rcu_barrier();
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 01d4930a11f7..538e9831c699 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -189,7 +189,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
INIT_WORK(&peer->error_distributor,
&rxrpc_peer_error_distributor);
peer->service_conns = RB_ROOT;
- rwlock_init(&peer->conn_lock);
+ seqlock_init(&peer->service_conn_lock);
spin_lock_init(&peer->lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 500cdcdc843c..ced5f07444e5 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -14,15 +14,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static const char *const rxrpc_conn_states[] = {
- [RXRPC_CONN_UNUSED] = "Unused ",
- [RXRPC_CONN_CLIENT] = "Client ",
- [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ",
- [RXRPC_CONN_SERVER_CHALLENGING] = "SvChall ",
- [RXRPC_CONN_SERVER] = "SvSecure",
- [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
- [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
- [RXRPC_CONN_NETWORK_ERROR] = "NetError",
+static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
+ [RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT] = "Client ",
+ [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
+ [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
+ [RXRPC_CONN_SERVICE] = "SvSecure",
+ [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_NETWORK_ERROR] = "NetError",
};
/*
@@ -137,7 +137,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
if (v == &rxrpc_connections) {
seq_puts(seq,
"Proto Local Remote "
- " SvID ConnID Calls End Use State Key "
+ " SvID ConnID End Use State Key "
" Serial ISerial\n"
);
return 0;
@@ -154,13 +154,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
ntohs(conn->params.peer->srx.transport.sin.sin_port));
seq_printf(seq,
- "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ "UDP %-22.22s %-22.22s %4x %08x %s %3u"
" %s %08x %08x %08x\n",
lbuff,
rbuff,
conn->params.service_id,
conn->proto.cid,
- conn->call_counter,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
atomic_read(&conn->usage),
rxrpc_conn_states[conn->state],
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 23c05ec6fa28..63afa9e9cc08 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -103,43 +103,43 @@ error:
* prime the encryption state with the invariant parts of a connection's
* description
*/
-static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
{
struct rxrpc_key_token *token;
SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
- struct scatterlist sg[2];
+ struct scatterlist sg;
struct rxrpc_crypt iv;
- struct {
- __be32 x[4];
- } tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+ __be32 *tmpbuf;
+ size_t tmpsize = 4 * sizeof(__be32);
_enter("");
if (!conn->params.key)
- return;
+ return 0;
+
+ tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
+ if (!tmpbuf)
+ return -ENOMEM;
token = conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- tmpbuf.x[0] = htonl(conn->proto.epoch);
- tmpbuf.x[1] = htonl(conn->proto.cid);
- tmpbuf.x[2] = 0;
- tmpbuf.x[3] = htonl(conn->security_ix);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ tmpbuf[0] = htonl(conn->proto.epoch);
+ tmpbuf[1] = htonl(conn->proto.cid);
+ tmpbuf[2] = 0;
+ tmpbuf[3] = htonl(conn->security_ix);
+ sg_init_one(&sg, tmpbuf, tmpsize);
skcipher_request_set_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
- ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
-
- _leave("");
+ memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
+ kfree(tmpbuf);
+ _leave(" = 0");
+ return 0;
}
/*
@@ -152,12 +152,9 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
{
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+ struct rxkad_level1_hdr hdr;
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- struct rxkad_level1_hdr hdr;
- __be32 first; /* first four bytes of data and padding */
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u16 check;
sp = rxrpc_skb(skb);
@@ -167,24 +164,19 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
check = sp->hdr.seq ^ sp->hdr.callNumber;
data_size |= (u32)check << 16;
- tmpbuf.hdr.data_size = htonl(data_size);
- memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+ hdr.data_size = htonl(data_size);
+ memcpy(sechdr, &hdr, sizeof(hdr));
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
-
+ sg_init_one(&sg, sechdr, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
-
_leave(" = 0");
return 0;
}
@@ -198,8 +190,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
void *sechdr)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr rxkhdr
- __attribute__((aligned(8))); /* must be all on one page */
+ struct rxkad_level2_hdr rxkhdr;
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
@@ -218,18 +209,16 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
rxkhdr.data_size = htonl(data_size | (u32)check << 16);
rxkhdr.checksum = 0;
+ memcpy(sechdr, &rxkhdr, sizeof(rxkhdr));
/* encrypt from the session key */
token = call->conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
- sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
-
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
-
+ skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
crypto_skcipher_encrypt(req);
/* we want to encrypt the skbuff in-place */
@@ -243,9 +232,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
sg_init_table(sg, nsg);
skb_to_sgvec(skb, sg, 0, len);
-
skcipher_request_set_crypt(req, sg, sg, len, iv.x);
-
crypto_skcipher_encrypt(req);
_leave(" = 0");
@@ -259,7 +246,7 @@ out:
/*
* checksum an RxRPC packet header
*/
-static int rxkad_secure_packet(const struct rxrpc_call *call,
+static int rxkad_secure_packet(struct rxrpc_call *call,
struct sk_buff *skb,
size_t data_size,
void *sechdr)
@@ -267,10 +254,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- __be32 x[2];
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u32 x, y;
int ret;
@@ -293,20 +277,17 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
/* calculate the security checksum */
x = call->channel << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
- tmpbuf.x[0] = htonl(sp->hdr.callNumber);
- tmpbuf.x[1] = htonl(x);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+ call->crypto_buf[1] = htonl(x);
+ sg_init_one(&sg, call->crypto_buf, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(tmpbuf.x[1]);
+ y = ntohl(call->crypto_buf[1]);
y = (y >> 16) & 0xffff;
if (y == 0)
y = 1; /* zero checksums are not permitted */
@@ -367,7 +348,6 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
@@ -452,7 +432,6 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
if (sg != _sg)
@@ -498,17 +477,14 @@ nomem:
/*
* verify the security on a received packet
*/
-static int rxkad_verify_packet(const struct rxrpc_call *call,
+static int rxkad_verify_packet(struct rxrpc_call *call,
struct sk_buff *skb,
u32 *_abort_code)
{
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_skb_priv *sp;
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
- struct {
- __be32 x[2];
- } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ struct scatterlist sg;
u16 cksum;
u32 x, y;
int ret;
@@ -533,20 +509,17 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
/* validate the security checksum */
x = call->channel << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
- tmpbuf.x[0] = htonl(call->call_id);
- tmpbuf.x[1] = htonl(x);
-
- sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
- sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ call->crypto_buf[0] = htonl(call->call_id);
+ call->crypto_buf[1] = htonl(x);
+ sg_init_one(&sg, call->crypto_buf, 8);
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
-
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(tmpbuf.x[1]);
+ y = ntohl(call->crypto_buf[1]);
cksum = (y >> 16) & 0xffff;
if (cksum == 0)
cksum = 1; /* zero checksums are not permitted */
@@ -710,29 +683,6 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
}
/*
- * load a scatterlist with a potentially split-page buffer
- */
-static void rxkad_sg_set_buf2(struct scatterlist sg[2],
- void *buf, size_t buflen)
-{
- int nsg = 1;
-
- sg_init_table(sg, 2);
-
- sg_set_buf(&sg[0], buf, buflen);
- if (sg[0].offset + buflen > PAGE_SIZE) {
- /* the buffer was split over two pages */
- sg[0].length = PAGE_SIZE - sg[0].offset;
- sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
- nsg++;
- }
-
- sg_mark_end(&sg[nsg - 1]);
-
- ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
-}
-
-/*
* encrypt the response packet
*/
static void rxkad_encrypt_response(struct rxrpc_connection *conn,
@@ -741,17 +691,16 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
{
SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct rxrpc_crypt iv;
- struct scatterlist sg[2];
+ struct scatterlist sg[1];
/* continue encrypting from where we left off */
memcpy(&iv, s2->session_key, sizeof(iv));
- rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
skcipher_request_set_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
}
@@ -818,14 +767,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
resp.kvno = htonl(token->kad->kvno);
resp.ticket_len = htonl(token->kad->ticket_len);
- resp.encrypted.call_id[0] =
- htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
- resp.encrypted.call_id[1] =
- htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
- resp.encrypted.call_id[2] =
- htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
- resp.encrypted.call_id[3] =
- htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
+ resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+ resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+ resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+ resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
/* calculate the response checksum and then do the encryption */
rxkad_calc_response_checksum(&resp);
@@ -887,10 +832,8 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
}
sg_init_one(&sg[0], ticket, ticket_len);
-
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_free(req);
@@ -1001,7 +944,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
const struct rxrpc_crypt *session_key)
{
SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
- struct scatterlist sg[2];
+ struct scatterlist sg[1];
struct rxrpc_crypt iv;
_enter(",,%08x%08x",
@@ -1016,12 +959,11 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
memcpy(&iv, session_key, sizeof(iv));
- rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
-
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
skcipher_request_set_tfm(req, rxkad_ci);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
-
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
@@ -1045,7 +987,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
void *ticket;
u32 abort_code, version, kvno, ticket_len, level;
__be32 csum;
- int ret;
+ int ret, i;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
@@ -1108,11 +1050,26 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (response.encrypted.checksum != csum)
goto protocol_error_free;
- if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
- ntohl(response.encrypted.call_id[1]) > INT_MAX ||
- ntohl(response.encrypted.call_id[2]) > INT_MAX ||
- ntohl(response.encrypted.call_id[3]) > INT_MAX)
- goto protocol_error_free;
+ spin_lock(&conn->channel_lock);
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ struct rxrpc_call *call;
+ u32 call_id = ntohl(response.encrypted.call_id[i]);
+
+ if (call_id > INT_MAX)
+ goto protocol_error_unlock;
+
+ if (call_id < conn->channels[i].call_counter)
+ goto protocol_error_unlock;
+ if (call_id > conn->channels[i].call_counter) {
+ call = rcu_dereference_protected(
+ conn->channels[i].call,
+ lockdep_is_held(&conn->channel_lock));
+ if (call && call->state < RXRPC_CALL_COMPLETE)
+ goto protocol_error_unlock;
+ conn->channels[i].call_counter = call_id;
+ }
+ }
+ spin_unlock(&conn->channel_lock);
abort_code = RXKADOUTOFSEQUENCE;
if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
@@ -1137,6 +1094,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_leave(" = 0");
return 0;
+protocol_error_unlock:
+ spin_unlock(&conn->channel_lock);
protocol_error_free:
kfree(ticket);
protocol_error:
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index f28122a15a24..b88914d53ca5 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -10,32 +10,37 @@
*/
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/udp.h>
#include "ar-internal.h"
/*
- * Set up an RxRPC address from a socket buffer.
+ * Fill out a peer address from a socket buffer containing a packet.
*/
-void rxrpc_get_addr_from_skb(struct rxrpc_local *local,
- const struct sk_buff *skb,
- struct sockaddr_rxrpc *srx)
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
{
memset(srx, 0, sizeof(*srx));
- srx->transport_type = local->srx.transport_type;
- srx->transport.family = local->srx.transport.family;
- /* Can we see an ipv4 UDP packet on an ipv6 UDP socket? and vice
- * versa?
- */
- switch (srx->transport.family) {
- case AF_INET:
+ switch (ntohs(skb->protocol)) {
+ case ETH_P_IP:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = udp_hdr(skb)->source;
- srx->transport_len = sizeof(struct sockaddr_in);
- memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
- sizeof(struct in_addr));
- break;
+ srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ return 0;
+
+ case ETH_P_IPV6:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+ srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+ return 0;
default:
- BUG();
+ pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
+ ntohs(skb->protocol));
+ return -EAFNOSUPPORT;
}
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index dff92ea772fe..3ddc7bd74ecb 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -115,9 +115,9 @@ struct hfsc_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est64 rate_est;
- unsigned int level; /* class level in hierarchy */
struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
+ unsigned int level; /* class level in hierarchy */
struct hfsc_sched *sched; /* scheduler data */
struct hfsc_class *cl_parent; /* parent class */
@@ -165,10 +165,10 @@ struct hfsc_class {
struct runtime_sc cl_virtual; /* virtual curve */
struct runtime_sc cl_ulimit; /* upperlimit curve */
- unsigned long cl_flags; /* which curves are valid */
- unsigned long cl_vtperiod; /* vt period sequence number */
- unsigned long cl_parentperiod;/* parent's vt period sequence number*/
- unsigned long cl_nactive; /* number of active children */
+ u8 cl_flags; /* which curves are valid */
+ u32 cl_vtperiod; /* vt period sequence number */
+ u32 cl_parentperiod;/* parent's vt period sequence number*/
+ u32 cl_nactive; /* number of active children */
};
struct hfsc_sched {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e1849f3714ad..1c23060c41a6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -268,6 +268,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
goto fail_init;
asoc->active_key_id = ep->active_key_id;
+ asoc->prsctp_enable = ep->prsctp_enable;
/* Save the hmacs and chunks list into this association */
if (ep->auth_hmacs_list)
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1eb94bf18ef4..a55e54738b81 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -335,13 +335,32 @@ errout:
/* Check whether this message has expired. */
int sctp_chunk_abandoned(struct sctp_chunk *chunk)
{
- struct sctp_datamsg *msg = chunk->msg;
+ if (!chunk->asoc->prsctp_enable ||
+ !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
+ struct sctp_datamsg *msg = chunk->msg;
+
+ if (!msg->can_abandon)
+ return 0;
+
+ if (time_after(jiffies, msg->expires_at))
+ return 1;
- if (!msg->can_abandon)
return 0;
+ }
- if (time_after(jiffies, msg->expires_at))
+ if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
+ time_after(jiffies, chunk->prsctp_param)) {
+ if (chunk->sent_count)
+ chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ else
+ chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ return 1;
+ } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
+ chunk->sent_count > chunk->prsctp_param) {
+ chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
+ }
+ /* PRIO policy is processed by sendmsg, not here */
return 0;
}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9d494e35e7f9..1f03065686fe 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -163,6 +163,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
*/
ep->auth_hmacs_list = auth_hmacs;
ep->auth_chunk_list = auth_chunks;
+ ep->prsctp_enable = net->sctp.prsctp_enable;
return ep;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1541a91d6d9d..7425f6c23888 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -316,6 +316,8 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
packet->has_data = 1;
/* timestamp the chunk for rtx purposes */
chunk->sent_at = jiffies;
+ /* Mainly used for prsctp RTX policy */
+ chunk->sent_count++;
break;
case SCTP_CID_COOKIE_ECHO:
packet->has_cookie_echo = 1;
@@ -582,9 +584,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
*/
pkt_size -= WORD_ROUND(chunk->skb->len);
- if (chunk == packet->auth && !list_empty(&packet->chunk_list))
- list_add(&chunk->list, &packet->chunk_list);
- else if (!sctp_chunk_is_data(chunk))
+ if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
sctp_chunk_free(chunk);
if (!pkt_size)
@@ -605,6 +605,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
(struct sctp_auth_chunk *)auth,
gfp);
+ if (packet->auth) {
+ if (!list_empty(&packet->chunk_list)) {
+ /* We will generate more packets, so re-queue
+ * auth chunk.
+ */
+ list_add(&chunk->list, &packet->chunk_list);
+ } else {
+ sctp_chunk_free(packet->auth);
+ packet->auth = NULL;
+ }
+ }
+
if (!gso)
break;
@@ -735,6 +747,8 @@ err:
}
goto out;
nomem:
+ if (packet->auth && list_empty(&packet->auth->list))
+ sctp_chunk_free(packet->auth);
err = -ENOMEM;
goto err;
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 084718f9b3da..72e54a416af6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -326,6 +326,9 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
sctp_chunk_hold(chunk);
sctp_outq_tail_data(q, chunk);
+ if (chunk->asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ chunk->asoc->sent_cnt_removable++;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
else
@@ -372,6 +375,96 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new)
list_add_tail(new, head);
}
+static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct list_head *queue, int msg_len)
+{
+ struct sctp_chunk *chk, *temp;
+
+ list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+ if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->prsctp_param <= sinfo->sinfo_timetolive)
+ continue;
+
+ list_del_init(&chk->transmitted_list);
+ sctp_insert_list(&asoc->outqueue.abandoned,
+ &chk->transmitted_list);
+
+ asoc->sent_cnt_removable--;
+ asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+
+ if (!chk->tsn_gap_acked) {
+ if (chk->transport)
+ chk->transport->flight_size -=
+ sctp_data_size(chk);
+ asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
+ }
+
+ msg_len -= SCTP_DATA_SNDSIZE(chk) +
+ sizeof(struct sk_buff) +
+ sizeof(struct sctp_chunk);
+ if (msg_len <= 0)
+ break;
+ }
+
+ return msg_len;
+}
+
+static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo,
+ struct list_head *queue, int msg_len)
+{
+ struct sctp_chunk *chk, *temp;
+
+ list_for_each_entry_safe(chk, temp, queue, list) {
+ if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->prsctp_param <= sinfo->sinfo_timetolive)
+ continue;
+
+ list_del_init(&chk->list);
+ asoc->sent_cnt_removable--;
+ asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+
+ msg_len -= SCTP_DATA_SNDSIZE(chk) +
+ sizeof(struct sk_buff) +
+ sizeof(struct sctp_chunk);
+ sctp_chunk_free(chk);
+ if (msg_len <= 0)
+ break;
+ }
+
+ return msg_len;
+}
+
+/* Abandon the chunks according their priorities */
+void sctp_prsctp_prune(struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *sinfo, int msg_len)
+{
+ struct sctp_transport *transport;
+
+ if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+ return;
+
+ msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+ &asoc->outqueue.retransmit,
+ msg_len);
+ if (msg_len <= 0)
+ return;
+
+ list_for_each_entry(transport, &asoc->peer.transport_addr_list,
+ transports) {
+ msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
+ &transport->transmitted,
+ msg_len);
+ if (msg_len <= 0)
+ return;
+ }
+
+ sctp_prsctp_prune_unsent(asoc, sinfo,
+ &asoc->outqueue.out_chunk_list,
+ msg_len);
+}
+
/* Mark all the eligible packets on a transport for retransmission. */
void sctp_retransmit_mark(struct sctp_outq *q,
struct sctp_transport *transport,
@@ -962,6 +1055,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
+ if (asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
sctp_chunk_free(chunk);
continue;
}
@@ -1251,6 +1347,9 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
tsn = ntohl(tchunk->subh.data_hdr->tsn);
if (TSN_lte(tsn, ctsn)) {
list_del_init(&tchunk->transmitted_list);
+ if (asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
sctp_chunk_free(tchunk);
}
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 56f364d8f932..1c96f4740e67 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -261,7 +261,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
chunksize += sizeof(ecap_param);
- if (net->sctp.prsctp_enable)
+ if (asoc->prsctp_enable)
chunksize += sizeof(prsctp_param);
/* ADDIP: Section 4.2.7:
@@ -355,7 +355,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
sctp_addto_param(retval, num_ext, extensions);
}
- if (net->sctp.prsctp_enable)
+ if (asoc->prsctp_enable)
sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
if (sp->adaptation_ind) {
@@ -711,6 +711,20 @@ nodata:
return retval;
}
+static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
+ const struct sctp_sndrcvinfo *sinfo)
+{
+ if (!chunk->asoc->prsctp_enable)
+ return;
+
+ if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+ chunk->prsctp_param =
+ jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+ else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
+ SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
+ chunk->prsctp_param = sinfo->sinfo_timetolive;
+}
+
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
@@ -744,6 +758,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+ sctp_set_prsctp_policy(retval, sinfo);
nodata:
return retval;
@@ -2024,8 +2039,8 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
for (i = 0; i < num_ext; i++) {
switch (param.ext->chunks[i]) {
case SCTP_CID_FWD_TSN:
- if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable)
- asoc->peer.prsctp_capable = 1;
+ if (asoc->prsctp_enable && !asoc->peer.prsctp_capable)
+ asoc->peer.prsctp_capable = 1;
break;
case SCTP_CID_AUTH:
/* if the peer reports AUTH, assume that he
@@ -2169,7 +2184,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (net->sctp.prsctp_enable)
+ if (ep->prsctp_enable)
break;
goto fallthrough;
@@ -2653,7 +2668,7 @@ do_addr_param:
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (net->sctp.prsctp_enable) {
+ if (asoc->prsctp_enable) {
asoc->peer.prsctp_capable = 1;
break;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index cdabbd8219b1..71c7dc5ea62e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1914,6 +1914,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_free;
}
+ if (sctp_wspace(asoc) < msg_len)
+ sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -3661,6 +3664,80 @@ static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_pr_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ asoc->prsctp_enable = !!params.assoc_value;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ sp->ep->prsctp_enable = !!params.assoc_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_default_prinfo(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_default_prinfo info;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(info))
+ goto out;
+
+ if (copy_from_user(&info, optval, sizeof(info))) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (info.pr_policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ if (info.pr_policy == SCTP_PR_SCTP_NONE)
+ info.pr_value = 0;
+
+ asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+ if (asoc) {
+ SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
+ asoc->default_timetolive = info.pr_value;
+ } else if (!info.pr_assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
+ sp->default_timetolive = info.pr_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3821,6 +3898,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_RECVNXTINFO:
retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
break;
+ case SCTP_PR_SUPPORTED:
+ retval = sctp_setsockopt_pr_supported(sk, optval, optlen);
+ break;
+ case SCTP_DEFAULT_PRINFO:
+ retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6166,6 +6249,148 @@ static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len,
return 0;
}
+static int sctp_getsockopt_pr_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->prsctp_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->ep->prsctp_enable;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_default_prinfo(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_default_prinfo info;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(info)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(info);
+ if (copy_from_user(&info, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, info.pr_assoc_id);
+ if (asoc) {
+ info.pr_policy = SCTP_PR_POLICY(asoc->default_flags);
+ info.pr_value = asoc->default_timetolive;
+ } else if (!info.pr_assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ info.pr_policy = SCTP_PR_POLICY(sp->default_flags);
+ info.pr_value = sp->default_timetolive;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &info, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_prstatus params;
+ struct sctp_association *asoc;
+ int policy;
+ int retval = -EINVAL;
+
+ if (len < sizeof(params))
+ goto out;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ policy = params.sprstat_policy;
+ if (policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+ if (!asoc)
+ goto out;
+
+ if (policy == SCTP_PR_SCTP_NONE) {
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+ params.sprstat_abandoned_unsent +=
+ asoc->abandoned_unsent[policy];
+ params.sprstat_abandoned_sent +=
+ asoc->abandoned_sent[policy];
+ }
+ } else {
+ params.sprstat_abandoned_unsent =
+ asoc->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ params.sprstat_abandoned_sent =
+ asoc->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ }
+
+ if (put_user(len, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -6319,6 +6544,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_RECVNXTINFO:
retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen);
break;
+ case SCTP_PR_SUPPORTED:
+ retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen);
+ break;
+ case SCTP_DEFAULT_PRINFO:
+ retval = sctp_getsockopt_default_prinfo(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_PR_ASSOC_STATUS:
+ retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6866,7 +7102,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY |
+ SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6890,7 +7126,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
- SCTP_SACK_IMMEDIATELY |
+ SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
new file mode 100755
index 000000000000..8e960234013d
--- /dev/null
+++ b/tools/hv/bondvf.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+
+# This example script creates bonding network devices based on synthetic NIC
+# (the virtual network adapter usually provided by Hyper-V) and the matching
+# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
+# function as one network device, and fail over to the synthetic NIC if VF is
+# down.
+#
+# Usage:
+# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
+# machine (VM)
+# - Run this scripts on the VM. It will create configuration files in
+# distro specific directory.
+# - Reboot the VM, so that the bonding config are enabled.
+#
+# The config files are DHCP by default. You may edit them if you need to change
+# to Static IP or change other settings.
+#
+
+sysdir=/sys/class/net
+netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
+bondcnt=0
+
+# Detect Distro
+if [ -f /etc/redhat-release ];
+then
+ cfgdir=/etc/sysconfig/network-scripts
+ distro=redhat
+elif grep -q 'Ubuntu' /etc/issue
+then
+ cfgdir=/etc/network
+ distro=ubuntu
+elif grep -q 'SUSE' /etc/issue
+then
+ cfgdir=/etc/sysconfig/network
+ distro=suse
+else
+ echo "Unsupported Distro"
+ exit 1
+fi
+
+echo Detected Distro: $distro, or compatible
+
+# Get a list of ethernet names
+list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
+eth_cnt=${#list_eth[@]}
+
+echo List of net devices:
+
+# Get the MAC addresses
+for (( i=0; i < $eth_cnt; i++ ))
+do
+ list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
+ echo ${list_eth[$i]}, ${list_mac[$i]}
+done
+
+# Find NIC with matching MAC
+for (( i=0; i < $eth_cnt-1; i++ ))
+do
+ for (( j=i+1; j < $eth_cnt; j++ ))
+ do
+ if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
+ then
+ list_match[$i]=${list_eth[$j]}
+ break
+ fi
+ done
+done
+
+function create_eth_cfg_redhat {
+ local fn=$cfgdir/ifcfg-$1
+
+ rm -f $fn
+ echo DEVICE=$1 >>$fn
+ echo TYPE=Ethernet >>$fn
+ echo BOOTPROTO=none >>$fn
+ echo ONBOOT=yes >>$fn
+ echo NM_CONTROLLED=no >>$fn
+ echo PEERDNS=yes >>$fn
+ echo IPV6INIT=yes >>$fn
+ echo MASTER=$2 >>$fn
+ echo SLAVE=yes >>$fn
+}
+
+function create_eth_cfg_pri_redhat {
+ create_eth_cfg_redhat $1 $2
+}
+
+function create_bond_cfg_redhat {
+ local fn=$cfgdir/ifcfg-$1
+
+ rm -f $fn
+ echo DEVICE=$1 >>$fn
+ echo TYPE=Bond >>$fn
+ echo BOOTPROTO=dhcp >>$fn
+ echo ONBOOT=yes >>$fn
+ echo NM_CONTROLLED=no >>$fn
+ echo PEERDNS=yes >>$fn
+ echo IPV6INIT=yes >>$fn
+ echo BONDING_MASTER=yes >>$fn
+ echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
+}
+
+function create_eth_cfg_ubuntu {
+ local fn=$cfgdir/interfaces
+
+ echo $'\n'auto $1 >>$fn
+ echo iface $1 inet manual >>$fn
+ echo bond-master $2 >>$fn
+}
+
+function create_eth_cfg_pri_ubuntu {
+ local fn=$cfgdir/interfaces
+
+ create_eth_cfg_ubuntu $1 $2
+ echo bond-primary $1 >>$fn
+}
+
+function create_bond_cfg_ubuntu {
+ local fn=$cfgdir/interfaces
+
+ echo $'\n'auto $1 >>$fn
+ echo iface $1 inet dhcp >>$fn
+ echo bond-mode active-backup >>$fn
+ echo bond-miimon 100 >>$fn
+ echo bond-slaves none >>$fn
+}
+
+function create_eth_cfg_suse {
+ local fn=$cfgdir/ifcfg-$1
+
+ rm -f $fn
+ echo BOOTPROTO=none >>$fn
+ echo STARTMODE=auto >>$fn
+}
+
+function create_eth_cfg_pri_suse {
+ create_eth_cfg_suse $1
+}
+
+function create_bond_cfg_suse {
+ local fn=$cfgdir/ifcfg-$1
+
+ rm -f $fn
+ echo BOOTPROTO=dhcp >>$fn
+ echo STARTMODE=auto >>$fn
+ echo BONDING_MASTER=yes >>$fn
+ echo BONDING_SLAVE_0=$2 >>$fn
+ echo BONDING_SLAVE_1=$3 >>$fn
+ echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
+}
+
+function create_bond {
+ local bondname=bond$bondcnt
+ local primary
+ local secondary
+
+ local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
+ local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
+
+ if [ "$class_id1" = "$netvsc_cls" ]
+ then
+ primary=$2
+ secondary=$1
+ elif [ "$class_id2" = "$netvsc_cls" ]
+ then
+ primary=$1
+ secondary=$2
+ else
+ return 0
+ fi
+
+ echo $'\nBond name:' $bondname
+
+ echo configuring $primary
+ create_eth_cfg_pri_$distro $primary $bondname
+
+ echo configuring $secondary
+ create_eth_cfg_$distro $secondary $bondname
+
+ echo creating: $bondname with primary slave: $primary
+ create_bond_cfg_$distro $bondname $primary $secondary
+
+ let bondcnt=bondcnt+1
+}
+
+for (( i=0; i < $eth_cnt-1; i++ ))
+do
+ if [ -n "${list_match[$i]}" ]
+ then
+ create_bond ${list_eth[$i]} ${list_match[$i]}
+ fi
+done
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
index 4d21ef2d601d..4c6f09ac7d12 100644
--- a/tools/perf/scripts/python/netdev-times.py
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -252,9 +252,10 @@ def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, i
event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
all_event_list.append(event_info)
-def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, dev_name):
+def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi,
+ dev_name, work=None, budget=None):
event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
- napi, dev_name)
+ napi, dev_name, work, budget)
all_event_list.append(event_info)
def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
@@ -354,11 +355,13 @@ def handle_irq_softirq_exit(event_info):
receive_hunk_list.append(rec_data)
def handle_napi_poll(event_info):
- (name, context, cpu, time, pid, comm, napi, dev_name) = event_info
+ (name, context, cpu, time, pid, comm, napi, dev_name,
+ work, budget) = event_info
if cpu in net_rx_dic.keys():
event_list = net_rx_dic[cpu]['event_list']
rec_data = {'event_name':'napi_poll',
- 'dev':dev_name, 'event_t':time}
+ 'dev':dev_name, 'event_t':time,
+ 'work':work, 'budget':budget}
event_list.append(rec_data)
def handle_netif_rx(event_info):